改善chatpdf的功能
This commit is contained in:
		
							parent
							
								
									613be5509b
								
							
						
					
					
						commit
						8ac9b454e3
					
				@ -76,7 +76,6 @@ def get_crazy_functions():
 | 
				
			|||||||
    from crazy_functions.总结word文档 import 总结word文档
 | 
					    from crazy_functions.总结word文档 import 总结word文档
 | 
				
			||||||
    from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
 | 
					    from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
 | 
				
			||||||
    from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
 | 
					    from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
 | 
				
			||||||
    from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
 | 
					 | 
				
			||||||
    from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
 | 
					    from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
 | 
				
			||||||
    from crazy_functions.Latex全文润色 import Latex中文润色
 | 
					    from crazy_functions.Latex全文润色 import Latex中文润色
 | 
				
			||||||
    from crazy_functions.Latex全文翻译 import Latex中译英
 | 
					    from crazy_functions.Latex全文翻译 import Latex中译英
 | 
				
			||||||
@ -108,11 +107,6 @@ def get_crazy_functions():
 | 
				
			|||||||
            "Color": "stop",
 | 
					            "Color": "stop",
 | 
				
			||||||
            "Function": HotReload(总结word文档)
 | 
					            "Function": HotReload(总结word文档)
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        # "[测试功能] 理解PDF文档内容(Tk文件选择接口,仅本地)": {
 | 
					 | 
				
			||||||
        #     # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
 | 
					 | 
				
			||||||
        #     "AsButton": False,  # 加入下拉菜单中
 | 
					 | 
				
			||||||
        #     "Function": HotReload(理解PDF文档内容)
 | 
					 | 
				
			||||||
        # },
 | 
					 | 
				
			||||||
        "[测试功能] 理解PDF文档内容(通用接口,读取文件输入区)": {
 | 
					        "[测试功能] 理解PDF文档内容(通用接口,读取文件输入区)": {
 | 
				
			||||||
            # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
 | 
					            # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
 | 
				
			||||||
            "Color": "stop",
 | 
					            "Color": "stop",
 | 
				
			||||||
@ -131,7 +125,6 @@ def get_crazy_functions():
 | 
				
			|||||||
            "AsButton": False,  # 加入下拉菜单中
 | 
					            "AsButton": False,  # 加入下拉菜单中
 | 
				
			||||||
            "Function": HotReload(Latex中文润色)
 | 
					            "Function": HotReload(Latex中文润色)
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					 | 
				
			||||||
        "[测试功能] Latex项目全文中译英(输入路径或上传压缩包)": {
 | 
					        "[测试功能] Latex项目全文中译英(输入路径或上传压缩包)": {
 | 
				
			||||||
            # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
 | 
					            # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
 | 
				
			||||||
            "Color": "stop",
 | 
					            "Color": "stop",
 | 
				
			||||||
 | 
				
			|||||||
@ -360,3 +360,171 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
 | 
				
			|||||||
            # 这个中文的句号是故意的,作为一个标识而存在
 | 
					            # 这个中文的句号是故意的,作为一个标识而存在
 | 
				
			||||||
            res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
 | 
					            res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
 | 
				
			||||||
            return [r.replace('。\n', '.') for r in res]
 | 
					            return [r.replace('。\n', '.') for r in res]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def read_and_clean_pdf_text(fp):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    **输入参数说明**
 | 
				
			||||||
 | 
					    - `fp`:需要读取和清理文本的pdf文件路径
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    **输出参数说明**
 | 
				
			||||||
 | 
					    - `meta_txt`:清理后的文本内容字符串
 | 
				
			||||||
 | 
					    - `page_one_meta`:第一页清理后的文本内容列表
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    **函数功能**
 | 
				
			||||||
 | 
					    读取pdf文件并清理其中的文本内容,清理规则包括:
 | 
				
			||||||
 | 
					    - 提取所有块元的文本信息,并合并为一个字符串
 | 
				
			||||||
 | 
					    - 去除短块(字符数小于100)并替换为回车符
 | 
				
			||||||
 | 
					    - 清理多余的空行
 | 
				
			||||||
 | 
					    - 合并小写字母开头的段落块并替换为空格
 | 
				
			||||||
 | 
					    - 清除重复的换行
 | 
				
			||||||
 | 
					    - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    import fitz, copy
 | 
				
			||||||
 | 
					    import re
 | 
				
			||||||
 | 
					    import numpy as np
 | 
				
			||||||
 | 
					    from colorful import print亮黄, print亮绿
 | 
				
			||||||
 | 
					    fc = 0
 | 
				
			||||||
 | 
					    fs = 1
 | 
				
			||||||
 | 
					    fb = 2
 | 
				
			||||||
 | 
					    REMOVE_FOOT_NOTE = True
 | 
				
			||||||
 | 
					    REMOVE_FOOT_FFSIZE_PERCENT = 0.95 
 | 
				
			||||||
 | 
					    def primary_ffsize(l):
 | 
				
			||||||
 | 
					        fsize_statiscs = {}
 | 
				
			||||||
 | 
					        for wtf in l['spans']:
 | 
				
			||||||
 | 
					            if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
 | 
				
			||||||
 | 
					            fsize_statiscs[wtf['size']] += len(wtf['text'])
 | 
				
			||||||
 | 
					        return max(fsize_statiscs, key=fsize_statiscs.get)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    def ffsize_same(a,b):
 | 
				
			||||||
 | 
					        return abs((a-b)/max(a,b)) < 0.02
 | 
				
			||||||
 | 
					    # file_content = ""
 | 
				
			||||||
 | 
					    with fitz.open(fp) as doc:
 | 
				
			||||||
 | 
					        meta_txt = []
 | 
				
			||||||
 | 
					        meta_font = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        meta_line = []
 | 
				
			||||||
 | 
					        meta_span = []
 | 
				
			||||||
 | 
					        for index, page in enumerate(doc):
 | 
				
			||||||
 | 
					            # file_content += page.get_text()
 | 
				
			||||||
 | 
					            text_areas = page.get_text("dict")  # 获取页面上的文本信息
 | 
				
			||||||
 | 
					            for t in text_areas['blocks']:
 | 
				
			||||||
 | 
					                if 'lines' in t:
 | 
				
			||||||
 | 
					                    pf = 998
 | 
				
			||||||
 | 
					                    for l in t['lines']:
 | 
				
			||||||
 | 
					                        txt_line = "".join([wtf['text'] for wtf in l['spans']])
 | 
				
			||||||
 | 
					                        pf = primary_ffsize(l)
 | 
				
			||||||
 | 
					                        meta_line.append([txt_line, pf, l['bbox'], l])
 | 
				
			||||||
 | 
					                        for wtf in l['spans']: # for l in t['lines']:
 | 
				
			||||||
 | 
					                            meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
 | 
				
			||||||
 | 
					                    # meta_line.append(["NEW_BLOCK", pf])
 | 
				
			||||||
 | 
					            # 块元提取                           for each word segment with in line                       for each line         cross-line words                          for each block
 | 
				
			||||||
 | 
					            meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
 | 
				
			||||||
 | 
					                '- ', '') for t in text_areas['blocks'] if 'lines' in t])
 | 
				
			||||||
 | 
					            meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
 | 
				
			||||||
 | 
					                             for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
 | 
				
			||||||
 | 
					            if index == 0:
 | 
				
			||||||
 | 
					                page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
 | 
				
			||||||
 | 
					                    '- ', '') for t in text_areas['blocks'] if 'lines' in t]
 | 
				
			||||||
 | 
					        # 获取正文主字体
 | 
				
			||||||
 | 
					        fsize_statiscs = {}
 | 
				
			||||||
 | 
					        for span in meta_span:
 | 
				
			||||||
 | 
					            if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
 | 
				
			||||||
 | 
					            fsize_statiscs[span[1]] += span[2]
 | 
				
			||||||
 | 
					        main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
 | 
				
			||||||
 | 
					        if REMOVE_FOOT_NOTE:
 | 
				
			||||||
 | 
					            give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # 切分和重新整合
 | 
				
			||||||
 | 
					        mega_sec = []
 | 
				
			||||||
 | 
					        sec = []
 | 
				
			||||||
 | 
					        for index, line in enumerate(meta_line):
 | 
				
			||||||
 | 
					            if index == 0: 
 | 
				
			||||||
 | 
					                sec.append(line[fc])
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            if REMOVE_FOOT_NOTE:
 | 
				
			||||||
 | 
					                if meta_line[index][fs] <= give_up_fize_threshold:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					            if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
 | 
				
			||||||
 | 
					                # 尝试识别段落
 | 
				
			||||||
 | 
					                if meta_line[index][fc].endswith('.') and\
 | 
				
			||||||
 | 
					                    (meta_line[index-1][fc] != 'NEW_BLOCK') and \
 | 
				
			||||||
 | 
					                    (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
 | 
				
			||||||
 | 
					                    sec[-1] += line[fc]
 | 
				
			||||||
 | 
					                    sec[-1] += "\n\n"
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    sec[-1] += " "
 | 
				
			||||||
 | 
					                    sec[-1] += line[fc]
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                if (index+1 < len(meta_line)) and \
 | 
				
			||||||
 | 
					                    meta_line[index][fs] > main_fsize:
 | 
				
			||||||
 | 
					                    # 单行 + 字体大
 | 
				
			||||||
 | 
					                    mega_sec.append(copy.deepcopy(sec))
 | 
				
			||||||
 | 
					                    sec = []
 | 
				
			||||||
 | 
					                    sec.append("# " + line[fc])
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    # 尝试识别section
 | 
				
			||||||
 | 
					                    if meta_line[index-1][fs] > meta_line[index][fs]:
 | 
				
			||||||
 | 
					                        sec.append("\n" + line[fc])
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        sec.append(line[fc])
 | 
				
			||||||
 | 
					        mega_sec.append(copy.deepcopy(sec))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        finals = []
 | 
				
			||||||
 | 
					        for ms in mega_sec:
 | 
				
			||||||
 | 
					            final = " ".join(ms)
 | 
				
			||||||
 | 
					            final = final.replace('- ', ' ')
 | 
				
			||||||
 | 
					            finals.append(final)
 | 
				
			||||||
 | 
					        meta_txt = finals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def 把字符太少的块清除为回车(meta_txt):
 | 
				
			||||||
 | 
					            for index, block_txt in enumerate(meta_txt):
 | 
				
			||||||
 | 
					                if len(block_txt) < 100:
 | 
				
			||||||
 | 
					                    meta_txt[index] = '\n'
 | 
				
			||||||
 | 
					            return meta_txt
 | 
				
			||||||
 | 
					        meta_txt = 把字符太少的块清除为回车(meta_txt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def 清理多余的空行(meta_txt):
 | 
				
			||||||
 | 
					            for index in reversed(range(1, len(meta_txt))):
 | 
				
			||||||
 | 
					                if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
 | 
				
			||||||
 | 
					                    meta_txt.pop(index)
 | 
				
			||||||
 | 
					            return meta_txt
 | 
				
			||||||
 | 
					        meta_txt = 清理多余的空行(meta_txt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def 合并小写开头的段落块(meta_txt):
 | 
				
			||||||
 | 
					            def starts_with_lowercase_word(s):
 | 
				
			||||||
 | 
					                pattern = r"^[a-z]+"
 | 
				
			||||||
 | 
					                match = re.match(pattern, s)
 | 
				
			||||||
 | 
					                if match:
 | 
				
			||||||
 | 
					                    return True
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    return False
 | 
				
			||||||
 | 
					            for _ in range(100):
 | 
				
			||||||
 | 
					                for index, block_txt in enumerate(meta_txt):
 | 
				
			||||||
 | 
					                    if starts_with_lowercase_word(block_txt):
 | 
				
			||||||
 | 
					                        if meta_txt[index-1] != '\n':
 | 
				
			||||||
 | 
					                            meta_txt[index-1] += ' '
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            meta_txt[index-1] = ''
 | 
				
			||||||
 | 
					                        meta_txt[index-1] += meta_txt[index]
 | 
				
			||||||
 | 
					                        meta_txt[index] = '\n'
 | 
				
			||||||
 | 
					            return meta_txt
 | 
				
			||||||
 | 
					        meta_txt = 合并小写开头的段落块(meta_txt)
 | 
				
			||||||
 | 
					        meta_txt = 清理多余的空行(meta_txt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        meta_txt = '\n'.join(meta_txt)
 | 
				
			||||||
 | 
					        # 清除重复的换行
 | 
				
			||||||
 | 
					        for _ in range(5):
 | 
				
			||||||
 | 
					            meta_txt = meta_txt.replace('\n\n', '\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # 换行 -> 双换行
 | 
				
			||||||
 | 
					        meta_txt = meta_txt.replace('\n', '\n\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for f in finals:
 | 
				
			||||||
 | 
					            print亮黄(f)
 | 
				
			||||||
 | 
					            print亮绿('***************************')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return meta_txt, page_one_meta
 | 
				
			||||||
 | 
				
			|||||||
@ -2,174 +2,9 @@ from toolbox import CatchException, report_execption, write_results_to_file
 | 
				
			|||||||
from toolbox import update_ui
 | 
					from toolbox import update_ui
 | 
				
			||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 | 
					from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 | 
				
			||||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 | 
					from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 | 
				
			||||||
 | 
					from .crazy_utils import read_and_clean_pdf_text
 | 
				
			||||||
from colorful import *
 | 
					from colorful import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def read_and_clean_pdf_text(fp):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好,不建议任何人去读这个函数
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    **输入参数说明**
 | 
					 | 
				
			||||||
    - `fp`:需要读取和清理文本的pdf文件路径
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    **输出参数说明**
 | 
					 | 
				
			||||||
    - `meta_txt`:清理后的文本内容字符串
 | 
					 | 
				
			||||||
    - `page_one_meta`:第一页清理后的文本内容列表
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    **函数功能**
 | 
					 | 
				
			||||||
    读取pdf文件并清理其中的文本内容,清理规则包括:
 | 
					 | 
				
			||||||
    - 提取所有块元的文本信息,并合并为一个字符串
 | 
					 | 
				
			||||||
    - 去除短块(字符数小于100)并替换为回车符
 | 
					 | 
				
			||||||
    - 清理多余的空行
 | 
					 | 
				
			||||||
    - 合并小写字母开头的段落块并替换为空格
 | 
					 | 
				
			||||||
    - 清除重复的换行
 | 
					 | 
				
			||||||
    - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    import fitz, copy
 | 
					 | 
				
			||||||
    import re
 | 
					 | 
				
			||||||
    import numpy as np
 | 
					 | 
				
			||||||
    fc = 0
 | 
					 | 
				
			||||||
    fs = 1
 | 
					 | 
				
			||||||
    fb = 2
 | 
					 | 
				
			||||||
    REMOVE_FOOT_NOTE = True
 | 
					 | 
				
			||||||
    REMOVE_FOOT_FFSIZE_PERCENT = 0.95 
 | 
					 | 
				
			||||||
    def primary_ffsize(l):
 | 
					 | 
				
			||||||
        fsize_statiscs = {}
 | 
					 | 
				
			||||||
        for wtf in l['spans']:
 | 
					 | 
				
			||||||
            if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
 | 
					 | 
				
			||||||
            fsize_statiscs[wtf['size']] += len(wtf['text'])
 | 
					 | 
				
			||||||
        return max(fsize_statiscs, key=fsize_statiscs.get)
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
    def ffsize_same(a,b):
 | 
					 | 
				
			||||||
        return abs((a-b)/max(a,b)) < 0.02
 | 
					 | 
				
			||||||
    # file_content = ""
 | 
					 | 
				
			||||||
    with fitz.open(fp) as doc:
 | 
					 | 
				
			||||||
        meta_txt = []
 | 
					 | 
				
			||||||
        meta_font = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        meta_line = []
 | 
					 | 
				
			||||||
        meta_span = []
 | 
					 | 
				
			||||||
        for index, page in enumerate(doc):
 | 
					 | 
				
			||||||
            # file_content += page.get_text()
 | 
					 | 
				
			||||||
            text_areas = page.get_text("dict")  # 获取页面上的文本信息
 | 
					 | 
				
			||||||
            for t in text_areas['blocks']:
 | 
					 | 
				
			||||||
                if 'lines' in t:
 | 
					 | 
				
			||||||
                    pf = 998
 | 
					 | 
				
			||||||
                    for l in t['lines']:
 | 
					 | 
				
			||||||
                        txt_line = "".join([wtf['text'] for wtf in l['spans']])
 | 
					 | 
				
			||||||
                        pf = primary_ffsize(l)
 | 
					 | 
				
			||||||
                        meta_line.append([txt_line, pf, l['bbox'], l])
 | 
					 | 
				
			||||||
                        for wtf in l['spans']: # for l in t['lines']:
 | 
					 | 
				
			||||||
                            meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
 | 
					 | 
				
			||||||
                    # meta_line.append(["NEW_BLOCK", pf])
 | 
					 | 
				
			||||||
            # 块元提取                           for each word segment with in line                       for each line         cross-line words                          for each block
 | 
					 | 
				
			||||||
            meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
 | 
					 | 
				
			||||||
                '- ', '') for t in text_areas['blocks'] if 'lines' in t])
 | 
					 | 
				
			||||||
            meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
 | 
					 | 
				
			||||||
                             for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
 | 
					 | 
				
			||||||
            if index == 0:
 | 
					 | 
				
			||||||
                page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
 | 
					 | 
				
			||||||
                    '- ', '') for t in text_areas['blocks'] if 'lines' in t]
 | 
					 | 
				
			||||||
        # 获取正文主字体
 | 
					 | 
				
			||||||
        fsize_statiscs = {}
 | 
					 | 
				
			||||||
        for span in meta_span:
 | 
					 | 
				
			||||||
            if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
 | 
					 | 
				
			||||||
            fsize_statiscs[span[1]] += span[2]
 | 
					 | 
				
			||||||
        main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
 | 
					 | 
				
			||||||
        if REMOVE_FOOT_NOTE:
 | 
					 | 
				
			||||||
            give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # 切分和重新整合
 | 
					 | 
				
			||||||
        mega_sec = []
 | 
					 | 
				
			||||||
        sec = []
 | 
					 | 
				
			||||||
        for index, line in enumerate(meta_line):
 | 
					 | 
				
			||||||
            if index == 0: 
 | 
					 | 
				
			||||||
                sec.append(line[fc])
 | 
					 | 
				
			||||||
                continue
 | 
					 | 
				
			||||||
            if REMOVE_FOOT_NOTE:
 | 
					 | 
				
			||||||
                if meta_line[index][fs] <= give_up_fize_threshold:
 | 
					 | 
				
			||||||
                    continue
 | 
					 | 
				
			||||||
            if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
 | 
					 | 
				
			||||||
                # 尝试识别段落
 | 
					 | 
				
			||||||
                if meta_line[index][fc].endswith('.') and\
 | 
					 | 
				
			||||||
                    (meta_line[index-1][fc] != 'NEW_BLOCK') and \
 | 
					 | 
				
			||||||
                    (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
 | 
					 | 
				
			||||||
                    sec[-1] += line[fc]
 | 
					 | 
				
			||||||
                    sec[-1] += "\n\n"
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    sec[-1] += " "
 | 
					 | 
				
			||||||
                    sec[-1] += line[fc]
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                if (index+1 < len(meta_line)) and \
 | 
					 | 
				
			||||||
                    meta_line[index][fs] > main_fsize:
 | 
					 | 
				
			||||||
                    # 单行 + 字体大
 | 
					 | 
				
			||||||
                    mega_sec.append(copy.deepcopy(sec))
 | 
					 | 
				
			||||||
                    sec = []
 | 
					 | 
				
			||||||
                    sec.append("# " + line[fc])
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    # 尝试识别section
 | 
					 | 
				
			||||||
                    if meta_line[index-1][fs] > meta_line[index][fs]:
 | 
					 | 
				
			||||||
                        sec.append("\n" + line[fc])
 | 
					 | 
				
			||||||
                    else:
 | 
					 | 
				
			||||||
                        sec.append(line[fc])
 | 
					 | 
				
			||||||
        mega_sec.append(copy.deepcopy(sec))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        finals = []
 | 
					 | 
				
			||||||
        for ms in mega_sec:
 | 
					 | 
				
			||||||
            final = " ".join(ms)
 | 
					 | 
				
			||||||
            final = final.replace('- ', ' ')
 | 
					 | 
				
			||||||
            finals.append(final)
 | 
					 | 
				
			||||||
        meta_txt = finals
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def 把字符太少的块清除为回车(meta_txt):
 | 
					 | 
				
			||||||
            for index, block_txt in enumerate(meta_txt):
 | 
					 | 
				
			||||||
                if len(block_txt) < 100:
 | 
					 | 
				
			||||||
                    meta_txt[index] = '\n'
 | 
					 | 
				
			||||||
            return meta_txt
 | 
					 | 
				
			||||||
        meta_txt = 把字符太少的块清除为回车(meta_txt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def 清理多余的空行(meta_txt):
 | 
					 | 
				
			||||||
            for index in reversed(range(1, len(meta_txt))):
 | 
					 | 
				
			||||||
                if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
 | 
					 | 
				
			||||||
                    meta_txt.pop(index)
 | 
					 | 
				
			||||||
            return meta_txt
 | 
					 | 
				
			||||||
        meta_txt = 清理多余的空行(meta_txt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def 合并小写开头的段落块(meta_txt):
 | 
					 | 
				
			||||||
            def starts_with_lowercase_word(s):
 | 
					 | 
				
			||||||
                pattern = r"^[a-z]+"
 | 
					 | 
				
			||||||
                match = re.match(pattern, s)
 | 
					 | 
				
			||||||
                if match:
 | 
					 | 
				
			||||||
                    return True
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    return False
 | 
					 | 
				
			||||||
            for _ in range(100):
 | 
					 | 
				
			||||||
                for index, block_txt in enumerate(meta_txt):
 | 
					 | 
				
			||||||
                    if starts_with_lowercase_word(block_txt):
 | 
					 | 
				
			||||||
                        if meta_txt[index-1] != '\n':
 | 
					 | 
				
			||||||
                            meta_txt[index-1] += ' '
 | 
					 | 
				
			||||||
                        else:
 | 
					 | 
				
			||||||
                            meta_txt[index-1] = ''
 | 
					 | 
				
			||||||
                        meta_txt[index-1] += meta_txt[index]
 | 
					 | 
				
			||||||
                        meta_txt[index] = '\n'
 | 
					 | 
				
			||||||
            return meta_txt
 | 
					 | 
				
			||||||
        meta_txt = 合并小写开头的段落块(meta_txt)
 | 
					 | 
				
			||||||
        meta_txt = 清理多余的空行(meta_txt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        meta_txt = '\n'.join(meta_txt)
 | 
					 | 
				
			||||||
        # 清除重复的换行
 | 
					 | 
				
			||||||
        for _ in range(5):
 | 
					 | 
				
			||||||
            meta_txt = meta_txt.replace('\n\n', '\n')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # 换行 -> 双换行
 | 
					 | 
				
			||||||
        meta_txt = meta_txt.replace('\n', '\n\n')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for f in finals:
 | 
					 | 
				
			||||||
            print亮黄(f)
 | 
					 | 
				
			||||||
            print亮绿('***************************')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return meta_txt, page_one_meta
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@CatchException
 | 
					@CatchException
 | 
				
			||||||
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port):
 | 
					def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port):
 | 
				
			||||||
    import glob
 | 
					    import glob
 | 
				
			||||||
 | 
				
			|||||||
@ -1,142 +1,66 @@
 | 
				
			|||||||
from toolbox import update_ui
 | 
					from toolbox import update_ui
 | 
				
			||||||
from toolbox import CatchException, report_execption
 | 
					from toolbox import CatchException, report_execption
 | 
				
			||||||
import re
 | 
					from .crazy_utils import read_and_clean_pdf_text
 | 
				
			||||||
import unicodedata
 | 
					 | 
				
			||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 | 
					from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 | 
				
			||||||
fast_debug = False
 | 
					fast_debug = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def is_paragraph_break(match):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    根据给定的匹配结果来判断换行符是否表示段落分隔。
 | 
					 | 
				
			||||||
    如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
 | 
					 | 
				
			||||||
    也可以根据之前的内容长度来判断段落是否已经足够长。
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    prev_char, next_char = match.groups()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 句子结束标志
 | 
					 | 
				
			||||||
    sentence_endings = ".!?"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 设定一个最小段落长度阈值
 | 
					 | 
				
			||||||
    min_paragraph_length = 140
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
 | 
					 | 
				
			||||||
        return "\n\n" 
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return " "
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def normalize_text(text):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
 | 
					 | 
				
			||||||
    例如,将连字 "fi" 转换为 "f" 和 "i"。
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    # 对文本进行归一化处理,分解连字
 | 
					 | 
				
			||||||
    normalized_text = unicodedata.normalize("NFKD", text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 替换其他特殊字符
 | 
					 | 
				
			||||||
    cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return cleaned_text
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def clean_text(raw_text):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    对从 PDF 提取出的原始文本进行清洗和格式化处理。
 | 
					 | 
				
			||||||
    1. 对原始文本进行归一化处理。
 | 
					 | 
				
			||||||
    2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
 | 
					 | 
				
			||||||
    3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    # 对文本进行归一化处理
 | 
					 | 
				
			||||||
    normalized_text = normalize_text(raw_text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 替换跨行的连词
 | 
					 | 
				
			||||||
    text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 根据前后相邻字符的特点,找到原文本中的换行符
 | 
					 | 
				
			||||||
    newlines = re.compile(r'(\S)\n(\S)')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 根据 heuristic 规则,用空格或段落分隔符替换原换行符
 | 
					 | 
				
			||||||
    final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return final_text.strip()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
 | 
					def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
 | 
				
			||||||
    import time, glob, os, fitz
 | 
					    import tiktoken
 | 
				
			||||||
    print('begin analysis on:', file_name)
 | 
					    print('begin analysis on:', file_name)
 | 
				
			||||||
 | 
					    file_content, page_one = read_and_clean_pdf_text(file_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with fitz.open(file_name) as doc:
 | 
					    # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
 | 
				
			||||||
        file_content = ""
 | 
					    # 的长度必须小于 2500 个 Token
 | 
				
			||||||
        for page in doc:
 | 
					    TOKEN_LIMIT_PER_FRAGMENT = 2500
 | 
				
			||||||
            file_content += page.get_text()
 | 
					 | 
				
			||||||
        file_content = clean_text(file_content)
 | 
					 | 
				
			||||||
        # print(file_content)
 | 
					 | 
				
			||||||
    split_number = 10000
 | 
					 | 
				
			||||||
    split_group = (len(file_content)//split_number)+1
 | 
					 | 
				
			||||||
    for i in range(0,split_group):
 | 
					 | 
				
			||||||
        if i==0:
 | 
					 | 
				
			||||||
            prefix = "接下来请你仔细分析下面的论文,学习里面的内容(专业术语、公式、数学概念).并且注意:由于论文内容较多,将分批次发送,每次发送完之后,你只需要回答“接受完成”"
 | 
					 | 
				
			||||||
            i_say = prefix + f'文件名是{file_name},文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
 | 
					 | 
				
			||||||
            i_say_show_user = f'文件名是:\n{file_name},\n由于论文内容过长,将分批请求(共{len(file_content)}字符,将分为{split_group}批,每批{split_number}字符)。\n当前发送{i+1}/{split_group}部分'
 | 
					 | 
				
			||||||
        elif i==split_group-1:
 | 
					 | 
				
			||||||
            i_say = f'你只需要回答“所有论文接受完成,请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
 | 
					 | 
				
			||||||
            i_say_show_user = f'当前发送{i+1}/{split_group}部分'
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
 | 
					 | 
				
			||||||
            i_say_show_user = f'当前发送{i+1}/{split_group}部分'
 | 
					 | 
				
			||||||
        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
 | 
					 | 
				
			||||||
        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt="")   # 带超时倒计时
 | 
					 | 
				
			||||||
        while "完成" not in gpt_say:
 | 
					 | 
				
			||||||
            i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
 | 
					 | 
				
			||||||
            i_say_show_user = f'出现error,重新发送{i+1}/{split_group}部分'
 | 
					 | 
				
			||||||
            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt="")   # 带超时倒计时
 | 
					 | 
				
			||||||
            time.sleep(1)
 | 
					 | 
				
			||||||
        chatbot[-1] = (i_say_show_user, gpt_say)
 | 
					 | 
				
			||||||
        history.append(i_say_show_user); history.append(gpt_say)
 | 
					 | 
				
			||||||
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					 | 
				
			||||||
        time.sleep(2)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    i_say = f'接下来,请你扮演一名专业的学术教授,利用你的所有知识并且结合这篇文章,回答我的问题。(请牢记:1.直到我说“退出”,你才能结束任务;2.所有问题需要紧密围绕文章内容;3.如果有公式,请使用tex渲染)'
 | 
					    from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
 | 
				
			||||||
    chatbot.append((i_say, "[Local Message] waiting gpt response."))
 | 
					    from toolbox import get_conf
 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					    enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
 | 
				
			||||||
 | 
					    def get_token_num(txt): return len(enc.encode(txt))
 | 
				
			||||||
 | 
					    paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
 | 
				
			||||||
 | 
					        txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
 | 
				
			||||||
 | 
					    page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
 | 
				
			||||||
 | 
					        txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
 | 
				
			||||||
 | 
					    # 为了更好的效果,我们剥离Introduction之后的部分(如果有)
 | 
				
			||||||
 | 
					    paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    # ** gpt request **
 | 
					    ############################## <第一步,从摘要中提取高价值信息,放到history中> ##################################
 | 
				
			||||||
    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt="")   # 带超时倒计时
 | 
					    final_results = []
 | 
				
			||||||
    chatbot[-1] = (i_say, gpt_say)
 | 
					    final_results.append(paper_meta)
 | 
				
			||||||
    history.append(i_say); history.append(gpt_say)
 | 
					 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ############################## <第二步,迭代地历遍整个文章,提取精炼信息> ##################################
 | 
				
			||||||
 | 
					    i_say_show_user = f'首先你在英文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。"           # 用户提示
 | 
				
			||||||
 | 
					    chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[])    # 更新UI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@CatchException
 | 
					    iteration_results = []
 | 
				
			||||||
def 理解PDF文档内容(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
 | 
					    last_iteration_result = paper_meta  # 初始值是摘要
 | 
				
			||||||
    import glob, os
 | 
					    MAX_WORD_TOTAL = 4096
 | 
				
			||||||
 | 
					    n_fragment = len(paper_fragments)
 | 
				
			||||||
 | 
					    if n_fragment >= 20: print('文章极长,不能达到预期效果')
 | 
				
			||||||
 | 
					    for i in range(n_fragment):
 | 
				
			||||||
 | 
					        NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
 | 
				
			||||||
 | 
					        i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
 | 
				
			||||||
 | 
					        i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]}"
 | 
				
			||||||
 | 
					        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user,  # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问
 | 
				
			||||||
 | 
					                                                                           llm_kwargs, chatbot, 
 | 
				
			||||||
 | 
					                                                                           history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果
 | 
				
			||||||
 | 
					                                                                           sys_prompt="Extract the main idea of this section."  # 提示
 | 
				
			||||||
 | 
					                                                                        ) 
 | 
				
			||||||
 | 
					        iteration_results.append(gpt_say)
 | 
				
			||||||
 | 
					        last_iteration_result = gpt_say
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # 基本信息:功能、贡献者
 | 
					    ############################## <第三步,整理history> ##################################
 | 
				
			||||||
    chatbot.append([
 | 
					    final_results.extend(iteration_results)
 | 
				
			||||||
        "函数插件功能?",
 | 
					    final_results.append(f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。')
 | 
				
			||||||
        "理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
 | 
					    # 接下来两句话只显示在界面上,不起实际作用
 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					    i_say_show_user = f'接下来,你是一名专业的学术教授,利用以上信息,使用中文回答我的问题。'; gpt_say = "[Local Message] 收到。"
 | 
				
			||||||
 | 
					    chatbot.append([i_say_show_user, gpt_say])
 | 
				
			||||||
    import tkinter as tk
 | 
					 | 
				
			||||||
    from tkinter import filedialog
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    root = tk.Tk()
 | 
					 | 
				
			||||||
    root.withdraw()
 | 
					 | 
				
			||||||
    txt = filedialog.askopenfilename()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 尝试导入依赖,如果缺少依赖,则给出安装建议
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        import fitz
 | 
					 | 
				
			||||||
    except:
 | 
					 | 
				
			||||||
        report_execption(chatbot, history, 
 | 
					 | 
				
			||||||
            a = f"解析项目: {txt}", 
 | 
					 | 
				
			||||||
            b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
 | 
					 | 
				
			||||||
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					 | 
				
			||||||
        return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 清空历史,以免输入溢出
 | 
					 | 
				
			||||||
    history = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 开始正式执行任务
 | 
					 | 
				
			||||||
    yield from 解析PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ############################## <第四步,设置一个token上限,防止回答时Token溢出> ##################################
 | 
				
			||||||
 | 
					    from .crazy_utils import input_clipping
 | 
				
			||||||
 | 
					    _, final_results = input_clipping("", final_results, max_token_limit=3200)
 | 
				
			||||||
 | 
					    yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@CatchException
 | 
					@CatchException
 | 
				
			||||||
@ -146,7 +70,7 @@ def 理解PDF文档内容标准文件输入(txt, llm_kwargs, plugin_kwargs, chat
 | 
				
			|||||||
    # 基本信息:功能、贡献者
 | 
					    # 基本信息:功能、贡献者
 | 
				
			||||||
    chatbot.append([
 | 
					    chatbot.append([
 | 
				
			||||||
        "函数插件功能?",
 | 
					        "函数插件功能?",
 | 
				
			||||||
        "理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
 | 
					        "理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe, binary-husky"])
 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # 尝试导入依赖,如果缺少依赖,则给出安装建议
 | 
					    # 尝试导入依赖,如果缺少依赖,则给出安装建议
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										4
									
								
								version
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								version
									
									
									
									
									
								
							@ -1,5 +1,5 @@
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  "version": 2.67,
 | 
					  "version": 2.68,
 | 
				
			||||||
  "show_feature": true,
 | 
					  "show_feature": true,
 | 
				
			||||||
  "new_feature": "现可通过输入区更新临时api-key <-> 增强多线程稳定性(涉及代码解析、PDF翻译、自译解等) <-> 修复Token计数错误(解决PDF翻译的分割不合理的问题) <-> 如果一键更新失败,可前往github手动更新"
 | 
					  "new_feature": "改善理解pdf(chatpdf)功能 <-> 如果一键更新失败,可前往github手动更新"
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user