diff --git a/crazy_functional.py b/crazy_functional.py index 3e53f54..028a2be 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -72,6 +72,7 @@ def get_crazy_functions(): from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer from crazy_functions.总结word文档 import 总结word文档 from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 + from crazy_functions.谷歌检索小助手 import 谷歌检索小助手 function_plugins.update({ "批量翻译PDF文档(多线程)": { @@ -90,6 +91,11 @@ def get_crazy_functions(): "AsButton": False, # 加入下拉菜单中 "Function": HotReload(批量总结PDF文档pdfminer) }, + "谷歌学术检索助手(输入谷歌学术搜索页url)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(谷歌检索小助手) + }, "批量总结Word文档": { "Color": "stop", "Function": HotReload(总结word文档) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index bdd6e2b..ac96896 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,4 +1,4 @@ - +import traceback def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2): import time @@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp mutable = [["", time.time()] for _ in range(n_frag)] def _req_gpt(index, inputs, history, sys_prompt): - gpt_say = predict_no_ui_long_connection( - inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[ - index] - ) + try: + gpt_say = predict_no_ui_long_connection( + inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index] + ) + except: + # 收拾残局 + tb_str = '```\n' + traceback.format_exc() + '```' + gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" + if len(mutable[index][0]) > 0: + gpt_say += "此线程失败前收到的回答:" + mutable[index][0] return gpt_say # 异步任务开始 futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip( diff --git a/crazy_functions/谷歌检索小助手.py b/crazy_functions/谷歌检索小助手.py new file mode 100644 index 0000000..1aa915e --- /dev/null +++ b/crazy_functions/谷歌检索小助手.py @@ -0,0 +1,106 @@ +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from toolbox import CatchException, report_execption, write_results_to_file + +def get_meta_information(url, chatbot, history): + import requests + import arxiv + import difflib + from bs4 import BeautifulSoup + from toolbox import get_conf + proxies, = get_conf('proxies') + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36', + } + # 发送 GET 请求 + response = requests.get(url, proxies=proxies, headers=headers) + + # 解析网页内容 + soup = BeautifulSoup(response.text, "html.parser") + + def string_similar(s1, s2): + return difflib.SequenceMatcher(None, s1, s2).quick_ratio() + + profile = [] + # 获取所有文章的标题和作者 + for result in soup.select(".gs_ri"): + title = result.a.text.replace('\n', ' ').replace(' ', ' ') + author = result.select_one(".gs_a").text + try: + citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来 + except: + citation = 'cited by 0' + abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格 + search = arxiv.Search( + query = title, + max_results = 1, + sort_by = arxiv.SortCriterion.Relevance, + ) + paper = next(search.results()) + if string_similar(title, paper.title) > 0.90: # same paper + abstract = paper.summary.replace('\n', ' ') + is_paper_in_arxiv = True + else: # different paper + abstract = abstract + is_paper_in_arxiv = False + paper = next(search.results()) + print(title) + print(author) + print(citation) + profile.append({ + 'title':title, + 'author':author, + 'citation':citation, + 'abstract':abstract, + 'is_paper_in_arxiv':is_paper_in_arxiv, + }) + + chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract] + msg = "正常" + yield chatbot, [], msg + return profile + +@CatchException +def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + # 基本信息:功能、贡献者 + chatbot.append([ + "函数插件功能?", + "分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."]) + yield chatbot, history, '正常' + + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import arxiv + from bs4 import BeautifulSoup + except: + report_execption(chatbot, history, + a = f"解析项目: {txt}", + b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。") + yield chatbot, history, '正常' + return + + # 清空历史,以免输入溢出 + history = [] + + meta_paper_info_list = yield from get_meta_information(txt, chatbot, history) + + if len(meta_paper_info_list[:10]) > 0: + i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \ + "1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \ + f"以下是信息源:{str(meta_paper_info_list[:10])}" + + inputs_show_user = f"请分析此页面中出现的所有文章:{txt}" + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=inputs_show_user, + top_p=top_p, temperature=temperature, chatbot=chatbot, history=[], + sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。" + ) + + history.extend([ "第一批", gpt_say ]) + meta_paper_info_list = meta_paper_info_list[10:] + + chatbot.append(["状态?", "已经全部完成"]) + msg = '正常' + yield chatbot, history, msg + res = write_results_to_file(history) + chatbot.append(("完成了吗?", res)); + yield chatbot, history, msg diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index c27a4ec..3ffbd73 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr result = '' while True: try: chunk = next(stream_response).decode() - except StopIteration: break + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。 if len(chunk)==0: continue if not chunk.startswith('data:'): error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode() diff --git a/version b/version index 8ad8971..62ccc9f 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 2.4, + "version": 2.43, "show_feature": true, - "new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。" + "new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。(5) 改善多线程运行遇到网络问题时的处理" }