Merge branch 'master' into dev

This commit is contained in:
Your Name 2023-04-07 23:55:19 +08:00
commit 8dbae2c68a
5 changed files with 129 additions and 8 deletions

View File

@ -72,6 +72,7 @@ def get_crazy_functions():
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
from crazy_functions.总结word文档 import 总结word文档
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
function_plugins.update({
"批量翻译PDF文档多线程": {
@ -90,6 +91,11 @@ def get_crazy_functions():
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(批量总结PDF文档pdfminer)
},
"谷歌学术检索助手输入谷歌学术搜索页url": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(谷歌检索小助手)
},
"批量总结Word文档": {
"Color": "stop",
"Function": HotReload(总结word文档)

View File

@ -1,4 +1,4 @@
import traceback
def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
import time
@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
mutable = [["", time.time()] for _ in range(n_frag)]
def _req_gpt(index, inputs, history, sys_prompt):
gpt_say = predict_no_ui_long_connection(
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[
index]
)
try:
gpt_say = predict_no_ui_long_connection(
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
)
except:
# 收拾残局
tb_str = '```\n' + traceback.format_exc() + '```'
gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
return gpt_say
# 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(

View File

@ -0,0 +1,106 @@
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import CatchException, report_execption, write_results_to_file
def get_meta_information(url, chatbot, history):
import requests
import arxiv
import difflib
from bs4 import BeautifulSoup
from toolbox import get_conf
proxies, = get_conf('proxies')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
}
# 发送 GET 请求
response = requests.get(url, proxies=proxies, headers=headers)
# 解析网页内容
soup = BeautifulSoup(response.text, "html.parser")
def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
profile = []
# 获取所有文章的标题和作者
for result in soup.select(".gs_ri"):
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
author = result.select_one(".gs_a").text
try:
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
except:
citation = 'cited by 0'
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
search = arxiv.Search(
query = title,
max_results = 1,
sort_by = arxiv.SortCriterion.Relevance,
)
paper = next(search.results())
if string_similar(title, paper.title) > 0.90: # same paper
abstract = paper.summary.replace('\n', ' ')
is_paper_in_arxiv = True
else: # different paper
abstract = abstract
is_paper_in_arxiv = False
paper = next(search.results())
print(title)
print(author)
print(citation)
profile.append({
'title':title,
'author':author,
'citation':citation,
'abstract':abstract,
'is_paper_in_arxiv':is_paper_in_arxiv,
})
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中不在arxiv中无法获取完整摘要:{is_paper_in_arxiv}\n\n' + abstract]
msg = "正常"
yield chatbot, [], msg
return profile
@CatchException
def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"分析用户提供的谷歌学术google scholar搜索页面中出现的所有文章: binary-husky插件初始化中..."])
yield chatbot, history, '正常'
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import arxiv
from bs4 import BeautifulSoup
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
yield chatbot, history, '正常'
return
# 清空历史,以免输入溢出
history = []
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
if len(meta_paper_info_list[:10]) > 0:
i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
"1、英文题目2、中文题目翻译3、作者4、arxiv公开is_paper_in_arxiv4、引用数量cite5、中文摘要翻译。" + \
f"以下是信息源:{str(meta_paper_info_list[:10])}"
inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user,
top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
sys_prompt="你是一个学术翻译请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
)
history.extend([ "第一批", gpt_say ])
meta_paper_info_list = meta_paper_info_list[10:]
chatbot.append(["状态?", "已经全部完成"])
msg = '正常'
yield chatbot, history, msg
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res));
yield chatbot, history, msg

View File

@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
result = ''
while True:
try: chunk = next(stream_response).decode()
except StopIteration: break
except StopIteration:
break
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()

View File

@ -1,5 +1,5 @@
{
"version": 2.4,
"version": 2.43,
"show_feature": true,
"new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。"
"new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。(5) 改善多线程运行遇到网络问题时的处理"
}