From 31d5ee6cccd8cf370fef5fe92a8becd18773aad6 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 7 Sep 2023 23:05:54 +0800 Subject: [PATCH 1/8] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0451262..5a055d9 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法 ⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/) ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中) -⭐虚空终端插件 | 用自然语言,直接调度本项目其他插件 +⭐虚空终端插件 | [函数插件] 用自然语言,直接调度本项目其他插件 更多新功能展示 (图像生成等) …… | 见本文档结尾处 …… @@ -149,11 +149,14 @@ python main.py ### 安装方法II:使用Docker +[![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml) + 1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1) [![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml) [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml) [![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml) + ``` sh git clone --depth=1 https://github.com/binary-husky/gpt_academic.git # 下载项目 cd gpt_academic # 进入路径 From c176187d245b1a42dc1de4381cc7575f1cfaca7a Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 7 Sep 2023 23:46:54 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=9B=A0=E4=B8=BA?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E7=9A=84=E4=B8=8D=E5=87=86=E7=A1=AE=E9=94=99=E8=AF=AF=E6=8F=90?= =?UTF-8?q?=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 0ba9f19..8686f7e 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -109,7 +109,7 @@ def arxiv_download(chatbot, history, txt): url_ = txt # https://arxiv.org/abs/1707.06690 if not txt.startswith('https://arxiv.org/abs/'): - msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}" + msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。" yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面 return msg, None # <-------------- set format -------------> @@ -255,7 +255,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, project_folder = txt else: if txt == "": txt = '空空如也的输入栏' - report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return From bac6810e759b81f6a5ee54ad12939dc621581296 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Fri, 8 Sep 2023 09:38:16 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=93=8D=E4=BD=9C?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/虚空终端.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py index a51e79b..2e1b523 100644 --- a/crazy_functions/虚空终端.py +++ b/crazy_functions/虚空终端.py @@ -24,12 +24,12 @@ explain_msg = """ ## 虚空终端插件说明: 1. 请用**自然语言**描述您需要做什么。例如: - - 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了。」 - - 「请调用插件翻译PDF论文,地址为https://www.nature.com/articles/s41586-019-1724-z.pdf」 - - 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现。」 + - 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了」 + - 「请调用插件翻译PDF论文,地址为https://aaa/bbb/ccc.pdf」 + - 「把Arxiv论文翻译成中文PDF,arxiv论文的ID是1812.10695,记得用插件!」 + - 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」 - 「用插件翻译README,Github网址是https://github.com/facebookresearch/co-tracker」 - - 「给爷翻译Arxiv论文,arxiv论文的ID是1812.10695,记得用插件,不要自己瞎搞!」 - - 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"。」 + - 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"」 - 「请问Transformer网络的结构是怎样的?」 2. 您可以打开插件下拉菜单以了解本项目的各种能力。 From 13c9606af7cdee8f95eca79641bf6968a494c79e Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 09:47:29 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=B8=8B=E8=BD=BDPDF?= =?UTF-8?q?=E5=A4=B1=E8=B4=A5=E6=97=B6=E4=BA=A7=E7=94=9F=E7=9A=84=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_utils.py | 5 ++++- crazy_functions/pdf_fns/parse_pdf.py | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 8069703..5a314b3 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -593,7 +593,10 @@ def get_files_from_everything(txt, type): # type='.md' from toolbox import get_conf from toolbox import get_log_folder, gen_time_str proxies, = get_conf('proxies') - r = requests.get(txt, proxies=proxies) + try: + r = requests.get(txt, proxies=proxies) + except: + raise ConnectionRefusedError(f"无法下载资源{txt},请检查。") path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type) with open(path, 'wb+') as f: f.write(r.content) project_folder = get_log_folder(plugin_name='web_download') diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index 00016be..8a7117a 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -20,6 +20,11 @@ def get_avail_grobid_url(): def parse_pdf(pdf_path, grobid_url): import scipdf # pip install scipdf_parser if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/') - article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + try: + article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + except GROBID_OFFLINE_EXCEPTION: + raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。") + except: + raise RuntimeError("解析PDF失败,请检查PDF是否损坏。") return article_dict From 77a6105a9a60b8aa6cc02a7ec5edc35eebb320ef Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 09:52:29 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9demo=E6=A1=88=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a055d9..ef463bd 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 3. 虚空终端(从自然语言输入中,理解用户意图+自动调用其他插件) -- 步骤一:输入 “ 请调用插件翻译PDF论文,地址为https://www.nature.com/articles/s41586-019-1724-z.pdf ” +- 步骤一:输入 “ 请调用插件翻译PDF论文,地址为https://storage.googleapis.com/deepmind-media/alphago/AlphaGoNaturePaper.pdf ” - 步骤二:点击“虚空终端”
From 347124c96724e09471a2d7f8526be8de016b14ec Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 10:43:20 +0800 Subject: [PATCH 6/8] update scipdf_parser dep --- crazy_functions/批量翻译PDF文档_多线程.py | 4 ++-- requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 456c773..0949f7e 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -24,10 +24,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst try: import fitz import tiktoken + import scipdf except: report_execption(chatbot, history, a=f"解析项目: {txt}", - b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken```。") + b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken scipdf_parser```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return @@ -58,7 +59,6 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url): import copy - import tiktoken TOKEN_LIMIT_PER_FRAGMENT = 1280 generated_conclusion_files = [] generated_html_files = [] diff --git a/requirements.txt b/requirements.txt index e6d27d2..5ff40cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,4 @@ arxiv rich pypdf2==2.12.1 websocket-client -scipdf_parser==0.3 +scipdf_parser>=0.3 From cce69beee9102a901d6208d7c3fa295aa2e5ff09 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 11:08:02 +0800 Subject: [PATCH 7/8] update error message --- crazy_functions/批量翻译PDF文档_多线程.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 0949f7e..6e9fe6a 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -66,7 +66,7 @@ def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwa for index, fp in enumerate(file_manifest): chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 article_dict = parse_pdf(fp, grobid_url) - print(article_dict) + if article_dict is None: raise RuntimeError("解析PDF失败,请检查PDF是否损坏。") prompt = "以下是一篇学术论文的基本信息:\n" # title title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n' From 2202cf3701ca3c5414145bbf389b174f2468a2b0 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 11:11:53 +0800 Subject: [PATCH 8/8] remove proxy message --- check_proxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_proxy.py b/check_proxy.py index 474988c..b6fe99f 100644 --- a/check_proxy.py +++ b/check_proxy.py @@ -5,7 +5,7 @@ def check_proxy(proxies): try: response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4) data = response.json() - print(f'查询代理的地理位置,返回的结果是{data}') + # print(f'查询代理的地理位置,返回的结果是{data}') if 'country_name' in data: country = data['country_name'] result = f"代理配置 {proxies_https}, 代理所在地:{country}"