add comments

2023-03-26 19:13:58 +08:00 · 2023-03-26 19:13:58 +08:00 · f04d9755bf
commit f04d9755bf
parent 6505fea0b7
6 changed files with 73 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -60,7 +60,7 @@ chat分析报告生成 | [实验性功能] 运行后自动生成总结汇报
 ## 直接运行 (Windows or Linux or MacOS)
-```
+``` sh
 # 下载项目
 git clone https://github.com/binary-husky/chatgpt_academic.git
 cd chatgpt_academic
@ -73,9 +73,16 @@ python -m pip install -r requirements.txt
 python main.py
 # 测试实验性功能
-input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 解析整个C++项目的头文件
+## 测试C++项目头文件分析
-input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 解读latex论文写摘要
+input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 "[实验] 解析整个C++项目（input输入项目根路径）"
-input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 解析整个Python项目
+## 测试给Latex项目写摘要
 input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 "[实验] 读tex论文写摘要（input输入项目根路径）"
 ## 测试Python项目分析
 input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 "[实验] 解析整个py项目（input输入项目根路径）"
 ## 测试自我代码解读
 点击 "[实验] 请解析并解构此项目本身"
 ## 测试实验功能模板函数（要求gpt回答几个数的平方是什么），您可以根据此函数为模板，实现更复杂的功能
 点击 "[实验] 实验功能函数模板"
 ```
@ -93,9 +100,18 @@ docker build -t gpt-academic .
 docker run --rm -it --net=host gpt-academic
 # 测试实验性功能
-input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 解析整个C++项目的头文件
+## 测试自我代码解读
-input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 解读latex论文写摘要
+点击 "[实验] 请解析并解构此项目本身"
-input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 解析整个Python项目
+## 测试实验功能模板函数（要求gpt回答几个数的平方是什么），您可以根据此函数为模板，实现更复杂的功能
 点击 "[实验] 实验功能函数模板"
 ##（请注意在docker中运行时，需要额外注意程序的文件访问权限问题）
 ## 测试C++项目头文件分析
 input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 "[实验] 解析整个C++项目（input输入项目根路径）"
 ## 测试给Latex项目写摘要
 input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 "[实验] 读tex论文写摘要（input输入项目根路径）"
 ## 测试Python项目分析
 input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 "[实验] 解析整个py项目（input输入项目根路径）"
 ```
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@ -9,9 +9,9 @@ def 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot,
        with open(fp, 'r', encoding='utf-8') as f:
            file_content = f.read()
-        前言 = "接下来请你逐文件分析下面的工程" if index==0 else ""
+        prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
-        i_say = 前言 + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
        yield chatbot, history, '正常'
@ -56,9 +56,9 @@ def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTx
        with open(fp, 'r', encoding='utf-8') as f:
            file_content = f.read()
-        前言 = "接下来请你分析自己的程序构成，别紧张，" if index==0 else ""
+        prefix = "接下来请你分析自己的程序构成，别紧张，" if index==0 else ""
-        i_say = 前言 + f'请对下面的程序文件做一个概述文件名是{fp}，文件代码是 ```{file_content}```'
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{fp}，文件代码是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
        yield chatbot, history, '正常'
--- a/crazy_functions/读文章写摘要.py
+++ b/crazy_functions/读文章写摘要.py
@ -10,9 +10,9 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
        with open(fp, 'r', encoding='utf-8') as f:
            file_content = f.read()
-        前言 = "接下来请你逐文件分析下面的论文文件，概括其内容" if index==0 else ""
+        prefix = "接下来请你逐文件分析下面的论文文件，概括其内容" if index==0 else ""
-        i_say = 前言 + f'请对下面的文章片段用中文做一个概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{file_content}```'
+        i_say = prefix + f'请对下面的文章片段用中文做一个概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
        print('[1] yield chatbot, history')
        yield chatbot, history, '正常'
--- a/main.py
+++ b/main.py
@ -1,11 +1,13 @@
-import os; os.environ['no_proxy'] = '*' 
+import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
 import gradio as gr 
 from predict import predict
 from toolbox import format_io, find_free_port
-try: from config_private import proxies, WEB_PORT # 放自己的秘密如API和代理网址 os.path.exists('config_private.py')
+# 建议您复制一个config_private.py放自己的秘密，如API和代理网址，避免不小心传github被别人看到
 try: from config_private import proxies, WEB_PORT 
 except: from config import proxies, WEB_PORT
 # 如果WEB_PORT是-1，则随机选取WEB端口
 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
 initial_prompt = "Serve me as a writing and programming assistant."
@ -13,20 +15,21 @@ title_html = """<h1 align="center">ChatGPT 学术优化</h1>"""
 import logging
 os.makedirs('gpt_log', exist_ok=True)
-logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8')
+logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8') # python 版本建议3.9+（越新越好）
 print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log，请注意自我隐私保护哦！')
-# 一些普通功能
+# 一些普通功能模块
 from functional import get_functionals
 functional = get_functionals()
-# 对一些丧心病狂的实验性功能进行测试
+# 对一些丧心病狂的实验性功能模块进行测试
 from functional_crazy import get_crazy_functionals
 crazy_functional = get_crazy_functionals()
 # 处理markdown文本格式的转变
 gr.Chatbot.postprocess = format_io
-with gr.Blocks() as demo:
+with gr.Blocks() as demo:   # 借助gradio框架，实现webUI
    gr.HTML(title_html)
    with gr.Row():
        with gr.Column(scale=2):
@ -66,7 +69,7 @@ with gr.Blocks() as demo:
        crazy_functional[k]["Button"].click(crazy_functional[k]["Function"], 
            [txt, top_p, temperature, chatbot, history, systemPromptTxt, gr.State(PORT)], [chatbot, history, statusDisplay])
-
+# 延迟函数，做一些准备工作，最后尝试打开浏览器
 def auto_opentab_delay():
    import threading, webbrowser, time
    print(f"URL http://localhost:{PORT}")
--- a/predict.py
+++ b/predict.py
@ -15,6 +15,9 @@ except: from config import proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY
 timeout_bot_msg = '[local] Request timeout, network error. please check proxy settings in config.py.'
 def get_full_error(chunk, stream_response):
    """
        获取完整的从Openai返回的报错
    """
    while True:
        try:
            chunk += next(stream_response)
@ -23,6 +26,16 @@ def get_full_error(chunk, stream_response):
    return chunk
 def predict_no_ui(inputs, top_p, temperature, history=[]):
    """
        发送至chatGPT，等待回复，一次性完成，不显示中间过程。
        predict函数的简化版。
        用于payload比较大的情况，或者用于实现多线、带嵌套的复杂功能。
        inputs 是本次问询的输入
        top_p, temperature是chatGPT的内部调优参数
        history 是之前的对话列表
        （注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误，然后raise ConnectionAbortedError）
    """
    headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt="", stream=False)
    retry = 0
@ -47,7 +60,15 @@ def predict_no_ui(inputs, top_p, temperature, history=[]):
 def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='', 
            stream = True, additional_fn=None):
-
+    """
        发送至chatGPT，流式获取输出。
        用于基础的对话功能。
        inputs 是本次问询的输入
        top_p, temperature是chatGPT的内部调优参数
        history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
        chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
        additional_fn代表点击的哪个按钮，按钮见functional.py
    """
    if additional_fn is not None:
        import functional
        importlib.reload(functional)
@ -115,6 +136,9 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt=''
                    return
 def generate_payload(inputs, top_p, temperature, history, system_prompt, stream):
    """
        整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
--- a/toolbox.py
+++ b/toolbox.py
@ -10,7 +10,10 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
    try: from config_private import TIMEOUT_SECONDS, MAX_RETRY
    except: from config import TIMEOUT_SECONDS, MAX_RETRY
    from predict import predict_no_ui
    # 多线程的时候，需要一个mutable结构在不同线程之间传递信息
    # list就是最简单的mutable结构，我们第一个位置放gpt输出，第二个位置传递报错信息
    mutable = [None, '']
    # multi-threading worker
    def mt(i_say, history): 
        while True:
            try:
@ -25,14 +28,16 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
                    mutable[1] = 'Warning! Input file is too long, cut into half. '
            except TimeoutError as e:
                mutable[0] = '[Local Message] Failed with timeout'
-
+    # 创建新线程发出http请求
    thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
    # 原来的线程则负责持续更新UI，实现一个超时倒计时，并等待新线程的任务完成
    cnt = 0
    while thread_name.is_alive():
        cnt += 1
        chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
        yield chatbot, history, '正常'
        time.sleep(1)
    # 把gpt的输出从mutable中取出来
    gpt_say = mutable[0]
    return gpt_say