Merge pull request #502 from mrhblfx/new_code_fun

解析项目源代码（手动指定和筛选源代码文件类型）
2023-04-22 18:40:47 +08:00 · 2023-04-22 18:40:47 +08:00 · 7d14de79bf
commit 7d14de79bf
parent 15c6b52b5f bd62c6be68
4 changed files with 90 additions and 29 deletions
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -174,7 +174,7 @@ def get_crazy_functions():
            "AsButton": False,  # 加入下拉菜单中
            "Function": HotReload(Markdown英译中)
        },
-        
+
    })

    ###################### 第三组插件 ###########################
@ -187,7 +187,7 @@ def get_crazy_functions():
            "Function": HotReload(下载arxiv论文并翻译摘要)
        }
    })
-        
+
    from crazy_functions.联网的ChatGPT import 连接网络回答问题
    function_plugins.update({
        "连接网络回答问题（先输入问题，再点击按钮，需要访问谷歌）": {
@ -197,5 +197,16 @@ def get_crazy_functions():
        }
    })

+    from crazy_functions.解析项目源代码 import 解析任意code项目
+    function_plugins.update({
+        "解析项目源代码（手动指定和筛选源代码文件类型）": {
+            "Color": "stop",
+            "AsButton": False,
+            "AdvancedArgs": True, # 调用时，唤起高级参数输入区（默认False）
+            "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
+            "Function": HotReload(解析任意code项目)
+        },
+    })
+
    ###################### 第n组插件 ###########################
    return function_plugins
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@ -11,7 +11,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
    history_array = []
    sys_prompt_array = []
    report_part_1 = []
-    
+
    assert len(file_manifest) <= 512, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
    ############################## <第一步，逐个文件分析，多线程> ##################################
    for index, fp in enumerate(file_manifest):
@ -63,10 +63,10 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
        current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
        i_say = f'根据以上分析，对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能（包括{previous_iteration_files_string}）。'
        inputs_show_user = f'根据以上分析，对程序的整体功能和构架重新做出概括，由于输入长度限制，可能需要分组处理，本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
-        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection) 
+        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
        this_iteration_history.append(last_iteration_result)
        result = yield from request_gpt_model_in_new_thread_with_ui_alive(
-            inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot, 
+            inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
            history=this_iteration_history,   # 迭代之前的分析
            sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。")
        report_part_2.extend([i_say, result])
@ -222,8 +222,8 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
-    
-    
+
+
@CatchException
 def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
    history = []    # 清空历史，以免输入溢出
@ -243,9 +243,9 @@ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
-    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)    
-    
-        
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
@CatchException
 def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
    history = []    # 清空历史，以免输入溢出
@ -263,4 +263,45 @@ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
-    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)    
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
+@CatchException
+def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    txt_pattern = plugin_kwargs.get("advanced_arg")
+    txt_pattern = txt_pattern.replace("，", ",")
+    # 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
+    pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
+    if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
+    # 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
+    pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
+    pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
+    # 将要忽略匹配的文件名(例如: ^README.md)
+    pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
+    # 生成正则表达式
+    pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
+    pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
+
+    history.clear()
+    import glob, os, re
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
+    maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
+    if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
+        extract_folder_path = maybe_dir[0]
+    else:
+        extract_folder_path = project_folder
+    # 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
+    file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
+                      os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
--- a/main.py
+++ b/main.py
@ -45,7 +45,7 @@ def main():

    gr_L1 = lambda: gr.Row().style()
    gr_L2 = lambda scale: gr.Column(scale=scale)
-    if LAYOUT == "TOP-DOWN": 
+    if LAYOUT == "TOP-DOWN":
        gr_L1 = lambda: DummyWith()
        gr_L2 = lambda scale: gr.Row()
        CHATBOT_HEIGHT /= 2
@ -88,9 +88,12 @@ def main():
                    with gr.Row():
                        with gr.Accordion("更多函数插件", open=True):
                            dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)]
-                            with gr.Column(scale=1):
+                            with gr.Row():
                                dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False)
-                            with gr.Column(scale=1):
+                            with gr.Row():
+                                plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False, 
+                                                                 placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
+                            with gr.Row():
                                switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary")
                    with gr.Row():
                        with gr.Accordion("点击展开“文件上传区”。上传本地文件可供红色函数插件调用。", open=False) as area_file_up:
@ -100,7 +103,7 @@ def main():
                    top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
                    temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
                    max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
-                    checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
+                    checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
                    md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)

                    gr.Markdown(description)
@ -122,11 +125,12 @@ def main():
            ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))})
            ret.update({clearBtn: gr.update(visible=("输入清除键" in a))})
            ret.update({clearBtn2: gr.update(visible=("输入清除键" in a))})
+            ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
            if "底部输入区" in a: ret.update({txt: gr.update(value="")})
            return ret
-        checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, clearBtn, clearBtn2] )
+        checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, clearBtn, clearBtn2, plugin_advanced_arg] )
        # 整理反复出现的控件句柄组合
-        input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
+        input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
        output_combo = [cookies, chatbot, history, status]
        predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
        # 提交按钮、重置按钮
@ -153,14 +157,19 @@ def main():
        # 函数插件-下拉菜单与随变按钮的互动
        def on_dropdown_changed(k):
            variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
-            return {switchy_bt: gr.update(value=k, variant=variant)}
-        dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
+            ret = {switchy_bt: gr.update(value=k, variant=variant)}
+            if crazy_fns[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
+                ret.update({plugin_advanced_arg: gr.update(visible=True,  label=f"插件[{k}]的高级参数说明：" + crazy_fns[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
+            else:
+                ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
+            return ret
+        dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
        def on_md_dropdown_changed(k):
            return {chatbot: gr.update(label="当前模型："+k)}
        md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
        # 随变按钮的回调函数注册
        def route(k, *args, **kwargs):
-            if k in [r"打开插件列表", r"请先从插件列表中选择"]: return 
+            if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
            yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs)
        click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
        click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
@ -178,7 +187,7 @@ def main():
        print(f"如果浏览器没有自动打开，请复制并转到以下URL：")
        print(f"\t（亮色主题）: http://localhost:{PORT}")
        print(f"\t（暗色主题）: http://localhost:{PORT}/?__dark-theme=true")
-        def open(): 
+        def open():
            time.sleep(2)       # 打开浏览器
            webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
        threading.Thread(target=open, name="open-browser", daemon=True).start()
--- a/toolbox.py
+++ b/toolbox.py
@ -24,23 +24,23 @@ def ArgsGeneralWrapper(f):
    """
    装饰器函数，用于重组输入参数，改变输入参数的顺序与结构。
    """
-    def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
+    def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg, *args):
        txt_passon = txt
        if txt == "" and txt2 != "": txt_passon = txt2
        # 引入一个有cookie的chatbot
        cookies.update({
-            'top_p':top_p, 
+            'top_p':top_p,
            'temperature':temperature,
        })
        llm_kwargs = {
            'api_key': cookies['api_key'],
            'llm_model': llm_model,
-            'top_p':top_p, 
+            'top_p':top_p,
            'max_length': max_length,
            'temperature':temperature,
        }
        plugin_kwargs = {
-            # 目前还没有
+            "advanced_arg": plugin_advanced_arg,
        }
        chatbot_with_cookie = ChatBotWithCookies(cookies)
        chatbot_with_cookie.write_list(chatbot)
@ -219,7 +219,7 @@ def markdown_convertion(txt):
            return content
        else:
            return tex2mathml_catch_exception(content)
-        
+
    def markdown_bug_hunt(content):
        """
        解决一个mdx_math的bug（单$包裹begin命令时多余<script>）
@ -227,7 +227,7 @@ def markdown_convertion(txt):
        content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
        content = content.replace('</script>\n</script>', '</script>')
        return content
-    
+

    if ('$' in txt) and ('```' not in txt):  # 有$标识的公式符号，且没有代码段```的标识
        # convert everything to html format
@ -248,7 +248,7 @@ def markdown_convertion(txt):
 def close_up_code_segment_during_stream(gpt_reply):
    """
    在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
-    
+
    Args:
        gpt_reply (str): GPT模型返回的回复字符串。

@ -511,7 +511,7 @@ class DummyWith():
    它的作用是……额……没用，即在代码结构不变得情况下取代其他的上下文管理器。
    上下文管理器是一种Python对象，用于与with语句一起使用，
    以确保一些资源在代码块执行期间得到正确的初始化和清理。
-    上下文管理器必须实现两个方法，分别为 __enter__()和 __exit__()。 
+    上下文管理器必须实现两个方法，分别为 __enter__()和 __exit__()。
    在上下文执行开始的情况下，__enter__()方法会在代码块被执行前被调用，
    而在上下文执行结束时，__exit__()方法则会被调用。
    """