From 2f9a4e1618463b45ffd3df4e43d6c3c64d50b3a7 Mon Sep 17 00:00:00 2001
From: mrhblfx <mrhblfx@gmail.com>
Date: Sun, 16 Apr 2023 23:00:45 +0800
Subject: [PATCH] Add parsing arbitrary code items

---
 crazy_functional.py               | 12 ++++++--
 crazy_functions/解析项目源代码.py | 49 +++++++++++++++++++++++++------
 main.py                           | 27 ++++++++++++++---
 toolbox.py                        | 21 ++++++-------
 4 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index af308b7..afc3217 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -162,7 +162,7 @@ def get_crazy_functions():
             "AsButton": False,  # 加入下拉菜单中
             "Function": HotReload(Markdown英译中)
         },
-        
+
     })
 
     ###################### 第三组插件 ###########################
@@ -179,8 +179,16 @@ def get_crazy_functions():
 
     except Exception as err:
         print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
-        
 
 
+    from crazy_functions.解析项目源代码 import 解析任意code项目
+    function_plugins.update({
+        "解析任意code项目": {
+            "Color": "stop",
+            "AsButton": False,
+            "Function": HotReload(解析任意code项目)
+        },
+    })
+
     ###################### 第n组插件 ###########################
     return function_plugins
diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py
index 04e5128..283f103 100644
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@@ -11,7 +11,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
     history_array = []
     sys_prompt_array = []
     report_part_1 = []
-    
+
     assert len(file_manifest) <= 1024, "源文件太多（超过1024个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
     ############################## <第一步，逐个文件分析，多线程> ##################################
     for index, fp in enumerate(file_manifest):
@@ -63,10 +63,10 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
         current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
         i_say = f'根据以上分析，对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能（包括{previous_iteration_files_string}）。'
         inputs_show_user = f'根据以上分析，对程序的整体功能和构架重新做出概括，由于输入长度限制，可能需要分组处理，本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
-        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection) 
+        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
         this_iteration_history.append(last_iteration_result)
         result = yield from request_gpt_model_in_new_thread_with_ui_alive(
-            inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot, 
+            inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
             history=this_iteration_history,   # 迭代之前的分析
             sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。")
         report_part_2.extend([i_say, result])
@@ -222,8 +222,8 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
     yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
-    
-    
+
+
 @CatchException
 def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
     history = []    # 清空历史，以免输入溢出
@@ -243,9 +243,9 @@ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
         report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
-    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)    
-    
-        
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
 @CatchException
 def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
     history = []    # 清空历史，以免输入溢出
@@ -263,4 +263,35 @@ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
         report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
-    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)    
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
+@CatchException
+def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    txt_include = plugin_kwargs.get("txt_include")
+    txt_except = plugin_kwargs.get("txt_except")
+    # 将要匹配的后缀
+    pattern_include = [_.lstrip(" .,，").rstrip(" ,，") for _ in txt_include.split(" ") if _ != ""]
+    pattern_include = [_.lstrip(" .,，").rstrip(" ,，") for __ in pattern_include for _ in __.split(",") if _ != ""]
+    pattern_include = [_.lstrip(" .,，").rstrip(" ,，") for __ in pattern_include for _ in __.split("，") if _ != ""]
+    # 将要忽略匹配的后缀
+    pattern_except = [_.lstrip(" .,，").rstrip(" ,，") for _ in txt_except.split(" ") if _ != ""]
+    pattern_except = [_.lstrip(" .,，").rstrip(" ,，") for __ in pattern_except for _ in __.split(",") if _ != ""]
+    pattern_except = [_.lstrip(" .,，").rstrip(" ,，") for __ in pattern_except for _ in __.split("，") if _ != ""]
+    pattern_except += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析上传的压缩文件
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.*', recursive=True) if os.path.isfile(f) and \
+                     ([] == pattern_include or f.split(".")[-1] in pattern_include) and f.split(".")[-1] not in pattern_except]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
\ No newline at end of file
diff --git a/main.py b/main.py
index 2e4e275..73646dc 100644
--- a/main.py
+++ b/main.py
@@ -44,7 +44,7 @@ proxy_info = check_proxy(proxies)
 
 gr_L1 = lambda: gr.Row().style()
 gr_L2 = lambda scale: gr.Column(scale=scale)
-if LAYOUT == "TOP-DOWN": 
+if LAYOUT == "TOP-DOWN":
     gr_L1 = lambda: DummyWith()
     gr_L2 = lambda scale: gr.Row()
     CHATBOT_HEIGHT /= 2
@@ -83,9 +83,23 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
                         variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
                         crazy_fns[k]["Button"] = gr.Button(k, variant=variant)
                         crazy_fns[k]["Button"].style(size="sm")
+                with gr.Row():
+                    with gr.Accordion("解析任意code项目", open=True):
+                        gr.Markdown("输入的文件后缀用空格或逗号隔开, 可混合使用空格逗号")
+                        with gr.Row():
+                            gr.Markdown("将要匹配文件的后缀, 不输入则代表解析所有文件")
+                            txt_pattern_include = gr.Textbox(show_label=False, placeholder="例如: .c .cpp .py").style(container=False)
+                        with gr.Row():
+                            gr.Markdown("将要忽略匹配文件的后缀")
+                            txt_pattern_except = gr.Textbox(show_label=False, placeholder="例如: .png, .jpg wav flac").style(container=False)
+                        code_plugin_name = "解析任意code项目"
+                        variant = crazy_fns[code_plugin_name]["Color"] if "Color" in crazy_fns[code_plugin_name] else "secondary"
+                        crazy_fns[code_plugin_name]["Button"] = gr.Button(code_plugin_name, variant=variant)
+                        crazy_fns[code_plugin_name]["Button"].style(size="sm")
                 with gr.Row():
                     with gr.Accordion("更多函数插件", open=True):
                         dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)]
+                        dropdown_fn_list.remove(code_plugin_name)
                         with gr.Column(scale=1):
                             dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False)
                         with gr.Column(scale=1):
@@ -118,7 +132,8 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
         return ret
     checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
     # 整理反复出现的控件句柄组合
-    input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
+    add_input_combo = (txt_pattern_include, txt_pattern_except)
+    input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *add_input_combo]
     output_combo = [cookies, chatbot, history, status]
     predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
     # 提交按钮、重置按钮
@@ -140,6 +155,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
         click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
         click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
         cancel_handles.append(click_handle)
+    # 函数插件-解析任意code项目
+    click_handle = crazy_fns[code_plugin_name]["Button"].click(ArgsGeneralWrapper(crazy_fns[code_plugin_name]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
+    click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+    cancel_handles.append(click_handle)
     # 函数插件-下拉菜单与随变按钮的互动
     def on_dropdown_changed(k):
         variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
@@ -147,7 +166,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
     dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
     # 随变按钮的回调函数注册
     def route(k, *args, **kwargs):
-        if k in [r"打开插件列表", r"请先从插件列表中选择"]: return 
+        if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
         yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs)
     click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
     click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
@@ -164,7 +183,7 @@ def auto_opentab_delay():
     print(f"如果浏览器没有自动打开，请复制并转到以下URL：")
     print(f"\t（亮色主题）: http://localhost:{PORT}")
     print(f"\t（暗色主题）: http://localhost:{PORT}/?__dark-theme=true")
-    def open(): 
+    def open():
         time.sleep(2)       # 打开浏览器
         webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
     threading.Thread(target=open, name="open-browser", daemon=True).start()
diff --git a/toolbox.py b/toolbox.py
index 3ced653..8de3a8d 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -27,23 +27,24 @@ def ArgsGeneralWrapper(f):
     """
         装饰器函数，用于重组输入参数，改变输入参数的顺序与结构。
     """
-    def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
+    def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, txt_include, txt_except, *args):
         txt_passon = txt
         if txt == "" and txt2 != "": txt_passon = txt2
         # 引入一个有cookie的chatbot
         cookies.update({
-            'top_p':top_p, 
+            'top_p':top_p,
             'temperature':temperature,
         })
         llm_kwargs = {
             'api_key': cookies['api_key'],
             'llm_model': cookies['llm_model'],
-            'top_p':top_p, 
+            'top_p':top_p,
             'temperature':temperature,
         }
-        plugin_kwargs = {
-            # 目前还没有
-        }
+        # plugin_kwargs = {
+        #     # 目前还没有
+        # }
+        plugin_kwargs = dict(txt_include = txt_include, txt_except = txt_except)
         chatbot_with_cookie = ChatBotWithCookies(cookies)
         chatbot_with_cookie.write_list(chatbot)
         yield from f(txt_passon, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, system_prompt, *args)
@@ -279,7 +280,7 @@ def markdown_convertion(txt):
             return content
         else:
             return tex2mathml_catch_exception(content)
-        
+
     def markdown_bug_hunt(content):
         """
         解决一个mdx_math的bug（单$包裹begin命令时多余<script>）
@@ -287,7 +288,7 @@ def markdown_convertion(txt):
         content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
         content = content.replace('</script>\n</script>', '</script>')
         return content
-    
+
 
     if ('$' in txt) and ('```' not in txt):  # 有$标识的公式符号，且没有代码段```的标识
         # convert everything to html format
@@ -308,7 +309,7 @@ def markdown_convertion(txt):
 def close_up_code_segment_during_stream(gpt_reply):
     """
     在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
-    
+
     Args:
         gpt_reply (str): GPT模型返回的回复字符串。
 
@@ -518,7 +519,7 @@ class DummyWith():
     它的作用是……额……没用，即在代码结构不变得情况下取代其他的上下文管理器。
     上下文管理器是一种Python对象，用于与with语句一起使用，
     以确保一些资源在代码块执行期间得到正确的初始化和清理。
-    上下文管理器必须实现两个方法，分别为 __enter__()和 __exit__()。 
+    上下文管理器必须实现两个方法，分别为 __enter__()和 __exit__()。
     在上下文执行开始的情况下，__enter__()方法会在代码块被执行前被调用，
     而在上下文执行结束时，__exit__()方法则会被调用。
     """