diff --git a/crazy_functional.py b/crazy_functional.py index 6bb11b9..7be9355 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -29,7 +29,7 @@ def get_crazy_functions(): "Color": "stop", # 按钮颜色 "Function": HotReload(解析一个C项目的头文件) }, - "解析整个C++项目(.cpp/.h)": { + "解析整个C++项目(.cpp/.hpp/.c/.h)": { "Color": "stop", # 按钮颜色 "AsButton": False, # 加入下拉菜单中 "Function": HotReload(解析一个C项目) diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py new file mode 100644 index 0000000..b669864 --- /dev/null +++ b/crazy_functions/Latex全文润色.py @@ -0,0 +1,70 @@ +from request_llm.bridge_chatgpt import predict_no_ui +from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down +fast_debug = False + + +def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): + import time, glob, os + print('begin analysis on:', file_manifest) + for index, fp in enumerate(file_manifest): + with open(fp, 'r', encoding='utf-8') as f: + file_content = f.read() + + prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else "" + i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```' + i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}' + chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) + print('[1] yield chatbot, history') + yield chatbot, history, '正常' + + if not fast_debug: + msg = '正常' + # ** gpt request ** + gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时 + + print('[2] end gpt req') + chatbot[-1] = (i_say_show_user, gpt_say) + history.append(i_say_show_user); history.append(gpt_say) + print('[3] yield chatbot, history') + yield chatbot, history, msg + print('[4] next') + if not fast_debug: time.sleep(2) + + all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)]) + i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。' + chatbot.append((i_say, "[Local Message] waiting gpt response.")) + yield chatbot, history, '正常' + + if not fast_debug: + msg = '正常' + # ** gpt request ** + gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时 + + chatbot[-1] = (i_say, gpt_say) + history.append(i_say); history.append(gpt_say) + yield chatbot, history, msg + res = write_results_to_file(history) + chatbot.append(("完成了吗?", res)) + yield chatbot, history, msg + + + +@CatchException +def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + history = [] # 清空历史,以免输入溢出 + import glob, os + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield chatbot, history, '正常' + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \ + # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \ + # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") + yield chatbot, history, '正常' + return + yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index ac96896..58d9776 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,19 +1,115 @@ import traceback +from toolbox import update_ui -def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2): +def input_clipping(inputs, history, max_token_limit): + import tiktoken + import numpy as np + from toolbox import get_conf + enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL')) + def get_token_num(txt): return len(enc.encode(txt)) + + mode = 'input-and-history' + # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史 + input_token_num = get_token_num(inputs) + if input_token_num < max_token_limit//2: + mode = 'only-history' + max_token_limit = max_token_limit - input_token_num + + everything = [inputs] if mode == 'input-and-history' else [''] + everything.extend(history) + n_token = get_token_num('\n'.join(everything)) + everything_token = [get_token_num(e) for e in everything] + delta = max(everything_token) // 16 # 截断时的颗粒度 + + while n_token > max_token_limit: + where = np.argmax(everything_token) + encoded = enc.encode(everything[where]) + clipped_encoded = encoded[:len(encoded)-delta] + everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char + everything_token[where] = get_token_num(everything[where]) + n_token = get_token_num('\n'.join(everything)) + + if mode == 'input-and-history': + inputs = everything[0] + else: + pass + history = everything[1:] + return inputs, history + +def request_gpt_model_in_new_thread_with_ui_alive( + inputs, inputs_show_user, top_p, temperature, + chatbot, history, sys_prompt, refresh_interval=0.2, + handle_token_exceed=True, + retry_times_at_unknown_error=2, + ): + """ + Request GPT model,请求GPT模型同时维持用户界面活跃。 + + 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行): + inputs (string): List of inputs (输入) + inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性) + top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数) + temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数) + chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化) + history (list): List of chat history (历史,对话历史列表) + sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样) + refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果) + handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启 + retry_times_at_unknown_error:失败时的重试次数 + + 输出 Returns: + future: 输出,GPT返回的结果 + """ import time from concurrent.futures import ThreadPoolExecutor from request_llm.bridge_chatgpt import predict_no_ui_long_connection # 用户反馈 chatbot.append([inputs_show_user, ""]) msg = '正常' - yield chatbot, [], msg + yield from update_ui(chatbot=chatbot, history=[]) executor = ThreadPoolExecutor(max_workers=16) mutable = ["", time.time()] - future = executor.submit(lambda: - predict_no_ui_long_connection( - inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable) - ) + def _req_gpt(inputs, history, sys_prompt): + retry_op = retry_times_at_unknown_error + exceeded_cnt = 0 + while True: + try: + # 【第一种情况】:顺利完成 + result = predict_no_ui_long_connection( + inputs=inputs, top_p=top_p, temperature=temperature, + history=history, sys_prompt=sys_prompt, observe_window=mutable) + return result + except ConnectionAbortedError as token_exceeded_error: + # 【第二种情况】:Token溢出, + if handle_token_exceed: + exceeded_cnt += 1 + # 【选择处理】 尝试计算比例,尽可能多地保留文本 + from toolbox import get_reduce_token_percent + p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) + MAX_TOKEN = 4096 + EXCEED_ALLO = 512 + 512 * exceeded_cnt + inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) + mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n' + continue # 返回重试 + else: + # 【选择放弃】 + tb_str = '```\n' + traceback.format_exc() + '```' + mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" + return mutable[0] # 放弃 + except: + # 【第三种情况】:其他错误 + tb_str = '```\n' + traceback.format_exc() + '```' + mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" + if retry_op > 0: + retry_op -= 1 + mutable[0] += f"[Local Message] 重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n" + time.sleep(5) + continue # 返回重试 + else: + time.sleep(5) + return mutable[0] # 放弃 + + future = executor.submit(_req_gpt, inputs, history, sys_prompt) while True: # yield一次以刷新前端页面 time.sleep(refresh_interval) @@ -27,8 +123,42 @@ def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_ return future.result() -def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inputs_array, inputs_show_user_array, top_p, temperature, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=10, scroller_max_len=30): - import time +def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array, inputs_show_user_array, top_p, temperature, + chatbot, history_array, sys_prompt_array, + refresh_interval=0.2, max_workers=10, scroller_max_len=30, + handle_token_exceed=True, show_user_at_complete=False, + retry_times_at_unknown_error=2, + ): + """ + Request GPT model using multiple threads with UI and high efficiency + 请求GPT模型的[多线程]版。 + 具备以下功能: + 实时在UI上反馈远程数据流 + 使用线程池,可调节线程池的大小避免openai的流量限制错误 + 处理中途中止的情况 + 网络等出问题时,会把traceback和已经接收的数据转入输出 + + 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行): + inputs_array (list): List of inputs (每个子任务的输入) + inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性) + top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数) + temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数) + chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化) + history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史) + sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样) + refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果) + max_workers (int, optional): Maximum number of threads (default: 10) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误) + scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果) + handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本) + handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启 + show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框) + retry_times_at_unknown_error:子任务失败时的重试次数 + + 输出 Returns: + list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方便调试和定位问题。) + """ + import time, random from concurrent.futures import ThreadPoolExecutor from request_llm.bridge_chatgpt import predict_no_ui_long_connection assert len(inputs_array) == len(history_array) @@ -40,20 +170,61 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp msg = '正常' yield chatbot, [], msg # 异步原子 - mutable = [["", time.time()] for _ in range(n_frag)] + mutable = [["", time.time(), "等待中"] for _ in range(n_frag)] def _req_gpt(index, inputs, history, sys_prompt): - try: - gpt_say = predict_no_ui_long_connection( - inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index] - ) - except: - # 收拾残局 - tb_str = '```\n' + traceback.format_exc() + '```' - gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" - if len(mutable[index][0]) > 0: - gpt_say += "此线程失败前收到的回答:" + mutable[index][0] - return gpt_say + gpt_say = "" + retry_op = retry_times_at_unknown_error + exceeded_cnt = 0 + mutable[index][2] = "执行中" + while True: + try: + # 【第一种情况】:顺利完成 + # time.sleep(10); raise RuntimeError("测试") + gpt_say = predict_no_ui_long_connection( + inputs=inputs, top_p=top_p, temperature=temperature, history=history, + sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True + ) + mutable[index][2] = "已成功" + return gpt_say + except ConnectionAbortedError as token_exceeded_error: + # 【第二种情况】:Token溢出, + if handle_token_exceed: + exceeded_cnt += 1 + # 【选择处理】 尝试计算比例,尽可能多地保留文本 + from toolbox import get_reduce_token_percent + p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) + MAX_TOKEN = 4096 + EXCEED_ALLO = 512 + 512 * exceeded_cnt + inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) + gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n' + mutable[index][2] = f"截断重试" + continue # 返回重试 + else: + # 【选择放弃】 + tb_str = '```\n' + traceback.format_exc() + '```' + gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" + if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] + mutable[index][2] = "输入过长已放弃" + return gpt_say # 放弃 + except: + # 【第三种情况】:其他错误 + tb_str = '```\n' + traceback.format_exc() + '```' + gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" + if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] + if retry_op > 0: + retry_op -= 1 + wait = random.randint(5, 20) + for i in range(wait):# 也许等待十几秒后,情况会好转 + mutable[index][2] = f"等待重试 {wait-i}"; time.sleep(1) + mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" + continue # 返回重试 + else: + mutable[index][2] = "已失败" + wait = 5 + time.sleep(5) + return gpt_say # 放弃 + # 异步任务开始 futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip( range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)] @@ -68,6 +239,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp break # 更好的UI视觉效果 observe_win = [] + # print([mutable[thread_index][2] for thread_index, _ in enumerate(worker_done)]) # 每个线程都要“喂狗”(看门狗) for thread_index, _ in enumerate(worker_done): mutable[thread_index][1] = time.time() @@ -77,10 +249,10 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp replace('\n', '').replace('```', '...').replace( ' ', '.').replace('
', '.....').replace('$', '.')+"`... ]" observe_win.append(print_something_really_funny) - stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip( - worker_done, observe_win)]) - chatbot[-1] = [chatbot[-1][0], - f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))] + stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' + if not done else f'`{mutable[thread_index][2]}`\n\n' + for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)]) + chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))] msg = "正常" yield chatbot, [], msg # 异步任务结束 @@ -88,9 +260,38 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp for inputs_show_user, f in zip(inputs_show_user_array, futures): gpt_res = f.result() gpt_response_collection.extend([inputs_show_user, gpt_res]) + + if show_user_at_complete: + for inputs_show_user, f in zip(inputs_show_user_array, futures): + gpt_res = f.result() + chatbot.append([inputs_show_user, gpt_res]) + yield chatbot, [], msg + time.sleep(1) return gpt_response_collection +def WithRetry(f): + """ + 装饰器函数,用于自动重试。 + """ + def decorated(retry, res_when_fail, *args, **kwargs): + assert retry >= 0 + while True: + try: + res = yield from f(*args, **kwargs) + return res + except: + retry -= 1 + if retry<0: + print("达到最大重试次数") + break + else: + print("重试中……") + continue + return res_when_fail + return decorated + + def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit): def cut(txt_tocut, must_break_at_empty_line): # 递归 if get_token_fn(txt_tocut) <= limit: diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py index ad23fea..028573e 100644 --- a/crazy_functions/代码重写为全英文_多线程.py +++ b/crazy_functions/代码重写为全英文_多线程.py @@ -58,11 +58,10 @@ def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt, # 第5步:Token限制下的截断与处理 MAX_TOKEN = 3000 - from transformers import GPT2TokenizerFast - print('加载tokenizer中') - tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") - get_token_fn = lambda txt: len(tokenizer(txt)["input_ids"]) - print('加载tokenizer结束') + import tiktoken + from toolbox import get_conf + enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL')) + def get_token_fn(txt): return len(enc.encode(txt)) # 第6步:任务函数 diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 348ebb9..4dae32a 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -148,7 +148,8 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor file_content, page_one = read_and_clean_pdf_text(fp) # 递归地切割PDF文件 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - enc = tiktoken.get_encoding("gpt2") + from toolbox import get_conf + enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL')) def get_token_num(txt): return len(enc.encode(txt)) # 分解文本 paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py index ced0f26..a39ef08 100644 --- a/crazy_functions/解析项目源代码.py +++ b/crazy_functions/解析项目源代码.py @@ -2,92 +2,96 @@ from request_llm.bridge_chatgpt import predict_no_ui from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down fast_debug = False -def 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): - import time, glob, os - print('begin analysis on:', file_manifest) + +def 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): + import os, copy + from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency + from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, WithRetry + msg = '正常' + inputs_array = [] + inputs_show_user_array = [] + history_array = [] + sys_prompt_array = [] + report_part_1 = [] + + ############################## <第一步,逐个文件分析,多线程> ################################## for index, fp in enumerate(file_manifest): with open(fp, 'r', encoding='utf-8') as f: file_content = f.read() - prefix = "接下来请你逐文件分析下面的工程" if index==0 else "" i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```' i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}' - chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) - yield chatbot, history, '正常' + # 装载请求内容 + inputs_array.append(i_say) + inputs_show_user_array.append(i_say_show_user) + history_array.append([]) + sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。") - if not fast_debug: - msg = '正常' + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array = inputs_array, + inputs_show_user_array = inputs_show_user_array, + history_array = history_array, + sys_prompt_array = sys_prompt_array, + top_p = top_p, + temperature = temperature, + chatbot = chatbot, + show_user_at_complete = True + ) - # ** gpt request ** - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时 + report_part_1 = copy.deepcopy(gpt_response_collection) + history_to_return = report_part_1 + res = write_results_to_file(report_part_1) + chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。")) + yield chatbot, history_to_return, msg - chatbot[-1] = (i_say_show_user, gpt_say) - history.append(i_say_show_user); history.append(gpt_say) - yield chatbot, history, msg - if not fast_debug: time.sleep(2) - - all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)]) - i_say = f'根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括{all_file})。' - chatbot.append((i_say, "[Local Message] waiting gpt response.")) - yield chatbot, history, '正常' - - if not fast_debug: - msg = '正常' - # ** gpt request ** - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时 - - chatbot[-1] = (i_say, gpt_say) - history.append(i_say); history.append(gpt_say) - yield chatbot, history, msg - res = write_results_to_file(history) - chatbot.append(("完成了吗?", res)) - yield chatbot, history, msg + ############################## <第二步,综合,单线程,分组+迭代处理> ################################## + batchsize = 16 # 10个文件为一组 + report_part_2 = [] + previous_iteration_files = [] + while True: + if len(file_manifest) == 0: break + this_iteration_file_manifest = file_manifest[:batchsize] + this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2] + file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)] + # 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}" + for index, content in enumerate(this_iteration_gpt_response_collection): + if index%2==0: this_iteration_gpt_response_collection[index] = f"文件名:{file_rel_path[index//2]}" + previous_iteration_files.extend([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]) + previous_iteration_files_string = ', '.join(previous_iteration_files) + current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]) + i_say = f'根据以上分析,对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能(包括{previous_iteration_files_string})。' + inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。' + this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection) + this_iteration_history.extend(report_part_2) + result = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=inputs_show_user, top_p=top_p, temperature=temperature, chatbot=chatbot, + history=this_iteration_history, # 迭代之前的分析 + sys_prompt="你是一个程序架构分析师,正在分析一个源代码项目。") + report_part_2.extend([i_say, result]) + file_manifest = file_manifest[batchsize:] + gpt_response_collection = gpt_response_collection[batchsize*2:] + ############################## ################################## + history_to_return.extend(report_part_2) + res = write_results_to_file(history_to_return) + chatbot.append(("完成了吗?", res)) + yield chatbot, history_to_return, msg @CatchException def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): history = [] # 清空历史,以免输入溢出 - import time, glob, os + import glob file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \ - [f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)] - for index, fp in enumerate(file_manifest): - # if 'test_project' in fp: continue - with open(fp, 'r', encoding='utf-8') as f: - file_content = f.read() - - prefix = "接下来请你分析自己的程序构成,别紧张," if index==0 else "" - i_say = prefix + f'请对下面的程序文件做一个概述文件名是{fp},文件代码是 ```{file_content}```' - i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}' - chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) - yield chatbot, history, '正常' - - if not fast_debug: - # ** gpt request ** - # gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature) - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], long_connection=True) # 带超时倒计时 - - chatbot[-1] = (i_say_show_user, gpt_say) - history.append(i_say_show_user); history.append(gpt_say) - yield chatbot, history, '正常' - time.sleep(2) - - i_say = f'根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括{file_manifest})。' - chatbot.append((i_say, "[Local Message] waiting gpt response.")) - yield chatbot, history, '正常' - - if not fast_debug: - # ** gpt request ** - # gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history) - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history, long_connection=True) # 带超时倒计时 - - chatbot[-1] = (i_say, gpt_say) - history.append(i_say); history.append(gpt_say) - yield chatbot, history, '正常' - res = write_results_to_file(history) - chatbot.append(("完成了吗?", res)) + [f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]+ \ + [f for f in glob.glob('./request_llm/*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + project_folder = './' + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}") yield chatbot, history, '正常' + return + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): @@ -105,7 +109,7 @@ def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPr report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException @@ -126,7 +130,7 @@ def 解析一个C项目的头文件(txt, top_p, temperature, chatbot, history, s report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): @@ -147,7 +151,7 @@ def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptT report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException @@ -169,7 +173,7 @@ def 解析一个Java项目(txt, top_p, temperature, chatbot, history, systemProm report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException @@ -192,7 +196,7 @@ def 解析一个Rect项目(txt, top_p, temperature, chatbot, history, systemProm report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) @CatchException @@ -211,4 +215,4 @@ def 解析一个Golang项目(txt, top_p, temperature, chatbot, history, systemPr report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}") yield chatbot, history, '正常' return - yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) diff --git a/objdump.tmp b/objdump.tmp new file mode 100644 index 0000000..d5186a0 Binary files /dev/null and b/objdump.tmp differ diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index 3ffbd73..b3bba97 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -72,7 +72,7 @@ def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""): raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text) -def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None): +def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None, console_slience=False): """ 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 inputs: @@ -121,7 +121,7 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr if "role" in delta: continue if "content" in delta: result += delta["content"] - print(delta["content"], end='') + if not console_slience: print(delta["content"], end='') if observe_window is not None: # 观测窗,把已经获取的数据显示出去 if len(observe_window) >= 1: observe_window[0] += delta["content"] @@ -264,8 +264,7 @@ def generate_payload(inputs, top_p, temperature, history, system_prompt, stream) "presence_penalty": 0, "frequency_penalty": 0, } - - print(f" {LLM_MODEL} : {conversation_cnt} : {inputs}") + print(f" {LLM_MODEL} : {conversation_cnt} : {inputs[:100]}") return headers,payload diff --git a/toolbox.py b/toolbox.py index 7c329ed..ffd44ea 100644 --- a/toolbox.py +++ b/toolbox.py @@ -21,6 +21,8 @@ def ArgsGeneralWrapper(f): yield from f(txt_passon, *args, **kwargs) return decorated +def update_ui(chatbot, history, msg='正常', *args, **kwargs): + yield chatbot, history, msg def get_reduce_token_percent(text): try: diff --git a/version b/version index 2f1e653..9916cb3 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 2.5, + "version": 2.6, "show_feature": true, - "new_feature": "新增一键更新程序<->高亮代码<->高亮公式<->新增垂直布局选项" + "new_feature": "增强多线程稳定性(涉及代码解析、PDF翻译等)<->修复Token计数错误(解决PDF翻译的分割不合理的问题)" }