From 13ade8267778c9ce191593b7c89ab6edd859f3b3 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 9 Jul 2023 23:18:06 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E5=96=84=E8=AF=AD=E9=9F=B3=E8=BE=85?= =?UTF-8?q?=E5=8A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/live_audio/aliyunASR.py | 25 ++++--- crazy_functions/live_audio/audio_io.py | 7 +- crazy_functions/辅助面试.py | 96 ++++++++++++++++++------- 3 files changed, 93 insertions(+), 35 deletions(-) diff --git a/crazy_functions/live_audio/aliyunASR.py b/crazy_functions/live_audio/aliyunASR.py index aa8ecc9..ac1ea34 100644 --- a/crazy_functions/live_audio/aliyunASR.py +++ b/crazy_functions/live_audio/aliyunASR.py @@ -4,31 +4,37 @@ import time, threading, json class AliyunASR(): def test_on_sentence_begin(self, message, *args): - print("test_on_sentence_begin:{}".format(message)) + # print("test_on_sentence_begin:{}".format(message)) + pass def test_on_sentence_end(self, message, *args): - print("test_on_sentence_end:{}".format(message)) + # print("test_on_sentence_end:{}".format(message)) message = json.loads(message) self.parsed_sentence = message['payload']['result'] self.event_on_entence_end.set() + print(self.parsed_sentence) def test_on_start(self, message, *args): - print("test_on_start:{}".format(message)) + # print("test_on_start:{}".format(message)) + pass def test_on_error(self, message, *args): - print("on_error args=>{}".format(args)) + # print("on_error args=>{}".format(args)) + pass def test_on_close(self, *args): - print("on_close: args=>{}".format(args)) + # print("on_close: args=>{}".format(args)) + pass def test_on_result_chg(self, message, *args): - print("test_on_chg:{}".format(message)) + # print("test_on_chg:{}".format(message)) message = json.loads(message) self.parsed_text = message['payload']['result'] self.event_on_result_chg.set() def test_on_completed(self, message, *args): - print("on_completed:args=>{} message=>{}".format(args, message)) + # print("on_completed:args=>{} message=>{}".format(args, message)) + pass def audio_convertion_thread(self, uuid): @@ -41,10 +47,11 @@ class AliyunASR(): from .audio_io import RealtimeAudioDistribution NEW_SAMPLERATE = 16000 rad = RealtimeAudioDistribution() + rad.clean_up() temp_folder = tempfile.gettempdir() TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY') - URL="wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" + URL="wss://nls-gateway.aliyuncs.com/ws/v1" sr = nls.NlsSpeechTranscriber( url=URL, token=TOKEN, @@ -74,7 +81,7 @@ class AliyunASR(): io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata) # read pcm binary with open(temp_file, "rb") as f: data = f.read() - print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640) + # print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640) slices = zip(*(iter(data),) * 640) # 640个字节为一组 for i in slices: sr.send_audio(bytes(i)) else: diff --git a/crazy_functions/live_audio/audio_io.py b/crazy_functions/live_audio/audio_io.py index 943bd52..3ff83a6 100644 --- a/crazy_functions/live_audio/audio_io.py +++ b/crazy_functions/live_audio/audio_io.py @@ -19,9 +19,12 @@ class RealtimeAudioDistribution(): self.max_len = 1024*1024 self.rate = 48000 # 只读,每秒采样数量 + def clean_up(self): + self.data = {} + def feed(self, uuid, audio): self.rate, audio_ = audio - print('feed', len(audio_), audio_[-25:]) + # print('feed', len(audio_), audio_[-25:]) if uuid not in self.data: self.data[uuid] = audio_ else: @@ -32,7 +35,7 @@ class RealtimeAudioDistribution(): def read(self, uuid): if uuid in self.data: res = self.data.pop(uuid) - print('read', len(res), res) + print('\r read-', len(res), '-', max(res), end='', flush=True) else: res = None return res diff --git a/crazy_functions/辅助面试.py b/crazy_functions/辅助面试.py index 9a70987..135c7c3 100644 --- a/crazy_functions/辅助面试.py +++ b/crazy_functions/辅助面试.py @@ -1,5 +1,5 @@ from toolbox import update_ui -from toolbox import CatchException, get_conf, write_results_to_file +from toolbox import CatchException, get_conf, markdown_convertion from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from request_llm.bridge_all import predict_no_ui_long_connection import threading, time @@ -7,6 +7,31 @@ import numpy as np from .live_audio.aliyunASR import AliyunASR import json +class WatchDog(): + def __init__(self, timeout, bark_fn, interval=3, msg="") -> None: + self.last_feed = None + self.timeout = timeout + self.bark_fn = bark_fn + self.interval = interval + self.msg = msg + + def watch(self): + while True: + if time.time() - self.last_feed > self.timeout: + if len(self.msg) > 0: print(self.msg) + self.bark_fn() + break + time.sleep(self.interval) + + def begin_watch(self): + self.last_feed = time.time() + th = threading.Thread(target=self.watch) + th.daemon = True + th.start() + + def feed(self): + self.last_feed = time.time() + class AsyncGptTask(): @@ -16,7 +41,8 @@ class AsyncGptTask(): def gpt_thread_worker(self, i_say, llm_kwargs, history, sys_prompt, observe_window, index): try: - gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=observe_window[index]) + gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, + observe_window=observe_window[index], console_slience=True) except ConnectionAbortedError as token_exceed_err: print('至少一个线程任务Token溢出而失败', e) except Exception as e: @@ -34,7 +60,7 @@ class AsyncGptTask(): for of, ofci in zip(self.observe_future, self.observe_future_chatbot_index): try: chatbot[ofci] = list(chatbot[ofci]) - chatbot[ofci][1] = of[0] + chatbot[ofci][1] = markdown_convertion(of[0]) except: self.observe_future = [] self.observe_future_chatbot_index = [] @@ -45,67 +71,89 @@ class InterviewAssistant(AliyunASR): self.capture_interval = 0.5 # second self.stop = False self.parsed_text = "" + self.parsed_sentence = "" + self.buffered_sentence = "" self.event_on_result_chg = threading.Event() self.event_on_entence_end = threading.Event() + self.event_on_commit_question = threading.Event() + + def __del__(self): + self.stop = True def init(self, chatbot): # 初始化音频采集线程 self.captured_audio = np.array([]) self.keep_latest_n_second = 10 + self.commit_after_pause_n_second = 1.5 self.ready_audio_flagment = None self.stop = False - th1 = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],)) - th1.daemon = True - th1.start() + self.plugin_wd = WatchDog(timeout=5, bark_fn=self.__del__, msg="程序终止") + self.aut = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],)) + self.aut.daemon = True + self.aut.start() # th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],)) # th2.daemon = True # th2.start() - def gpt_answer(self, text, chatbot, history, llm_kwargs): - i_say = inputs_show_user = text - gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, inputs_show_user=inputs_show_user, - llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, - sys_prompt="请回答问题。" # 你是求职者,正在参加面试, - ) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - history.extend([i_say, gpt_say]) + def no_audio_for_a_while(self): + if len(self.buffered_sentence) < 7: # 如果一句话小于7个字,暂不提交 + self.commit_wd.begin_watch() + else: + self.event_on_commit_question.set() def begin(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): # main plugin function self.init(chatbot) - chatbot.append(["", ""]) + chatbot.append(["[请讲话]", "[等待GPT响应]"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + self.plugin_wd.begin_watch() self.agt = AsyncGptTask() + self.commit_wd = WatchDog(timeout=self.commit_after_pause_n_second, bark_fn=self.no_audio_for_a_while, interval=0.2) + self.commit_wd.begin_watch() while True: self.event_on_result_chg.wait(timeout=0.25) # run once every 0.25 second chatbot = self.agt.update_chatbot(chatbot) # 将子线程的gpt结果写入chatbot - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + self.plugin_wd.feed() if self.event_on_result_chg.is_set(): # update audio decode result self.event_on_result_chg.clear() chatbot[-1] = list(chatbot[-1]) - chatbot[-1][0] = self.parsed_text - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + chatbot[-1][0] = self.buffered_sentence + self.parsed_text + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + self.commit_wd.feed() if self.event_on_entence_end.is_set(): # called when a sentence has ended self.event_on_entence_end.clear() + self.parsed_text = self.parsed_sentence + self.buffered_sentence += self.parsed_sentence + + if self.event_on_commit_question.is_set(): + # called when a question should be commited + self.event_on_commit_question.clear() + if len(self.buffered_sentence) == 0: raise RuntimeError + + self.commit_wd.begin_watch() chatbot[-1] = list(chatbot[-1]) - chatbot[-1] = [self.parsed_sentence, "[waiting gpt reply]"] + chatbot[-1] = [self.buffered_sentence, "[waiting gpt reply]"] yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # add gpt task 创建子线程请求gpt,避免线程阻塞 - self.agt.add_async_gpt_task(self.parsed_sentence, len(chatbot)-1, llm_kwargs, history, system_prompt) - chatbot.append(["", ""]) + self.agt.add_async_gpt_task(self.buffered_sentence, len(chatbot)-1, llm_kwargs, history, system_prompt) + + self.buffered_sentence = "" + chatbot.append(["[请讲话]", "[等待GPT响应]"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + @CatchException def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # pip install -U openai-whisper - chatbot.append(["函数插件功能:辅助面试", "辅助面试助手, 正在监听音频 ..."]) + chatbot.append(["对话助手函数插件:使用时,双手离开鼠标键盘吧", "音频助手, 正在听您讲话 ..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 @@ -113,7 +161,7 @@ def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt import nls from scipy import io except: - chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"]) + chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return