同步音频输入

2023-07-02 14:42:12 +08:00 · 2023-07-02 14:42:12 +08:00 · 322c4be145
commit 322c4be145
parent a3596ff60d
5 changed files with 134 additions and 36 deletions
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -390,6 +390,19 @@ def get_crazy_functions():
    except:
        print('Load function plugin failed')

+
+    try:
+        from crazy_functions.辅助面试 import 辅助面试
+        function_plugins.update({
+            "面试助手 [实时音频采集]": {
+                "Color": "stop",
+                "AsButton": False,
+                "Function": HotReload(辅助面试)
+            }
+        })
+    except:
+        print('Load function plugin failed')
+        
    # try:
    #     from crazy_functions.虚空终端 import 终端
    #     function_plugins.update({
--- a/crazy_functions/live_audio/audio_io.py
+++ b/crazy_functions/live_audio/audio_io.py
@ -0,0 +1,37 @@
+import numpy as np
+
+def Singleton(cls):
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
+
+
+@Singleton
+class RealtimeAudioDistribution():
+    def __init__(self) -> None:
+        self.data = {}
+        self.max_len = 1024*64
+        self.rate = 48000   # 只读，每秒采样数量
+
+    def feed(self, uuid, audio):
+        print('feed')
+        self.rate, audio_ = audio
+        if uuid not in self.data:
+            self.data[uuid] = audio_
+        else:
+            new_arr = np.concatenate((self.data[uuid], audio_))
+            if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
+            self.data[uuid] = new_arr
+
+    def read(self, uuid):
+        if uuid in self.data:
+            res = self.data.pop(uuid)
+            print('read', len(res))
+        else:
+            res = None
+        return res
--- a/crazy_functions/辅助面试.py
+++ b/crazy_functions/辅助面试.py
@ -1,45 +1,87 @@
 from toolbox import update_ui
 from toolbox import CatchException, report_execption, write_results_to_file
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-import threading
+import threading, time
+import numpy as np
+
+def take_audio_sentence_flagment(captured_audio):
+    """
+    判断音频是否到达句尾，如果到了，截取片段
+    """
+    ready_part = None
+    other_part = captured_audio
+    return ready_part, other_part

 class InterviewAssistent():
-
    def __init__(self):
+        self.capture_interval = 1.0 # second
+        self.stop = False
        pass

-    
-    # def audio_capture_thread(self):
+    def init(self, chatbot):
+        # 初始化音频采集线程
+        self.captured_audio = np.array([])
+        self.keep_latest_n_second = 10
+        self.ready_audio_flagment = None
+        self.stop = False
+        th1 = threading.Thread(target=self.audio_capture_thread, args=(chatbot._cookies['uuid'],))
+        th1.daemon = True
+        th1.start()
+        th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
+        th2.daemon = True
+        th2.start()

-        # 第7步：所有线程同时开始执行任务函数
-        # handles = [ for index, fp in enumerate(file_manifest)]
+    def audio_capture_thread(self, uuid):
+        # 在一个异步线程中采集音频
+        from .live_audio.audio_io import RealtimeAudioDistribution
+        rad = RealtimeAudioDistribution()
+        while not self.stop:
+            time.sleep(self.capture_interval)
+            self.captured_audio = np.concatenate((self.captured_audio, rad.read(uuid.hex)))
+            if len(self.captured_audio) > self.keep_latest_n_second * rad.rate:
+                self.captured_audio = self.captured_audio[-self.keep_latest_n_second * rad.rate:]

+    def audio2txt_thread(self, llm_kwargs):
+        import whisper
+        # 在一个异步线程中音频转文字
+        while not self.stop:
+            time.sleep(1)
+            if len(self.captured_audio) > 0:
+                model = whisper.load_model("base")
+                result = model.transcribe("audio.mp3", language='Chinese')

-
-
-    def init(self):
-        self.captured_words = ""
-        # threading.Thread(target=self.audio_capture_thread, args=(self, 1))
-
+    def gpt_answer(self, text, chatbot, history, llm_kwargs):
+        i_say = inputs_show_user = text
+        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=i_say, inputs_show_user=inputs_show_user,
+            llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+            sys_prompt="你是求职者，正在参加面试，请回答问题。"
+        )
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+        history.extend([i_say, gpt_say])

    def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
+        # 面试插件主函数
+        self.init(chatbot)
        while True:
-            break
-            # yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
-
-
-
-
-
-
+            time.sleep(self.capture_interval)
+            if self.ready_audio_flagment:
+                audio_for_whisper = self.ready_audio_flagment
+                text = self.audio2txt(audio_for_whisper, llm_kwargs)
+                yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
+                self.ready_audio_flagment = None

@CatchException
 def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
-    pass
    # pip install -U openai-whisper
-    # while True:
-    #     time.sleep(4)
-    #     print(plugin_kwargs)
-    # ia = InterviewAssistent()
-    # yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
+    chatbot.append(["函数插件功能：辅助面试", "正在预热本地音频转文字模型 ..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    import whisper
+    whisper.load_model("base")
+    chatbot.append(["预热本地音频转文字模型完成", "辅助面试助手, 正在监听音频 ..."])
+
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    ia = InterviewAssistent()
+    yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)

--- a/main.py
+++ b/main.py
@ -19,7 +19,7 @@ def main():
    description =  """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic)，感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""

    # 问询记录, python 版本建议3.9+（越新越好）
-    import logging
+    import logging, uuid
    os.makedirs("gpt_log", exist_ok=True)
    try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
    except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
@ -57,7 +57,9 @@ def main():
        cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
        with gr_L1():
            with gr_L2(scale=2):
-                if ENABLE_AUDIO: audio = gr.Audio(source="microphone", streaming=True)
+                if ENABLE_AUDIO: 
+                    audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True)
+
                chatbot = gr.Chatbot(label=f"当前模型：{LLM_MODEL}")
                chatbot.style(height=CHATBOT_HEIGHT)
                history = gr.State([])
@ -134,7 +136,6 @@ def main():
        checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, clearBtn, clearBtn2, plugin_advanced_arg] )
        # 整理反复出现的控件句柄组合
        input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
-        if ENABLE_AUDIO: input_combo.append(audio)
        output_combo = [cookies, chatbot, history, status]
        predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
        # 提交按钮、重置按钮
@ -188,7 +189,18 @@ def main():
        stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
        stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)

-        demo.load()
+        def init_cookie(cookies, chatbot):
+            # 为每一位访问的用户赋予一个独一无二的uuid编码
+            cookies.update({'uuid': uuid.uuid4()})
+            return cookies
+        demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies])
+
+        if ENABLE_AUDIO: 
+            from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
+            rad = RealtimeAudioDistribution()
+            def deal_audio(audio, cookies):
+                rad.feed(cookies['uuid'].hex, audio)
+            audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])

    # gradio的inbrowser触发不太稳定，回滚代码到原始的浏览器打开函数
    def auto_opentab_delay():
--- a/theme/green.py
+++ b/theme/green.py
@ -73,12 +73,6 @@ def adjust_theme():
            chatbot_code_background_color_dark="*neutral_950",
        )
        js = ''
-        # if ADD_CHUANHU:
-        #     with open("./docs/assets/custom.js", "r", encoding="utf-8") as f, \
-        #             open("./docs/assets/external-scripts.js", "r", encoding="utf-8") as f1:
-        #         customJS = f.read()
-        #         externalScripts = f1.read()
-        #     js += f'<script>{customJS}</script><script async>{externalScripts}</script>'
        # 添加一个萌萌的看板娘
        if ADD_WAIFU:
            js += """