error

2023-07-02 22:54:05 +08:00 · 2023-07-02 22:54:05 +08:00 · a330d6636e
commit a330d6636e
parent 322c4be145
5 changed files with 115 additions and 51 deletions
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -396,7 +396,7 @@ def get_crazy_functions():
        function_plugins.update({
            "面试助手 [实时音频采集]": {
                "Color": "stop",
-                "AsButton": False,
+                "AsButton": True,
                "Function": HotReload(辅助面试)
            }
        })
--- a/crazy_functions/live_audio/aliyunASR.py
+++ b/crazy_functions/live_audio/aliyunASR.py
@ -0,0 +1,81 @@
 import time, threading
 class AliyunASR():
    def __init__(self):
        self.event_on_result_chg = threading.Event()
        self.event_on_entence_end = threading.Event()
    def test_on_sentence_begin(self, message, *args):
        print("test_on_sentence_begin:{}".format(message))
    def test_on_sentence_end(self, message, *args):
        print("test_on_sentence_end:{}".format(message))
        self.event_on_entence_end.set()
    def test_on_start(self, message, *args):
        print("test_on_start:{}".format(message))
    def test_on_error(self, message, *args):
        print("on_error args=>{}".format(args))
    def test_on_close(self, *args):
        print("on_close: args=>{}".format(args))
    def test_on_result_chg(self, message, *args):
        print("test_on_chg:{}".format(message))
        self.parsed_text = message['payload']['result']
        self.event_on_result_chg.set()
    def test_on_completed(self, message, *args):
        print("on_completed:args=>{} message=>{}".format(args, message))
    def audio_convertion_thread(self, uuid):
        # 在一个异步线程中采集音频
        import nls  # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
        from scipy import io
        from .audio_io import change_sample_rate
        NEW_SAMPLERATE = 16000
        from .audio_io import RealtimeAudioDistribution
        rad = RealtimeAudioDistribution()
        import tempfile
        temp_folder = tempfile.gettempdir()
        URL="wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1"
        TOKEN="f37f30e0f9934c34a992f6f64f7eba4f"    # 参考https://help.aliyun.com/document_detail/450255.html获取token
        APPKEY="RoPlZrM88DnAFkZK"                   # 获取Appkey请前往控制台：https://nls-portal.console.aliyun.com/applist
        sr = nls.NlsSpeechTranscriber(
                    url=URL,
                    token=TOKEN,
                    appkey=APPKEY,
                    on_sentence_begin=self.test_on_sentence_begin,
                    on_sentence_end=self.test_on_sentence_end,
                    on_start=self.test_on_start,
                    on_result_changed=self.test_on_result_chg,
                    on_completed=self.test_on_completed,
                    on_error=self.test_on_error,
                    on_close=self.test_on_close,
                    callback_args=[uuid.hex]
                )
        r = sr.start(aformat="pcm",
                enable_intermediate_result=True,
                enable_punctuation_prediction=True,
                enable_inverse_text_normalization=True)
        while not self.stop:
            # time.sleep(self.capture_interval)
            audio = rad.read(uuid.hex) 
            if audio is not None:
                # convert to pcm file
                temp_file = f'{temp_folder}/{uuid.hex}.pcm' # 
                dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
                io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
                # read pcm binary
                with open(temp_file, "rb") as f: data = f.read()
                print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
                slices = zip(*(iter(data),) * 640)    # 640个字节为一组
                for i in slices: sr.send_audio(bytes(i))
            else:
                time.sleep(0.1)
        r = sr.stop()
--- a/crazy_functions/live_audio/audio_io.py
+++ b/crazy_functions/live_audio/audio_io.py
@ -1,4 +1,5 @@
 import numpy as np
 from scipy import interpolate
 def Singleton(cls):
    _instance = {}
@ -15,12 +16,12 @@ def Singleton(cls):
 class RealtimeAudioDistribution():
    def __init__(self) -> None:
        self.data = {}
-        self.max_len = 1024*64
+        self.max_len = 1024*1024
        self.rate = 48000   # 只读，每秒采样数量
    def feed(self, uuid, audio):
        print('feed')
        self.rate, audio_ = audio
        print('feed', len(audio_), audio_[-25:])
        if uuid not in self.data:
            self.data[uuid] = audio_
        else:
@ -31,7 +32,17 @@ class RealtimeAudioDistribution():
    def read(self, uuid):
        if uuid in self.data:
            res = self.data.pop(uuid)
-            print('read', len(res))
+            print('read', len(res), res)
        else:
            res = None
-        return res
+        return res
 def change_sample_rate(audio, old_sr, new_sr):
    duration = audio.shape[0] / old_sr
    time_old  = np.linspace(0, duration, audio.shape[0])
    time_new  = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
    interpolator = interpolate.interp1d(time_old, audio.T)
    new_audio = interpolator(time_new).T
    return new_audio.astype(np.int16)
--- a/crazy_functions/辅助面试.py
+++ b/crazy_functions/辅助面试.py
@ -3,20 +3,15 @@ from toolbox import CatchException, report_execption, write_results_to_file
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 import threading, time
 import numpy as np
 from .live_audio.aliyunASR import AliyunASR
 def take_audio_sentence_flagment(captured_audio):
    """
    判断音频是否到达句尾，如果到了，截取片段
    """
    ready_part = None
    other_part = captured_audio
    return ready_part, other_part
-class InterviewAssistent():
+class InterviewAssistant(AliyunASR):
    def __init__(self):
-        self.capture_interval = 1.0 # second
+        super(InterviewAssistant, self).__init__()
        self.capture_interval = 0.5 # second
        self.stop = False
-        pass
+        self.parsed_text = ""
    def init(self, chatbot):
        # 初始化音频采集线程
@ -24,31 +19,9 @@ class InterviewAssistent():
        self.keep_latest_n_second = 10
        self.ready_audio_flagment = None
        self.stop = False
-        th1 = threading.Thread(target=self.audio_capture_thread, args=(chatbot._cookies['uuid'],))
+        th1 = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],))
        th1.daemon = True
        th1.start()
        th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
        th2.daemon = True
        th2.start()
    def audio_capture_thread(self, uuid):
        # 在一个异步线程中采集音频
        from .live_audio.audio_io import RealtimeAudioDistribution
        rad = RealtimeAudioDistribution()
        while not self.stop:
            time.sleep(self.capture_interval)
            self.captured_audio = np.concatenate((self.captured_audio, rad.read(uuid.hex)))
            if len(self.captured_audio) > self.keep_latest_n_second * rad.rate:
                self.captured_audio = self.captured_audio[-self.keep_latest_n_second * rad.rate:]
    def audio2txt_thread(self, llm_kwargs):
        import whisper
        # 在一个异步线程中音频转文字
        while not self.stop:
            time.sleep(1)
            if len(self.captured_audio) > 0:
                model = whisper.load_model("base")
                result = model.transcribe("audio.mp3", language='Chinese')
    def gpt_answer(self, text, chatbot, history, llm_kwargs):
        i_say = inputs_show_user = text
@ -63,25 +36,24 @@ class InterviewAssistent():
    def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
        # 面试插件主函数
        self.init(chatbot)
        chatbot.append(["", ""])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        while True:
-            time.sleep(self.capture_interval)
+            self.event_on_result_chg.wait()
-            if self.ready_audio_flagment:
+            chatbot[-1][0] = self.parsed_text
-                audio_for_whisper = self.ready_audio_flagment
+            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-                text = self.audio2txt(audio_for_whisper, llm_kwargs)
+            # if self.event_on_entence_end
-                yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
+
-                self.ready_audio_flagment = None
+            # yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
            # self.ready_audio_flagment = None
@CatchException
 def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
    # pip install -U openai-whisper
-    chatbot.append(["函数插件功能：辅助面试", "正在预热本地音频转文字模型 ..."])
+    chatbot.append(["函数插件功能：辅助面试", "辅助面试助手, 正在监听音频 ..."])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
    import whisper
    whisper.load_model("base")
    chatbot.append(["预热本地音频转文字模型完成", "辅助面试助手, 正在监听音频 ..."])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-    ia = InterviewAssistent()
+    ia = InterviewAssistant()
    yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
--- a/main.py
+++ b/main.py
@ -58,7 +58,7 @@ def main():
        with gr_L1():
            with gr_L2(scale=2):
                if ENABLE_AUDIO: 
-                    audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True)
+                    audio_mic = gr.Audio(source="microphone", type="numpy")
                chatbot = gr.Chatbot(label=f"当前模型：{LLM_MODEL}")
                chatbot.style(height=CHATBOT_HEIGHT)