error
This commit is contained in:
parent
322c4be145
commit
a330d6636e
@ -396,7 +396,7 @@ def get_crazy_functions():
|
|||||||
function_plugins.update({
|
function_plugins.update({
|
||||||
"面试助手 [实时音频采集]": {
|
"面试助手 [实时音频采集]": {
|
||||||
"Color": "stop",
|
"Color": "stop",
|
||||||
"AsButton": False,
|
"AsButton": True,
|
||||||
"Function": HotReload(辅助面试)
|
"Function": HotReload(辅助面试)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
81
crazy_functions/live_audio/aliyunASR.py
Normal file
81
crazy_functions/live_audio/aliyunASR.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
import time, threading
|
||||||
|
|
||||||
|
|
||||||
|
class AliyunASR():
|
||||||
|
def __init__(self):
|
||||||
|
self.event_on_result_chg = threading.Event()
|
||||||
|
self.event_on_entence_end = threading.Event()
|
||||||
|
|
||||||
|
def test_on_sentence_begin(self, message, *args):
|
||||||
|
print("test_on_sentence_begin:{}".format(message))
|
||||||
|
|
||||||
|
def test_on_sentence_end(self, message, *args):
|
||||||
|
print("test_on_sentence_end:{}".format(message))
|
||||||
|
self.event_on_entence_end.set()
|
||||||
|
|
||||||
|
def test_on_start(self, message, *args):
|
||||||
|
print("test_on_start:{}".format(message))
|
||||||
|
|
||||||
|
def test_on_error(self, message, *args):
|
||||||
|
print("on_error args=>{}".format(args))
|
||||||
|
|
||||||
|
def test_on_close(self, *args):
|
||||||
|
print("on_close: args=>{}".format(args))
|
||||||
|
|
||||||
|
def test_on_result_chg(self, message, *args):
|
||||||
|
print("test_on_chg:{}".format(message))
|
||||||
|
self.parsed_text = message['payload']['result']
|
||||||
|
self.event_on_result_chg.set()
|
||||||
|
|
||||||
|
def test_on_completed(self, message, *args):
|
||||||
|
print("on_completed:args=>{} message=>{}".format(args, message))
|
||||||
|
|
||||||
|
def audio_convertion_thread(self, uuid):
|
||||||
|
# 在一个异步线程中采集音频
|
||||||
|
import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
|
||||||
|
from scipy import io
|
||||||
|
from .audio_io import change_sample_rate
|
||||||
|
NEW_SAMPLERATE = 16000
|
||||||
|
from .audio_io import RealtimeAudioDistribution
|
||||||
|
rad = RealtimeAudioDistribution()
|
||||||
|
import tempfile
|
||||||
|
temp_folder = tempfile.gettempdir()
|
||||||
|
|
||||||
|
URL="wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1"
|
||||||
|
TOKEN="f37f30e0f9934c34a992f6f64f7eba4f" # 参考https://help.aliyun.com/document_detail/450255.html获取token
|
||||||
|
APPKEY="RoPlZrM88DnAFkZK" # 获取Appkey请前往控制台:https://nls-portal.console.aliyun.com/applist
|
||||||
|
sr = nls.NlsSpeechTranscriber(
|
||||||
|
url=URL,
|
||||||
|
token=TOKEN,
|
||||||
|
appkey=APPKEY,
|
||||||
|
on_sentence_begin=self.test_on_sentence_begin,
|
||||||
|
on_sentence_end=self.test_on_sentence_end,
|
||||||
|
on_start=self.test_on_start,
|
||||||
|
on_result_changed=self.test_on_result_chg,
|
||||||
|
on_completed=self.test_on_completed,
|
||||||
|
on_error=self.test_on_error,
|
||||||
|
on_close=self.test_on_close,
|
||||||
|
callback_args=[uuid.hex]
|
||||||
|
)
|
||||||
|
|
||||||
|
r = sr.start(aformat="pcm",
|
||||||
|
enable_intermediate_result=True,
|
||||||
|
enable_punctuation_prediction=True,
|
||||||
|
enable_inverse_text_normalization=True)
|
||||||
|
|
||||||
|
while not self.stop:
|
||||||
|
# time.sleep(self.capture_interval)
|
||||||
|
audio = rad.read(uuid.hex)
|
||||||
|
if audio is not None:
|
||||||
|
# convert to pcm file
|
||||||
|
temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
|
||||||
|
dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
|
||||||
|
io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
|
||||||
|
# read pcm binary
|
||||||
|
with open(temp_file, "rb") as f: data = f.read()
|
||||||
|
print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
|
||||||
|
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
||||||
|
for i in slices: sr.send_audio(bytes(i))
|
||||||
|
else:
|
||||||
|
time.sleep(0.1)
|
||||||
|
r = sr.stop()
|
@ -1,4 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
from scipy import interpolate
|
||||||
|
|
||||||
def Singleton(cls):
|
def Singleton(cls):
|
||||||
_instance = {}
|
_instance = {}
|
||||||
@ -15,12 +16,12 @@ def Singleton(cls):
|
|||||||
class RealtimeAudioDistribution():
|
class RealtimeAudioDistribution():
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.data = {}
|
self.data = {}
|
||||||
self.max_len = 1024*64
|
self.max_len = 1024*1024
|
||||||
self.rate = 48000 # 只读,每秒采样数量
|
self.rate = 48000 # 只读,每秒采样数量
|
||||||
|
|
||||||
def feed(self, uuid, audio):
|
def feed(self, uuid, audio):
|
||||||
print('feed')
|
|
||||||
self.rate, audio_ = audio
|
self.rate, audio_ = audio
|
||||||
|
print('feed', len(audio_), audio_[-25:])
|
||||||
if uuid not in self.data:
|
if uuid not in self.data:
|
||||||
self.data[uuid] = audio_
|
self.data[uuid] = audio_
|
||||||
else:
|
else:
|
||||||
@ -31,7 +32,17 @@ class RealtimeAudioDistribution():
|
|||||||
def read(self, uuid):
|
def read(self, uuid):
|
||||||
if uuid in self.data:
|
if uuid in self.data:
|
||||||
res = self.data.pop(uuid)
|
res = self.data.pop(uuid)
|
||||||
print('read', len(res))
|
print('read', len(res), res)
|
||||||
else:
|
else:
|
||||||
res = None
|
res = None
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def change_sample_rate(audio, old_sr, new_sr):
|
||||||
|
duration = audio.shape[0] / old_sr
|
||||||
|
|
||||||
|
time_old = np.linspace(0, duration, audio.shape[0])
|
||||||
|
time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
|
||||||
|
|
||||||
|
interpolator = interpolate.interp1d(time_old, audio.T)
|
||||||
|
new_audio = interpolator(time_new).T
|
||||||
|
return new_audio.astype(np.int16)
|
@ -3,20 +3,15 @@ from toolbox import CatchException, report_execption, write_results_to_file
|
|||||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
import threading, time
|
import threading, time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from .live_audio.aliyunASR import AliyunASR
|
||||||
|
|
||||||
def take_audio_sentence_flagment(captured_audio):
|
|
||||||
"""
|
|
||||||
判断音频是否到达句尾,如果到了,截取片段
|
|
||||||
"""
|
|
||||||
ready_part = None
|
|
||||||
other_part = captured_audio
|
|
||||||
return ready_part, other_part
|
|
||||||
|
|
||||||
class InterviewAssistent():
|
class InterviewAssistant(AliyunASR):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.capture_interval = 1.0 # second
|
super(InterviewAssistant, self).__init__()
|
||||||
|
self.capture_interval = 0.5 # second
|
||||||
self.stop = False
|
self.stop = False
|
||||||
pass
|
self.parsed_text = ""
|
||||||
|
|
||||||
def init(self, chatbot):
|
def init(self, chatbot):
|
||||||
# 初始化音频采集线程
|
# 初始化音频采集线程
|
||||||
@ -24,31 +19,9 @@ class InterviewAssistent():
|
|||||||
self.keep_latest_n_second = 10
|
self.keep_latest_n_second = 10
|
||||||
self.ready_audio_flagment = None
|
self.ready_audio_flagment = None
|
||||||
self.stop = False
|
self.stop = False
|
||||||
th1 = threading.Thread(target=self.audio_capture_thread, args=(chatbot._cookies['uuid'],))
|
th1 = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],))
|
||||||
th1.daemon = True
|
th1.daemon = True
|
||||||
th1.start()
|
th1.start()
|
||||||
th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
|
|
||||||
th2.daemon = True
|
|
||||||
th2.start()
|
|
||||||
|
|
||||||
def audio_capture_thread(self, uuid):
|
|
||||||
# 在一个异步线程中采集音频
|
|
||||||
from .live_audio.audio_io import RealtimeAudioDistribution
|
|
||||||
rad = RealtimeAudioDistribution()
|
|
||||||
while not self.stop:
|
|
||||||
time.sleep(self.capture_interval)
|
|
||||||
self.captured_audio = np.concatenate((self.captured_audio, rad.read(uuid.hex)))
|
|
||||||
if len(self.captured_audio) > self.keep_latest_n_second * rad.rate:
|
|
||||||
self.captured_audio = self.captured_audio[-self.keep_latest_n_second * rad.rate:]
|
|
||||||
|
|
||||||
def audio2txt_thread(self, llm_kwargs):
|
|
||||||
import whisper
|
|
||||||
# 在一个异步线程中音频转文字
|
|
||||||
while not self.stop:
|
|
||||||
time.sleep(1)
|
|
||||||
if len(self.captured_audio) > 0:
|
|
||||||
model = whisper.load_model("base")
|
|
||||||
result = model.transcribe("audio.mp3", language='Chinese')
|
|
||||||
|
|
||||||
def gpt_answer(self, text, chatbot, history, llm_kwargs):
|
def gpt_answer(self, text, chatbot, history, llm_kwargs):
|
||||||
i_say = inputs_show_user = text
|
i_say = inputs_show_user = text
|
||||||
@ -63,25 +36,24 @@ class InterviewAssistent():
|
|||||||
def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
|
def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
|
||||||
# 面试插件主函数
|
# 面试插件主函数
|
||||||
self.init(chatbot)
|
self.init(chatbot)
|
||||||
|
chatbot.append(["", ""])
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
while True:
|
while True:
|
||||||
time.sleep(self.capture_interval)
|
self.event_on_result_chg.wait()
|
||||||
if self.ready_audio_flagment:
|
chatbot[-1][0] = self.parsed_text
|
||||||
audio_for_whisper = self.ready_audio_flagment
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
text = self.audio2txt(audio_for_whisper, llm_kwargs)
|
# if self.event_on_entence_end
|
||||||
yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
|
|
||||||
self.ready_audio_flagment = None
|
# yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
|
||||||
|
# self.ready_audio_flagment = None
|
||||||
|
|
||||||
@CatchException
|
@CatchException
|
||||||
def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
# pip install -U openai-whisper
|
# pip install -U openai-whisper
|
||||||
chatbot.append(["函数插件功能:辅助面试", "正在预热本地音频转文字模型 ..."])
|
chatbot.append(["函数插件功能:辅助面试", "辅助面试助手, 正在监听音频 ..."])
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
import whisper
|
|
||||||
whisper.load_model("base")
|
|
||||||
chatbot.append(["预热本地音频转文字模型完成", "辅助面试助手, 正在监听音频 ..."])
|
|
||||||
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
ia = InterviewAssistent()
|
ia = InterviewAssistant()
|
||||||
yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
|
yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
|
||||||
|
|
||||||
|
2
main.py
2
main.py
@ -58,7 +58,7 @@ def main():
|
|||||||
with gr_L1():
|
with gr_L1():
|
||||||
with gr_L2(scale=2):
|
with gr_L2(scale=2):
|
||||||
if ENABLE_AUDIO:
|
if ENABLE_AUDIO:
|
||||||
audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True)
|
audio_mic = gr.Audio(source="microphone", type="numpy")
|
||||||
|
|
||||||
chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}")
|
chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}")
|
||||||
chatbot.style(height=CHATBOT_HEIGHT)
|
chatbot.style(height=CHATBOT_HEIGHT)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user