Merge branch 'master' into frontier
This commit is contained in:
commit
8faf69c41e
@ -87,7 +87,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
|
|||||||
"api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "api2d-gpt-4",
|
"api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "api2d-gpt-4",
|
||||||
"gpt-4", "gpt-4-32k", "azure-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
|
"gpt-4", "gpt-4-32k", "azure-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
|
||||||
# P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random"
|
# P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random"
|
||||||
# "spark", "sparkv2", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
|
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
|
||||||
|
|
||||||
|
|
||||||
# 百度千帆(LLM_MODEL="qianfan")
|
# 百度千帆(LLM_MODEL="qianfan")
|
||||||
|
@ -451,6 +451,22 @@ if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
|||||||
})
|
})
|
||||||
except:
|
except:
|
||||||
print(trimmed_format_exc())
|
print(trimmed_format_exc())
|
||||||
|
if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
||||||
|
try:
|
||||||
|
from .bridge_spark import predict_no_ui_long_connection as spark_noui
|
||||||
|
from .bridge_spark import predict as spark_ui
|
||||||
|
model_info.update({
|
||||||
|
"sparkv3": {
|
||||||
|
"fn_with_ui": spark_ui,
|
||||||
|
"fn_without_ui": spark_noui,
|
||||||
|
"endpoint": None,
|
||||||
|
"max_token": 4096,
|
||||||
|
"tokenizer": tokenizer_gpt35,
|
||||||
|
"token_cnt": get_token_num_gpt35,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
except:
|
||||||
|
print(trimmed_format_exc())
|
||||||
if "llama2" in AVAIL_LLM_MODELS: # llama2
|
if "llama2" in AVAIL_LLM_MODELS: # llama2
|
||||||
try:
|
try:
|
||||||
from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
|
from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
|
||||||
|
@ -87,7 +87,7 @@ class GetGLMFTHandle(Process):
|
|||||||
new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
|
new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
|
||||||
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
||||||
|
|
||||||
if model_args['quantization_bit'] is not None:
|
if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
|
||||||
print(f"Quantized to {model_args['quantization_bit']} bit")
|
print(f"Quantized to {model_args['quantization_bit']} bit")
|
||||||
model = model.quantize(model_args['quantization_bit'])
|
model = model.quantize(model_args['quantization_bit'])
|
||||||
model = model.cuda()
|
model = model.cuda()
|
||||||
|
@ -141,10 +141,10 @@ class NewBingHandle(Process):
|
|||||||
except:
|
except:
|
||||||
self.success = False
|
self.success = False
|
||||||
tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
|
tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
|
||||||
self.child.send(f'[Local Message] 不能加载Newbing组件。{tb_str}')
|
self.child.send(f'[Local Message] 不能加载Newbing组件,请注意Newbing组件已不再维护。{tb_str}')
|
||||||
self.child.send('[Fail]')
|
self.child.send('[Fail]')
|
||||||
self.child.send('[Finish]')
|
self.child.send('[Finish]')
|
||||||
raise RuntimeError(f"不能加载Newbing组件。")
|
raise RuntimeError(f"不能加载Newbing组件,请注意Newbing组件已不再维护。")
|
||||||
|
|
||||||
self.success = True
|
self.success = True
|
||||||
try:
|
try:
|
||||||
|
@ -64,6 +64,7 @@ class SparkRequestInstance():
|
|||||||
self.api_key = XFYUN_API_KEY
|
self.api_key = XFYUN_API_KEY
|
||||||
self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
|
self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
|
||||||
self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
|
self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
|
||||||
|
self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
|
||||||
|
|
||||||
self.time_to_yield_event = threading.Event()
|
self.time_to_yield_event = threading.Event()
|
||||||
self.time_to_exit_event = threading.Event()
|
self.time_to_exit_event = threading.Event()
|
||||||
@ -87,6 +88,8 @@ class SparkRequestInstance():
|
|||||||
def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt):
|
def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt):
|
||||||
if llm_kwargs['llm_model'] == 'sparkv2':
|
if llm_kwargs['llm_model'] == 'sparkv2':
|
||||||
gpt_url = self.gpt_url_v2
|
gpt_url = self.gpt_url_v2
|
||||||
|
elif llm_kwargs['llm_model'] == 'sparkv3':
|
||||||
|
gpt_url = self.gpt_url_v3
|
||||||
else:
|
else:
|
||||||
gpt_url = self.gpt_url
|
gpt_url = self.gpt_url
|
||||||
|
|
||||||
@ -168,6 +171,11 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
|
|||||||
"""
|
"""
|
||||||
通过appid和用户的提问来生成请参数
|
通过appid和用户的提问来生成请参数
|
||||||
"""
|
"""
|
||||||
|
domains = {
|
||||||
|
"spark": "general",
|
||||||
|
"sparkv2": "generalv2",
|
||||||
|
"sparkv3": "generalv3",
|
||||||
|
}
|
||||||
data = {
|
data = {
|
||||||
"header": {
|
"header": {
|
||||||
"app_id": appid,
|
"app_id": appid,
|
||||||
@ -175,7 +183,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt):
|
|||||||
},
|
},
|
||||||
"parameter": {
|
"parameter": {
|
||||||
"chat": {
|
"chat": {
|
||||||
"domain": "generalv2" if llm_kwargs['llm_model'] == 'sparkv2' else "general",
|
"domain": domains[llm_kwargs['llm_model']],
|
||||||
"temperature": llm_kwargs["temperature"],
|
"temperature": llm_kwargs["temperature"],
|
||||||
"random_threshold": 0.5,
|
"random_threshold": 0.5,
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user