' + " ".join([re.sub(pattern2, sub, r) for r in result.split('\n') if r.startswith('[')]) + ''
+ return result
+
+
+def close_up_code_segment_during_stream(gpt_reply):
+ """
+ 在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的```
+
+ Args:
+ gpt_reply (str): GPT模型返回的回复字符串。
+
+ Returns:
+ str: 返回一个新的字符串,将输出代码片段的“后面的```”补上。
+
+ """
+ if '```' not in gpt_reply:
+ return gpt_reply
+ if gpt_reply.endswith('```'):
+ return gpt_reply
+
+ # 排除了以上两个情况,我们
+ segments = gpt_reply.split('```')
+ n_mark = len(segments) - 1
+ if n_mark % 2 == 1:
+ # print('输出代码片段中!')
+ return gpt_reply+'\n```'
+ else:
+ return gpt_reply
+
+import markdown
+from latex2mathml.converter import convert as tex2mathml
+from functools import wraps, lru_cache
+def markdown_convertion(txt):
+ """
+ 将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
+ """
+ pre = '
'
+ suf = '
'
+ if txt.startswith(pre) and txt.endswith(suf):
+ # print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
+ return txt # 已经被转化过,不需要再次转化
+
+ markdown_extension_configs = {
+ 'mdx_math': {
+ 'enable_dollar_delimiter': True,
+ 'use_gitlab_delimiters': False,
+ },
+ }
+ find_equation_pattern = r'\n', '')
+ return content
+
+
+ if ('$' in txt) and ('```' not in txt): # 有$标识的公式符号,且没有代码段```的标识
+ # convert everything to html format
+ split = markdown.markdown(text='---')
+ convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)
+ convert_stage_1 = markdown_bug_hunt(convert_stage_1)
+ # re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
+ # 1. convert to easy-to-copy tex (do not render math)
+ convert_stage_2_1, n = re.subn(find_equation_pattern, replace_math_no_render, convert_stage_1, flags=re.DOTALL)
+ # 2. convert to rendered equation
+ convert_stage_2_2, n = re.subn(find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL)
+ # cat them together
+ return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
+ else:
+ return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf
+
+
+sample = preprocess_newbing_out(sample)
+sample = close_up_code_segment_during_stream(sample)
+sample = markdown_convertion(sample)
+with open('tmp.html', 'w', encoding='utf8') as f:
+ f.write("""
+
+
+ My Website
+
+
+
+ """)
+ f.write(sample)
diff --git a/request_llm/bridge_newbing.py b/request_llm/bridge_newbing.py
index 66db8b6..2fa4761 100644
--- a/request_llm/bridge_newbing.py
+++ b/request_llm/bridge_newbing.py
@@ -27,12 +27,12 @@ def preprocess_newbing_out(s):
sub = lambda m: '\['+m.group(1)+'\]' # 将匹配到的数字作为替换值
result = re.sub(pattern, sub, s) # 替换操作
if '[1]' in result:
- result += '\n\n\n\n' + "\n\n".join(['`'+r+'`' for r in result.split('\n') if r.startswith('[')])
+ result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
return result
def preprocess_newbing_out_simple(result):
if '[1]' in result:
- result += '\n\n```\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
+ result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
return result
class NewBingHandle(Process):
diff --git a/toolbox.py b/toolbox.py
index af28e8d..c09ea74 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -271,8 +271,14 @@ def markdown_convertion(txt):
content = content.replace('\n', '')
return content
+ def no_code(txt):
+ if '```' not in txt:
+ return True
+ else:
+ if '```reference' in txt: return True # newbing
+ else: return False
- if ('$' in txt) and ('```' not in txt): # 有$标识的公式符号,且没有代码段```的标识
+ if ('$' in txt) and no_code(txt): # 有$标识的公式符号,且没有代码段```的标识
# convert everything to html format
split = markdown.markdown(text='---')
convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)