From ec60a85cac6c68ce74b21cdd646f534a390a3e78 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 5 Dec 2023 00:15:17 +0800 Subject: [PATCH] new vector store establishment --- crazy_functional.py | 4 +- crazy_functions/crazy_utils.py | 85 +---- .../vector_fns/general_file_loader.py | 70 ++++ crazy_functions/vector_fns/vector_database.py | 349 ++++++++++++++++++ .../{Langchain知识库.py => 知识库问答.py} | 2 +- docs/translate_english.json | 2 +- docs/translate_japanese.json | 2 +- docs/translate_std.json | 2 +- docs/translate_traditionalchinese.json | 2 +- tests/test_plugins.py | 6 +- 10 files changed, 430 insertions(+), 94 deletions(-) create mode 100644 crazy_functions/vector_fns/general_file_loader.py create mode 100644 crazy_functions/vector_fns/vector_database.py rename crazy_functions/{Langchain知识库.py => 知识库问答.py} (98%) diff --git a/crazy_functional.py b/crazy_functional.py index c3ee50a..0d665f1 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -440,7 +440,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.Langchain知识库 import 知识库问答 + from crazy_functions.知识库问答 import 知识库问答 function_plugins.update({ "构建知识库(先上传文件素材,再运行此插件)": { "Group": "对话", @@ -456,7 +456,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.Langchain知识库 import 读取知识库作答 + from crazy_functions.知识库问答 import 读取知识库作答 function_plugins.update({ "知识库问答(构建知识库后,再运行此插件)": { "Group": "对话", diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index afe079f..9778053 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,4 +1,4 @@ -from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token +from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton import threading import os import logging @@ -631,89 +631,6 @@ def get_files_from_everything(txt, type): # type='.md' - -def Singleton(cls): - _instance = {} - - def _singleton(*args, **kargs): - if cls not in _instance: - _instance[cls] = cls(*args, **kargs) - return _instance[cls] - - return _singleton - - -@Singleton -class knowledge_archive_interface(): - def __init__(self) -> None: - self.threadLock = threading.Lock() - self.current_id = "" - self.kai_path = None - self.qa_handle = None - self.text2vec_large_chinese = None - - def get_chinese_text2vec(self): - if self.text2vec_large_chinese is None: - # < -------------------预热文本向量化模组--------------- > - from toolbox import ProxyNetworkActivate - print('Checking Text2vec ...') - from langchain.embeddings.huggingface import HuggingFaceEmbeddings - with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 - self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") - - return self.text2vec_large_chinese - - - def feed_archive(self, file_manifest, id="default"): - self.threadLock.acquire() - # import uuid - self.current_id = id - from zh_langchain import construct_vector_store - self.qa_handle, self.kai_path = construct_vector_store( - vs_id=self.current_id, - files=file_manifest, - sentence_size=100, - history=[], - one_conent="", - one_content_segmentation="", - text2vec = self.get_chinese_text2vec(), - ) - self.threadLock.release() - - def get_current_archive_id(self): - return self.current_id - - def get_loaded_file(self): - return self.qa_handle.get_loaded_file() - - def answer_with_archive_by_id(self, txt, id): - self.threadLock.acquire() - if not self.current_id == id: - self.current_id = id - from zh_langchain import construct_vector_store - self.qa_handle, self.kai_path = construct_vector_store( - vs_id=self.current_id, - files=[], - sentence_size=100, - history=[], - one_conent="", - one_content_segmentation="", - text2vec = self.get_chinese_text2vec(), - ) - VECTOR_SEARCH_SCORE_THRESHOLD = 0 - VECTOR_SEARCH_TOP_K = 4 - CHUNK_SIZE = 512 - resp, prompt = self.qa_handle.get_knowledge_based_conent_test( - query = txt, - vs_path = self.kai_path, - score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, - vector_search_top_k=VECTOR_SEARCH_TOP_K, - chunk_conent=True, - chunk_size=CHUNK_SIZE, - text2vec = self.get_chinese_text2vec(), - ) - self.threadLock.release() - return resp, prompt @Singleton class nougat_interface(): diff --git a/crazy_functions/vector_fns/general_file_loader.py b/crazy_functions/vector_fns/general_file_loader.py new file mode 100644 index 0000000..a512c48 --- /dev/null +++ b/crazy_functions/vector_fns/general_file_loader.py @@ -0,0 +1,70 @@ +# From project chatglm-langchain + + +from langchain.document_loaders import UnstructuredFileLoader +from langchain.text_splitter import CharacterTextSplitter +import re +from typing import List + +class ChineseTextSplitter(CharacterTextSplitter): + def __init__(self, pdf: bool = False, sentence_size: int = None, **kwargs): + super().__init__(**kwargs) + self.pdf = pdf + self.sentence_size = sentence_size + + def split_text1(self, text: str) -> List[str]: + if self.pdf: + text = re.sub(r"\n{3,}", "\n", text) + text = re.sub('\s', ' ', text) + text = text.replace("\n\n", "") + sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :; + sent_list = [] + for ele in sent_sep_pattern.split(text): + if sent_sep_pattern.match(ele) and sent_list: + sent_list[-1] += ele + elif ele: + sent_list.append(ele) + return sent_list + + def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑 + if self.pdf: + text = re.sub(r"\n{3,}", r"\n", text) + text = re.sub('\s', " ", text) + text = re.sub("\n\n", "", text) + + text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符 + text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号 + text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号 + text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text) + # 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号 + text = text.rstrip() # 段尾如果有多余的\n就去掉它 + # 很多规则中会考虑分号;,但是这里我把它忽略不计,破折号、英文双引号等同样忽略,需要的再做些简单调整即可。 + ls = [i for i in text.split("\n") if i] + for ele in ls: + if len(ele) > self.sentence_size: + ele1 = re.sub(r'([,,.]["’”」』]{0,2})([^,,.])', r'\1\n\2', ele) + ele1_ls = ele1.split("\n") + for ele_ele1 in ele1_ls: + if len(ele_ele1) > self.sentence_size: + ele_ele2 = re.sub(r'([\n]{1,}| {2,}["’”」』]{0,2})([^\s])', r'\1\n\2', ele_ele1) + ele2_ls = ele_ele2.split("\n") + for ele_ele2 in ele2_ls: + if len(ele_ele2) > self.sentence_size: + ele_ele3 = re.sub('( ["’”」』]{0,2})([^ ])', r'\1\n\2', ele_ele2) + ele2_id = ele2_ls.index(ele_ele2) + ele2_ls = ele2_ls[:ele2_id] + [i for i in ele_ele3.split("\n") if i] + ele2_ls[ + ele2_id + 1:] + ele_id = ele1_ls.index(ele_ele1) + ele1_ls = ele1_ls[:ele_id] + [i for i in ele2_ls if i] + ele1_ls[ele_id + 1:] + + id = ls.index(ele) + ls = ls[:id] + [i for i in ele1_ls if i] + ls[id + 1:] + return ls + +def load_file(filepath, sentence_size): + loader = UnstructuredFileLoader(filepath, mode="elements") + textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size) + docs = loader.load_and_split(text_splitter=textsplitter) + # write_check_file(filepath, docs) + return docs + diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py new file mode 100644 index 0000000..2fa2cee --- /dev/null +++ b/crazy_functions/vector_fns/vector_database.py @@ -0,0 +1,349 @@ +# From project chatglm-langchain + +import threading +from toolbox import Singleton +import os +import shutil +import os +import uuid +import tqdm +from langchain.vectorstores import FAISS +from langchain.docstore.document import Document +from typing import List, Tuple +import numpy as np +from crazy_functions.vector_fns.general_file_loader import load_file + +embedding_model_dict = { + "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", + "ernie-base": "nghuyong/ernie-3.0-base-zh", + "text2vec-base": "shibing624/text2vec-base-chinese", + "text2vec": "GanymedeNil/text2vec-large-chinese", +} + +# Embedding model name +EMBEDDING_MODEL = "text2vec" + +# Embedding running device +EMBEDDING_DEVICE = "cpu" + +VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store") + +UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content") + +# 基于上下文的prompt模版,请务必保留"{question}"和"{context}" +PROMPT_TEMPLATE = """已知信息: +{context} + +根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}""" + +# 文本分句长度 +SENTENCE_SIZE = 100 + +# 匹配后单段上下文长度 +CHUNK_SIZE = 250 + +# LLM input history length +LLM_HISTORY_LEN = 3 + +# return top-k text chunk from vector store +VECTOR_SEARCH_TOP_K = 5 + +# 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准 +VECTOR_SEARCH_SCORE_THRESHOLD = 0 + +NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") + +FLAG_USER_NAME = uuid.uuid4().hex + +# 是否开启跨域,默认为False,如果需要开启,请设置为True +# is open cross domain +OPEN_CROSS_DOMAIN = False + +def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4 +) -> List[Tuple[Document, float]]: + + def seperate_list(ls: List[int]) -> List[List[int]]: + lists = [] + ls1 = [ls[0]] + for i in range(1, len(ls)): + if ls[i - 1] + 1 == ls[i]: + ls1.append(ls[i]) + else: + lists.append(ls1) + ls1 = [ls[i]] + lists.append(ls1) + return lists + + scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k) + docs = [] + id_set = set() + store_len = len(self.index_to_docstore_id) + for j, i in enumerate(indices[0]): + if i == -1 or 0 < self.score_threshold < scores[0][j]: + # This happens when not enough docs are returned. + continue + _id = self.index_to_docstore_id[i] + doc = self.docstore.search(_id) + if not self.chunk_conent: + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + doc.metadata["score"] = int(scores[0][j]) + docs.append(doc) + continue + id_set.add(i) + docs_len = len(doc.page_content) + for k in range(1, max(i, store_len - i)): + break_flag = False + for l in [i + k, i - k]: + if 0 <= l < len(self.index_to_docstore_id): + _id0 = self.index_to_docstore_id[l] + doc0 = self.docstore.search(_id0) + if docs_len + len(doc0.page_content) > self.chunk_size: + break_flag = True + break + elif doc0.metadata["source"] == doc.metadata["source"]: + docs_len += len(doc0.page_content) + id_set.add(l) + if break_flag: + break + if not self.chunk_conent: + return docs + if len(id_set) == 0 and self.score_threshold > 0: + return [] + id_list = sorted(list(id_set)) + id_lists = seperate_list(id_list) + for id_seq in id_lists: + for id in id_seq: + if id == id_seq[0]: + _id = self.index_to_docstore_id[id] + doc = self.docstore.search(_id) + else: + _id0 = self.index_to_docstore_id[id] + doc0 = self.docstore.search(_id0) + doc.page_content += " " + doc0.page_content + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]]) + doc.metadata["score"] = int(doc_score) + docs.append(doc) + return docs + + +class LocalDocQA: + llm: object = None + embeddings: object = None + top_k: int = VECTOR_SEARCH_TOP_K + chunk_size: int = CHUNK_SIZE + chunk_conent: bool = True + score_threshold: int = VECTOR_SEARCH_SCORE_THRESHOLD + + def init_cfg(self, + top_k=VECTOR_SEARCH_TOP_K, + ): + + self.llm = None + self.top_k = top_k + + def init_knowledge_vector_store(self, + filepath, + vs_path: str or os.PathLike = None, + sentence_size=SENTENCE_SIZE, + text2vec=None): + loaded_files = [] + failed_files = [] + if isinstance(filepath, str): + if not os.path.exists(filepath): + print("路径不存在") + return None + elif os.path.isfile(filepath): + file = os.path.split(filepath)[-1] + try: + docs = load_file(filepath, sentence_size) + print(f"{file} 已成功加载") + loaded_files.append(filepath) + except Exception as e: + print(e) + print(f"{file} 未能成功加载") + return None + elif os.path.isdir(filepath): + docs = [] + for file in tqdm(os.listdir(filepath), desc="加载文件"): + fullfilepath = os.path.join(filepath, file) + try: + docs += load_file(fullfilepath, sentence_size) + loaded_files.append(fullfilepath) + except Exception as e: + print(e) + failed_files.append(file) + + if len(failed_files) > 0: + print("以下文件未能成功加载:") + for file in failed_files: + print(f"{file}\n") + + else: + docs = [] + for file in filepath: + try: + docs += load_file(file) + print(f"{file} 已成功加载") + loaded_files.append(file) + except Exception as e: + print(e) + print(f"{file} 未能成功加载") + + if len(docs) > 0: + print("文件加载完毕,正在生成向量库") + if vs_path and os.path.isdir(vs_path): + self.vector_store = FAISS.load_local(vs_path, text2vec) + self.vector_store.add_documents(docs) + else: + if not vs_path: assert False + self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表 + + self.vector_store.save_local(vs_path) + return vs_path, loaded_files + else: + self.vector_store = FAISS.load_local(vs_path, text2vec) + return vs_path, loaded_files + + def get_loaded_file(self): + ds = self.vector_store.docstore + return set([ds._dict[k].metadata['source'].split(UPLOAD_ROOT_PATH)[-1] for k in ds._dict]) + + + # query 查询内容 + # vs_path 知识库路径 + # chunk_conent 是否启用上下文关联 + # score_threshold 搜索匹配score阈值 + # vector_search_top_k 搜索知识库内容条数,默认搜索5条结果 + # chunk_sizes 匹配单段内容的连接上下文长度 + def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE, + text2vec=None): + self.vector_store = FAISS.load_local(vs_path, text2vec) + self.vector_store.chunk_conent = chunk_conent + self.vector_store.score_threshold = score_threshold + self.vector_store.chunk_size = chunk_size + + embedding = self.vector_store.embedding_function(query) + related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k) + + if not related_docs_with_score: + response = {"query": query, + "source_documents": []} + return response, "" + # prompt = f"{query}. You should answer this question using information from following documents: \n\n" + prompt = f"{query}. 你必须利用以下文档中包含的信息回答这个问题: \n\n---\n\n" + prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)]) + prompt += "\n\n---\n\n" + prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + # print(prompt) + response = {"query": query, "source_documents": related_docs_with_score} + return response, prompt + + + + +def construct_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation, text2vec): + for file in files: + assert os.path.exists(file), "输入文件不存在" + import nltk + if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path + local_doc_qa = LocalDocQA() + local_doc_qa.init_cfg() + vs_path = os.path.join(VS_ROOT_PATH, vs_id) + filelist = [] + if not os.path.exists(os.path.join(UPLOAD_ROOT_PATH, vs_id)): + os.makedirs(os.path.join(UPLOAD_ROOT_PATH, vs_id)) + if isinstance(files, list): + for file in files: + file_name = file.name if not isinstance(file, str) else file + filename = os.path.split(file_name)[-1] + shutil.copyfile(file_name, os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) + filelist.append(os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) + vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path, sentence_size, text2vec) + else: + vs_path, loaded_files = local_doc_qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation, + sentence_size, text2vec) + if len(loaded_files): + file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问" + else: + pass + # file_status = "文件未成功加载,请重新上传文件" + # print(file_status) + return local_doc_qa, vs_path + +@Singleton +class knowledge_archive_interface(): + def __init__(self) -> None: + self.threadLock = threading.Lock() + self.current_id = "" + self.kai_path = None + self.qa_handle = None + self.text2vec_large_chinese = None + + def get_chinese_text2vec(self): + if self.text2vec_large_chinese is None: + # < -------------------预热文本向量化模组--------------- > + from toolbox import ProxyNetworkActivate + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 + self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + return self.text2vec_large_chinese + + + def feed_archive(self, file_manifest, id="default"): + self.threadLock.acquire() + # import uuid + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=file_manifest, + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + + def get_current_archive_id(self): + return self.current_id + + def get_loaded_file(self): + return self.qa_handle.get_loaded_file() + + def answer_with_archive_by_id(self, txt, id): + self.threadLock.acquire() + if not self.current_id == id: + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=[], + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + VECTOR_SEARCH_SCORE_THRESHOLD = 0 + VECTOR_SEARCH_TOP_K = 4 + CHUNK_SIZE = 512 + resp, prompt = self.qa_handle.get_knowledge_based_conent_test( + query = txt, + vs_path = self.kai_path, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, + chunk_conent=True, + chunk_size=CHUNK_SIZE, + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + return resp, prompt \ No newline at end of file diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/知识库问答.py similarity index 98% rename from crazy_functions/Langchain知识库.py rename to crazy_functions/知识库问答.py index 8433895..9d53848 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/知识库问答.py @@ -68,7 +68,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() - # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答' + # chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库问答->读取知识库作答' # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 diff --git a/docs/translate_english.json b/docs/translate_english.json index 955dcaf..622a1d6 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -1666,7 +1666,7 @@ "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", "联网的ChatGPT_bing版": "OnlineChatGPT_BingEdition", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison", "Latex输出PDF结果": "OutputPDFFromLatex", "Latex翻译中文并重新编译PDF": "TranslateChineseToEnglishInLatexAndRecompilePDF", diff --git a/docs/translate_japanese.json b/docs/translate_japanese.json index 2f80792..2184d4a 100644 --- a/docs/translate_japanese.json +++ b/docs/translate_japanese.json @@ -1487,7 +1487,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationManim", "Markdown翻译指定语言": "TranslateMarkdownSpecifiedLanguage", "知识库问答": "KnowledgeBaseQuestionAnswer", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAnswer", "交互功能模板函数": "InteractiveFunctionTemplateFunction", "交互功能函数模板": "InteractiveFunctionFunctionTemplate", diff --git a/docs/translate_std.json b/docs/translate_std.json index ee8b2c6..0c2a7bd 100644 --- a/docs/translate_std.json +++ b/docs/translate_std.json @@ -15,7 +15,7 @@ "删除所有本地对话历史记录": "DeleteAllLocalConversationHistoryRecords", "批量Markdown翻译": "BatchTranslateMarkdown", "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "Latex输出PDF结果": "OutputPDFFromLatex", "把字符太少的块清除为回车": "ClearBlocksWithTooFewCharactersToNewline", "Latex精细分解与转化": "DecomposeAndConvertLatex", diff --git a/docs/translate_traditionalchinese.json b/docs/translate_traditionalchinese.json index 9ca7cba..f0338fc 100644 --- a/docs/translate_traditionalchinese.json +++ b/docs/translate_traditionalchinese.json @@ -1463,7 +1463,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationsWithManim", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", "知识库问答": "KnowledgeBaseQA", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAndAnswerQuestions", "交互功能模板函数": "InteractiveFunctionTemplateFunctions", "交互功能函数模板": "InteractiveFunctionFunctionTemplates", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 8470895..aeefc19 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -48,11 +48,11 @@ if __name__ == "__main__": # for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]: # plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown翻译指定语言', main_input="README.md", advanced_arg={"advanced_arg": lang}) - # plugin_test(plugin='crazy_functions.Langchain知识库->知识库问答', main_input="./") + # plugin_test(plugin='crazy_functions.知识库问答->知识库问答', main_input="./") - # plugin_test(plugin='crazy_functions.Langchain知识库->读取知识库作答', main_input="What is the installation method?") + # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="What is the installation method?") - # plugin_test(plugin='crazy_functions.Langchain知识库->读取知识库作答', main_input="远程云服务器部署?") + # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="远程云服务器部署?") # plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2210.03629")