test vector store on docker

This commit is contained in:
binary-husky 2023-12-08 22:22:01 +08:00
parent 21bccf69d2
commit 0cb7dd5280
5 changed files with 43 additions and 7 deletions

View File

@ -23,11 +23,13 @@ RUN pip3 install -r requirements.txt
# 装载项目文件,安装剩余依赖(必要)
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
RUN pip3 install langchain sentence-transformers unstructured[local-inference] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk
COPY .cache /root/.cache
COPY . .
RUN pip3 install -r requirements.txt
# 非必要步骤,用于预热模块(可以删除)
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
# 启动(必要)
CMD ["python3", "-u", "tests/test_vector_plugins.py"]
CMD ["python3", "-u", "main.py"]

View File

@ -159,7 +159,15 @@ def warm_up_modules():
enc.encode("模块预热", disallowed_special=())
enc = model_info["gpt-4"]['tokenizer']
enc.encode("模块预热", disallowed_special=())
def warm_up_vectordb():
print('正在执行一些模块的预热 ...')
from toolbox import ProxyNetworkActivate
with ProxyNetworkActivate("Warmup_Modules"):
import nltk
with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")
if __name__ == '__main__':
import os
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染

View File

@ -242,7 +242,7 @@ class LocalDocQA:
def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec):
for file in files:
assert os.path.exists(file), "输入文件不存在"
assert os.path.exists(file), "输入文件不存在" + file
import nltk
if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
local_doc_qa = LocalDocQA()

View File

@ -51,7 +51,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
for sp in spl:
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
file_manifest += file_manifest_tmp
if len(file_manifest) == 0:
chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -0,0 +1,26 @@
# 此Dockerfile适用于“无本地模型”的环境构建如果需要使用chatglm等本地模型请参考 docs/Dockerfile+ChatGLM
# 如何构建: 先修改 `config.py` 然后 docker build -t gpt-academic-nolocal-vs -f docs/GithubAction+NoLocal+Vectordb .
# 如何运行: docker run --rm -it --net=host gpt-academic-nolocal-vs
FROM python:3.11
# 指定路径
WORKDIR /gpt
# 装载项目文件
COPY . .
# 安装依赖
RUN pip3 install -r requirements.txt
# 安装知识库插件的额外依赖
RUN apt-get update && apt-get install libgl1 -y
RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
RUN pip3 install unstructured[all-docs] --upgrade
# 可选步骤,用于预热模块
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
# 启动
CMD ["python3", "-u", "main.py"]