Merge branch 'master' into frontier

2023-12-15 12:58:05 +08:00 · 2023-12-15 12:58:05 +08:00 · d3f7267a63
commit d3f7267a63
parent c0a36e37be f4127a9c9c
8 changed files with 107 additions and 7 deletions
--- a/.github/workflows/build-with-all-capacity-beta.yml
+++ b/.github/workflows/build-with-all-capacity-beta.yml
@ -0,0 +1,44 @@
+# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
+name: build-with-all-capacity-beta
+
+on:
+  push:
+    branches:
+      - 'master'
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          file: docs/GithubAction+AllCapacityBeta
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
            result_pdf = pj(work_folder_modified, f'merge_diff.pdf')    # get pdf path
            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
        if modified_pdf_success:
-            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
+            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history)    # 刷新Gradio前端界面
            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
            origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
            if os.path.exists(pj(work_folder, '..', 'translation')):
--- a/docs/GithubAction+AllCapacityBeta
+++ b/docs/GithubAction+AllCapacityBeta
@ -0,0 +1,53 @@
+# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity  --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
+# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta  --network=host .
+# docker run -it --net=host gpt-academic-all-capacity  bash
+
+# 从NVIDIA源，从而支持显卡（检查宿主的nvidia-smi中的cuda版本必须>=11.3）
+FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest
+
+# use python3 as the system default python
+WORKDIR /gpt
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
+
+# # 非必要步骤，更换pip源 （以下三行，可以删除）
+# RUN echo '[global]' > /etc/pip.conf && \
+#     echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
+#     echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
+
+# 下载pytorch
+RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
+# 准备pip依赖
+RUN python3 -m pip install openai numpy arxiv rich
+RUN python3 -m pip install colorama Markdown pygments pymupdf
+RUN python3 -m pip install python-docx moviepy pdfminer
+RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
+RUN python3 -m pip install rarfile py7zr
+RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
+# 下载分支
+WORKDIR /gpt
+RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
+WORKDIR /gpt/gpt_academic
+RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
+
+RUN python3 -m pip install -r requirements.txt
+RUN python3 -m pip install -r request_llms/requirements_moss.txt
+RUN python3 -m pip install -r request_llms/requirements_qwen.txt
+RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
+RUN python3 -m pip install -r request_llms/requirements_newbing.txt
+RUN python3 -m pip install nougat-ocr
+
+# 预热Tiktoken模块
+RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+
+# 安装知识库插件的额外依赖
+RUN apt-get update && apt-get install libgl1 -y
+RUN pip3 install transformers protobuf langchain sentence-transformers  faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
+RUN pip3 install unstructured[all-docs] --upgrade
+RUN python3  -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
+RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests
+
+
+# COPY .cache /root/.cache
+# COPY config_private.py config_private.py
+# 启动
+CMD ["python3", "-u", "main.py"]
--- a/docs/GithubAction+NoLocal+Vectordb
+++ b/docs/GithubAction+NoLocal+Vectordb
@ -17,10 +17,10 @@ RUN apt-get update && apt-get install libgl1 -y
 RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
 RUN pip3 install transformers protobuf langchain sentence-transformers  faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
 RUN pip3 install unstructured[all-docs] --upgrade
+RUN python3  -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'

 # 可选步骤，用于预热模块
 RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'
-RUN python3  -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'

 # 启动
 CMD ["python3", "-u", "main.py"]
--- a/request_llms/requirements_chatglm_onnx.txt
+++ b/request_llms/requirements_chatglm_onnx.txt
@ -6,5 +6,3 @@ sentencepiece
 numpy
 onnxruntime
 sentencepiece
-streamlit
-streamlit-chat
--- a/request_llms/requirements_moss.txt
+++ b/request_llms/requirements_moss.txt
@ -5,5 +5,4 @@ accelerate
 matplotlib
 huggingface_hub
 triton
-streamlit

--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,7 @@ pypdf2==2.12.1
 tiktoken>=0.3.3
 requests[socks]
 pydantic==1.10.11
+protobuf==3.18
 transformers>=4.27.1
 scipdf_parser>=0.52
 python-markdown-math
--- a/toolbox.py
+++ b/toolbox.py
@ -1007,14 +1007,19 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
    def get_token_num(txt): 
        return len(tokenizer.encode(txt, disallowed_special=()))
    input_token_num = get_token_num(inputs)
+
+    if max_token_limit < 5000:   output_token_expect = 256  # 4k & 2k models
+    elif max_token_limit < 9000: output_token_expect = 512  # 8k models
+    else: output_token_expect = 1024                        # 16k & 32k models
+
    if input_token_num < max_token_limit * 3 / 4:
        # 当输入部分的token占比小于限制的3/4时，裁剪时
        # 1. 把input的余量留出来
        max_token_limit = max_token_limit - input_token_num
        # 2. 把输出用的余量留出来
-        max_token_limit = max_token_limit - 128
+        max_token_limit = max_token_limit - output_token_expect
        # 3. 如果余量太小了，直接清除历史
-        if max_token_limit < 128:
+        if max_token_limit < output_token_expect:
            history = []
            return history
    else: