diff --git a/.github/workflows/build-with-all-capacity-beta.yml b/.github/workflows/build-with-all-capacity-beta.yml new file mode 100644 index 0000000..5a2a1a5 --- /dev/null +++ b/.github/workflows/build-with-all-capacity-beta.yml @@ -0,0 +1,44 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: build-with-all-capacity-beta + +on: + push: + branches: + - 'master' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + file: docs/GithubAction+AllCapacityBeta + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index 113a278..b43d7d2 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI if modified_pdf_success: - yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 + yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history) # 刷新Gradio前端界面 result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path if os.path.exists(pj(work_folder, '..', 'translation')): diff --git a/docs/GithubAction+AllCapacityBeta b/docs/GithubAction+AllCapacityBeta new file mode 100644 index 0000000..d3a06ee --- /dev/null +++ b/docs/GithubAction+AllCapacityBeta @@ -0,0 +1,53 @@ +# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 . +# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host . +# docker run -it --net=host gpt-academic-all-capacity bash + +# 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3) +FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest + +# use python3 as the system default python +WORKDIR /gpt +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8 + +# # 非必要步骤,更换pip源 (以下三行,可以删除) +# RUN echo '[global]' > /etc/pip.conf && \ +# echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \ +# echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf + +# 下载pytorch +RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113 +# 准备pip依赖 +RUN python3 -m pip install openai numpy arxiv rich +RUN python3 -m pip install colorama Markdown pygments pymupdf +RUN python3 -m pip install python-docx moviepy pdfminer +RUN python3 -m pip install zh_langchain==0.2.1 pypinyin +RUN python3 -m pip install rarfile py7zr +RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git +# 下载分支 +WORKDIR /gpt +RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git +WORKDIR /gpt/gpt_academic +RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss + +RUN python3 -m pip install -r requirements.txt +RUN python3 -m pip install -r request_llms/requirements_moss.txt +RUN python3 -m pip install -r request_llms/requirements_qwen.txt +RUN python3 -m pip install -r request_llms/requirements_chatglm.txt +RUN python3 -m pip install -r request_llms/requirements_newbing.txt +RUN python3 -m pip install nougat-ocr + +# 预热Tiktoken模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 安装知识库插件的额外依赖 +RUN apt-get update && apt-get install libgl1 -y +RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade +RUN pip3 install unstructured[all-docs] --upgrade +RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' +RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests + + +# COPY .cache /root/.cache +# COPY config_private.py config_private.py +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/docs/GithubAction+NoLocal+Vectordb b/docs/GithubAction+NoLocal+Vectordb index 98595e3..45074d9 100644 --- a/docs/GithubAction+NoLocal+Vectordb +++ b/docs/GithubAction+NoLocal+Vectordb @@ -17,10 +17,10 @@ RUN apt-get update && apt-get install libgl1 -y RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade RUN pip3 install unstructured[all-docs] --upgrade +RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' -RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' # 启动 CMD ["python3", "-u", "main.py"] diff --git a/request_llms/requirements_chatglm_onnx.txt b/request_llms/requirements_chatglm_onnx.txt index 5481147..2cd11f6 100644 --- a/request_llms/requirements_chatglm_onnx.txt +++ b/request_llms/requirements_chatglm_onnx.txt @@ -6,5 +6,3 @@ sentencepiece numpy onnxruntime sentencepiece -streamlit -streamlit-chat diff --git a/request_llms/requirements_moss.txt b/request_llms/requirements_moss.txt index c27907c..544b25f 100644 --- a/request_llms/requirements_moss.txt +++ b/request_llms/requirements_moss.txt @@ -5,5 +5,4 @@ accelerate matplotlib huggingface_hub triton -streamlit diff --git a/requirements.txt b/requirements.txt index a5782f7..e253415 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ pypdf2==2.12.1 tiktoken>=0.3.3 requests[socks] pydantic==1.10.11 +protobuf==3.18 transformers>=4.27.1 scipdf_parser>=0.52 python-markdown-math diff --git a/toolbox.py b/toolbox.py index 8d91035..bb4ec66 100644 --- a/toolbox.py +++ b/toolbox.py @@ -1007,14 +1007,19 @@ def clip_history(inputs, history, tokenizer, max_token_limit): def get_token_num(txt): return len(tokenizer.encode(txt, disallowed_special=())) input_token_num = get_token_num(inputs) + + if max_token_limit < 5000: output_token_expect = 256 # 4k & 2k models + elif max_token_limit < 9000: output_token_expect = 512 # 8k models + else: output_token_expect = 1024 # 16k & 32k models + if input_token_num < max_token_limit * 3 / 4: # 当输入部分的token占比小于限制的3/4时,裁剪时 # 1. 把input的余量留出来 max_token_limit = max_token_limit - input_token_num # 2. 把输出用的余量留出来 - max_token_limit = max_token_limit - 128 + max_token_limit = max_token_limit - output_token_expect # 3. 如果余量太小了,直接清除历史 - if max_token_limit < 128: + if max_token_limit < output_token_expect: history = [] return history else: