Merge branch 'master' into frontier
This commit is contained in:
commit
d3f7267a63
44
.github/workflows/build-with-all-capacity-beta.yml
vendored
Normal file
44
.github/workflows/build-with-all-capacity-beta.yml
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
|
||||
name: build-with-all-capacity-beta
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta
|
||||
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: docs/GithubAction+AllCapacityBeta
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
||||
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
if modified_pdf_success:
|
||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
||||
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
|
||||
if os.path.exists(pj(work_folder, '..', 'translation')):
|
||||
|
53
docs/GithubAction+AllCapacityBeta
Normal file
53
docs/GithubAction+AllCapacityBeta
Normal file
@ -0,0 +1,53 @@
|
||||
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
|
||||
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host .
|
||||
# docker run -it --net=host gpt-academic-all-capacity bash
|
||||
|
||||
# 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
|
||||
FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest
|
||||
|
||||
# use python3 as the system default python
|
||||
WORKDIR /gpt
|
||||
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
|
||||
|
||||
# # 非必要步骤,更换pip源 (以下三行,可以删除)
|
||||
# RUN echo '[global]' > /etc/pip.conf && \
|
||||
# echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
||||
# echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
||||
|
||||
# 下载pytorch
|
||||
RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
|
||||
# 准备pip依赖
|
||||
RUN python3 -m pip install openai numpy arxiv rich
|
||||
RUN python3 -m pip install colorama Markdown pygments pymupdf
|
||||
RUN python3 -m pip install python-docx moviepy pdfminer
|
||||
RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
|
||||
RUN python3 -m pip install rarfile py7zr
|
||||
RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
|
||||
# 下载分支
|
||||
WORKDIR /gpt
|
||||
RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
|
||||
WORKDIR /gpt/gpt_academic
|
||||
RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
|
||||
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_moss.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_qwen.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_newbing.txt
|
||||
RUN python3 -m pip install nougat-ocr
|
||||
|
||||
# 预热Tiktoken模块
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
# 安装知识库插件的额外依赖
|
||||
RUN apt-get update && apt-get install libgl1 -y
|
||||
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
|
||||
RUN pip3 install unstructured[all-docs] --upgrade
|
||||
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
|
||||
RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests
|
||||
|
||||
|
||||
# COPY .cache /root/.cache
|
||||
# COPY config_private.py config_private.py
|
||||
# 启动
|
||||
CMD ["python3", "-u", "main.py"]
|
@ -17,10 +17,10 @@ RUN apt-get update && apt-get install libgl1 -y
|
||||
RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
|
||||
RUN pip3 install unstructured[all-docs] --upgrade
|
||||
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
|
||||
|
||||
# 可选步骤,用于预热模块
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
|
||||
|
||||
# 启动
|
||||
CMD ["python3", "-u", "main.py"]
|
||||
|
@ -6,5 +6,3 @@ sentencepiece
|
||||
numpy
|
||||
onnxruntime
|
||||
sentencepiece
|
||||
streamlit
|
||||
streamlit-chat
|
||||
|
@ -5,5 +5,4 @@ accelerate
|
||||
matplotlib
|
||||
huggingface_hub
|
||||
triton
|
||||
streamlit
|
||||
|
||||
|
@ -3,6 +3,7 @@ pypdf2==2.12.1
|
||||
tiktoken>=0.3.3
|
||||
requests[socks]
|
||||
pydantic==1.10.11
|
||||
protobuf==3.18
|
||||
transformers>=4.27.1
|
||||
scipdf_parser>=0.52
|
||||
python-markdown-math
|
||||
|
@ -1007,14 +1007,19 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
|
||||
def get_token_num(txt):
|
||||
return len(tokenizer.encode(txt, disallowed_special=()))
|
||||
input_token_num = get_token_num(inputs)
|
||||
|
||||
if max_token_limit < 5000: output_token_expect = 256 # 4k & 2k models
|
||||
elif max_token_limit < 9000: output_token_expect = 512 # 8k models
|
||||
else: output_token_expect = 1024 # 16k & 32k models
|
||||
|
||||
if input_token_num < max_token_limit * 3 / 4:
|
||||
# 当输入部分的token占比小于限制的3/4时,裁剪时
|
||||
# 1. 把input的余量留出来
|
||||
max_token_limit = max_token_limit - input_token_num
|
||||
# 2. 把输出用的余量留出来
|
||||
max_token_limit = max_token_limit - 128
|
||||
max_token_limit = max_token_limit - output_token_expect
|
||||
# 3. 如果余量太小了,直接清除历史
|
||||
if max_token_limit < 128:
|
||||
if max_token_limit < output_token_expect:
|
||||
history = []
|
||||
return history
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user