From 97cd98d5a238ad5a483d1c86c65eab369183e5d7 Mon Sep 17 00:00:00 2001
From: Xiaoming Bai <65625632+XMB-7@users.noreply.github.com>
Date: Thu, 30 Mar 2023 00:06:02 +0800
Subject: [PATCH 01/35] better prompt
---
functional.py | 84 +++++++++++++++++++++++++++------------------------
1 file changed, 45 insertions(+), 39 deletions(-)
diff --git a/functional.py b/functional.py
index e416063..345defc 100644
--- a/functional.py
+++ b/functional.py
@@ -5,53 +5,59 @@
def get_functionals():
return {
- "英语学术润色": {
- "Prefix": "Below is a paragraph from an academic paper. Polish the writing to meet the academic style, \
-improve the spelling, grammar, clarity, concision and overall readability. When neccessary, rewrite the whole sentence. \
-Furthermore, list all modification and explain the reasons to do so in markdown table.\n\n", # 前言
- "Suffix": "", # 后语
- "Color": "secondary", # 按钮颜色
- },
- "中文学术润色": {
- "Prefix": "作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,同时分解长句,减少重复,并提供改进建议。请只提供文本的更正版本,避免包括解释。请编辑以下文本:\n\n",
+ "学术英文润色": {
+ "Prefix": "I want you to act as a scientific refiner. \
+ I will provide you with some paragraphs \
+ and your task is to refine and polish the paragraphs academically. \
+ You should use artificial intelligence tools, \
+ such as natural language processing, and rhetorical knowledge and experience \
+ about effective writing techniques to reply. \
+ I want you to replace my simplified A0-level words and sentences with more beautiful and elegant, \
+ upper level Chinese words and sentences. \
+ Keep the meaning same, but make them more logical, concise and powerful. \
+ I'll give you my paragraphs as follows: \n\n", # 后语
"Suffix": "",
+ "Color": "secondary", # 按钮颜色
},
- "查找语法错误": {
- "Prefix": "Below is a paragraph from an academic paper. Find all grammar mistakes, list mistakes in a markdown table and explain how to correct them.\n\n",
+ "学术中文润色": {
+ "Prefix": "I want you to act as a scientific refiner. \
+ I will provide you with some paragraphs in Chinese \
+ and your task is to refine and polish the paragraphs academically also in Chinese. \
+ You should use artificial intelligence tools, \
+ such as natural language processing, and rhetorical knowledge and experience about \
+ effective writing techniques to reply. \
+ I want you to replace my simplified A0-level words and sentences with more beautiful and elegant, \
+ upper level Chinese words and sentences. \
+ Keep the meaning same, but make them more logical, concise and powerful. \
+ I'll give you my paragraphs as follows: \n\n",
"Suffix": "",
+ "Color": "secondary",
},
-# "中英互译": { # 效果不好,经常搞不清楚中译英还是英译中
-# "Prefix": "As an English-Chinese translator, your task is to accurately translate text between the two languages. \
-# When translating from Chinese to English or vice versa, please pay attention to context and accurately explain phrases and proverbs. \
-# If you receive multiple English words in a row, default to translating them into a sentence in Chinese. \
-# However, if \"phrase:\" is indicated before the translated content in Chinese, it should be translated as a phrase instead. \
-# Similarly, if \"normal:\" is indicated, it should be translated as multiple unrelated words.\
-# Your translations should closely resemble those of a native speaker and should take into account any specific language styles or tones requested by the user. \
-# Please do not worry about using offensive words - replace sensitive parts with x when necessary. \
-# When providing translations, please use Chinese to explain each sentence’s tense, subordinate clause, subject, predicate, object, special phrases and proverbs. \
-# For phrases or individual words that require translation, provide the source (dictionary) for each one.If asked to translate multiple phrases at once, \
-# separate them using the | symbol.Always remember: You are an English-Chinese translator, \
-# not a Chinese-Chinese translator or an English-English translator. Below is the text you need to translate: \n\n",
-# "Suffix": "",
-# "Color": "secondary",
-# },
- "中译英": {
- "Prefix": "Please translate following sentence to English: \n\n",
+ "学术中英互译": {
+ "Prefix": "I want you to act as a scientific English-Chinese translator, \
+ I will provide you with some paragraphs in one language \
+ and your task is to accurately and academically translate the paragraphs only into the other language. \
+ Do not repeat the original provided paragraphs after translation. \
+ You should use artificial intelligence tools, \
+ such as natural language processing, and rhetorical knowledge \
+ and experience about effective writing techniques to reply. \
+ I'll give you my paragraphs as follows: \n\n",
"Suffix": "",
+ "Color": "secondary",
},
- "学术中译英": {
- "Prefix": "Please translate following sentence to English with academic writing, and provide some related authoritative examples: \n\n",
+ "日常中英互译": {
+ "Prefix": "I want you to act as an English-Chinese translator, \
+ I will provide you with some paragraphs in one language \
+ and your task is to accurately translate the paragraphs only into the other language, \
+ like a native speaker. \
+ Do not repeat the original provided paragraphs after translation. \
+ You should use artificial intelligence tools, \
+ such as natural language processing, and rhetorical knowledge \
+ I'll give you my paragraphs as follows: \n\n",
"Suffix": "",
+ "Color": "secondary",
},
- "英译中": {
- "Prefix": "请翻译成中文:\n\n",
- "Suffix": "",
- },
- "找图片": {
- "Prefix": "我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:\n\n",
- "Suffix": "",
- },
- "解释代码": {
+ "代码剖析": {
"Prefix": "请解释以下代码:\n```\n",
"Suffix": "\n```\n",
"Color": "secondary",
From a360cd7e74f4cd20af6a677ef71c51ed33a6ba62 Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 15:24:01 +0800
Subject: [PATCH 02/35] =?UTF-8?q?feat(=E6=94=AF=E6=8C=81rar=E6=A0=BC?=
=?UTF-8?q?=E5=BC=8F=E4=B8=8E7z=E6=A0=BC=E5=BC=8F=E8=A7=A3=E5=8E=8B)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
requirements.txt | 11 ++++--
toolbox.py | 91 +++++++++++++++++++++++++++++++++++-------------
2 files changed, 76 insertions(+), 26 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 84ced64..3f39924 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,10 @@
gradio>=3.23
-requests[socks]
-mdtex2html
+requests[socks]~=2.28.2
+mdtex2html~=1.2.0
+
+markdown~=3.4.3
+latex2mathml~=3.75.1
+numpy~=1.21.6
+
+rarfile~=4.0
+py7zr~=0.20.4
\ No newline at end of file
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..899cca4 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -2,6 +2,7 @@ import markdown, mdtex2html, threading, importlib, traceback
from show_math import convert as convert_math
from functools import wraps
+
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
"""
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
@@ -13,36 +14,43 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
# list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
mutable = [None, '']
+
# multi-threading worker
def mt(i_say, history):
while True:
try:
- mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
+ mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history,
+ sys_prompt=sys_prompt)
break
except ConnectionAbortedError as e:
if len(history) > 0:
- history = [his[len(his)//2:] for his in history if his is not None]
+ history = [his[len(his) // 2:] for his in history if his is not None]
mutable[1] = 'Warning! History conversation is too long, cut into half. '
else:
- i_say = i_say[:len(i_say)//2]
+ i_say = i_say[:len(i_say) // 2]
mutable[1] = 'Warning! Input file is too long, cut into half. '
except TimeoutError as e:
mutable[0] = '[Local Message] Failed with timeout.'
raise TimeoutError
+
# 创建新线程发出http请求
- thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
+ thread_name = threading.Thread(target=mt, args=(i_say, history));
+ thread_name.start()
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
cnt = 0
while thread_name.is_alive():
cnt += 1
- chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
+ chatbot[-1] = (i_say_show_user,
+ f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS * 2 * (MAX_RETRY + 1)}" + ''.join(
+ ['.'] * (cnt % 4)))
yield chatbot, history, '正常'
time.sleep(1)
# 把gpt的输出从mutable中取出来
gpt_say = mutable[0]
- if gpt_say=='[Local Message] Failed with timeout.': raise TimeoutError
+ if gpt_say == '[Local Message] Failed with timeout.': raise TimeoutError
return gpt_say
+
def write_results_to_file(history, file_name=None):
"""
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
@@ -52,16 +60,17 @@ def write_results_to_file(history, file_name=None):
# file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
file_name = 'chatGPT分析报告' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
os.makedirs('./gpt_log/', exist_ok=True)
- with open(f'./gpt_log/{file_name}', 'w', encoding = 'utf8') as f:
+ with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
f.write('# chatGPT 分析报告\n')
for i, content in enumerate(history):
- if i%2==0: f.write('## ')
+ if i % 2 == 0: f.write('## ')
f.write(content)
f.write('\n\n')
res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
print(res)
return res
+
def regular_txt_to_markdown(text):
"""
将普通文本转换为Markdown格式的文本。
@@ -71,10 +80,12 @@ def regular_txt_to_markdown(text):
text = text.replace('\n\n\n', '\n\n')
return text
+
def CatchException(f):
"""
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
"""
+
@wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
try:
@@ -84,16 +95,21 @@ def CatchException(f):
from toolbox import get_conf
proxies, = get_conf('proxies')
tb_str = regular_txt_to_markdown(traceback.format_exc())
- chatbot[-1] = (chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
+ chatbot[-1] = (
+ chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
yield chatbot, history, f'异常 {e}'
+
return decorated
+
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
"""
chatbot.append((a, b))
- history.append(a); history.append(b)
+ history.append(a);
+ history.append(b)
+
def text_divide_paragraph(text):
"""
@@ -110,15 +126,16 @@ def text_divide_paragraph(text):
text = "".join(lines)
return text
+
def markdown_convertion(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
if ('$' in txt) and ('```' not in txt):
- return markdown.markdown(txt,extensions=['fenced_code','tables']) + '
' + \
- markdown.markdown(convert_math(txt, splitParagraphs=False),extensions=['fenced_code','tables'])
+ return markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '
' + \
+ markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables'])
else:
- return markdown.markdown(txt,extensions=['fenced_code','tables'])
+ return markdown.markdown(txt, extensions=['fenced_code', 'tables'])
def format_io(self, y):
@@ -127,9 +144,9 @@ def format_io(self, y):
"""
if y is None or y == []: return []
i_ask, gpt_reply = y[-1]
- i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波
+ i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波
y[-1] = (
- None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code','tables']),
+ None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']),
None if gpt_reply is None else markdown_convertion(gpt_reply)
)
return y
@@ -151,6 +168,7 @@ def extract_archive(file_path, dest_dir):
import zipfile
import tarfile
import os
+
# Get the file extension of the input file
file_extension = os.path.splitext(file_path)[1]
@@ -164,9 +182,28 @@ def extract_archive(file_path, dest_dir):
with tarfile.open(file_path, 'r:*') as tarobj:
tarobj.extractall(path=dest_dir)
print("Successfully extracted tar archive to {}".format(dest_dir))
+
+ elif file_extension == '.rar':
+ # 这是个第三方库,需要预先pip install rarfile
+ # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以正常运行
+ try:
+ import rarfile
+ with rarfile.RarFile(file_path) as rf:
+ rf.extractall(path=dest_dir)
+ print("Successfully extracted rar archive to {}".format(dest_dir))
+ except:
+ print("rar格式需要安装额外依赖")
+ elif file_extension == '.7z':
+ try:
+ import py7zr
+ with py7zr.SevenZipFile(file_path, mode='r') as f:
+ f.extractall(path=dest_dir)
+ except:
+ print("7z格式需要安装额外依赖")
else:
return
+
def find_recent_files(directory):
"""
me: find files that is created with in one minutes under a directory with python, write a function
@@ -193,19 +230,21 @@ def on_file_uploaded(files, chatbot, txt):
if len(files) == 0: return chatbot, txt
import shutil, os, time, glob
from toolbox import extract_archive
- try: shutil.rmtree('./private_upload/')
- except: pass
+ try:
+ shutil.rmtree('./private_upload/')
+ except:
+ pass
time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
for file in files:
file_origin_name = os.path.basename(file.orig_name)
shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
- extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
+ extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
txt = f'private_upload/{time_tag}'
moved_files_str = '\t\n\n'.join(moved_files)
- chatbot.append(['我上传了文件,请查收',
+ chatbot.append(['我上传了文件,请查收',
f'[Local Message] 收到以下文件: \n\n{moved_files_str}\n\n调用路径参数已自动修正到: \n\n{txt}\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'])
return chatbot, txt
@@ -218,21 +257,25 @@ def on_report_generated(files, chatbot):
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区,请查收。'])
return report_files, chatbot
+
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []
for arg in args:
- try: r = getattr(importlib.import_module('config_private'), arg)
- except: r = getattr(importlib.import_module('config'), arg)
+ try:
+ r = getattr(importlib.import_module('config_private'), arg)
+ except:
+ r = getattr(importlib.import_module('config'), arg)
res.append(r)
# 在读取API_KEY时,检查一下是不是忘了改config
- if arg=='API_KEY' and len(r) != 51:
+ if arg == 'API_KEY' and len(r) != 51:
assert False, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
- "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
+ "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
+
def clear_line_break(txt):
txt = txt.replace('\n', ' ')
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
- return txt
\ No newline at end of file
+ return txt
From e470ee1f7f7d82a1f5dfbf12701402c6c41f3b3b Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 15:45:58 +0800
Subject: [PATCH 03/35] =?UTF-8?q?feat(toolbox):=20=E6=94=AF=E6=8C=81rar?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E4=B8=8E7z=E6=A0=BC=E5=BC=8F=E8=A7=A3?=
=?UTF-8?q?=E5=8E=8B=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BA=86=E4=B8=8B=E6=B3=A8?=
=?UTF-8?q?=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 1 +
toolbox.py | 2 ++
2 files changed, 3 insertions(+)
diff --git a/config.py b/config.py
index a513f44..b445fa7 100644
--- a/config.py
+++ b/config.py
@@ -14,6 +14,7 @@ if USE_PROXY:
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
proxies = { "http": "socks5h://localhost:11284", "https": "socks5h://localhost:11284", }
+
print('网络代理状态:运行。')
else:
proxies = None
diff --git a/toolbox.py b/toolbox.py
index 899cca4..43fafd3 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -193,11 +193,13 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted rar archive to {}".format(dest_dir))
except:
print("rar格式需要安装额外依赖")
+
elif file_extension == '.7z':
try:
import py7zr
with py7zr.SevenZipFile(file_path, mode='r') as f:
f.extractall(path=dest_dir)
+ print("Successfully extracted 7z archive to {}".format(dest_dir))
except:
print("7z格式需要安装额外依赖")
else:
From d57d529aa1f92d37d48511e1333e854e8a9fed56 Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 15:47:18 +0800
Subject: [PATCH 04/35] =?UTF-8?q?feat(toolbox):=20=E6=94=AF=E6=8C=81rar?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E4=B8=8E7z=E6=A0=BC=E5=BC=8F=E8=A7=A3?=
=?UTF-8?q?=E5=8E=8B=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BA=86=E4=B8=8B=E6=B3=A8?=
=?UTF-8?q?=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
toolbox.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/toolbox.py b/toolbox.py
index 43fafd3..3813709 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -202,6 +202,7 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted 7z archive to {}".format(dest_dir))
except:
print("7z格式需要安装额外依赖")
+
else:
return
From 6d8c8cd3f0b9d2b6fe8d412b83f902cbd43fa0bd Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 15:48:00 +0800
Subject: [PATCH 05/35] =?UTF-8?q?feat(toolbox):=20=E6=94=AF=E6=8C=81rar?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E4=B8=8E7z=E6=A0=BC=E5=BC=8F=E8=A7=A3?=
=?UTF-8?q?=E5=8E=8B=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BA=86=E4=B8=8B=E6=B3=A8?=
=?UTF-8?q?=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/config.py b/config.py
index b445fa7..a513f44 100644
--- a/config.py
+++ b/config.py
@@ -14,7 +14,6 @@ if USE_PROXY:
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
proxies = { "http": "socks5h://localhost:11284", "https": "socks5h://localhost:11284", }
-
print('网络代理状态:运行。')
else:
proxies = None
From 80e0c4e388dbaf39033819815ba57098c799801a Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 15:48:55 +0800
Subject: [PATCH 06/35] =?UTF-8?q?feat(toolbox):=20=E6=94=AF=E6=8C=81rar?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E4=B8=8E7z=E6=A0=BC=E5=BC=8F=E8=A7=A3?=
=?UTF-8?q?=E5=8E=8B=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BA=86=E4=B8=8B=E6=B3=A8?=
=?UTF-8?q?=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
toolbox.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/toolbox.py b/toolbox.py
index 3813709..968ca60 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -185,7 +185,7 @@ def extract_archive(file_path, dest_dir):
elif file_extension == '.rar':
# 这是个第三方库,需要预先pip install rarfile
- # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以正常运行
+ # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以
try:
import rarfile
with rarfile.RarFile(file_path) as rf:
From b073477905fbb9feb6c30f8f081226225d09b6a7 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 30 Mar 2023 18:01:06 +0800
Subject: [PATCH 07/35] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E7=83=AD=E6=9B=B4?=
=?UTF-8?q?=E6=96=B0=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 2 +-
functional_crazy.py | 22 +++++++++++++---------
toolbox.py | 13 ++++++++++++-
3 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index a281e4f..26640f7 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ https://github.com/polarwinkel/mdtex2html
>
> 1.请注意只有“红颜色”标识的函数插件(按钮)才支持读取文件。目前暂不能完善地支持pdf格式文献的翻译解读,尚不支持word格式文件的读取。
>
-> 2.本项目中每个文件的功能都在`project_self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。
+> 2.本项目中每个文件的功能都在`project_self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自译解报告。
>
> 3.如果您不太习惯部分中文命名的函数,您可以随时点击相关函数插件,调用GPT一键生成纯英文的项目源代码。
diff --git a/functional_crazy.py b/functional_crazy.py
index 3f13853..889e242 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -1,9 +1,12 @@
+from functools import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
+
# UserVisibleLevel是过滤器参数。
# 由于UI界面空间有限,所以通过这种方式决定UI界面中显示哪些插件
# 默认函数插件 VisibleLevel 是 0
# 当 UserVisibleLevel >= 函数插件的 VisibleLevel 时,该函数插件才会被显示出来
UserVisibleLevel = 1
+
def get_crazy_functionals():
from crazy_functions.读文章写摘要 import 读文章写摘要
from crazy_functions.生成函数注释 import 批量生成函数注释
@@ -16,33 +19,34 @@ def get_crazy_functionals():
function_plugins = {
"请解析并解构此项目本身": {
- "Function": 解析项目本身
+ # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
+ "Function": HotReload(解析项目本身)
},
"解析整个py项目": {
"Color": "stop", # 按钮颜色
- "Function": 解析一个Python项目
+ "Function": HotReload(解析一个Python项目)
},
"解析整个C++项目头文件": {
"Color": "stop", # 按钮颜色
- "Function": 解析一个C项目的头文件
+ "Function": HotReload(解析一个C项目的头文件)
},
"解析整个C++项目": {
"Color": "stop", # 按钮颜色
- "Function": 解析一个C项目
+ "Function": HotReload(解析一个C项目)
},
"读tex论文写摘要": {
"Color": "stop", # 按钮颜色
- "Function": 读文章写摘要
+ "Function": HotReload(读文章写摘要)
},
"批量生成函数注释": {
"Color": "stop", # 按钮颜色
- "Function": 批量生成函数注释
+ "Function": HotReload(批量生成函数注释)
},
"[多线程demo] 把本项目源代码切换成全英文": {
- "Function": 全项目切换英文
+ "Function": HotReload(全项目切换英文)
},
"[函数插件模板demo] 历史上的今天": {
- "Function": 高阶功能模板函数
+ "Function": HotReload(高阶功能模板函数)
},
}
@@ -52,7 +56,7 @@ def get_crazy_functionals():
function_plugins.update({
"[仅供开发调试] 批量总结PDF文档": {
"Color": "stop",
- "Function": 批量总结PDF文档
+ "Function": HotReload(批量总结PDF文档)
},
})
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..e50b973 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,4 +1,4 @@
-import markdown, mdtex2html, threading, importlib, traceback
+import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect
from show_math import convert as convert_math
from functools import wraps
@@ -88,6 +88,17 @@ def CatchException(f):
yield chatbot, history, f'异常 {e}'
return decorated
+def HotReload(f):
+ """
+ 装饰器函数,实现函数插件热更新
+ """
+ @wraps(f)
+ def decorated(*args, **kwargs):
+ fn_name = f.__name__
+ f_hot_reload = getattr(importlib.reload(inspect.getmodule(f)), fn_name)
+ yield from f_hot_reload(*args, **kwargs)
+ return decorated
+
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
From 363e45508b0fdf00fea2a8647cdee772d86d3e16 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 30 Mar 2023 18:04:20 +0800
Subject: [PATCH 08/35] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E7=83=AD=E6=9B=B4?=
=?UTF-8?q?=E6=96=B0=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 2 +-
functional_crazy.py | 11 ++++++++---
toolbox.py | 13 ++++++++++++-
3 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index a281e4f..26640f7 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ https://github.com/polarwinkel/mdtex2html
>
> 1.请注意只有“红颜色”标识的函数插件(按钮)才支持读取文件。目前暂不能完善地支持pdf格式文献的翻译解读,尚不支持word格式文件的读取。
>
-> 2.本项目中每个文件的功能都在`project_self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。
+> 2.本项目中每个文件的功能都在`project_self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自译解报告。
>
> 3.如果您不太习惯部分中文命名的函数,您可以随时点击相关函数插件,调用GPT一键生成纯英文的项目源代码。
diff --git a/functional_crazy.py b/functional_crazy.py
index 3f13853..67c3c4b 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -1,3 +1,5 @@
+from functools import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
+
# UserVisibleLevel是过滤器参数。
# 由于UI界面空间有限,所以通过这种方式决定UI界面中显示哪些插件
# 默认函数插件 VisibleLevel 是 0
@@ -39,10 +41,12 @@ def get_crazy_functionals():
"Function": 批量生成函数注释
},
"[多线程demo] 把本项目源代码切换成全英文": {
- "Function": 全项目切换英文
+ # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
+ "Function": HotReload(全项目切换英文)
},
"[函数插件模板demo] 历史上的今天": {
- "Function": 高阶功能模板函数
+ # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
+ "Function": HotReload(高阶功能模板函数)
},
}
@@ -52,7 +56,8 @@ def get_crazy_functionals():
function_plugins.update({
"[仅供开发调试] 批量总结PDF文档": {
"Color": "stop",
- "Function": 批量总结PDF文档
+ # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
+ "Function": HotReload(批量总结PDF文档)
},
})
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..e50b973 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,4 +1,4 @@
-import markdown, mdtex2html, threading, importlib, traceback
+import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect
from show_math import convert as convert_math
from functools import wraps
@@ -88,6 +88,17 @@ def CatchException(f):
yield chatbot, history, f'异常 {e}'
return decorated
+def HotReload(f):
+ """
+ 装饰器函数,实现函数插件热更新
+ """
+ @wraps(f)
+ def decorated(*args, **kwargs):
+ fn_name = f.__name__
+ f_hot_reload = getattr(importlib.reload(inspect.getmodule(f)), fn_name)
+ yield from f_hot_reload(*args, **kwargs)
+ return decorated
+
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
From ba0c17ba53a7d128b2834d6479851dce29dc45fb Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 30 Mar 2023 18:21:17 +0800
Subject: [PATCH 09/35] =?UTF-8?q?=E8=87=AA=E8=AF=91=E8=A7=A3=E6=8A=A5?=
=?UTF-8?q?=E5=91=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
project_self_analysis.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/project_self_analysis.md b/project_self_analysis.md
index c817421..f0a544e 100644
--- a/project_self_analysis.md
+++ b/project_self_analysis.md
@@ -1,5 +1,5 @@
-# chatgpt-academic项目分析报告
-(Author补充:以下分析均由本项目调用ChatGPT一键生成,如果有不准确的地方全怪GPT)
+# chatgpt-academic项目自译解报告
+(Author补充:以下分析均由本项目调用ChatGPT一键生成,如果有不准确的地方,全怪GPT😄)
## [0/10] 程序摘要: check_proxy.py
From 44e77dc741dc434dd301e4f419ada512005c0a65 Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 20:28:15 +0800
Subject: [PATCH 10/35] =?UTF-8?q?feat(toolbox):=E8=B0=83=E6=95=B4=E4=BA=86?=
=?UTF-8?q?=E7=A9=BA=E6=A0=BC=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
requirements.txt | 11 ++-----
toolbox.py | 78 +++++++++++++++++-------------------------------
2 files changed, 29 insertions(+), 60 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 3f39924..265a3cb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,3 @@
gradio>=3.23
-requests[socks]~=2.28.2
-mdtex2html~=1.2.0
-
-markdown~=3.4.3
-latex2mathml~=3.75.1
-numpy~=1.21.6
-
-rarfile~=4.0
-py7zr~=0.20.4
\ No newline at end of file
+requests[socks]
+mdtex2html
\ No newline at end of file
diff --git a/toolbox.py b/toolbox.py
index 968ca60..30399de 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -2,7 +2,6 @@ import markdown, mdtex2html, threading, importlib, traceback
from show_math import convert as convert_math
from functools import wraps
-
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
"""
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
@@ -14,43 +13,36 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
# list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
mutable = [None, '']
-
# multi-threading worker
def mt(i_say, history):
while True:
try:
- mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history,
- sys_prompt=sys_prompt)
+ mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
break
except ConnectionAbortedError as e:
if len(history) > 0:
- history = [his[len(his) // 2:] for his in history if his is not None]
+ history = [his[len(his)//2:] for his in history if his is not None]
mutable[1] = 'Warning! History conversation is too long, cut into half. '
else:
- i_say = i_say[:len(i_say) // 2]
+ i_say = i_say[:len(i_say)//2]
mutable[1] = 'Warning! Input file is too long, cut into half. '
except TimeoutError as e:
mutable[0] = '[Local Message] Failed with timeout.'
raise TimeoutError
-
# 创建新线程发出http请求
- thread_name = threading.Thread(target=mt, args=(i_say, history));
- thread_name.start()
+ thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
cnt = 0
while thread_name.is_alive():
cnt += 1
- chatbot[-1] = (i_say_show_user,
- f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS * 2 * (MAX_RETRY + 1)}" + ''.join(
- ['.'] * (cnt % 4)))
+ chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
yield chatbot, history, '正常'
time.sleep(1)
# 把gpt的输出从mutable中取出来
gpt_say = mutable[0]
- if gpt_say == '[Local Message] Failed with timeout.': raise TimeoutError
+ if gpt_say=='[Local Message] Failed with timeout.': raise TimeoutError
return gpt_say
-
def write_results_to_file(history, file_name=None):
"""
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
@@ -60,17 +52,16 @@ def write_results_to_file(history, file_name=None):
# file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
file_name = 'chatGPT分析报告' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
os.makedirs('./gpt_log/', exist_ok=True)
- with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+ with open(f'./gpt_log/{file_name}', 'w', encoding = 'utf8') as f:
f.write('# chatGPT 分析报告\n')
for i, content in enumerate(history):
- if i % 2 == 0: f.write('## ')
+ if i%2==0: f.write('## ')
f.write(content)
f.write('\n\n')
res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
print(res)
return res
-
def regular_txt_to_markdown(text):
"""
将普通文本转换为Markdown格式的文本。
@@ -80,12 +71,10 @@ def regular_txt_to_markdown(text):
text = text.replace('\n\n\n', '\n\n')
return text
-
def CatchException(f):
"""
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
"""
-
@wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
try:
@@ -95,21 +84,16 @@ def CatchException(f):
from toolbox import get_conf
proxies, = get_conf('proxies')
tb_str = regular_txt_to_markdown(traceback.format_exc())
- chatbot[-1] = (
- chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] 实验性函数调用出错: \n\n {tb_str} \n\n 当前代理可用性: \n\n {check_proxy(proxies)}")
yield chatbot, history, f'异常 {e}'
-
return decorated
-
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
"""
chatbot.append((a, b))
- history.append(a);
- history.append(b)
-
+ history.append(a); history.append(b)
def text_divide_paragraph(text):
"""
@@ -126,16 +110,15 @@ def text_divide_paragraph(text):
text = "".join(lines)
return text
-
def markdown_convertion(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
if ('$' in txt) and ('```' not in txt):
- return markdown.markdown(txt, extensions=['fenced_code', 'tables']) + '
' + \
- markdown.markdown(convert_math(txt, splitParagraphs=False), extensions=['fenced_code', 'tables'])
+ return markdown.markdown(txt,extensions=['fenced_code','tables']) + '
' + \
+ markdown.markdown(convert_math(txt, splitParagraphs=False),extensions=['fenced_code','tables'])
else:
- return markdown.markdown(txt, extensions=['fenced_code', 'tables'])
+ return markdown.markdown(txt,extensions=['fenced_code','tables'])
def format_io(self, y):
@@ -144,9 +127,9 @@ def format_io(self, y):
"""
if y is None or y == []: return []
i_ask, gpt_reply = y[-1]
- i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波
+ i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波
y[-1] = (
- None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']),
+ None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code','tables']),
None if gpt_reply is None else markdown_convertion(gpt_reply)
)
return y
@@ -168,7 +151,6 @@ def extract_archive(file_path, dest_dir):
import zipfile
import tarfile
import os
-
# Get the file extension of the input file
file_extension = os.path.splitext(file_path)[1]
@@ -183,17 +165,18 @@ def extract_archive(file_path, dest_dir):
tarobj.extractall(path=dest_dir)
print("Successfully extracted tar archive to {}".format(dest_dir))
+ # 第三方库,需要预先pip install rarfile
+ # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以
elif file_extension == '.rar':
- # 这是个第三方库,需要预先pip install rarfile
- # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以
try:
import rarfile
with rarfile.RarFile(file_path) as rf:
rf.extractall(path=dest_dir)
print("Successfully extracted rar archive to {}".format(dest_dir))
except:
- print("rar格式需要安装额外依赖")
+ print("Rar format requires additional dependencies to install")
+ # 第三方库,需要预先pip install py7zr
elif file_extension == '.7z':
try:
import py7zr
@@ -201,12 +184,11 @@ def extract_archive(file_path, dest_dir):
f.extractall(path=dest_dir)
print("Successfully extracted 7z archive to {}".format(dest_dir))
except:
- print("7z格式需要安装额外依赖")
+ print("7z format requires additional dependencies to install")
else:
return
-
def find_recent_files(directory):
"""
me: find files that is created with in one minutes under a directory with python, write a function
@@ -233,10 +215,8 @@ def on_file_uploaded(files, chatbot, txt):
if len(files) == 0: return chatbot, txt
import shutil, os, time, glob
from toolbox import extract_archive
- try:
- shutil.rmtree('./private_upload/')
- except:
- pass
+ try: shutil.rmtree('./private_upload/')
+ except: pass
time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
for file in files:
@@ -260,25 +240,21 @@ def on_report_generated(files, chatbot):
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区,请查收。'])
return report_files, chatbot
-
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []
for arg in args:
- try:
- r = getattr(importlib.import_module('config_private'), arg)
- except:
- r = getattr(importlib.import_module('config'), arg)
+ try: r = getattr(importlib.import_module('config_private'), arg)
+ except: r = getattr(importlib.import_module('config'), arg)
res.append(r)
# 在读取API_KEY时,检查一下是不是忘了改config
- if arg == 'API_KEY' and len(r) != 51:
+ if arg=='API_KEY' and len(r) != 51:
assert False, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
- "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
+ "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
-
def clear_line_break(txt):
txt = txt.replace('\n', ' ')
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
- return txt
+ return txt
\ No newline at end of file
From ac4fce05cfab8d9d2671aca6b1323ee04327927c Mon Sep 17 00:00:00 2001
From: JasonGuo1 <1515893624@qq.com>
Date: Thu, 30 Mar 2023 23:23:41 +0800
Subject: [PATCH 11/35] =?UTF-8?q?feat(=E6=80=BB=E7=BB=93word=E6=96=87?=
=?UTF-8?q?=E6=A1=A3):=E5=A2=9E=E5=8A=A0=E8=AF=BB=E5=8F=96docx=E3=80=81doc?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E7=9A=84=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/总结word文档.py | 123 ++++++++++++++++++++++++++++++++
functional_crazy.py | 4 ++
2 files changed, 127 insertions(+)
create mode 100644 crazy_functions/总结word文档.py
diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py
new file mode 100644
index 0000000..b7cef5b
--- /dev/null
+++ b/crazy_functions/总结word文档.py
@@ -0,0 +1,123 @@
+from predict import predict_no_ui
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+fast_debug = False
+
+
+def 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
+ import time, os
+ # pip install python-docx 用于docx格式,跨平台
+ # pip install pywin32 用于doc格式,仅支持Win平台
+
+ print('begin analysis on:', file_manifest)
+ for index, fp in enumerate(file_manifest):
+ if fp.split(".")[-1] == "docx":
+ from docx import Document
+ doc = Document(fp)
+ file_content = "\n".join([para.text for para in doc.paragraphs])
+ else:
+ import win32com.client
+ word = win32com.client.Dispatch("Word.Application")
+ word.visible = False
+ # 打开文件
+ print('fp', os.getcwd())
+ doc = word.Documents.Open(os.getcwd() + '/' + fp)
+ # file_content = doc.Content.Text
+ doc = word.ActiveDocument
+ file_content = doc.Range().Text
+ doc.Close()
+ word.Quit()
+
+ print(file_content)
+
+ prefix = "接下来请你逐文件分析下面的论文文件," if index == 0 else ""
+ # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名
+ i_say = prefix + f'请对下面的文章片段用中英文做概述,文件名是{os.path.relpath(fp, project_folder)},' \
+ f'文章内容是 ```{file_content}```'
+ i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 假设你是论文审稿专家,请对下面的文章片段做概述: {os.path.abspath(fp)}'
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
+ yield chatbot, history, '正常'
+
+ if not fast_debug:
+ msg = '正常'
+ # ** gpt request **
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature,
+ history=[]) # 带超时倒计时
+ chatbot[-1] = (i_say_show_user, gpt_say)
+ history.append(i_say_show_user);
+ history.append(gpt_say)
+ yield chatbot, history, msg
+ if not fast_debug: time.sleep(2)
+
+ """
+ # 可按需启用
+ i_say = f'根据你上述的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一篇英文的。'
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
+ yield chatbot, history, '正常'
+
+
+ i_say = f'我想让你做一个论文写作导师。您的任务是使用人工智能工具(例如自然语言处理)提供有关如何改进其上述文章的反馈。' \
+ f'您还应该利用您在有效写作技巧方面的修辞知识和经验来建议作者可以更好地以书面形式表达他们的想法和想法的方法。' \
+ f'根据你之前的分析,提出建议'
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
+ yield chatbot, history, '正常'
+
+ """
+
+ if not fast_debug:
+ msg = '正常'
+ # ** gpt request **
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature,
+ history=history) # 带超时倒计时
+
+ chatbot[-1] = (i_say, gpt_say)
+ history.append(i_say)
+ history.append(gpt_say)
+ yield chatbot, history, msg
+ res = write_results_to_file(history)
+ chatbot.append(("完成了吗?", res))
+ yield chatbot, history, msg
+
+
+@CatchException
+def 总结word文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+ import glob, os
+
+ yield chatbot, history, '正常'
+
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
+ try:
+ from docx import Document
+ except:
+ report_execption(chatbot, history,
+ a=f"解析项目: {txt}",
+ b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
+ yield chatbot, history, '正常'
+ return
+
+ # 清空历史,以免输入溢出
+ history = []
+
+ # 检测输入参数,如没有给定输入参数,直接退出
+ if os.path.exists(txt):
+ project_folder = txt
+ else:
+ if txt == "": txt = '空空如也的输入栏'
+ report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+ yield chatbot, history, '正常'
+ return
+
+ # 搜索需要处理的文件清单
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
+ [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
+ # [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
+ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+
+ # 如果没找到任何文件
+ if len(file_manifest) == 0:
+ report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.docx或doc文件: {txt}")
+ yield chatbot, history, '正常'
+ return
+
+ # 开始正式执行任务
+ yield from 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
diff --git a/functional_crazy.py b/functional_crazy.py
index 3f13853..5bab039 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -13,6 +13,7 @@ def get_crazy_functionals():
from crazy_functions.解析项目源代码 import 解析一个C项目
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
+ from crazy_functions.总结word文档 import 总结word文档
function_plugins = {
"请解析并解构此项目本身": {
@@ -44,6 +45,9 @@ def get_crazy_functionals():
"[函数插件模板demo] 历史上的今天": {
"Function": 高阶功能模板函数
},
+ "[总结word文档demo] 解析word文档": {
+ "Function": 总结word文档
+ },
}
# VisibleLevel=1 经过测试,但功能未达到理想状态
From 285fa4690c8da500ae1ceebec94d65972546f1a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AC=A7=E7=8E=AE=E6=9D=B0?=
Date: Fri, 31 Mar 2023 00:54:01 +0800
Subject: [PATCH 12/35] feature(read pdf paper then write summary):
add a func called readPdf in toolbox, which can read pdf paper to str. then use bs4.BeautifulSoup to clean content.
---
crazy_functions/读文章写摘要.py | 20 ++++++++----
functional_crazy.py | 2 +-
requirements.txt | 11 +++++--
toolbox.py | 58 ++++++++++++++++++++++++++++++++-
4 files changed, 80 insertions(+), 11 deletions(-)
diff --git a/crazy_functions/读文章写摘要.py b/crazy_functions/读文章写摘要.py
index dc92256..51ae683 100644
--- a/crazy_functions/读文章写摘要.py
+++ b/crazy_functions/读文章写摘要.py
@@ -1,14 +1,19 @@
from predict import predict_no_ui
-from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, readPdf
fast_debug = False
+from bs4 import BeautifulSoup
def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
import time, glob, os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
- with open(fp, 'r', encoding='utf-8') as f:
- file_content = f.read()
+ if ".tex" in fp:
+ with open(fp, 'r', encoding='utf-8') as f:
+ file_content = f.read()
+ if ".pdf" in fp:
+ file_content = readPdf(fp)
+ file_content = BeautifulSoup(''.join(file_content), features="lxml").body.text.encode('gbk', 'ignore').decode('gbk')
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
@@ -17,7 +22,7 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
print('[1] yield chatbot, history')
yield chatbot, history, '正常'
- if not fast_debug:
+ if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
@@ -35,7 +40,7 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield chatbot, history, '正常'
- if not fast_debug:
+ if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
@@ -60,11 +65,12 @@ def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTx
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield chatbot, history, '正常'
return
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
+ [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
- report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或pdf文件: {txt}")
yield chatbot, history, '正常'
return
yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
diff --git a/functional_crazy.py b/functional_crazy.py
index 3f13853..4b90af4 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -30,7 +30,7 @@ def get_crazy_functionals():
"Color": "stop", # 按钮颜色
"Function": 解析一个C项目
},
- "读tex论文写摘要": {
+ "读tex or pdf论文写摘要": {
"Color": "stop", # 按钮颜色
"Function": 读文章写摘要
},
diff --git a/requirements.txt b/requirements.txt
index 84ced64..56c5b23 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,10 @@
gradio>=3.23
-requests[socks]
-mdtex2html
+requests[socks]~=2.28.2
+mdtex2html~=1.2.0
+Markdown~=3.4.3
+latex2mathml~=3.75.1
+bs4~=0.0.1
+lxml~=4.6.4
+beautifulsoup4~=4.12.0
+numpy~=1.24.2
+pdfminer.six
\ No newline at end of file
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..b30c255 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,6 +1,14 @@
import markdown, mdtex2html, threading, importlib, traceback
from show_math import convert as convert_math
from functools import wraps
+import pdfminer
+from pdfminer.pdfparser import PDFParser
+from pdfminer.pdfdocument import PDFDocument
+from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+from pdfminer.pdfdevice import PDFDevice
+from pdfminer.layout import LAParams
+from pdfminer.converter import PDFPageAggregator
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
"""
@@ -235,4 +243,52 @@ def clear_line_break(txt):
txt = txt.replace('\n', ' ')
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
- return txt
\ No newline at end of file
+ return txt
+
+def readPdf(pdfPath):
+ """
+ 读取pdf文件,返回文本内容
+ """
+ fp = open(pdfPath, 'rb')
+
+ # Create a PDF parser object associated with the file object
+ parser = PDFParser(fp)
+
+ # Create a PDF document object that stores the document structure.
+ # Password for initialization as 2nd parameter
+ document = PDFDocument(parser)
+ # Check if the document allows text extraction. If not, abort.
+ if not document.is_extractable:
+ raise PDFTextExtractionNotAllowed
+
+ # Create a PDF resource manager object that stores shared resources.
+ rsrcmgr = PDFResourceManager()
+
+ # Create a PDF device object.
+ # device = PDFDevice(rsrcmgr)
+
+ # BEGIN LAYOUT ANALYSIS.
+ # Set parameters for analysis.
+ laparams = LAParams(
+ char_margin=10.0,
+ line_margin=0.2,
+ boxes_flow=0.2,
+ all_texts=False,
+ )
+ # Create a PDF page aggregator object.
+ device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+ # Create a PDF interpreter object.
+ interpreter = PDFPageInterpreter(rsrcmgr, device)
+
+ # loop over all pages in the document
+ outTextList = []
+ for page in PDFPage.create_pages(document):
+ # read the page into a layout object
+ interpreter.process_page(page)
+ layout = device.get_result()
+ for obj in layout._objs:
+ if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
+ # print(obj.get_text())
+ outTextList.append(obj.get_text())
+
+ return outTextList
\ No newline at end of file
From 125fa7c378a4b4f8a0c30f7de1231c12e0054bfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AC=A7=E7=8E=AE=E6=9D=B0?=
Date: Fri, 31 Mar 2023 10:03:10 +0800
Subject: [PATCH 13/35] =?UTF-8?q?fix(fix=20"gbk"=20encode=20error=20in=20?=
=?UTF-8?q?=E6=89=B9=E9=87=8F=E6=80=BB=E7=BB=93PDF=E6=96=87=E6=A1=A3=20lin?=
=?UTF-8?q?e14):?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
由于不可编码字符,导致报错,添加软解码,处理原始文本。
---
crazy_functions/批量总结PDF文档.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index 102bc9e..bf7fe6f 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -11,6 +11,7 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
file_content = ""
for page in doc:
file_content += page.get_text()
+ file_content = file_content.encode('gbk', 'ignore').decode('gbk')
print(file_content)
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
From db8c8afd74e75fe8bfbeecdf978e34f3ef8ba994 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=AC=A7=E7=8E=AE=E6=9D=B0?=
Date: Fri, 31 Mar 2023 10:26:40 +0800
Subject: [PATCH 14/35] fix(the ".PDF" file can not be recognized):
---
crazy_functions/读文章写摘要.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crazy_functions/读文章写摘要.py b/crazy_functions/读文章写摘要.py
index 51ae683..4144d11 100644
--- a/crazy_functions/读文章写摘要.py
+++ b/crazy_functions/读文章写摘要.py
@@ -11,7 +11,7 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
if ".tex" in fp:
with open(fp, 'r', encoding='utf-8') as f:
file_content = f.read()
- if ".pdf" in fp:
+ if ".pdf" in fp.lower():
file_content = readPdf(fp)
file_content = BeautifulSoup(''.join(file_content), features="lxml").body.text.encode('gbk', 'ignore').decode('gbk')
From cec44805a57fb2f10f65181ba64a9a169e64f2df Mon Sep 17 00:00:00 2001
From: fulyaec
Date: Fri, 31 Mar 2023 16:24:40 +0800
Subject: [PATCH 15/35] refactor and enhance
---
main.py | 5 ++---
toolbox.py | 9 +++------
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/main.py b/main.py
index c69795a..3033e39 100644
--- a/main.py
+++ b/main.py
@@ -10,7 +10,7 @@ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = \
# 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
-AUTHENTICATION = None if AUTHENTICATION == [] else AUTHENTICATION
+if not AUTHENTICATION: AUTHENTICATION = None
initial_prompt = "Serve me as a writing and programming assistant."
title_html = """ChatGPT 学术优化
"""
@@ -105,8 +105,7 @@ def auto_opentab_delay():
def open():
time.sleep(2)
webbrowser.open_new_tab(f'http://localhost:{PORT}')
- t = threading.Thread(target=open)
- t.daemon = True; t.start()
+ threading.Thread(target=open, name="open-browser", daemon=True).start()
auto_opentab_delay()
demo.title = "ChatGPT 学术优化"
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..326740a 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -226,13 +226,10 @@ def get_conf(*args):
except: r = getattr(importlib.import_module('config'), arg)
res.append(r)
# 在读取API_KEY时,检查一下是不是忘了改config
- if arg=='API_KEY' and len(r) != 51:
- assert False, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
+ assert arg != 'API_KEY' or len(r) == 51, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" \
"(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
def clear_line_break(txt):
- txt = txt.replace('\n', ' ')
- txt = txt.replace(' ', ' ')
- txt = txt.replace(' ', ' ')
- return txt
\ No newline at end of file
+ import re
+ return re.sub(r"\s+", " ", txt)
\ No newline at end of file
From 0b03c797bc14a6eb26e39ff4493ce5cb4162a02d Mon Sep 17 00:00:00 2001
From: Jia Xinglong
Date: Fri, 31 Mar 2023 17:38:39 +0800
Subject: [PATCH 16/35] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20re=20=E6=A8=A1?=
=?UTF-8?q?=E5=9D=97=E7=9A=84=20match=20=E5=87=BD=E6=95=B0=E5=8F=AF?=
=?UTF-8?q?=E4=BB=A5=E6=9B=B4=E7=B2=BE=E5=87=86=E7=9A=84=E5=8C=B9=E9=85=8D?=
=?UTF-8?q?=E5=92=8C=E7=A1=AE=E8=AE=A4=20API=5FKEY=20=E6=98=AF=E5=90=A6?=
=?UTF-8?q?=E6=AD=A3=E7=A1=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
toolbox.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..75dd8bc 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,6 +1,7 @@
import markdown, mdtex2html, threading, importlib, traceback
from show_math import convert as convert_math
from functools import wraps
+import re
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
"""
@@ -226,9 +227,14 @@ def get_conf(*args):
except: r = getattr(importlib.import_module('config'), arg)
res.append(r)
# 在读取API_KEY时,检查一下是不是忘了改config
- if arg=='API_KEY' and len(r) != 51:
- assert False, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
- "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
+ if arg=='API_KEY':
+ # 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
+ API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
+ if API_MATCH:
+ print("您的 API_KEY 是: ", r, "\nAPI_KEY 导入成功")
+ else:
+ assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
+ "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
def clear_line_break(txt):
From 60506eff9f855d8346d357499ca96759c2c8abb3 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 19:46:01 +0800
Subject: [PATCH 17/35] revert toolbox
---
toolbox.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/toolbox.py b/toolbox.py
index d657c1f..e50b973 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -237,10 +237,13 @@ def get_conf(*args):
except: r = getattr(importlib.import_module('config'), arg)
res.append(r)
# 在读取API_KEY时,检查一下是不是忘了改config
- assert arg != 'API_KEY' or len(r) == 51, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" \
+ if arg=='API_KEY' and len(r) != 51:
+ assert False, "正确的API_KEY密钥是51位,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
"(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
def clear_line_break(txt):
- import re
- return re.sub(r"\s+", " ", txt)
\ No newline at end of file
+ txt = txt.replace('\n', ' ')
+ txt = txt.replace(' ', ' ')
+ txt = txt.replace(' ', ' ')
+ return txt
\ No newline at end of file
From fa7464ae443061c013831748b63b39702b96836e Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 20:02:12 +0800
Subject: [PATCH 18/35] =?UTF-8?q?config=E6=96=B0=E5=A2=9E=E8=AF=B4?=
=?UTF-8?q?=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/config.py b/config.py
index 7fc73db..72309e7 100644
--- a/config.py
+++ b/config.py
@@ -1,16 +1,15 @@
-# API_KEY = "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" 此key无效
+# [step 1]>> 例如: API_KEY = "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" (此key无效)
API_KEY = "sk-此处填API密钥"
-API_URL = "https://api.openai.com/v1/chat/completions"
-# 改为True应用代理
+
+# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
USE_PROXY = False
if USE_PROXY:
-
- # 填写格式是 [协议]:// [地址] :[端口] ,
+ # 填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
# 例如 "socks5h://localhost:11284"
- # [协议] 常见协议无非socks5h/http,例如 v2*** 和 s** 的默认本地协议是socks5h,cl**h 的默认本地协议是http
+ # [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
# [地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上)
- # [端口] 在代理软件的设置里,不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
+ # [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
proxies = { "http": "socks5h://localhost:11284", "https": "socks5h://localhost:11284", }
@@ -19,6 +18,9 @@ else:
proxies = None
print('网络代理状态:未配置。无代理状态下很可能无法访问。')
+
+# [step 3]>> 以下配置可以优化体验,但大部分场合下并不需要修改
+
# 发送请求到OpenAI后,等待多久判定为超时
TIMEOUT_SECONDS = 25
@@ -28,9 +30,12 @@ WEB_PORT = -1
# 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
MAX_RETRY = 2
-# 选择的OpenAI模型是(gpt4现在只对申请成功的人开放)
+# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
LLM_MODEL = "gpt-3.5-turbo"
+# OpenAI的API_URL
+API_URL = "https://api.openai.com/v1/chat/completions"
+
# 设置并行使用的线程数
CONCURRENT_COUNT = 100
From 72f23cbbef3639cb24afedb308fe1ff41914dcd4 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 20:05:31 +0800
Subject: [PATCH 19/35] fix import error
---
functional_crazy.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/functional_crazy.py b/functional_crazy.py
index 861aae5..6f455ec 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -1,4 +1,4 @@
-from functools import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
+from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
# UserVisibleLevel是过滤器参数。
# 由于UI界面空间有限,所以通过这种方式决定UI界面中显示哪些插件
From 87c09368da7932c29db6690582bbf0a62f0aa426 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 20:12:27 +0800
Subject: [PATCH 20/35] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/高级功能函数模板.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py
index 4cf1cb9..ec14e74 100644
--- a/crazy_functions/高级功能函数模板.py
+++ b/crazy_functions/高级功能函数模板.py
@@ -5,7 +5,7 @@ import datetime
@CatchException
def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
history = [] # 清空历史,以免输入溢出
- chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个函数模板,该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。为了做到简单易读,该函数只有25行代码,不会实时反馈文字流或心跳,请耐心等待程序输出完成。另外您若希望分享新的功能模组,请不吝PR!"))
+ chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。为了做到简单易读,该函数只有25行代码,所以不会实时反馈文字流或心跳,请耐心等待程序输出完成。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组,请不吝PR!"))
yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
for i in range(5):
From 167be41621c950188f8d0ce10dd79da787869818 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 21:03:12 +0800
Subject: [PATCH 21/35] =?UTF-8?q?pdfminer=E6=95=B4=E5=90=88=E5=88=B0?=
=?UTF-8?q?=E4=B8=80=E4=B8=AA=E6=96=87=E4=BB=B6=E4=B8=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/批量总结PDF文档.py | 1 -
crazy_functions/批量总结PDF文档pdfminer.py | 151 +++++++++++++++++++++
crazy_functions/读文章写摘要.py | 20 +--
functional_crazy.py | 10 +-
requirements.txt | 13 +-
toolbox.py | 58 +-------
6 files changed, 171 insertions(+), 82 deletions(-)
create mode 100644 crazy_functions/批量总结PDF文档pdfminer.py
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index bf7fe6f..102bc9e 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -11,7 +11,6 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
file_content = ""
for page in doc:
file_content += page.get_text()
- file_content = file_content.encode('gbk', 'ignore').decode('gbk')
print(file_content)
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
diff --git a/crazy_functions/批量总结PDF文档pdfminer.py b/crazy_functions/批量总结PDF文档pdfminer.py
new file mode 100644
index 0000000..060187c
--- /dev/null
+++ b/crazy_functions/批量总结PDF文档pdfminer.py
@@ -0,0 +1,151 @@
+from predict import predict_no_ui
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+
+fast_debug = False
+
+def readPdf(pdfPath):
+ """
+ 读取pdf文件,返回文本内容
+ """
+ import pdfminer
+ from pdfminer.pdfparser import PDFParser
+ from pdfminer.pdfdocument import PDFDocument
+ from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
+ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+ from pdfminer.pdfdevice import PDFDevice
+ from pdfminer.layout import LAParams
+ from pdfminer.converter import PDFPageAggregator
+
+ fp = open(pdfPath, 'rb')
+
+ # Create a PDF parser object associated with the file object
+ parser = PDFParser(fp)
+
+ # Create a PDF document object that stores the document structure.
+ # Password for initialization as 2nd parameter
+ document = PDFDocument(parser)
+ # Check if the document allows text extraction. If not, abort.
+ if not document.is_extractable:
+ raise PDFTextExtractionNotAllowed
+
+ # Create a PDF resource manager object that stores shared resources.
+ rsrcmgr = PDFResourceManager()
+
+ # Create a PDF device object.
+ # device = PDFDevice(rsrcmgr)
+
+ # BEGIN LAYOUT ANALYSIS.
+ # Set parameters for analysis.
+ laparams = LAParams(
+ char_margin=10.0,
+ line_margin=0.2,
+ boxes_flow=0.2,
+ all_texts=False,
+ )
+ # Create a PDF page aggregator object.
+ device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+ # Create a PDF interpreter object.
+ interpreter = PDFPageInterpreter(rsrcmgr, device)
+
+ # loop over all pages in the document
+ outTextList = []
+ for page in PDFPage.create_pages(document):
+ # read the page into a layout object
+ interpreter.process_page(page)
+ layout = device.get_result()
+ for obj in layout._objs:
+ if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
+ # print(obj.get_text())
+ outTextList.append(obj.get_text())
+
+ return outTextList
+
+
+def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
+ import time, glob, os
+ from bs4 import BeautifulSoup
+ print('begin analysis on:', file_manifest)
+ for index, fp in enumerate(file_manifest):
+ if ".tex" in fp:
+ with open(fp, 'r', encoding='utf-8') as f:
+ file_content = f.read()
+ if ".pdf" in fp.lower():
+ file_content = readPdf(fp)
+ file_content = BeautifulSoup(''.join(file_content), features="lxml").body.text.encode('gbk', 'ignore').decode('gbk')
+
+ prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
+ i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
+ i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
+ print('[1] yield chatbot, history')
+ yield chatbot, history, '正常'
+
+ if not fast_debug:
+ msg = '正常'
+ # ** gpt request **
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
+
+ print('[2] end gpt req')
+ chatbot[-1] = (i_say_show_user, gpt_say)
+ history.append(i_say_show_user); history.append(gpt_say)
+ print('[3] yield chatbot, history')
+ yield chatbot, history, msg
+ print('[4] next')
+ if not fast_debug: time.sleep(2)
+
+ all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
+ i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
+ yield chatbot, history, '正常'
+
+ if not fast_debug:
+ msg = '正常'
+ # ** gpt request **
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
+
+ chatbot[-1] = (i_say, gpt_say)
+ history.append(i_say); history.append(gpt_say)
+ yield chatbot, history, msg
+ res = write_results_to_file(history)
+ chatbot.append(("完成了吗?", res))
+ yield chatbot, history, msg
+
+
+
+@CatchException
+def 批量总结PDF文档pdfminer(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+ history = [] # 清空历史,以免输入溢出
+ import glob, os
+
+ # 基本信息:功能、贡献者
+ chatbot.append([
+ "函数插件功能?",
+ "批量总结PDF文档,此版本使用pdfminer插件,带token约简功能。函数插件贡献者: Euclid-Jie。"])
+ yield chatbot, history, '正常'
+
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
+ try:
+ import pdfminer, bs4
+ except:
+ report_execption(chatbot, history,
+ a = f"解析项目: {txt}",
+ b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
+ yield chatbot, history, '正常'
+ return
+ if os.path.exists(txt):
+ project_folder = txt
+ else:
+ if txt == "": txt = '空空如也的输入栏'
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+ yield chatbot, history, '正常'
+ return
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
+ [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
+ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+ if len(file_manifest) == 0:
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或pdf文件: {txt}")
+ yield chatbot, history, '正常'
+ return
+ yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
+
diff --git a/crazy_functions/读文章写摘要.py b/crazy_functions/读文章写摘要.py
index 4144d11..dc92256 100644
--- a/crazy_functions/读文章写摘要.py
+++ b/crazy_functions/读文章写摘要.py
@@ -1,19 +1,14 @@
from predict import predict_no_ui
-from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, readPdf
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
fast_debug = False
-from bs4 import BeautifulSoup
def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
import time, glob, os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
- if ".tex" in fp:
- with open(fp, 'r', encoding='utf-8') as f:
- file_content = f.read()
- if ".pdf" in fp.lower():
- file_content = readPdf(fp)
- file_content = BeautifulSoup(''.join(file_content), features="lxml").body.text.encode('gbk', 'ignore').decode('gbk')
+ with open(fp, 'r', encoding='utf-8') as f:
+ file_content = f.read()
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
@@ -22,7 +17,7 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
print('[1] yield chatbot, history')
yield chatbot, history, '正常'
- if not fast_debug:
+ if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
@@ -40,7 +35,7 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield chatbot, history, '正常'
- if not fast_debug:
+ if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
@@ -65,12 +60,11 @@ def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTx
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield chatbot, history, '正常'
return
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
- [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
- report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或pdf文件: {txt}")
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield chatbot, history, '正常'
return
yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
diff --git a/functional_crazy.py b/functional_crazy.py
index 4b90af4..ef5b6c2 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -30,7 +30,7 @@ def get_crazy_functionals():
"Color": "stop", # 按钮颜色
"Function": 解析一个C项目
},
- "读tex or pdf论文写摘要": {
+ "读tex论文写摘要": {
"Color": "stop", # 按钮颜色
"Function": 读文章写摘要
},
@@ -55,7 +55,13 @@ def get_crazy_functionals():
"Function": 批量总结PDF文档
},
})
-
+ from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
+ function_plugins.update({
+ "[仅供开发调试] 批量总结PDF文档pdfminer": {
+ "Color": "stop",
+ "Function": 批量总结PDF文档pdfminer
+ },
+ })
# VisibleLevel=2 尚未充分测试的函数插件,放在这里
if UserVisibleLevel >= 2:
function_plugins.update({
diff --git a/requirements.txt b/requirements.txt
index 56c5b23..0e1d7db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,5 @@
gradio>=3.23
-requests[socks]~=2.28.2
-mdtex2html~=1.2.0
-Markdown~=3.4.3
-latex2mathml~=3.75.1
-bs4~=0.0.1
-lxml~=4.6.4
-beautifulsoup4~=4.12.0
-numpy~=1.24.2
-pdfminer.six
\ No newline at end of file
+requests[socks]
+mdtex2html
+Markdown
+latex2mathml
\ No newline at end of file
diff --git a/toolbox.py b/toolbox.py
index b30c255..d96b3f6 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,14 +1,6 @@
import markdown, mdtex2html, threading, importlib, traceback
from show_math import convert as convert_math
from functools import wraps
-import pdfminer
-from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
-from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
-from pdfminer.pdfdevice import PDFDevice
-from pdfminer.layout import LAParams
-from pdfminer.converter import PDFPageAggregator
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
"""
@@ -243,52 +235,4 @@ def clear_line_break(txt):
txt = txt.replace('\n', ' ')
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
- return txt
-
-def readPdf(pdfPath):
- """
- 读取pdf文件,返回文本内容
- """
- fp = open(pdfPath, 'rb')
-
- # Create a PDF parser object associated with the file object
- parser = PDFParser(fp)
-
- # Create a PDF document object that stores the document structure.
- # Password for initialization as 2nd parameter
- document = PDFDocument(parser)
- # Check if the document allows text extraction. If not, abort.
- if not document.is_extractable:
- raise PDFTextExtractionNotAllowed
-
- # Create a PDF resource manager object that stores shared resources.
- rsrcmgr = PDFResourceManager()
-
- # Create a PDF device object.
- # device = PDFDevice(rsrcmgr)
-
- # BEGIN LAYOUT ANALYSIS.
- # Set parameters for analysis.
- laparams = LAParams(
- char_margin=10.0,
- line_margin=0.2,
- boxes_flow=0.2,
- all_texts=False,
- )
- # Create a PDF page aggregator object.
- device = PDFPageAggregator(rsrcmgr, laparams=laparams)
- # Create a PDF interpreter object.
- interpreter = PDFPageInterpreter(rsrcmgr, device)
-
- # loop over all pages in the document
- outTextList = []
- for page in PDFPage.create_pages(document):
- # read the page into a layout object
- interpreter.process_page(page)
- layout = device.get_result()
- for obj in layout._objs:
- if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
- # print(obj.get_text())
- outTextList.append(obj.get_text())
-
- return outTextList
\ No newline at end of file
+ return txt
\ No newline at end of file
From 16caf34800b1183a642a76b73abe4b68e4223f51 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 21:05:18 +0800
Subject: [PATCH 22/35] =?UTF-8?q?=E6=95=B4=E5=90=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
functional_crazy.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/functional_crazy.py b/functional_crazy.py
index d48c1a7..d820b14 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -55,20 +55,19 @@ def get_crazy_functionals():
# VisibleLevel=1 经过测试,但功能未达到理想状态
if UserVisibleLevel >= 1:
from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
+ from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
function_plugins.update({
"[仅供开发调试] 批量总结PDF文档": {
"Color": "stop",
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Function": HotReload(批量总结PDF文档)
},
- })
- from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
- function_plugins.update({
"[仅供开发调试] 批量总结PDF文档pdfminer": {
"Color": "stop",
- "Function": 批量总结PDF文档pdfminer
+ "Function": HotReload(批量总结PDF文档pdfminer)
},
})
+
# VisibleLevel=2 尚未充分测试的函数插件,放在这里
if UserVisibleLevel >= 2:
function_plugins.update({
From ab879ca4b7efabeb14cc3a74ceaaedfb13edbd7d Mon Sep 17 00:00:00 2001
From: Siyuan Feng
Date: Fri, 31 Mar 2023 21:26:55 +0800
Subject: [PATCH 23/35] feat: clean pdf fitz text
---
crazy_functions/批量总结PDF文档.py | 3 +-
toolbox.py | 58 +++++++++++++++++++++++++++++-
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index 102bc9e..29ffb25 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -1,5 +1,5 @@
from predict import predict_no_ui
-from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, clean_text
fast_debug = False
@@ -11,6 +11,7 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
file_content = ""
for page in doc:
file_content += page.get_text()
+ file_content = clean_text(file_content)
print(file_content)
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
diff --git a/toolbox.py b/toolbox.py
index d96b3f6..be0a188 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -235,4 +235,60 @@ def clear_line_break(txt):
txt = txt.replace('\n', ' ')
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
- return txt
\ No newline at end of file
+ return txt
+
+import re
+import unicodedata
+
+def is_paragraph_break(match):
+ """
+ 根据给定的匹配结果来判断换行符是否表示段落分隔。
+ 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
+ 也可以根据之前的内容长度来判断段落是否已经足够长。
+ """
+ prev_char, next_char = match.groups()
+
+ # 句子结束标志
+ sentence_endings = ".!?"
+
+ # 设定一个最小段落长度阈值
+ min_paragraph_length = 140
+
+ if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
+ return "\n\n"
+ else:
+ return " "
+
+def normalize_text(text):
+ """
+ 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
+ 例如,将连字 "fi" 转换为 "f" 和 "i"。
+ """
+ # 对文本进行归一化处理,分解连字
+ normalized_text = unicodedata.normalize("NFKD", text)
+
+ # 替换其他特殊字符
+ cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
+
+ return cleaned_text
+
+def clean_text(raw_text):
+ """
+ 对从 PDF 提取出的原始文本进行清洗和格式化处理。
+ 1. 对原始文本进行归一化处理。
+ 2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
+ 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
+ """
+ # 对文本进行归一化处理
+ normalized_text = normalize_text(raw_text)
+
+ # 替换跨行的连词
+ text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
+
+ # 根据前后相邻字符的特点,找到原文本中的换行符
+ newlines = re.compile(r'(\S)\n(\S)')
+
+ # 根据 heuristic 规则,用空格或段落分隔符替换原换行符
+ final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
+
+ return final_text.strip()
\ No newline at end of file
From a87ce5bb774a169019955c2ed4bf63f6d296f950 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 21:37:46 +0800
Subject: [PATCH 24/35] JasonGuo1
---
crazy_functions/总结word文档.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py
index 324d7bd..a117fb3 100644
--- a/crazy_functions/总结word文档.py
+++ b/crazy_functions/总结word文档.py
@@ -82,6 +82,10 @@ def 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, histo
def 总结word文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
import glob, os
+ # 基本信息:功能、贡献者
+ chatbot.append([
+ "函数插件功能?",
+ "批量总结Word文档。函数插件贡献者: JasonGuo1"])
yield chatbot, history, '正常'
# 尝试导入依赖,如果缺少依赖,则给出安装建议
From 6fc2423ae35625a02a21e29ab78e5e572efce786 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Fri, 31 Mar 2023 21:41:17 +0800
Subject: [PATCH 25/35] add contributor
---
crazy_functions/批量总结PDF文档.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index 29ffb25..ab9ba83 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -59,7 +59,7 @@ def 批量总结PDF文档(txt, top_p, temperature, chatbot, history, systemPromp
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
- "批量总结PDF文档。函数插件贡献者: ValeriaWong"])
+ "批量总结PDF文档。函数插件贡献者: ValeriaWong,Eralien"])
yield chatbot, history, '正常'
# 尝试导入依赖,如果缺少依赖,则给出安装建议
From 14a7d00037cb92ff71ebd7dc03948c5e71272e14 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 21:46:47 +0800
Subject: [PATCH 26/35] =?UTF-8?q?=E7=A7=BB=E5=8A=A8=E5=87=BD=E6=95=B0?=
=?UTF-8?q?=E5=88=B0=E8=B0=83=E7=94=A8=E6=A8=A1=E7=BB=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/批量总结PDF文档.py | 56 +++++++++++++++++++++++++++++-
toolbox.py | 56 ------------------------------
2 files changed, 55 insertions(+), 57 deletions(-)
diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py
index ab9ba83..7270263 100644
--- a/crazy_functions/批量总结PDF文档.py
+++ b/crazy_functions/批量总结PDF文档.py
@@ -1,7 +1,61 @@
from predict import predict_no_ui
-from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, clean_text
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+import re
+import unicodedata
fast_debug = False
+def is_paragraph_break(match):
+ """
+ 根据给定的匹配结果来判断换行符是否表示段落分隔。
+ 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
+ 也可以根据之前的内容长度来判断段落是否已经足够长。
+ """
+ prev_char, next_char = match.groups()
+
+ # 句子结束标志
+ sentence_endings = ".!?"
+
+ # 设定一个最小段落长度阈值
+ min_paragraph_length = 140
+
+ if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
+ return "\n\n"
+ else:
+ return " "
+
+def normalize_text(text):
+ """
+ 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
+ 例如,将连字 "fi" 转换为 "f" 和 "i"。
+ """
+ # 对文本进行归一化处理,分解连字
+ normalized_text = unicodedata.normalize("NFKD", text)
+
+ # 替换其他特殊字符
+ cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
+
+ return cleaned_text
+
+def clean_text(raw_text):
+ """
+ 对从 PDF 提取出的原始文本进行清洗和格式化处理。
+ 1. 对原始文本进行归一化处理。
+ 2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
+ 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
+ """
+ # 对文本进行归一化处理
+ normalized_text = normalize_text(raw_text)
+
+ # 替换跨行的连词
+ text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
+
+ # 根据前后相邻字符的特点,找到原文本中的换行符
+ newlines = re.compile(r'(\S)\n(\S)')
+
+ # 根据 heuristic 规则,用空格或段落分隔符替换原换行符
+ final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
+
+ return final_text.strip()
def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
import time, glob, os, fitz
diff --git a/toolbox.py b/toolbox.py
index ef15803..7374002 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -280,59 +280,3 @@ def clear_line_break(txt):
txt = txt.replace(' ', ' ')
txt = txt.replace(' ', ' ')
return txt
-
-import re
-import unicodedata
-
-def is_paragraph_break(match):
- """
- 根据给定的匹配结果来判断换行符是否表示段落分隔。
- 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
- 也可以根据之前的内容长度来判断段落是否已经足够长。
- """
- prev_char, next_char = match.groups()
-
- # 句子结束标志
- sentence_endings = ".!?"
-
- # 设定一个最小段落长度阈值
- min_paragraph_length = 140
-
- if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
- return "\n\n"
- else:
- return " "
-
-def normalize_text(text):
- """
- 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
- 例如,将连字 "fi" 转换为 "f" 和 "i"。
- """
- # 对文本进行归一化处理,分解连字
- normalized_text = unicodedata.normalize("NFKD", text)
-
- # 替换其他特殊字符
- cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
-
- return cleaned_text
-
-def clean_text(raw_text):
- """
- 对从 PDF 提取出的原始文本进行清洗和格式化处理。
- 1. 对原始文本进行归一化处理。
- 2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
- 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
- """
- # 对文本进行归一化处理
- normalized_text = normalize_text(raw_text)
-
- # 替换跨行的连词
- text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
-
- # 根据前后相邻字符的特点,找到原文本中的换行符
- newlines = re.compile(r'(\S)\n(\S)')
-
- # 根据 heuristic 规则,用空格或段落分隔符替换原换行符
- final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
-
- return final_text.strip()
\ No newline at end of file
From 9593b0d09d0c6e480726d994c11c0f6f1710c0d2 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 22:36:46 +0800
Subject: [PATCH 27/35] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=87=AA=E8=AF=91?=
=?UTF-8?q?=E8=A7=A3=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 7 +-
crazy_functions/解析项目源代码.py | 7 +-
project_self_analysis.md | 181 +++++++++++++++++++-----------
toolbox.py | 47 ++++++--
4 files changed, 164 insertions(+), 78 deletions(-)
diff --git a/config.py b/config.py
index 72309e7..6bb421e 100644
--- a/config.py
+++ b/config.py
@@ -1,7 +1,6 @@
# [step 1]>> 例如: API_KEY = "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" (此key无效)
API_KEY = "sk-此处填API密钥"
-
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
USE_PROXY = False
if USE_PROXY:
@@ -12,7 +11,11 @@ if USE_PROXY:
# [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
- proxies = { "http": "socks5h://localhost:11284", "https": "socks5h://localhost:11284", }
+ proxies = {
+ # [协议]:// [地址] :[端口]
+ "http": "socks5h://localhost:11284",
+ "https": "socks5h://localhost:11284",
+ }
print('网络代理状态:运行。')
else:
proxies = None
diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py
index a239d96..9ae53a7 100644
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@@ -50,7 +50,8 @@ def 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot,
def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
history = [] # 清空历史,以免输入溢出
import time, glob, os
- file_manifest = [f for f in glob.glob('*.py')]
+ file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
+ [f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
for index, fp in enumerate(file_manifest):
# if 'test_project' in fp: continue
with open(fp, 'r', encoding='utf-8') as f:
@@ -65,7 +66,7 @@ def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTx
if not fast_debug:
# ** gpt request **
# gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature)
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], long_connection=True) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
@@ -79,7 +80,7 @@ def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTx
if not fast_debug:
# ** gpt request **
# gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history)
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history, long_connection=True) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
diff --git a/project_self_analysis.md b/project_self_analysis.md
index f0a544e..b1f5302 100644
--- a/project_self_analysis.md
+++ b/project_self_analysis.md
@@ -1,88 +1,85 @@
# chatgpt-academic项目自译解报告
(Author补充:以下分析均由本项目调用ChatGPT一键生成,如果有不准确的地方,全怪GPT😄)
-## [0/10] 程序摘要: check_proxy.py
+## [0/18] 程序摘要: functional_crazy.py
-这个程序是一个用来检查代理服务器是否有效的 Python 程序代码。程序文件名为 check_proxy.py。其中定义了一个函数 check_proxy,该函数接收一个代理配置信息 proxies,使用 requests 库向一个代理服务器发送请求,获取该代理的所在地信息并返回。如果请求超时或者异常,该函数将返回一个代理无效的结果。
+这是一个功能扩展的程序,文件名为 `functional_crazy.py`。代码的主要功能是通过提供一系列函数插件,增强程序的功能,让用户可以通过界面中的按钮,快速调用对应的函数插件实现相应的操作。代码中使用了 `HotReload` 函数插件,可以在不重启程序的情况下更新函数插件的代码,让其生效。同时,通过 `UserVisibleLevel` 变量的设置,可以控制哪些插件会在UI界面显示出来。函数插件列表包括了以下功能:解析项目本身、解析一个Python项目、解析一个C++项目头文件、解析一个C++项目、读取文章并生成摘要、批量生成函数注释、全项目切换成英文、批量总结PDF文档、批量总结PDF文档pdfminer、批量总结Word文档、高阶功能模板函数、以及其他未经充分测试的函数插件。
-程序代码分为两个部分,首先是 check_proxy 函数的定义部分,其次是程序文件的入口部分,在该部分代码中,程序从 config_private.py 文件或者 config.py 文件中加载代理配置信息,然后调用 check_proxy 函数来检测代理服务器是否有效。如果配置文件 config_private.py 存在,则会加载其中的代理配置信息,否则会从 config.py 文件中读取。
+## [1/18] 程序摘要: main.py
-## [1/10] 程序摘要: config.py
+该程序是一个基于Gradio构建的对话生成模型的Web界面示例,包含了以下主要功能:
-本程序文件名为config.py,主要功能是存储应用所需的常量和配置信息。
+1.加载模型并对用户输入进行响应;
+2.通过调用外部函数库来获取用户的输入,并在模型生成的过程中进行处理;
+3.支持用户上传本地文件,供外部函数库调用;
+4.支持停止当前的生成过程;
+5.保存用户的历史记录,并将其记录在本地日志文件中,以供后续分析和使用。
-其中,包含了应用所需的OpenAI API密钥、API接口地址、网络代理设置、超时设置、网络端口和OpenAI模型选择等信息,在运行应用前需要进行相应的配置。在未配置网络代理时,程序给出了相应的警告提示。
+该程序需要依赖于一些外部库和软件包,如Gradio、torch等。用户需要确保这些依赖项已经安装,并且在运行该程序前对config_private.py配置文件进行相应的修改。
-此外,还包含了一个检查函数,用于检查是否忘记修改API密钥。
+## [2/18] 程序摘要: functional.py
-总之,config.py文件是应用中的一个重要配置文件,用来存储应用所需的常量和配置信息,需要在应用运行前进行相应的配置。
+该文件定义了一个名为“functional”的函数,函数的作用是返回一个包含多个字典(键值对)的字典,每个键值对表示一种功能。该字典的键值由功能名称和对应的数据组成。其中的每个字典都包含4个键值对,分别为“Prefix”、“Suffix”、“Color”和“PreProcess”,分别表示前缀、后缀、按钮颜色和预处理函数。如果某些键值对没有给出,那么程序中默认相应的值,如按钮颜色默认为“secondary”等。每个功能描述了不同的学术润色/翻译/其他服务,如“英语学术润色”、“中文学术润色”、“查找语法错误”等。函数还引用了一个名为“clear_line_break”的函数,用于预处理修改前的文本。
-## [2/10] 程序摘要: config_private.py
+## [3/18] 程序摘要: show_math.py
-该文件是一个配置文件,命名为config_private.py。它是一个Python脚本,用于配置OpenAI的API密钥、模型和其它相关设置。该配置文件还可以设置是否使用代理。如果使用代理,需要设置代理协议、地址和端口。在设置代理之后,该文件还包括一些用于测试代理是否正常工作的代码。该文件还包括超时时间、随机端口、重试次数等设置。在文件末尾,还有一个检查代码,如果没有更改API密钥,则抛出异常。
+该程序文件名为show_math.py,主要用途是将Markdown和LaTeX混合格式转换成带有MathML的HTML格式。该程序通过递归地处理LaTeX和Markdown混合段落逐一转换成HTML/MathML标记出来,并在LaTeX公式创建中进行错误处理。在程序文件中定义了3个变量,分别是incomplete,convError和convert,其中convert函数是用来执行转换的主要函数。程序使用正则表达式进行LaTeX格式和Markdown段落的分割,从而实现转换。如果在Latex转换过程中发生错误,程序将输出相应的错误信息。
-## [3/10] 程序摘要: functional.py
+## [4/18] 程序摘要: predict.py
-该程序文件名为 functional.py,其中包含一个名为 get_functionals 的函数,该函数返回一个字典,该字典包含了各种翻译、校对等功能的名称、前缀、后缀以及默认按钮颜色等信息。具体功能包括:英语学术润色、中文学术润色、查找语法错误、中英互译、中译英、学术中译英、英译中、解释代码等。该程序的作用为提供各种翻译、校对等功能的模板,以便后续程序可以直接调用。
+本程序文件的文件名为"./predict.py",主要包含三个函数:
-(Author补充:这个文件汇总了模块化的Prompt调用,如果发现了新的好用Prompt,别藏着哦^_^速速PR)
+1. predict:正常对话时使用,具备完备的交互功能,不可多线程;
+2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑;
+3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程。
+其中,predict函数用于基础的对话功能,发送至chatGPT,流式获取输出,根据点击的哪个按钮,进行对话预处理等额外操作;predict_no_ui函数用于payload比较大的情况,或者用于实现多线、带嵌套的复杂功能;predict_no_ui_long_connection实现调用predict_no_ui处理长文档时,避免连接断掉的情况,支持多线程。
-## [4/10] 程序摘要: functional_crazy.py
+## [5/18] 程序摘要: check_proxy.py
-这个程序文件 functional_crazy.py 导入了一些 python 模块,并提供了一个函数 get_crazy_functionals(),该函数返回不同实验功能的描述和函数。其中,使用的的模块包括:
+该程序文件名为check_proxy.py,主要功能是检查代理服务器的可用性并返回代理服务器的地理位置信息或错误提示。具体实现方式如下:
-- crazy_functions.读文章写摘要 中的 读文章写摘要
-- crazy_functions.生成函数注释 中的 批量生成函数注释
-- crazy_functions.解析项目源代码 中的 解析项目本身、解析一个Python项目、解析一个C项目的头文件、解析一个C项目
-- crazy_functions.高级功能函数模板 中的 高阶功能模板函数
+首先使用requests模块向指定网站(https://ipapi.co/json/)发送GET请求,请求结果以JSON格式返回。如果代理服务器参数(proxies)是有效的且没有指明'https'代理,则用默认字典值'无'替代。
-返回的实验功能函数包括:
+然后,程序会解析返回的JSON数据,并根据数据中是否包含国家名字字段来判断代理服务器的地理位置。如果有国家名字字段,则将其打印出来并返回代理服务器的相关信息。如果没有国家名字字段,但有错误信息字段,则返回其他错误提示信息。
-- "[实验] 请解析并解构此项目本身",包含函数:解析项目本身
-- "[实验] 解析整个py项目(配合input输入框)",包含函数:解析一个Python项目
-- "[实验] 解析整个C++项目头文件(配合input输入框)",包含函数:解析一个C项目的头文件
-- "[实验] 解析整个C++项目(配合input输入框)",包含函数:解析一个C项目
-- "[实验] 读tex论文写摘要(配合input输入框)",包含函数:读文章写摘要
-- "[实验] 批量生成函数注释(配合input输入框)",包含函数:批量生成函数注释
-- "[实验] 实验功能函数模板",包含函数:高阶功能模板函数
+在程序执行前,程序会先设置环境变量no_proxy,并使用toolbox模块中的get_conf函数从配置文件中读取代理参数。
-这些函数用于系统开发和测试,方便开发者进行特定程序语言后台功能开发的测试和实验,增加系统可靠稳定性和用户友好性。
+最后,检测程序会输出检查结果并返回对应的结果字符串。
-(Author补充:这个文件汇总了模块化的函数,如此设计以方便任何新功能的加入)
+## [6/18] 程序摘要: config_private.py
-## [5/10] 程序摘要: main.py
+本程序文件名为`config_private.py`,其功能为配置私有信息以便在主程序中使用。主要功能包括:
-该程序是一个基于Gradio框架的聊天机器人应用程序。用户可以通过输入问题来获取答案,并与聊天机器人进行对话。该应用程序还集成了一些实验性功能模块,用户可以通过上传本地文件或点击相关按钮来使用这些模块。程序还可以生成对话日志,并且具有一些外观上的调整。在运行时,它会自动打开一个网页并在本地启动服务器。
+- 配置OpenAI API的密钥和API URL
+- 配置是否使用代理,如果使用代理配置代理地址和端口
+- 配置发送请求的超时时间和失败重试次数的限制
+- 配置并行使用线程数和用户名密码
+- 提供检查功能以确保API密钥已经正确设置
+其中,需要特别注意的是:最后一个检查功能要求在运行之前必须将API密钥正确设置,否则程序会直接退出。
-## [6/10] 程序摘要: predict.py
+## [7/18] 程序摘要: config.py
-该程序文件名为predict.py,主要是针对一个基于ChatGPT的聊天机器人进行交互和预测。
+该程序文件是一个配置文件,用于配置OpenAI的API参数和优化体验的相关参数,具体包括以下几个步骤:
-第一部分是导入所需的库和配置文件。
+1.设置OpenAI的API密钥。
-第二部分是一个用于获取Openai返回的完整错误信息的函数。
+2.选择是否使用代理,如果使用则需要设置代理地址和端口等参数。
-第三部分是用于一次性完成向ChatGPT发送请求和等待回复的函数。
+3.设置请求OpenAI后的超时时间、网页的端口、重试次数、选择的OpenAI模型、API的网址等。
-第四部分是用于基础的对话功能的函数,通过stream参数可以选择是否显示中间的过程。
+4.设置并行使用的线程数和用户名密码。
-第五部分是用于整合所需信息和选择LLM模型生成的HTTP请求。
+该程序文件的作用为在使用OpenAI API时进行相关参数的配置,以保证请求的正确性和速度,并且优化使用体验。
-(Author补充:主要是predict_no_ui和predict两个函数。前者不用stream,方便、高效、易用。后者用stream,展现效果好。)
+## [8/18] 程序摘要: theme.py
-## [7/10] 程序摘要: show_math.py
+该程序是一个自定义Gradio主题的Python模块。主题文件名为"./theme.py"。程序引入了Gradio模块,并定义了一个名为"adjust_theme()"的函数。该函数根据输入值调整Gradio的默认主题,返回一个包含所需自定义属性的主题对象。主题属性包括颜色、字体、过渡、阴影、按钮边框和渐变等。主题颜色列表包括石板色、灰色、锌色、中性色、石头色、红色、橙色、琥珀色、黄色、酸橙色、绿色、祖母绿、青蓝色、青色、天蓝色、蓝色、靛蓝色、紫罗兰色、紫色、洋红色、粉红色和玫瑰色。如果Gradio版本较旧,则不能自定义字体和颜色。
-这是一个名为show_math.py的Python程序文件,主要用于将Markdown-LaTeX混合文本转换为HTML格式,并包括MathML数学公式。程序使用latex2mathml.converter库将LaTeX公式转换为MathML格式,并使用正则表达式递归地翻译输入的Markdown-LaTeX混合文本。程序包括转换成双美元符号($$)形式、转换成单美元符号($)形式、转换成\[\]形式以及转换成\(\)形式的LaTeX数学公式。如果转换中出现错误,程序将返回相应的错误消息。
+## [9/18] 程序摘要: toolbox.py
-## [8/10] 程序摘要: theme.py
-
-这是一个名为theme.py的程序文件,用于设置Gradio界面的颜色和字体主题。该文件中定义了一个名为adjust_theme()的函数,其作用是返回一个Gradio theme对象,设置了Gradio界面的颜色和字体主题。在该函数里面,使用了Graido可用的颜色列表,主要参数包括primary_hue、neutral_hue、font和font_mono等,用于设置Gradio界面的主题色调、字体等。另外,该函数还实现了一些参数的自定义,如input_background_fill_dark、button_transition、button_shadow_hover等,用于设置Gradio界面的渐变、阴影等特效。如果Gradio版本过于陈旧,该函数会抛出异常并返回None。
-
-## [9/10] 程序摘要: toolbox.py
-
-该文件为Python程序文件,文件名为toolbox.py。主要功能包括:
+该程序文件包含了一系列函数,用于实现聊天程序所需的各种功能,如预测对话、将对话记录写入文件、将普通文本转换为Markdown格式文本、装饰器函数CatchException和HotReload等。其中一些函数用到了第三方库,如Python-Markdown、mdtex2html、zipfile、tarfile、rarfile和py7zr。除此之外,还有一些辅助函数,如get_conf、clear_line_break和extract_archive等。主要功能包括:
1. 导入markdown、mdtex2html、threading、functools等模块。
2. 定义函数predict_no_ui_but_counting_down,用于生成对话。
@@ -99,24 +96,80 @@
13. 定义函数on_file_uploaded,用于处理上传文件的操作。
14. 定义函数on_report_generated,用于处理生成报告文件的操作。
-## 程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能。
-这是一个基于Gradio框架的聊天机器人应用,支持通过文本聊天来获取答案,并可以使用一系列实验性功能模块,例如生成函数注释、解析项目源代码、读取Latex论文写摘要等。 程序架构分为前端和后端两个部分。前端使用Gradio实现,包括用户输入区域、应答区域、按钮、调用方式等。后端使用Python实现,包括聊天机器人模型、实验性功能模块、模板模块、管理模块、主程序模块等。
+## [10/18] 程序摘要: crazy_functions/生成函数注释.py
-每个程序文件的功能如下:
+该程序文件是一个Python脚本,文件名为“生成函数注释.py”,位于“./crazy_functions/”目录下。该程序实现了一个批量生成函数注释的功能,可以对指定文件夹下的所有Python和C++源代码文件中的所有函数进行注释,使用Markdown表格输出注释结果。
-| 文件名 | 功能描述 |
-|:----:|:----:|
-| check_proxy.py | 检查代理服务器是否有效 |
-| config.py | 存储应用所需的常量和配置信息 |
-| config_private.py | 存储Openai的API密钥、模型和其他相关设置 |
-| functional.py | 提供各种翻译、校对等实用模板 |
-| functional_crazy.py | 提供一些实验性质的高级功能 |
-| main.py | 基于Gradio框架的聊天机器人应用程序的主程序 |
-| predict.py | 用于chatbot预测方案创建,向ChatGPT发送请求和获取回复 |
-| show_math.py | 将Markdown-LaTeX混合文本转换为HTML格式,并包括MathML数学公式 |
-| theme.py | 设置Gradio界面的颜色和字体主题 |
-| toolbox.py | 定义一系列工具函数,用于对输入输出进行格式转换、文件操作、异常捕捉和处理等 |
+该程序引用了predict.py和toolbox.py两个模块,其中predict.py实现了一个基于GPT模型的文本生成功能,用于生成函数注释,而toolbox.py实现了一些工具函数,包括异常处理函数、文本写入函数等。另外,该程序还定义了两个函数,一个是“生成函数注释”函数,用于处理单个文件的注释生成;另一个是“批量生成函数注释”函数,用于批量处理多个文件的注释生成。
-这些程序文件共同组成了一个聊天机器人应用程序的前端和后端实现,使用户可以方便地进行聊天,并可以使用相应的实验功能模块。
+## [11/18] 程序摘要: crazy_functions/读文章写摘要.py
+
+这个程序文件是一个名为“读文章写摘要”的函数。该函数的输入包括文章的文本内容、top_p(生成文本时选择最可能的词语的概率阈值)、temperature(控制生成文本的随机性的因子)、对话历史等参数,以及一个聊天机器人和一个系统提示的文本。该函数的主要工作是解析一组.tex文件,然后生成一段学术性语言的中文和英文摘要。在解析过程中,该函数使用一个名为“toolbox”的模块中的辅助函数和一个名为“predict”的模块中的函数来执行GPT-2模型的推理工作,然后将结果返回给聊天机器人。另外,该程序还包括一个名为“fast_debug”的bool型变量,用于调试和测试。
+
+## [12/18] 程序摘要: crazy_functions/代码重写为全英文_多线程.py
+
+该程序文件实现了一个多线程操作,用于将指定目录下的所有 Python 文件中的中文转化为英文,并将转化后的文件存入另一个目录中。具体实现过程如下:
+
+1. 集合目标文件路径并清空历史记录。
+2. 循环目标文件,对每个文件启动一个线程进行任务操作。
+3. 各个线程同时开始执行任务函数,并在任务完成后将转化后的文件写入指定目录,最终生成一份任务执行报告。
+
+## [13/18] 程序摘要: crazy_functions/高级功能函数模板.py
+
+该程序文件名为高级功能函数模板.py,它包含了一个名为“高阶功能模板函数”的函数,这个函数可以作为开发新功能函数的模板。该函数引用了predict.py和toolbox.py文件中的函数。在该函数内部,它首先清空了历史记录,然后对于今天和今天以后的四天,它问用户历史中哪些事件发生在这些日期,并列举两条事件并发送相关的图片。在向用户询问问题时,使用了GPT进行响应。由于请求GPT需要一定的时间,所以函数会在重新显示状态之前等待一段时间。在每次与用户的互动中,使用yield关键字生成器函数来输出聊天机器人的当前状态,包括聊天消息、历史记录和状态('正常')。最后,程序调用write_results_to_file函数将聊天的结果写入文件,以供后续的评估和分析。
+
+## [14/18] 程序摘要: crazy_functions/总结word文档.py
+
+该程序文件名为总结word文档.py,主要功能是批量总结Word文档。具体实现过程是解析docx格式和doc格式文件,生成文件内容,然后使用自然语言处理工具对文章内容做中英文概述,最后给出建议。该程序需要依赖python-docx和pywin32,如果没有安装,会给出安装建议。
+
+## [15/18] 程序摘要: crazy_functions/批量总结PDF文档pdfminer.py
+
+该程序文件名为pdfminer.py,位于./crazy_functions/目录下。程序实现了批量读取PDF文件,并使用pdfminer解析PDF文件内容。此外,程序还根据解析得到的文本内容,调用机器学习模型生成对每篇文章的概述,最终生成全文摘要。程序中还对模块依赖进行了导入检查,若缺少依赖,则会提供安装建议。
+
+## [16/18] 程序摘要: crazy_functions/解析项目源代码.py
+
+这个程序文件中包含了几个函数,分别是:
+
+1. `解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)`:通过输入文件路径列表对程序文件进行逐文件分析,根据分析结果做出整体功能和构架的概括,并生成包括每个文件功能的markdown表格。
+2. `解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)`:对当前文件夹下的所有Python文件及其子文件夹进行逐文件分析,并生成markdown表格。
+3. `解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)`:对指定路径下的所有Python文件及其子文件夹进行逐文件分析,并生成markdown表格。
+4. `解析一个C项目的头文件(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)`:对指定路径下的所有头文件进行逐文件分析,并生成markdown表格。
+5. `解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)`:对指定路径下的所有.h、.cpp、.c文件及其子文件夹进行逐文件分析,并生成markdown表格。
+
+程序中还包含了一些辅助函数和变量,如CatchException装饰器函数,report_execption函数、write_results_to_file函数等。在执行过程中还会调用其他模块中的函数,如toolbox模块的函数和predict模块的函数。
+
+## [17/18] 程序摘要: crazy_functions/批量总结PDF文档.py
+
+这个程序文件是一个名为“批量总结PDF文档”的函数插件。它导入了predict和toolbox模块,并定义了一些函数,包括is_paragraph_break,normalize_text和clean_text。这些函数是对输入文本进行预处理和清洗的功能函数。主要的功能函数是解析PDF,它打开每个PDF文件并将其内容存储在file_content变量中,然后传递给聊天机器人,以产生一句话的概括。在解析PDF文件之后,该函数连接了所有文件的摘要,以产生一段学术语言和英文摘要。最后,函数批量处理目标文件夹中的所有PDF文件,并输出结果。
+
+## 根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能。
+
+该程序是一个聊天机器人,使用了OpenAI的GPT语言模型以及一些特殊的辅助功能去处理各种学术写作和科研润色任务。整个程序由一些函数组成,每个函数都代表了不同的学术润色/翻译/其他服务。
+
+下面是程序中每个文件的功能列表:
+
+| 文件名 | 功能 |
+|--------|--------|
+| functional_crazy.py | 实现高级功能函数模板和其他一些辅助功能函数 |
+| main.py | 程序的主要入口,负责程序的启动和UI的展示 |
+| functional.py | 定义各种功能按钮的颜色和响应函数 |
+| show_math.py | 解析LaTeX文本,将其转换为Markdown格式 |
+| predict.py | 基础的对话功能,用于与chatGPT进行交互 |
+| check_proxy.py | 检查代理设置的正确性 |
+| config_private.py | 配置程序的API密钥和其他私有信息 |
+| config.py | 配置OpenAI的API参数和程序的其他属性 |
+| theme.py | 设置程序主题样式 |
+| toolbox.py | 存放一些辅助函数供程序使用 |
+| crazy_functions/生成函数注释.py | 生成Python文件中所有函数的注释 |
+| crazy_functions/读文章写摘要.py | 解析文章文本,生成中英文摘要 |
+| crazy_functions/代码重写为全英文_多线程.py | 将中文代码内容转化为英文 |
+| crazy_functions/高级功能函数模板.py | 实现高级功能函数模板 |
+| crazy_functions/总结word文档.py | 解析Word文件,生成文章内容的概要 |
+| crazy_functions/批量总结PDF文档pdfminer.py | 解析PDF文件,生成文章内容的概要(使用pdfminer库) |
+| crazy_functions/批量总结PDF文档.py | 解析PDF文件,生成文章内容的概要(使用PyMuPDF库) |
+| crazy_functions/解析项目源代码.py | 解析C/C++源代码,生成markdown表格 |
+| crazy_functions/批量总结PDF文档.py | 对PDF文件进行批量摘要生成 |
+
+总的来说,该程序提供了一系列的学术润色和翻译的工具,支持对各种类型的文件进行分析和处理。同时也提供了对话式用户界面,便于用户使用和交互。
diff --git a/toolbox.py b/toolbox.py
index 7374002..3e9ec17 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,14 +1,34 @@
-import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect
+import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect, re
from show_math import convert as convert_math
from functools import wraps
-import re
-def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt=''):
+def get_reduce_token_percent(e):
+ try:
+ # text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens"
+ pattern = r"(\d+)\s+tokens\b"
+ match = re.findall(pattern, text)
+ eps = 50 # 稍微留一点余地, 确保下次别再超过token
+ max_limit = float(match[0]) - eps
+ current_tokens = float(match[1])
+ ratio = max_limit/current_tokens
+ assert ratio > 0 and ratio < 1
+ return ratio
+ except:
+ return 0.5
+
+def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=False):
"""
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
+ i_say: 当前输入
+ i_say_show_user: 显示到对话界面上的当前输入,例如,输入整个文件时,你绝对不想把文件的内容都糊到对话界面上
+ chatbot: 对话界面句柄
+ top_p, temperature: gpt参数
+ history: gpt参数 对话历史
+ sys_prompt: gpt参数 sys_prompt
+ long_connection: 是否采用更稳定的连接方式(推荐)
"""
import time
- from predict import predict_no_ui
+ from predict import predict_no_ui, predict_no_ui_long_connection
from toolbox import get_conf
TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
@@ -18,18 +38,26 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
def mt(i_say, history):
while True:
try:
- mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
+ if long_connection:
+ mutable[0] = predict_no_ui_long_connection(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
+ else:
+ mutable[0] = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
break
- except ConnectionAbortedError as e:
+ except ConnectionAbortedError as token_exceeded_error:
+ # 尝试计算比例,尽可能多地保留文本
+ p_ratio = get_reduce_token_percent(str(token_exceeded_error))
if len(history) > 0:
- history = [his[len(his)//2:] for his in history if his is not None]
+ history = [his[ int(len(his) *p_ratio): ] for his in history if his is not None]
mutable[1] = 'Warning! History conversation is too long, cut into half. '
else:
- i_say = i_say[:len(i_say)//2]
+ i_say = i_say[: int(len(i_say) *p_ratio) ]
mutable[1] = 'Warning! Input file is too long, cut into half. '
except TimeoutError as e:
mutable[0] = '[Local Message] Failed with timeout.'
raise TimeoutError
+ except Exception as e:
+ mutable[0] = f'[Local Message] Failed with {str(e)}.'
+ raise RuntimeError(f'[Local Message] Failed with {str(e)}.')
# 创建新线程发出http请求
thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
@@ -56,6 +84,7 @@ def write_results_to_file(history, file_name=None):
with open(f'./gpt_log/{file_name}', 'w', encoding = 'utf8') as f:
f.write('# chatGPT 分析报告\n')
for i, content in enumerate(history):
+ if type(content) != str: content = str(content)
if i%2==0: f.write('## ')
f.write(content)
f.write('\n\n')
@@ -269,7 +298,7 @@ def get_conf(*args):
# 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
if API_MATCH:
- print("您的 API_KEY 是: ", r, "\nAPI_KEY 导入成功")
+ print(f"您的 API_KEY 是: {r[:15]}*** \nAPI_KEY 导入成功")
else:
assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
"(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
From 44a605e766b9e67551bee637fd50ab7f2a304f7e Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 23:18:45 +0800
Subject: [PATCH 28/35] =?UTF-8?q?=E5=AF=B9word=E5=92=8Cpdf=E8=BF=9B?=
=?UTF-8?q?=E8=A1=8C=E7=AE=80=E6=98=93=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index cf01cc1..0c3874b 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ https://github.com/polarwinkel/mdtex2html
> **Note**
>
-> 1.请注意只有“红颜色”标识的函数插件(按钮)才支持读取文件。目前暂不能完善地支持pdf格式文献的翻译解读,尚不支持word格式文件的读取。
+> 1.请注意只有“红颜色”标识的函数插件(按钮)才支持读取文件。目前暂不能完善地支持pdf/word格式文献的翻译解读,相关函数函件正在测试中。
>
> 2.本项目中每个文件的功能都在自译解[`project_self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。
>
From b5b0f6a3ce49b3e143ae49222cff9d489b211383 Mon Sep 17 00:00:00 2001
From: Junru Shen
Date: Fri, 31 Mar 2023 23:38:49 +0800
Subject: [PATCH 29/35] make grammar correction prompt more clear
---
functional.py | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/functional.py b/functional.py
index b1f7ae9..56d98fe 100644
--- a/functional.py
+++ b/functional.py
@@ -21,12 +21,20 @@ def get_functionals():
"Suffix": r"",
},
"查找语法错误": {
- "Prefix": r"Below is a paragraph from an academic paper. " +
- r"Can you help me ensure that the grammar and the spelling is correct? " +
- r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good." +
- r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, " +
+ "Prefix": r"Can you help me ensure that the grammar and the spelling is correct? " +
+ r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good." +
+ r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, " +
r"put the original text the first column, " +
- r"put the corrected text in the second column and highlight the key words you fixed." + "\n\n",
+ r"put the corrected text in the second column and highlight the key words you fixed.""\n"
+ r"Example:""\n"
+ r"Paragraph: How is you? Do you knows what is it?""\n"
+ r"| Original sentence | Corrected sentence |""\n"
+ r"| :--- | :--- |""\n"
+ r"| How **is** you? | How **are** you? |""\n"
+ r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n"
+ r"Below is a paragraph from an academic paper. "
+ r"You need to report all grammar and spelling mistakes as the example before."
+ + "\n\n",
"Suffix": r"",
"PreProcess": clear_line_break, # 预处理:清除换行符
},
From 7b8de7884f3e1e69e15505c593ac951460d2b596 Mon Sep 17 00:00:00 2001
From: Junru Shen
Date: Fri, 31 Mar 2023 23:40:21 +0800
Subject: [PATCH 30/35] add markdown table border line to make text boundary
more clear
---
main.py | 14 +++++++++++++-
toolbox.py | 7 ++++---
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/main.py b/main.py
index 3033e39..bdb4584 100644
--- a/main.py
+++ b/main.py
@@ -37,8 +37,20 @@ gr.Chatbot.postprocess = format_io
from theme import adjust_theme
set_theme = adjust_theme()
+CSS = """
+.markdown-body table {
+ border: 1px solid #ddd;
+ border-collapse: collapse;
+}
+
+.markdown-body th, .markdown-body td {
+ border: 1px solid #ddd;
+ padding: 5px;
+}
+"""
+
cancel_handles = []
-with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
+with gr.Blocks(theme=set_theme, analytics_enabled=False, css=CSS) as demo:
gr.HTML(title_html)
with gr.Row():
with gr.Column(scale=2):
diff --git a/toolbox.py b/toolbox.py
index b78a513..a7f4f9e 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -158,11 +158,12 @@ def markdown_convertion(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
+ pre = ''
+ suf = '
'
if ('$' in txt) and ('```' not in txt):
- return markdown.markdown(txt,extensions=['fenced_code','tables']) + '
' + \
- markdown.markdown(convert_math(txt, splitParagraphs=False),extensions=['fenced_code','tables'])
+ return pre + markdown.markdown(txt,extensions=['fenced_code','tables']) + '
' + markdown.markdown(convert_math(txt, splitParagraphs=False),extensions=['fenced_code','tables']) + suf
else:
- return markdown.markdown(txt,extensions=['fenced_code','tables'])
+ return pre + markdown.markdown(txt,extensions=['fenced_code','tables']) + suf
def format_io(self, y):
From 9ad21838fe684da2472fc4d67bc12a077b768751 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Fri, 31 Mar 2023 23:51:17 +0800
Subject: [PATCH 31/35] =?UTF-8?q?=E6=9B=B4=E6=B8=85=E6=9C=97=E7=9A=84UI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
main.py | 46 ++++++++++++++++++++++++++++++----------------
1 file changed, 30 insertions(+), 16 deletions(-)
diff --git a/main.py b/main.py
index 3033e39..2368bba 100644
--- a/main.py
+++ b/main.py
@@ -57,24 +57,38 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
with gr.Row():
from check_proxy import check_proxy
statusDisplay = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {check_proxy(proxies)}")
- with gr.Row():
- for k in functional:
- variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
- functional[k]["Button"] = gr.Button(k, variant=variant)
- with gr.Row():
- gr.Markdown("注意:以下“红颜色”标识的函数插件需从input区读取路径作为参数.")
- with gr.Row():
- for k in crazy_functional:
- variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
- crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
- with gr.Row():
- gr.Markdown("上传本地文件,供上面的函数插件调用.")
- with gr.Row():
- file_upload = gr.Files(label='任何文件, 但推荐上传压缩文件(zip, tar)', file_count="multiple")
- system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt).style(container=True)
- with gr.Accordion("arguments", open=False):
+ with gr.Accordion("基础功能区", open=True):
+ with gr.Row():
+ for k in functional:
+ variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
+ functional[k]["Button"] = gr.Button(k, variant=variant)
+ with gr.Accordion("函数插件区", open=True):
+ with gr.Row():
+ gr.Markdown("注意:以下“红颜色”标识的函数插件需从input区读取路径作为参数.")
+ with gr.Row():
+ for k in crazy_functional:
+ variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
+ crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
+ with gr.Row():
+ with gr.Accordion("展开“文件上传区”。上传本地文件供“红颜色”的函数插件调用。", open=False):
+ file_upload = gr.Files(label='任何文件, 但推荐上传压缩文件(zip, tar)', file_count="multiple")
+ with gr.Accordion("展开SysPrompt & GPT参数 & 交互界面布局", open=False):
+ system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
+ checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "文件上传区"], value=["USA", "Japan", "Pakistan"],
+ label="显示功能区")
+
+
+
+ def what_is_this(a):
+ return a
+
+ checkboxes.select(what_is_this, [checkboxes], [checkboxes])
+
+
+
+
predict_args = dict(fn=predict, inputs=[txt, top_p, temperature, chatbot, history, system_prompt], outputs=[chatbot, history, statusDisplay], show_progress=True)
empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏
From 833d136fb9454d11caaa8030d855648a3312a7fa Mon Sep 17 00:00:00 2001
From: Your Name
Date: Sat, 1 Apr 2023 00:21:27 +0800
Subject: [PATCH 32/35] =?UTF-8?q?=E9=9A=90=E8=97=8F=E3=80=81=E6=98=BE?=
=?UTF-8?q?=E7=A4=BA=E5=8A=9F=E8=83=BD=E5=8C=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
main.py | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/main.py b/main.py
index 2368bba..10bbddc 100644
--- a/main.py
+++ b/main.py
@@ -57,12 +57,12 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
with gr.Row():
from check_proxy import check_proxy
statusDisplay = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {check_proxy(proxies)}")
- with gr.Accordion("基础功能区", open=True):
+ with gr.Accordion("基础功能区", open=True) as area_basic_fn:
with gr.Row():
for k in functional:
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
functional[k]["Button"] = gr.Button(k, variant=variant)
- with gr.Accordion("函数插件区", open=True):
+ with gr.Accordion("函数插件区", open=True) as area_crazy_fn:
with gr.Row():
gr.Markdown("注意:以下“红颜色”标识的函数插件需从input区读取路径作为参数.")
with gr.Row():
@@ -76,19 +76,18 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
- checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "文件上传区"], value=["USA", "Japan", "Pakistan"],
- label="显示功能区")
-
-
+ checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"],
+ value=["基础功能区", "函数插件区"], label="显示哪些功能区")
def what_is_this(a):
- return a
-
- checkboxes.select(what_is_this, [checkboxes], [checkboxes])
-
-
-
+ ret = {}
+ # if area_basic_fn.visible != ("基础功能区" in a):
+ ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
+ # if area_crazy_fn.visible != ("函数插件区" in a):
+ ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
+ return ret
+ checkboxes.select(what_is_this, [checkboxes], [area_basic_fn, area_crazy_fn] )
predict_args = dict(fn=predict, inputs=[txt, top_p, temperature, chatbot, history, system_prompt], outputs=[chatbot, history, statusDisplay], show_progress=True)
empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏
From 5e8eb6253c0a37ac325b289aafa6821cae3bb6ca Mon Sep 17 00:00:00 2001
From: Your Name
Date: Sat, 1 Apr 2023 03:36:05 +0800
Subject: [PATCH 33/35] =?UTF-8?q?=E4=BC=98=E5=8C=96Token=E6=BA=A2=E5=87=BA?=
=?UTF-8?q?=E6=97=B6=E7=9A=84=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 2 +
.../test_project/cpp/longcode/jpgd.cpp | 3276 +++++++++++++++++
.../test_project/cpp/longcode/jpge.cpp | 1049 ++++++
.../test_project/cpp/longcode/prod_cons.h | 433 +++
crazy_functions/高级功能函数模板.py | 2 +-
functional_crazy.py | 13 +-
main.py | 105 +-
predict.py | 12 +-
toolbox.py | 25 +-
9 files changed, 4851 insertions(+), 66 deletions(-)
create mode 100644 crazy_functions/test_project/cpp/longcode/jpgd.cpp
create mode 100644 crazy_functions/test_project/cpp/longcode/jpge.cpp
create mode 100644 crazy_functions/test_project/cpp/longcode/prod_cons.h
diff --git a/config.py b/config.py
index 6bb421e..54e40a5 100644
--- a/config.py
+++ b/config.py
@@ -23,6 +23,8 @@ else:
# [step 3]>> 以下配置可以优化体验,但大部分场合下并不需要修改
+# 对话窗的高度
+CHATBOT_HEIGHT = 1117
# 发送请求到OpenAI后,等待多久判定为超时
TIMEOUT_SECONDS = 25
diff --git a/crazy_functions/test_project/cpp/longcode/jpgd.cpp b/crazy_functions/test_project/cpp/longcode/jpgd.cpp
new file mode 100644
index 0000000..36d06c8
--- /dev/null
+++ b/crazy_functions/test_project/cpp/longcode/jpgd.cpp
@@ -0,0 +1,3276 @@
+// jpgd.cpp - C++ class for JPEG decompression.
+// Public domain, Rich Geldreich
+// Last updated Apr. 16, 2011
+// Alex Evans: Linear memory allocator (taken from jpge.h).
+//
+// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
+//
+// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
+// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
+// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
+
+#include "jpgd.h"
+#include
+
+#include
+// BEGIN EPIC MOD
+#define JPGD_ASSERT(x) { assert(x); CA_ASSUME(x); } (void)0
+// END EPIC MOD
+
+#ifdef _MSC_VER
+#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
+#endif
+
+// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
+// This is slower, but results in higher quality on images with highly saturated colors.
+#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
+
+#define JPGD_TRUE (1)
+#define JPGD_FALSE (0)
+
+#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
+#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
+
+namespace jpgd {
+
+ static inline void *jpgd_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
+ static inline void jpgd_free(void *p) { FMemory::Free(p); }
+
+// BEGIN EPIC MOD
+//@UE3 - use UE3 BGRA encoding instead of assuming RGBA
+ // stolen from IImageWrapper.h
+ enum ERGBFormatJPG
+ {
+ Invalid = -1,
+ RGBA = 0,
+ BGRA = 1,
+ Gray = 2,
+ };
+ static ERGBFormatJPG jpg_format;
+// END EPIC MOD
+
+ // DCT coefficients are stored in this sequence.
+ static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
+
+ enum JPEG_MARKER
+ {
+ M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
+ M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
+ M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
+ M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
+ M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
+ };
+
+ enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
+
+#define CONST_BITS 13
+#define PASS1_BITS 2
+#define SCALEDONE ((int32)1)
+
+#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */
+#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */
+#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */
+#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */
+#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */
+#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */
+#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */
+#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */
+#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */
+#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */
+#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */
+#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */
+
+#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n))
+#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
+
+#define MULTIPLY(var, cnst) ((var) * (cnst))
+
+#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
+
+ // Compiler creates a fast path 1D IDCT for X non-zero columns
+ template
+ struct Row
+ {
+ static void idct(int* pTemp, const jpgd_block_t* pSrc)
+ {
+ // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
+#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
+
+ const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
+
+ const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
+ const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
+
+ const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+ const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
+
+ const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+ const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+ const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
+ const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
+ const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
+ const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
+
+ const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+ const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+ const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+ const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+ pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
+ pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
+ pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
+ pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
+ pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
+ pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
+ pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
+ pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
+ }
+ };
+
+ template <>
+ struct Row<0>
+ {
+ static void idct(int* pTemp, const jpgd_block_t* pSrc)
+ {
+#ifdef _MSC_VER
+ pTemp; pSrc;
+#endif
+ }
+ };
+
+ template <>
+ struct Row<1>
+ {
+ static void idct(int* pTemp, const jpgd_block_t* pSrc)
+ {
+ const int dcval = (pSrc[0] << PASS1_BITS);
+
+ pTemp[0] = dcval;
+ pTemp[1] = dcval;
+ pTemp[2] = dcval;
+ pTemp[3] = dcval;
+ pTemp[4] = dcval;
+ pTemp[5] = dcval;
+ pTemp[6] = dcval;
+ pTemp[7] = dcval;
+ }
+ };
+
+ // Compiler creates a fast path 1D IDCT for X non-zero rows
+ template
+ struct Col
+ {
+ static void idct(uint8* pDst_ptr, const int* pTemp)
+ {
+ // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
+#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
+
+ const int z2 = ACCESS_ROW(2);
+ const int z3 = ACCESS_ROW(6);
+
+ const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
+ const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
+
+ const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+ const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
+
+ const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+ const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+ const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
+ const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
+ const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
+ const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
+
+ const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+ const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+ const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+ const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+ int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*0] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*7] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*1] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*6] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*2] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*5] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*3] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
+ pDst_ptr[8*4] = (uint8)CLAMP(i);
+ }
+ };
+
+ template <>
+ struct Col<1>
+ {
+ static void idct(uint8* pDst_ptr, const int* pTemp)
+ {
+ int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
+ const uint8 dcval_clamped = (uint8)CLAMP(dcval);
+ pDst_ptr[0*8] = dcval_clamped;
+ pDst_ptr[1*8] = dcval_clamped;
+ pDst_ptr[2*8] = dcval_clamped;
+ pDst_ptr[3*8] = dcval_clamped;
+ pDst_ptr[4*8] = dcval_clamped;
+ pDst_ptr[5*8] = dcval_clamped;
+ pDst_ptr[6*8] = dcval_clamped;
+ pDst_ptr[7*8] = dcval_clamped;
+ }
+ };
+
+ static const uint8 s_idct_row_table[] =
+ {
+ 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
+ 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
+ 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
+ 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
+ 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
+ 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
+ 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
+ 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
+ };
+
+ static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+
+ void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
+ {
+ JPGD_ASSERT(block_max_zag >= 1);
+ JPGD_ASSERT(block_max_zag <= 64);
+
+ if (block_max_zag == 1)
+ {
+ int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
+ k = CLAMP(k);
+ k = k | (k<<8);
+ k = k | (k<<16);
+
+ for (int i = 8; i > 0; i--)
+ {
+ *(int*)&pDst_ptr[0] = k;
+ *(int*)&pDst_ptr[4] = k;
+ pDst_ptr += 8;
+ }
+ return;
+ }
+
+ int temp[64];
+
+ const jpgd_block_t* pSrc = pSrc_ptr;
+ int* pTemp = temp;
+
+ const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
+ int i;
+ for (i = 8; i > 0; i--, pRow_tab++)
+ {
+ switch (*pRow_tab)
+ {
+ case 0: Row<0>::idct(pTemp, pSrc); break;
+ case 1: Row<1>::idct(pTemp, pSrc); break;
+ case 2: Row<2>::idct(pTemp, pSrc); break;
+ case 3: Row<3>::idct(pTemp, pSrc); break;
+ case 4: Row<4>::idct(pTemp, pSrc); break;
+ case 5: Row<5>::idct(pTemp, pSrc); break;
+ case 6: Row<6>::idct(pTemp, pSrc); break;
+ case 7: Row<7>::idct(pTemp, pSrc); break;
+ case 8: Row<8>::idct(pTemp, pSrc); break;
+ }
+
+ pSrc += 8;
+ pTemp += 8;
+ }
+
+ pTemp = temp;
+
+ const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
+ for (i = 8; i > 0; i--)
+ {
+ switch (nonzero_rows)
+ {
+ case 1: Col<1>::idct(pDst_ptr, pTemp); break;
+ case 2: Col<2>::idct(pDst_ptr, pTemp); break;
+ case 3: Col<3>::idct(pDst_ptr, pTemp); break;
+ case 4: Col<4>::idct(pDst_ptr, pTemp); break;
+ case 5: Col<5>::idct(pDst_ptr, pTemp); break;
+ case 6: Col<6>::idct(pDst_ptr, pTemp); break;
+ case 7: Col<7>::idct(pDst_ptr, pTemp); break;
+ case 8: Col<8>::idct(pDst_ptr, pTemp); break;
+ }
+
+ pTemp++;
+ pDst_ptr++;
+ }
+ }
+
+ void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
+ {
+ int temp[64];
+ int* pTemp = temp;
+ const jpgd_block_t* pSrc = pSrc_ptr;
+
+ for (int i = 4; i > 0; i--)
+ {
+ Row<4>::idct(pTemp, pSrc);
+ pSrc += 8;
+ pTemp += 8;
+ }
+
+ pTemp = temp;
+ for (int i = 8; i > 0; i--)
+ {
+ Col<4>::idct(pDst_ptr, pTemp);
+ pTemp++;
+ pDst_ptr++;
+ }
+ }
+
+ // Retrieve one character from the input stream.
+ inline uint jpeg_decoder::get_char()
+ {
+ // Any bytes remaining in buffer?
+ if (!m_in_buf_left)
+ {
+ // Try to get more bytes.
+ prep_in_buffer();
+ // Still nothing to get?
+ if (!m_in_buf_left)
+ {
+ // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
+ int t = m_tem_flag;
+ m_tem_flag ^= 1;
+ if (t)
+ return 0xD9;
+ else
+ return 0xFF;
+ }
+ }
+
+ uint c = *m_pIn_buf_ofs++;
+ m_in_buf_left--;
+
+ return c;
+ }
+
+ // Same as previous method, except can indicate if the character is a pad character or not.
+ inline uint jpeg_decoder::get_char(bool *pPadding_flag)
+ {
+ if (!m_in_buf_left)
+ {
+ prep_in_buffer();
+ if (!m_in_buf_left)
+ {
+ *pPadding_flag = true;
+ int t = m_tem_flag;
+ m_tem_flag ^= 1;
+ if (t)
+ return 0xD9;
+ else
+ return 0xFF;
+ }
+ }
+
+ *pPadding_flag = false;
+
+ uint c = *m_pIn_buf_ofs++;
+ m_in_buf_left--;
+
+ return c;
+ }
+
+ // Inserts a previously retrieved character back into the input buffer.
+ inline void jpeg_decoder::stuff_char(uint8 q)
+ {
+ *(--m_pIn_buf_ofs) = q;
+ m_in_buf_left++;
+ }
+
+ // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
+ inline uint8 jpeg_decoder::get_octet()
+ {
+ bool padding_flag;
+ int c = get_char(&padding_flag);
+
+ if (c == 0xFF)
+ {
+ if (padding_flag)
+ return 0xFF;
+
+ c = get_char(&padding_flag);
+ if (padding_flag)
+ {
+ stuff_char(0xFF);
+ return 0xFF;
+ }
+
+ if (c == 0x00)
+ return 0xFF;
+ else
+ {
+ stuff_char(static_cast(c));
+ stuff_char(0xFF);
+ return 0xFF;
+ }
+ }
+
+ return static_cast(c);
+ }
+
+ // Retrieves a variable number of bits from the input stream. Does not recognize markers.
+ inline uint jpeg_decoder::get_bits(int num_bits)
+ {
+ if (!num_bits)
+ return 0;
+
+ uint i = m_bit_buf >> (32 - num_bits);
+
+ if ((m_bits_left -= num_bits) <= 0)
+ {
+ m_bit_buf <<= (num_bits += m_bits_left);
+
+ uint c1 = get_char();
+ uint c2 = get_char();
+ m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
+
+ m_bit_buf <<= -m_bits_left;
+
+ m_bits_left += 16;
+
+ JPGD_ASSERT(m_bits_left >= 0);
+ }
+ else
+ m_bit_buf <<= num_bits;
+
+ return i;
+ }
+
+ // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
+ inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
+ {
+ if (!num_bits)
+ return 0;
+
+ uint i = m_bit_buf >> (32 - num_bits);
+
+ if ((m_bits_left -= num_bits) <= 0)
+ {
+ m_bit_buf <<= (num_bits += m_bits_left);
+
+ if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
+ {
+ uint c1 = get_octet();
+ uint c2 = get_octet();
+ m_bit_buf |= (c1 << 8) | c2;
+ }
+ else
+ {
+ m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
+ m_in_buf_left -= 2;
+ m_pIn_buf_ofs += 2;
+ }
+
+ m_bit_buf <<= -m_bits_left;
+
+ m_bits_left += 16;
+
+ JPGD_ASSERT(m_bits_left >= 0);
+ }
+ else
+ m_bit_buf <<= num_bits;
+
+ return i;
+ }
+
+ // Decodes a Huffman encoded symbol.
+ inline int jpeg_decoder::huff_decode(huff_tables *pH)
+ {
+ int symbol;
+
+ // Check first 8-bits: do we have a complete symbol?
+ if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
+ {
+ // Decode more bits, use a tree traversal to find symbol.
+ int ofs = 23;
+ do
+ {
+ symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
+ ofs--;
+ } while (symbol < 0);
+
+ get_bits_no_markers(8 + (23 - ofs));
+ }
+ else
+ get_bits_no_markers(pH->code_size[symbol]);
+
+ return symbol;
+ }
+
+ // Decodes a Huffman encoded symbol.
+ inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
+ {
+ int symbol;
+
+ // Check first 8-bits: do we have a complete symbol?
+ if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
+ {
+ // Use a tree traversal to find symbol.
+ int ofs = 23;
+ do
+ {
+ symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
+ ofs--;
+ } while (symbol < 0);
+
+ get_bits_no_markers(8 + (23 - ofs));
+
+ extra_bits = get_bits_no_markers(symbol & 0xF);
+ }
+ else
+ {
+ JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
+
+ if (symbol & 0x8000)
+ {
+ get_bits_no_markers((symbol >> 8) & 31);
+ extra_bits = symbol >> 16;
+ }
+ else
+ {
+ int code_size = (symbol >> 8) & 31;
+ int num_extra_bits = symbol & 0xF;
+ int bits = code_size + num_extra_bits;
+ if (bits <= (m_bits_left + 16))
+ extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
+ else
+ {
+ get_bits_no_markers(code_size);
+ extra_bits = get_bits_no_markers(num_extra_bits);
+ }
+ }
+
+ symbol &= 0xFF;
+ }
+
+ return symbol;
+ }
+
+ // Tables and macro used to fully decode the DPCM differences.
+ static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+ static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
+ static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
+#define HUFF_EXTEND(x,s) ((x) < s_extend_test[s] ? (x) + s_extend_offset[s] : (x))
+
+ // Clamps a value between 0-255.
+ inline uint8 jpeg_decoder::clamp(int i)
+ {
+ if (static_cast(i) > 255)
+ i = (((~i) >> 31) & 0xFF);
+
+ return static_cast(i);
+ }
+
+ namespace DCT_Upsample
+ {
+ struct Matrix44
+ {
+ typedef int Element_Type;
+ enum { NUM_ROWS = 4, NUM_COLS = 4 };
+
+ Element_Type v[NUM_ROWS][NUM_COLS];
+
+ inline int rows() const { return NUM_ROWS; }
+ inline int cols() const { return NUM_COLS; }
+
+ inline const Element_Type & at(int r, int c) const { return v[r][c]; }
+ inline Element_Type & at(int r, int c) { return v[r][c]; }
+
+ inline Matrix44() { }
+
+ inline Matrix44& operator += (const Matrix44& a)
+ {
+ for (int r = 0; r < NUM_ROWS; r++)
+ {
+ at(r, 0) += a.at(r, 0);
+ at(r, 1) += a.at(r, 1);
+ at(r, 2) += a.at(r, 2);
+ at(r, 3) += a.at(r, 3);
+ }
+ return *this;
+ }
+
+ inline Matrix44& operator -= (const Matrix44& a)
+ {
+ for (int r = 0; r < NUM_ROWS; r++)
+ {
+ at(r, 0) -= a.at(r, 0);
+ at(r, 1) -= a.at(r, 1);
+ at(r, 2) -= a.at(r, 2);
+ at(r, 3) -= a.at(r, 3);
+ }
+ return *this;
+ }
+
+ friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
+ {
+ Matrix44 ret;
+ for (int r = 0; r < NUM_ROWS; r++)
+ {
+ ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
+ ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
+ ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
+ ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
+ }
+ return ret;
+ }
+
+ friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
+ {
+ Matrix44 ret;
+ for (int r = 0; r < NUM_ROWS; r++)
+ {
+ ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
+ ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
+ ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
+ ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
+ }
+ return ret;
+ }
+
+ static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
+ {
+ for (int r = 0; r < 4; r++)
+ {
+ pDst[0*8 + r] = static_cast(a.at(r, 0) + b.at(r, 0));
+ pDst[1*8 + r] = static_cast(a.at(r, 1) + b.at(r, 1));
+ pDst[2*8 + r] = static_cast(a.at(r, 2) + b.at(r, 2));
+ pDst[3*8 + r] = static_cast(a.at(r, 3) + b.at(r, 3));
+ }
+ }
+
+ static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
+ {
+ for (int r = 0; r < 4; r++)
+ {
+ pDst[0*8 + r] = static_cast(a.at(r, 0) - b.at(r, 0));
+ pDst[1*8 + r] = static_cast(a.at(r, 1) - b.at(r, 1));
+ pDst[2*8 + r] = static_cast(a.at(r, 2) - b.at(r, 2));
+ pDst[3*8 + r] = static_cast(a.at(r, 3) - b.at(r, 3));
+ }
+ }
+ };
+
+ const int FRACT_BITS = 10;
+ const int SCALE = 1 << FRACT_BITS;
+
+ typedef int Temp_Type;
+#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
+#define F(i) ((int)((i) * SCALE + .5f))
+
+ // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
+#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
+
+ // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
+ template
+ struct P_Q
+ {
+ static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
+ {
+ // 4x8 = 4x8 times 8x8, matrix 0 is constant
+ const Temp_Type X000 = AT(0, 0);
+ const Temp_Type X001 = AT(0, 1);
+ const Temp_Type X002 = AT(0, 2);
+ const Temp_Type X003 = AT(0, 3);
+ const Temp_Type X004 = AT(0, 4);
+ const Temp_Type X005 = AT(0, 5);
+ const Temp_Type X006 = AT(0, 6);
+ const Temp_Type X007 = AT(0, 7);
+ const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
+ const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
+ const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
+ const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
+ const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
+ const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
+ const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
+ const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
+ const Temp_Type X020 = AT(4, 0);
+ const Temp_Type X021 = AT(4, 1);
+ const Temp_Type X022 = AT(4, 2);
+ const Temp_Type X023 = AT(4, 3);
+ const Temp_Type X024 = AT(4, 4);
+ const Temp_Type X025 = AT(4, 5);
+ const Temp_Type X026 = AT(4, 6);
+ const Temp_Type X027 = AT(4, 7);
+ const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
+ const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
+ const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
+ const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
+ const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
+ const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
+ const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
+ const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
+
+ // 4x4 = 4x8 times 8x4, matrix 1 is constant
+ P.at(0, 0) = X000;
+ P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
+ P.at(0, 2) = X004;
+ P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
+ P.at(1, 0) = X010;
+ P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
+ P.at(1, 2) = X014;
+ P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
+ P.at(2, 0) = X020;
+ P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
+ P.at(2, 2) = X024;
+ P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
+ P.at(3, 0) = X030;
+ P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
+ P.at(3, 2) = X034;
+ P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
+ // 40 muls 24 adds
+
+ // 4x4 = 4x8 times 8x4, matrix 1 is constant
+ Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
+ Q.at(0, 1) = X002;
+ Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
+ Q.at(0, 3) = X006;
+ Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
+ Q.at(1, 1) = X012;
+ Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
+ Q.at(1, 3) = X016;
+ Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
+ Q.at(2, 1) = X022;
+ Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
+ Q.at(2, 3) = X026;
+ Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
+ Q.at(3, 1) = X032;
+ Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
+ Q.at(3, 3) = X036;
+ // 40 muls 24 adds
+ }
+ };
+
+ template
+ struct R_S
+ {
+ static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
+ {
+ // 4x8 = 4x8 times 8x8, matrix 0 is constant
+ const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
+ const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
+ const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
+ const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
+ const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
+ const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
+ const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
+ const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
+ const Temp_Type X110 = AT(2, 0);
+ const Temp_Type X111 = AT(2, 1);
+ const Temp_Type X112 = AT(2, 2);
+ const Temp_Type X113 = AT(2, 3);
+ const Temp_Type X114 = AT(2, 4);
+ const Temp_Type X115 = AT(2, 5);
+ const Temp_Type X116 = AT(2, 6);
+ const Temp_Type X117 = AT(2, 7);
+ const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
+ const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
+ const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
+ const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
+ const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
+ const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
+ const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
+ const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
+ const Temp_Type X130 = AT(6, 0);
+ const Temp_Type X131 = AT(6, 1);
+ const Temp_Type X132 = AT(6, 2);
+ const Temp_Type X133 = AT(6, 3);
+ const Temp_Type X134 = AT(6, 4);
+ const Temp_Type X135 = AT(6, 5);
+ const Temp_Type X136 = AT(6, 6);
+ const Temp_Type X137 = AT(6, 7);
+ // 80 muls 48 adds
+
+ // 4x4 = 4x8 times 8x4, matrix 1 is constant
+ R.at(0, 0) = X100;
+ R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
+ R.at(0, 2) = X104;
+ R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
+ R.at(1, 0) = X110;
+ R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
+ R.at(1, 2) = X114;
+ R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
+ R.at(2, 0) = X120;
+ R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
+ R.at(2, 2) = X124;
+ R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
+ R.at(3, 0) = X130;
+ R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
+ R.at(3, 2) = X134;
+ R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
+ // 40 muls 24 adds
+ // 4x4 = 4x8 times 8x4, matrix 1 is constant
+ S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
+ S.at(0, 1) = X102;
+ S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
+ S.at(0, 3) = X106;
+ S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
+ S.at(1, 1) = X112;
+ S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
+ S.at(1, 3) = X116;
+ S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
+ S.at(2, 1) = X122;
+ S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
+ S.at(2, 3) = X126;
+ S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
+ S.at(3, 1) = X132;
+ S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
+ S.at(3, 3) = X136;
+ // 40 muls 24 adds
+ }
+ };
+ } // end namespace DCT_Upsample
+
+ // Unconditionally frees all allocated m_blocks.
+ void jpeg_decoder::free_all_blocks()
+ {
+ m_pStream = NULL;
+ for (mem_block *b = m_pMem_blocks; b; )
+ {
+ mem_block *n = b->m_pNext;
+ jpgd_free(b);
+ b = n;
+ }
+ m_pMem_blocks = NULL;
+ }
+
+ // This method handles all errors.
+ // It could easily be changed to use C++ exceptions.
+ void jpeg_decoder::stop_decoding(jpgd_status status)
+ {
+ m_error_code = status;
+ free_all_blocks();
+ longjmp(m_jmp_state, status);
+
+ // we shouldn't get here as longjmp shouldn't return, but we put it here to make it explicit
+ // that this function doesn't return, otherwise we get this error:
+ //
+ // error : function declared 'noreturn' should not return
+ exit(1);
+ }
+
+ void *jpeg_decoder::alloc(size_t nSize, bool zero)
+ {
+ nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
+ char *rv = NULL;
+ for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
+ {
+ if ((b->m_used_count + nSize) <= b->m_size)
+ {
+ rv = b->m_data + b->m_used_count;
+ b->m_used_count += nSize;
+ break;
+ }
+ }
+ if (!rv)
+ {
+ int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
+ mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
+ if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
+ b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
+ b->m_used_count = nSize;
+ b->m_size = capacity;
+ rv = b->m_data;
+ }
+ if (zero) memset(rv, 0, nSize);
+ return rv;
+ }
+
+ void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
+ {
+ uint8 *pD = (uint8*)p;
+ const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
+ while (n)
+ {
+ pD[0] = l; pD[1] = h; pD += 2;
+ n--;
+ }
+ }
+
+ // Refill the input buffer.
+ // This method will sit in a loop until (A) the buffer is full or (B)
+ // the stream's read() method reports and end of file condition.
+ void jpeg_decoder::prep_in_buffer()
+ {
+ m_in_buf_left = 0;
+ m_pIn_buf_ofs = m_in_buf;
+
+ if (m_eof_flag)
+ return;
+
+ do
+ {
+ int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
+ if (bytes_read == -1)
+ stop_decoding(JPGD_STREAM_READ);
+
+ m_in_buf_left += bytes_read;
+ } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
+
+ m_total_bytes_read += m_in_buf_left;
+
+ // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
+ // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
+ word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
+ }
+
+ // Read a Huffman code table.
+ void jpeg_decoder::read_dht_marker()
+ {
+ int i, index, count;
+ uint8 huff_num[17];
+ uint8 huff_val[256];
+
+ uint num_left = get_bits(16);
+
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_DHT_MARKER);
+
+ num_left -= 2;
+
+ while (num_left)
+ {
+ index = get_bits(8);
+
+ huff_num[0] = 0;
+
+ count = 0;
+
+ for (i = 1; i <= 16; i++)
+ {
+ huff_num[i] = static_cast(get_bits(8));
+ count += huff_num[i];
+ }
+
+ if (count > 255)
+ stop_decoding(JPGD_BAD_DHT_COUNTS);
+
+ for (i = 0; i < count; i++)
+ huff_val[i] = static_cast(get_bits(8));
+
+ i = 1 + 16 + count;
+
+ if (num_left < (uint)i)
+ stop_decoding(JPGD_BAD_DHT_MARKER);
+
+ num_left -= i;
+
+ if ((index & 0x10) > 0x10)
+ stop_decoding(JPGD_BAD_DHT_INDEX);
+
+ index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
+
+ if (index >= JPGD_MAX_HUFF_TABLES)
+ stop_decoding(JPGD_BAD_DHT_INDEX);
+
+ if (!m_huff_num[index])
+ m_huff_num[index] = (uint8 *)alloc(17);
+
+ if (!m_huff_val[index])
+ m_huff_val[index] = (uint8 *)alloc(256);
+
+ m_huff_ac[index] = (index & 0x10) != 0;
+ memcpy(m_huff_num[index], huff_num, 17);
+ memcpy(m_huff_val[index], huff_val, 256);
+ }
+ }
+
+ // Read a quantization table.
+ void jpeg_decoder::read_dqt_marker()
+ {
+ int n, i, prec;
+ uint num_left;
+ uint temp;
+
+ num_left = get_bits(16);
+
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_DQT_MARKER);
+
+ num_left -= 2;
+
+ while (num_left)
+ {
+ n = get_bits(8);
+ prec = n >> 4;
+ n &= 0x0F;
+
+ if (n >= JPGD_MAX_QUANT_TABLES)
+ stop_decoding(JPGD_BAD_DQT_TABLE);
+
+ if (!m_quant[n])
+ m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
+
+ // read quantization entries, in zag order
+ for (i = 0; i < 64; i++)
+ {
+ temp = get_bits(8);
+
+ if (prec)
+ temp = (temp << 8) + get_bits(8);
+
+ m_quant[n][i] = static_cast(temp);
+ }
+
+ i = 64 + 1;
+
+ if (prec)
+ i += 64;
+
+ if (num_left < (uint)i)
+ stop_decoding(JPGD_BAD_DQT_LENGTH);
+
+ num_left -= i;
+ }
+ }
+
+ // Read the start of frame (SOF) marker.
+ void jpeg_decoder::read_sof_marker()
+ {
+ int i;
+ uint num_left;
+
+ num_left = get_bits(16);
+
+ if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */
+ stop_decoding(JPGD_BAD_PRECISION);
+
+ m_image_y_size = get_bits(16);
+
+ if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
+ stop_decoding(JPGD_BAD_HEIGHT);
+
+ m_image_x_size = get_bits(16);
+
+ if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
+ stop_decoding(JPGD_BAD_WIDTH);
+
+ m_comps_in_frame = get_bits(8);
+
+ if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
+ stop_decoding(JPGD_TOO_MANY_COMPONENTS);
+
+ if (num_left != (uint)(m_comps_in_frame * 3 + 8))
+ stop_decoding(JPGD_BAD_SOF_LENGTH);
+
+ for (i = 0; i < m_comps_in_frame; i++)
+ {
+ m_comp_ident[i] = get_bits(8);
+ m_comp_h_samp[i] = get_bits(4);
+ m_comp_v_samp[i] = get_bits(4);
+ m_comp_quant[i] = get_bits(8);
+ }
+ }
+
+ // Used to skip unrecognized markers.
+ void jpeg_decoder::skip_variable_marker()
+ {
+ uint num_left;
+
+ num_left = get_bits(16);
+
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_VARIABLE_MARKER);
+
+ num_left -= 2;
+
+ while (num_left)
+ {
+ get_bits(8);
+ num_left--;
+ }
+ }
+
+ // Read a define restart interval (DRI) marker.
+ void jpeg_decoder::read_dri_marker()
+ {
+ if (get_bits(16) != 4)
+ stop_decoding(JPGD_BAD_DRI_LENGTH);
+
+ m_restart_interval = get_bits(16);
+ }
+
+ // Read a start of scan (SOS) marker.
+ void jpeg_decoder::read_sos_marker()
+ {
+ uint num_left;
+ int i, ci, n, c, cc;
+
+ num_left = get_bits(16);
+
+ n = get_bits(8);
+
+ m_comps_in_scan = n;
+
+ num_left -= 3;
+
+ if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
+ stop_decoding(JPGD_BAD_SOS_LENGTH);
+
+ for (i = 0; i < n; i++)
+ {
+ cc = get_bits(8);
+ c = get_bits(8);
+ num_left -= 2;
+
+ for (ci = 0; ci < m_comps_in_frame; ci++)
+ if (cc == m_comp_ident[ci])
+ break;
+
+ if (ci >= m_comps_in_frame)
+ stop_decoding(JPGD_BAD_SOS_COMP_ID);
+
+ m_comp_list[i] = ci;
+ m_comp_dc_tab[ci] = (c >> 4) & 15;
+ m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
+ }
+
+ m_spectral_start = get_bits(8);
+ m_spectral_end = get_bits(8);
+ m_successive_high = get_bits(4);
+ m_successive_low = get_bits(4);
+
+ if (!m_progressive_flag)
+ {
+ m_spectral_start = 0;
+ m_spectral_end = 63;
+ }
+
+ num_left -= 3;
+
+ while (num_left) /* read past whatever is num_left */
+ {
+ get_bits(8);
+ num_left--;
+ }
+ }
+
+ // Finds the next marker.
+ int jpeg_decoder::next_marker()
+ {
+ uint c, bytes;
+
+ bytes = 0;
+
+ do
+ {
+ do
+ {
+ bytes++;
+ c = get_bits(8);
+ } while (c != 0xFF);
+
+ do
+ {
+ c = get_bits(8);
+ } while (c == 0xFF);
+
+ } while (c == 0);
+
+ // If bytes > 0 here, there where extra bytes before the marker (not good).
+
+ return c;
+ }
+
+ // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
+ // encountered.
+ int jpeg_decoder::process_markers()
+ {
+ int c;
+
+ for ( ; ; )
+ {
+ c = next_marker();
+
+ switch (c)
+ {
+ case M_SOF0:
+ case M_SOF1:
+ case M_SOF2:
+ case M_SOF3:
+ case M_SOF5:
+ case M_SOF6:
+ case M_SOF7:
+ // case M_JPG:
+ case M_SOF9:
+ case M_SOF10:
+ case M_SOF11:
+ case M_SOF13:
+ case M_SOF14:
+ case M_SOF15:
+ case M_SOI:
+ case M_EOI:
+ case M_SOS:
+ {
+ return c;
+ }
+ case M_DHT:
+ {
+ read_dht_marker();
+ break;
+ }
+ // No arithmitic support - dumb patents!
+ case M_DAC:
+ {
+ stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+ break;
+ }
+ case M_DQT:
+ {
+ read_dqt_marker();
+ break;
+ }
+ case M_DRI:
+ {
+ read_dri_marker();
+ break;
+ }
+ //case M_APP0: /* no need to read the JFIF marker */
+
+ case M_JPG:
+ case M_RST0: /* no parameters */
+ case M_RST1:
+ case M_RST2:
+ case M_RST3:
+ case M_RST4:
+ case M_RST5:
+ case M_RST6:
+ case M_RST7:
+ case M_TEM:
+ {
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
+ break;
+ }
+ default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
+ {
+ skip_variable_marker();
+ break;
+ }
+ }
+ }
+ }
+
+ // Finds the start of image (SOI) marker.
+ // This code is rather defensive: it only checks the first 512 bytes to avoid
+ // false positives.
+ void jpeg_decoder::locate_soi_marker()
+ {
+ uint lastchar, thischar;
+ uint bytesleft;
+
+ lastchar = get_bits(8);
+
+ thischar = get_bits(8);
+
+ /* ok if it's a normal JPEG file without a special header */
+
+ if ((lastchar == 0xFF) && (thischar == M_SOI))
+ return;
+
+ bytesleft = 4096; //512;
+
+ for ( ; ; )
+ {
+ if (--bytesleft == 0)
+ stop_decoding(JPGD_NOT_JPEG);
+
+ lastchar = thischar;
+
+ thischar = get_bits(8);
+
+ if (lastchar == 0xFF)
+ {
+ if (thischar == M_SOI)
+ break;
+ else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
+ stop_decoding(JPGD_NOT_JPEG);
+ }
+ }
+
+ // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
+ thischar = (m_bit_buf >> 24) & 0xFF;
+
+ if (thischar != 0xFF)
+ stop_decoding(JPGD_NOT_JPEG);
+ }
+
+ // Find a start of frame (SOF) marker.
+ void jpeg_decoder::locate_sof_marker()
+ {
+ locate_soi_marker();
+
+ int c = process_markers();
+
+ switch (c)
+ {
+ case M_SOF2:
+ m_progressive_flag = JPGD_TRUE;
+ case M_SOF0: /* baseline DCT */
+ case M_SOF1: /* extended sequential DCT */
+ {
+ read_sof_marker();
+ break;
+ }
+ case M_SOF9: /* Arithmitic coding */
+ {
+ stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+ break;
+ }
+ default:
+ {
+ stop_decoding(JPGD_UNSUPPORTED_MARKER);
+ break;
+ }
+ }
+ }
+
+ // Find a start of scan (SOS) marker.
+ int jpeg_decoder::locate_sos_marker()
+ {
+ int c;
+
+ c = process_markers();
+
+ if (c == M_EOI)
+ return JPGD_FALSE;
+ else if (c != M_SOS)
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
+
+ read_sos_marker();
+
+ return JPGD_TRUE;
+ }
+
+ // Reset everything to default/uninitialized state.
+ void jpeg_decoder::init(jpeg_decoder_stream *pStream)
+ {
+ m_pMem_blocks = NULL;
+ m_error_code = JPGD_SUCCESS;
+ m_ready_flag = false;
+ m_image_x_size = m_image_y_size = 0;
+ m_pStream = pStream;
+ m_progressive_flag = JPGD_FALSE;
+
+ memset(m_huff_ac, 0, sizeof(m_huff_ac));
+ memset(m_huff_num, 0, sizeof(m_huff_num));
+ memset(m_huff_val, 0, sizeof(m_huff_val));
+ memset(m_quant, 0, sizeof(m_quant));
+
+ m_scan_type = 0;
+ m_comps_in_frame = 0;
+
+ memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
+ memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
+ memset(m_comp_quant, 0, sizeof(m_comp_quant));
+ memset(m_comp_ident, 0, sizeof(m_comp_ident));
+ memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
+ memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
+
+ m_comps_in_scan = 0;
+ memset(m_comp_list, 0, sizeof(m_comp_list));
+ memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
+ memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
+
+ m_spectral_start = 0;
+ m_spectral_end = 0;
+ m_successive_low = 0;
+ m_successive_high = 0;
+ m_max_mcu_x_size = 0;
+ m_max_mcu_y_size = 0;
+ m_blocks_per_mcu = 0;
+ m_max_blocks_per_row = 0;
+ m_mcus_per_row = 0;
+ m_mcus_per_col = 0;
+ m_expanded_blocks_per_component = 0;
+ m_expanded_blocks_per_mcu = 0;
+ m_expanded_blocks_per_row = 0;
+ m_freq_domain_chroma_upsample = false;
+
+ memset(m_mcu_org, 0, sizeof(m_mcu_org));
+
+ m_total_lines_left = 0;
+ m_mcu_lines_left = 0;
+ m_real_dest_bytes_per_scan_line = 0;
+ m_dest_bytes_per_scan_line = 0;
+ m_dest_bytes_per_pixel = 0;
+
+ memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
+
+ memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
+ memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
+ memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+ m_eob_run = 0;
+
+ memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+ m_pIn_buf_ofs = m_in_buf;
+ m_in_buf_left = 0;
+ m_eof_flag = false;
+ m_tem_flag = 0;
+
+ memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
+ memset(m_in_buf, 0, sizeof(m_in_buf));
+ memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
+
+ m_restart_interval = 0;
+ m_restarts_left = 0;
+ m_next_restart_num = 0;
+
+ m_max_mcus_per_row = 0;
+ m_max_blocks_per_mcu = 0;
+ m_max_mcus_per_col = 0;
+
+ memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
+ m_pMCU_coefficients = NULL;
+ m_pSample_buf = NULL;
+
+ m_total_bytes_read = 0;
+
+ m_pScan_line_0 = NULL;
+ m_pScan_line_1 = NULL;
+
+ // Ready the input buffer.
+ prep_in_buffer();
+
+ // Prime the bit buffer.
+ m_bits_left = 16;
+ m_bit_buf = 0;
+
+ get_bits(16);
+ get_bits(16);
+
+ for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
+ m_mcu_block_max_zag[i] = 64;
+ }
+
+#define SCALEBITS 16
+#define ONE_HALF ((int) 1 << (SCALEBITS-1))
+#define FIX(x) ((int) ((x) * (1L<> SCALEBITS;
+ m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
+ m_crg[i] = (-FIX(0.71414f)) * k;
+ m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
+ }
+ }
+
+ // This method throws back into the stream any bytes that where read
+ // into the bit buffer during initial marker scanning.
+ void jpeg_decoder::fix_in_buffer()
+ {
+ // In case any 0xFF's where pulled into the buffer during marker scanning.
+ JPGD_ASSERT((m_bits_left & 7) == 0);
+
+ if (m_bits_left == 16)
+ stuff_char( (uint8)(m_bit_buf & 0xFF));
+
+ if (m_bits_left >= 8)
+ stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
+
+ stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
+ stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
+
+ m_bits_left = 16;
+ get_bits_no_markers(16);
+ get_bits_no_markers(16);
+ }
+
+ void jpeg_decoder::transform_mcu(int mcu_row)
+ {
+ jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
+ uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
+
+ for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
+ pSrc_ptr += 64;
+ pDst_ptr += 64;
+ }
+ }
+
+ static const uint8 s_max_rc[64] =
+ {
+ 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
+ 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
+ 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
+ 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
+ };
+
+ void jpeg_decoder::transform_mcu_expand(int mcu_row)
+ {
+ jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
+ uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
+
+ // Y IDCT
+ int mcu_block;
+ for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
+ {
+ idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
+ pSrc_ptr += 64;
+ pDst_ptr += 64;
+ }
+
+ // Chroma IDCT, with upsampling
+ jpgd_block_t temp_block[64];
+
+ for (int i = 0; i < 2; i++)
+ {
+ DCT_Upsample::Matrix44 P, Q, R, S;
+
+ JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
+ JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
+
+ switch (s_max_rc[m_mcu_block_max_zag[mcu_block++] - 1])
+ {
+ case 1*16+1:
+ DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
+ break;
+ case 1*16+2:
+ DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
+ break;
+ case 2*16+2:
+ DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
+ break;
+ case 3*16+2:
+ DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
+ break;
+ case 3*16+3:
+ DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
+ break;
+ case 3*16+4:
+ DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
+ break;
+ case 4*16+4:
+ DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
+ break;
+ case 5*16+4:
+ DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
+ break;
+ case 5*16+5:
+ DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
+ break;
+ case 5*16+6:
+ DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
+ break;
+ case 6*16+6:
+ DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
+ break;
+ case 7*16+6:
+ DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
+ break;
+ case 7*16+7:
+ DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
+ break;
+ case 7*16+8:
+ DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
+ break;
+ case 8*16+8:
+ DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
+ DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
+ break;
+ default:
+ JPGD_ASSERT(false);
+ }
+
+ DCT_Upsample::Matrix44 a(P + Q); P -= Q;
+ DCT_Upsample::Matrix44& b = P;
+ DCT_Upsample::Matrix44 c(R + S); R -= S;
+ DCT_Upsample::Matrix44& d = R;
+
+ DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
+ idct_4x4(temp_block, pDst_ptr);
+ pDst_ptr += 64;
+
+ DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
+ idct_4x4(temp_block, pDst_ptr);
+ pDst_ptr += 64;
+
+ DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
+ idct_4x4(temp_block, pDst_ptr);
+ pDst_ptr += 64;
+
+ DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
+ idct_4x4(temp_block, pDst_ptr);
+ pDst_ptr += 64;
+
+ pSrc_ptr += 64;
+ }
+ }
+
+ // Loads and dequantizes the next row of (already decoded) coefficients.
+ // Progressive images only.
+ void jpeg_decoder::load_next_row()
+ {
+ int i;
+ jpgd_block_t *p;
+ jpgd_quant_t *q;
+ int mcu_row, mcu_block, row_block = 0;
+ int component_num, component_id;
+ int block_x_mcu[JPGD_MAX_COMPONENTS];
+
+ memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
+
+ for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+ for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ component_id = m_mcu_org[mcu_block];
+ q = m_quant[m_comp_quant[component_id]];
+
+ p = m_pMCU_coefficients + 64 * mcu_block;
+
+ jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+ jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+ p[0] = pDC[0];
+ memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
+
+ for (i = 63; i > 0; i--)
+ if (p[g_ZAG[i]])
+ break;
+
+ m_mcu_block_max_zag[mcu_block] = i + 1;
+
+ for ( ; i >= 0; i--)
+ if (p[g_ZAG[i]])
+ p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]);
+
+ row_block++;
+
+ if (m_comps_in_scan == 1)
+ block_x_mcu[component_id]++;
+ else
+ {
+ if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+ {
+ block_x_mcu_ofs = 0;
+
+ if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+ {
+ block_y_mcu_ofs = 0;
+
+ block_x_mcu[component_id] += m_comp_h_samp[component_id];
+ }
+ }
+ }
+ }
+
+ if (m_freq_domain_chroma_upsample)
+ transform_mcu_expand(mcu_row);
+ else
+ transform_mcu(mcu_row);
+ }
+
+ if (m_comps_in_scan == 1)
+ m_block_y_mcu[m_comp_list[0]]++;
+ else
+ {
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ component_id = m_comp_list[component_num];
+
+ m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
+ }
+ }
+ }
+
+ // Restart interval processing.
+ void jpeg_decoder::process_restart()
+ {
+ int i;
+ int c = 0;
+
+ // Align to a byte boundry
+ // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
+ //get_bits_no_markers(m_bits_left & 7);
+
+ // Let's scan a little bit to find the marker, but not _too_ far.
+ // 1536 is a "fudge factor" that determines how much to scan.
+ for (i = 1536; i > 0; i--)
+ if (get_char() == 0xFF)
+ break;
+
+ if (i == 0)
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ for ( ; i > 0; i--)
+ if ((c = get_char()) != 0xFF)
+ break;
+
+ if (i == 0)
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ // Is it the expected marker? If not, something bad happened.
+ if (c != (m_next_restart_num + M_RST0))
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ // Reset each component's DC prediction values.
+ memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+ m_eob_run = 0;
+
+ m_restarts_left = m_restart_interval;
+
+ m_next_restart_num = (m_next_restart_num + 1) & 7;
+
+ // Get the bit buffer going again...
+
+ m_bits_left = 16;
+ get_bits_no_markers(16);
+ get_bits_no_markers(16);
+ }
+
+ static inline int dequantize_ac(int c, int q) { c *= q; return c; }
+
+ // Decodes and dequantizes the next row of coefficients.
+ void jpeg_decoder::decode_next_row()
+ {
+ int row_block = 0;
+
+ for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ if ((m_restart_interval) && (m_restarts_left == 0))
+ process_restart();
+
+ jpgd_block_t* p = m_pMCU_coefficients;
+ for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
+ {
+ int component_id = m_mcu_org[mcu_block];
+ jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
+
+ int r, s;
+ s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
+ s = HUFF_EXTEND(r, s);
+
+ m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
+
+ p[0] = static_cast(s * q[0]);
+
+ int prev_num_set = m_mcu_block_max_zag[mcu_block];
+
+ huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
+
+ int k;
+ for (k = 1; k < 64; k++)
+ {
+ int extra_bits;
+ s = huff_decode(pH, extra_bits);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if (r)
+ {
+ if ((k + r) > 63)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (k < prev_num_set)
+ {
+ int n = JPGD_MIN(r, prev_num_set - k);
+ int kt = k;
+ while (n--)
+ p[g_ZAG[kt++]] = 0;
+ }
+
+ k += r;
+ }
+
+ s = HUFF_EXTEND(extra_bits, s);
+
+ JPGD_ASSERT(k < 64);
+
+ p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k];
+ }
+ else
+ {
+ if (r == 15)
+ {
+ if ((k + 16) > 64)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (k < prev_num_set)
+ {
+ int n = JPGD_MIN(16, prev_num_set - k);
+ int kt = k;
+ while (n--)
+ {
+ JPGD_ASSERT(kt <= 63);
+ p[g_ZAG[kt++]] = 0;
+ }
+ }
+
+ k += 16 - 1; // - 1 because the loop counter is k
+ // BEGIN EPIC MOD
+ JPGD_ASSERT(k < 64 && p[g_ZAG[k]] == 0);
+ // END EPIC MOD
+ }
+ else
+ break;
+ }
+ }
+
+ if (k < prev_num_set)
+ {
+ int kt = k;
+ while (kt < prev_num_set)
+ p[g_ZAG[kt++]] = 0;
+ }
+
+ m_mcu_block_max_zag[mcu_block] = k;
+
+ row_block++;
+ }
+
+ if (m_freq_domain_chroma_upsample)
+ transform_mcu_expand(mcu_row);
+ else
+ transform_mcu(mcu_row);
+
+ m_restarts_left--;
+ }
+ }
+
+ // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
+ void jpeg_decoder::H1V1Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8 *d = m_pScan_line_0;
+ uint8 *s = m_pSample_buf + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ int y = s[j];
+ int cb = s[64+j];
+ int cr = s[128+j];
+
+ if (jpg_format == ERGBFormatJPG::BGRA)
+ {
+ d[0] = clamp(y + m_cbb[cb]);
+ d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+ d[2] = clamp(y + m_crr[cr]);
+ d[3] = 255;
+ }
+ else
+ {
+ d[0] = clamp(y + m_crr[cr]);
+ d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+ d[2] = clamp(y + m_cbb[cb]);
+ d[3] = 255;
+ }
+ d += 4;
+ }
+
+ s += 64*3;
+ }
+ }
+
+ // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H2V1Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8 *d0 = m_pScan_line_0;
+ uint8 *y = m_pSample_buf + row * 8;
+ uint8 *c = m_pSample_buf + 2*64 + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int l = 0; l < 2; l++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ int cb = c[0];
+ int cr = c[64];
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ int yy = y[j<<1];
+ if (jpg_format == ERGBFormatJPG::BGRA)
+ {
+ d0[0] = clamp(yy+bc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+rc);
+ d0[3] = 255;
+ yy = y[(j<<1)+1];
+ d0[4] = clamp(yy+bc);
+ d0[5] = clamp(yy+gc);
+ d0[6] = clamp(yy+rc);
+ d0[7] = 255;
+ }
+ else
+ {
+ d0[0] = clamp(yy+rc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+bc);
+ d0[3] = 255;
+ yy = y[(j<<1)+1];
+ d0[4] = clamp(yy+rc);
+ d0[5] = clamp(yy+gc);
+ d0[6] = clamp(yy+bc);
+ d0[7] = 255;
+ }
+
+ d0 += 8;
+
+ c++;
+ }
+ y += 64;
+ }
+
+ y += 64*4 - 64*2;
+ c += 64*4 - 8;
+ }
+ }
+
+ // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H1V2Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8 *d0 = m_pScan_line_0;
+ uint8 *d1 = m_pScan_line_1;
+ uint8 *y;
+ uint8 *c;
+
+ if (row < 8)
+ y = m_pSample_buf + row * 8;
+ else
+ y = m_pSample_buf + 64*1 + (row & 7) * 8;
+
+ c = m_pSample_buf + 64*2 + (row >> 1) * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ int cb = c[0+j];
+ int cr = c[64+j];
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ int yy = y[j];
+ if (jpg_format == ERGBFormatJPG::BGRA)
+ {
+ d0[0] = clamp(yy+bc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+rc);
+ d0[3] = 255;
+ yy = y[8+j];
+ d1[0] = clamp(yy+bc);
+ d1[1] = clamp(yy+gc);
+ d1[2] = clamp(yy+rc);
+ d1[3] = 255;
+ }
+ else
+ {
+ d0[0] = clamp(yy+rc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+bc);
+ d0[3] = 255;
+ yy = y[8+j];
+ d1[0] = clamp(yy+rc);
+ d1[1] = clamp(yy+gc);
+ d1[2] = clamp(yy+bc);
+ d1[3] = 255;
+ }
+
+ d0 += 4;
+ d1 += 4;
+ }
+
+ y += 64*4;
+ c += 64*4;
+ }
+ }
+
+ // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
+ void jpeg_decoder::H2V2Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8 *d0 = m_pScan_line_0;
+ uint8 *d1 = m_pScan_line_1;
+ uint8 *y;
+ uint8 *c;
+
+ if (row < 8)
+ y = m_pSample_buf + row * 8;
+ else
+ y = m_pSample_buf + 64*2 + (row & 7) * 8;
+
+ c = m_pSample_buf + 64*4 + (row >> 1) * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int l = 0; l < 2; l++)
+ {
+ for (int j = 0; j < 8; j += 2)
+ {
+ int cb = c[0];
+ int cr = c[64];
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ int yy = y[j];
+ if (jpg_format == ERGBFormatJPG::BGRA)
+ {
+ d0[0] = clamp(yy+bc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+rc);
+ d0[3] = 255;
+ yy = y[j+1];
+ d0[4] = clamp(yy+bc);
+ d0[5] = clamp(yy+gc);
+ d0[6] = clamp(yy+rc);
+ d0[7] = 255;
+ yy = y[j+8];
+ d1[0] = clamp(yy+bc);
+ d1[1] = clamp(yy+gc);
+ d1[2] = clamp(yy+rc);
+ d1[3] = 255;
+ yy = y[j+8+1];
+ d1[4] = clamp(yy+bc);
+ d1[5] = clamp(yy+gc);
+ d1[6] = clamp(yy+rc);
+ d1[7] = 255;
+ }
+ else
+ {
+ d0[0] = clamp(yy+rc);
+ d0[1] = clamp(yy+gc);
+ d0[2] = clamp(yy+bc);
+ d0[3] = 255;
+ yy = y[j+1];
+ d0[4] = clamp(yy+rc);
+ d0[5] = clamp(yy+gc);
+ d0[6] = clamp(yy+bc);
+ d0[7] = 255;
+ yy = y[j+8];
+ d1[0] = clamp(yy+rc);
+ d1[1] = clamp(yy+gc);
+ d1[2] = clamp(yy+bc);
+ d1[3] = 255;
+ yy = y[j+8+1];
+ d1[4] = clamp(yy+rc);
+ d1[5] = clamp(yy+gc);
+ d1[6] = clamp(yy+bc);
+ d1[7] = 255;
+ }
+
+ d0 += 8;
+ d1 += 8;
+
+ c++;
+ }
+ y += 64;
+ }
+
+ y += 64*6 - 64*2;
+ c += 64*6 - 8;
+ }
+ }
+
+ // Y (1 block per MCU) to 8-bit grayscale
+ void jpeg_decoder::gray_convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8 *d = m_pScan_line_0;
+ uint8 *s = m_pSample_buf + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ *(uint *)d = *(uint *)s;
+ *(uint *)(&d[4]) = *(uint *)(&s[4]);
+
+ s += 64;
+ d += 8;
+ }
+ }
+
+ void jpeg_decoder::expanded_convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+
+ uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
+
+ uint8* d = m_pScan_line_0;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int k = 0; k < m_max_mcu_x_size; k += 8)
+ {
+ const int Y_ofs = k * 8;
+ const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
+ const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
+ for (int j = 0; j < 8; j++)
+ {
+ int y = Py[Y_ofs + j];
+ int cb = Py[Cb_ofs + j];
+ int cr = Py[Cr_ofs + j];
+
+ if (jpg_format == ERGBFormatJPG::BGRA)
+ {
+ d[0] = clamp(y + m_cbb[cb]);
+ d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+ d[2] = clamp(y + m_crr[cr]);
+ d[3] = 255;
+ }
+ else
+ {
+ d[0] = clamp(y + m_crr[cr]);
+ d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+ d[2] = clamp(y + m_cbb[cb]);
+ d[3] = 255;
+ }
+
+ d += 4;
+ }
+ }
+
+ Py += 64 * m_expanded_blocks_per_mcu;
+ }
+ }
+
+ // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
+ void jpeg_decoder::find_eoi()
+ {
+ if (!m_progressive_flag)
+ {
+ // Attempt to read the EOI marker.
+ //get_bits_no_markers(m_bits_left & 7);
+
+ // Prime the bit buffer
+ m_bits_left = 16;
+ get_bits(16);
+ get_bits(16);
+
+ // The next marker _should_ be EOI
+ process_markers();
+ }
+
+ m_total_bytes_read -= m_in_buf_left;
+ }
+
+ int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
+ {
+ if ((m_error_code) || (!m_ready_flag))
+ return JPGD_FAILED;
+
+ if (m_total_lines_left == 0)
+ return JPGD_DONE;
+
+ if (m_mcu_lines_left == 0)
+ {
+ if (setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ if (m_progressive_flag)
+ load_next_row();
+ else
+ decode_next_row();
+
+ // Find the EOI marker if that was the last row.
+ if (m_total_lines_left <= m_max_mcu_y_size)
+ find_eoi();
+
+ m_mcu_lines_left = m_max_mcu_y_size;
+ }
+
+ if (m_freq_domain_chroma_upsample)
+ {
+ expanded_convert();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ {
+ switch (m_scan_type)
+ {
+ case JPGD_YH2V2:
+ {
+ if ((m_mcu_lines_left & 1) == 0)
+ {
+ H2V2Convert();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ *pScan_line = m_pScan_line_1;
+
+ break;
+ }
+ case JPGD_YH2V1:
+ {
+ H2V1Convert();
+ *pScan_line = m_pScan_line_0;
+ break;
+ }
+ case JPGD_YH1V2:
+ {
+ if ((m_mcu_lines_left & 1) == 0)
+ {
+ H1V2Convert();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ *pScan_line = m_pScan_line_1;
+
+ break;
+ }
+ case JPGD_YH1V1:
+ {
+ H1V1Convert();
+ *pScan_line = m_pScan_line_0;
+ break;
+ }
+ case JPGD_GRAYSCALE:
+ {
+ gray_convert();
+ *pScan_line = m_pScan_line_0;
+
+ break;
+ }
+ }
+ }
+
+ *pScan_line_len = m_real_dest_bytes_per_scan_line;
+
+ m_mcu_lines_left--;
+ m_total_lines_left--;
+
+ return JPGD_SUCCESS;
+ }
+
+ // Creates the tables needed for efficient Huffman decoding.
+ void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
+ {
+ int p, i, l, si;
+ uint8 huffsize[257];
+ uint huffcode[257];
+ uint code;
+ uint subtree;
+ int code_size;
+ int lastp;
+ int nextfreeentry;
+ int currententry;
+
+ pH->ac_table = m_huff_ac[index] != 0;
+
+ p = 0;
+
+ for (l = 1; l <= 16; l++)
+ {
+ for (i = 1; i <= m_huff_num[index][l]; i++)
+ huffsize[p++] = static_cast(l);
+ }
+
+ huffsize[p] = 0;
+
+ lastp = p;
+
+ code = 0;
+ si = huffsize[0];
+ p = 0;
+
+ while (huffsize[p])
+ {
+ while (huffsize[p] == si)
+ {
+ huffcode[p++] = code;
+ code++;
+ }
+
+ code <<= 1;
+ si++;
+ }
+
+ memset(pH->look_up, 0, sizeof(pH->look_up));
+ memset(pH->look_up2, 0, sizeof(pH->look_up2));
+ memset(pH->tree, 0, sizeof(pH->tree));
+ memset(pH->code_size, 0, sizeof(pH->code_size));
+
+ nextfreeentry = -1;
+
+ p = 0;
+
+ while (p < lastp)
+ {
+ i = m_huff_val[index][p];
+ code = huffcode[p];
+ code_size = huffsize[p];
+
+ pH->code_size[i] = static_cast(code_size);
+
+ if (code_size <= 8)
+ {
+ code <<= (8 - code_size);
+
+ for (l = 1 << (8 - code_size); l > 0; l--)
+ {
+ JPGD_ASSERT(i < 256);
+
+ pH->look_up[code] = i;
+
+ bool has_extrabits = false;
+ int extra_bits = 0;
+ int num_extra_bits = i & 15;
+
+ int bits_to_fetch = code_size;
+ if (num_extra_bits)
+ {
+ int total_codesize = code_size + num_extra_bits;
+ if (total_codesize <= 8)
+ {
+ has_extrabits = true;
+ extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
+ JPGD_ASSERT(extra_bits <= 0x7FFF);
+ bits_to_fetch += num_extra_bits;
+ }
+ }
+
+ if (!has_extrabits)
+ pH->look_up2[code] = i | (bits_to_fetch << 8);
+ else
+ pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
+
+ code++;
+ }
+ }
+ else
+ {
+ subtree = (code >> (code_size - 8)) & 0xFF;
+
+ currententry = pH->look_up[subtree];
+
+ if (currententry == 0)
+ {
+ pH->look_up[subtree] = currententry = nextfreeentry;
+ pH->look_up2[subtree] = currententry = nextfreeentry;
+
+ nextfreeentry -= 2;
+ }
+
+ code <<= (16 - (code_size - 8));
+
+ for (l = code_size; l > 9; l--)
+ {
+ if ((code & 0x8000) == 0)
+ currententry--;
+
+ if (pH->tree[-currententry - 1] == 0)
+ {
+ pH->tree[-currententry - 1] = nextfreeentry;
+
+ currententry = nextfreeentry;
+
+ nextfreeentry -= 2;
+ }
+ else
+ currententry = pH->tree[-currententry - 1];
+
+ code <<= 1;
+ }
+
+ if ((code & 0x8000) == 0)
+ currententry--;
+
+ pH->tree[-currententry - 1] = i;
+ }
+
+ p++;
+ }
+ }
+
+ // Verifies the quantization tables needed for this scan are available.
+ void jpeg_decoder::check_quant_tables()
+ {
+ for (int i = 0; i < m_comps_in_scan; i++)
+ if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
+ stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
+ }
+
+ // Verifies that all the Huffman tables needed for this scan are available.
+ void jpeg_decoder::check_huff_tables()
+ {
+ for (int i = 0; i < m_comps_in_scan; i++)
+ {
+ if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
+ stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+
+ if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
+ stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+ }
+
+ for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
+ if (m_huff_num[i])
+ {
+ if (!m_pHuff_tabs[i])
+ m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
+
+ make_huff_table(i, m_pHuff_tabs[i]);
+ }
+ }
+
+ // Determines the component order inside each MCU.
+ // Also calcs how many MCU's are on each row, etc.
+ void jpeg_decoder::calc_mcu_block_order()
+ {
+ int component_num, component_id;
+ int max_h_samp = 0, max_v_samp = 0;
+
+ for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+ {
+ if (m_comp_h_samp[component_id] > max_h_samp)
+ max_h_samp = m_comp_h_samp[component_id];
+
+ if (m_comp_v_samp[component_id] > max_v_samp)
+ max_v_samp = m_comp_v_samp[component_id];
+ }
+
+ for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+ {
+ m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
+ m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
+ }
+
+ if (m_comps_in_scan == 1)
+ {
+ m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
+ m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
+ }
+ else
+ {
+ m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
+ m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
+ }
+
+ if (m_comps_in_scan == 1)
+ {
+ m_mcu_org[0] = m_comp_list[0];
+
+ m_blocks_per_mcu = 1;
+ }
+ else
+ {
+ m_blocks_per_mcu = 0;
+
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ int num_blocks;
+
+ component_id = m_comp_list[component_num];
+
+ num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
+
+ while (num_blocks--)
+ m_mcu_org[m_blocks_per_mcu++] = component_id;
+ }
+ }
+ }
+
+ // Starts a new scan.
+ int jpeg_decoder::init_scan()
+ {
+ if (!locate_sos_marker())
+ return JPGD_FALSE;
+
+ calc_mcu_block_order();
+
+ check_huff_tables();
+
+ check_quant_tables();
+
+ memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+ m_eob_run = 0;
+
+ if (m_restart_interval)
+ {
+ m_restarts_left = m_restart_interval;
+ m_next_restart_num = 0;
+ }
+
+ fix_in_buffer();
+
+ return JPGD_TRUE;
+ }
+
+ // Starts a frame. Determines if the number of components or sampling factors
+ // are supported.
+ void jpeg_decoder::init_frame()
+ {
+ int i;
+
+ if (m_comps_in_frame == 1)
+ {
+ if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+ m_scan_type = JPGD_GRAYSCALE;
+ m_max_blocks_per_mcu = 1;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 8;
+ }
+ else if (m_comps_in_frame == 3)
+ {
+ if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
+ ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+ if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
+ {
+ m_scan_type = JPGD_YH1V1;
+
+ m_max_blocks_per_mcu = 3;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 8;
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
+ {
+ m_scan_type = JPGD_YH2V1;
+ m_max_blocks_per_mcu = 4;
+ m_max_mcu_x_size = 16;
+ m_max_mcu_y_size = 8;
+ }
+ else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
+ {
+ m_scan_type = JPGD_YH1V2;
+ m_max_blocks_per_mcu = 4;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 16;
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
+ {
+ m_scan_type = JPGD_YH2V2;
+ m_max_blocks_per_mcu = 6;
+ m_max_mcu_x_size = 16;
+ m_max_mcu_y_size = 16;
+ }
+ else
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+ }
+ else
+ stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+ m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
+ m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
+
+ // These values are for the *destination* pixels: after conversion.
+ if (m_scan_type == JPGD_GRAYSCALE)
+ m_dest_bytes_per_pixel = 1;
+ else
+ m_dest_bytes_per_pixel = 4;
+
+ m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
+
+ m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
+
+ // Initialize two scan line buffers.
+ m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
+ if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
+ m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
+
+ m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
+
+ // Should never happen
+ if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
+ stop_decoding(JPGD_ASSERTION_ERROR);
+
+ // Allocate the coefficient buffer, enough for one MCU
+ m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
+
+ for (i = 0; i < m_max_blocks_per_mcu; i++)
+ m_mcu_block_max_zag[i] = 64;
+
+ m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
+ m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
+ m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
+ // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor.
+// BEGIN EPIC MOD
+#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
+ m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
+#else
+ m_freq_domain_chroma_upsample = 0;
+#endif
+// END EPIC MOD
+
+ if (m_freq_domain_chroma_upsample)
+ m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
+ else
+ m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
+
+ m_total_lines_left = m_image_y_size;
+
+ m_mcu_lines_left = 0;
+
+ create_look_ups();
+ }
+
+ // The coeff_buf series of methods originally stored the coefficients
+ // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
+ // was used to make this process more efficient. Now, we can store the entire
+ // thing in RAM.
+ jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
+ {
+ coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
+
+ cb->block_num_x = block_num_x;
+ cb->block_num_y = block_num_y;
+ cb->block_len_x = block_len_x;
+ cb->block_len_y = block_len_y;
+ cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
+ cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
+ return cb;
+ }
+
+ inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
+ {
+ JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
+ return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
+ }
+
+ // The following methods decode the various types of m_blocks encountered
+ // in progressively encoded images.
+ void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+ {
+ int s, r;
+ jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+ if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
+ {
+ r = pD->get_bits_no_markers(s);
+ s = HUFF_EXTEND(r, s);
+ }
+
+ pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
+
+ p[0] = static_cast(s << pD->m_successive_low);
+ }
+
+ void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+ p[0] |= (1 << pD->m_successive_low);
+ }
+ }
+
+ void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+ {
+ int k, s, r;
+
+ if (pD->m_eob_run)
+ {
+ pD->m_eob_run--;
+ return;
+ }
+
+ jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+
+ for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
+ {
+ s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if ((k += r) > 63)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ r = pD->get_bits_no_markers(s);
+ s = HUFF_EXTEND(r, s);
+
+ p[g_ZAG[k]] = static_cast(s << pD->m_successive_low);
+ }
+ else
+ {
+ if (r == 15)
+ {
+ if ((k += 15) > 63)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+ }
+ else
+ {
+ pD->m_eob_run = 1 << r;
+
+ if (r)
+ pD->m_eob_run += pD->get_bits_no_markers(r);
+
+ pD->m_eob_run--;
+
+ break;
+ }
+ }
+ }
+ }
+
+ void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+ {
+ int s, k, r;
+ int p1 = 1 << pD->m_successive_low;
+ int m1 = (-1) << pD->m_successive_low;
+ jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+
+ k = pD->m_spectral_start;
+
+ if (pD->m_eob_run == 0)
+ {
+ for ( ; k <= pD->m_spectral_end; k++)
+ {
+ s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if (s != 1)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ if (pD->get_bits_no_markers(1))
+ s = p1;
+ else
+ s = m1;
+ }
+ else
+ {
+ if (r != 15)
+ {
+ pD->m_eob_run = 1 << r;
+
+ if (r)
+ pD->m_eob_run += pD->get_bits_no_markers(r);
+
+ break;
+ }
+ }
+
+ do
+ {
+ // BEGIN EPIC MOD
+ JPGD_ASSERT(k < 64);
+ // END EPIC MOD
+
+ jpgd_block_t *this_coef = p + g_ZAG[k];
+
+ if (*this_coef != 0)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ if ((*this_coef & p1) == 0)
+ {
+ if (*this_coef >= 0)
+ *this_coef = static_cast(*this_coef + p1);
+ else
+ *this_coef = static_cast(*this_coef + m1);
+ }
+ }
+ }
+ else
+ {
+ if (--r < 0)
+ break;
+ }
+
+ k++;
+
+ } while (k <= pD->m_spectral_end);
+
+ if ((s) && (k < 64))
+ {
+ p[g_ZAG[k]] = static_cast(s);
+ }
+ }
+ }
+
+ if (pD->m_eob_run > 0)
+ {
+ for ( ; k <= pD->m_spectral_end; k++)
+ {
+ // BEGIN EPIC MOD
+ JPGD_ASSERT(k < 64);
+ // END EPIC MOD
+
+ jpgd_block_t *this_coef = p + g_ZAG[k];
+
+ if (*this_coef != 0)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ if ((*this_coef & p1) == 0)
+ {
+ if (*this_coef >= 0)
+ *this_coef = static_cast(*this_coef + p1);
+ else
+ *this_coef = static_cast(*this_coef + m1);
+ }
+ }
+ }
+ }
+
+ pD->m_eob_run--;
+ }
+ }
+
+ // Decode a scan in a progressively encoded image.
+ void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
+ {
+ int mcu_row, mcu_col, mcu_block;
+ int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
+
+ memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+ for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
+ {
+ int component_num, component_id;
+
+ memset(block_x_mcu, 0, sizeof(block_x_mcu));
+
+ for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+ if ((m_restart_interval) && (m_restarts_left == 0))
+ process_restart();
+
+ for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ component_id = m_mcu_org[mcu_block];
+
+ decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+
+ if (m_comps_in_scan == 1)
+ block_x_mcu[component_id]++;
+ else
+ {
+ if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+ {
+ block_x_mcu_ofs = 0;
+
+ if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+ {
+ block_y_mcu_ofs = 0;
+ block_x_mcu[component_id] += m_comp_h_samp[component_id];
+ }
+ }
+ }
+ }
+
+ m_restarts_left--;
+ }
+
+ if (m_comps_in_scan == 1)
+ m_block_y_mcu[m_comp_list[0]]++;
+ else
+ {
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ component_id = m_comp_list[component_num];
+ m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
+ }
+ }
+ }
+ }
+
+ // Decode a progressively encoded image.
+ void jpeg_decoder::init_progressive()
+ {
+ int i;
+
+ if (m_comps_in_frame == 4)
+ stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+ // Allocate the coefficient buffers.
+ for (i = 0; i < m_comps_in_frame; i++)
+ {
+ m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
+ m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
+ }
+
+ for ( ; ; )
+ {
+ int dc_only_scan, refinement_scan;
+ pDecode_block_func decode_block_func;
+
+ if (!init_scan())
+ break;
+
+ dc_only_scan = (m_spectral_start == 0);
+ refinement_scan = (m_successive_high != 0);
+
+ if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+ if (dc_only_scan)
+ {
+ if (m_spectral_end)
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+ }
+ else if (m_comps_in_scan != 1) /* AC scans can only contain one component */
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+ if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
+ stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
+
+ if (dc_only_scan)
+ {
+ if (refinement_scan)
+ decode_block_func = decode_block_dc_refine;
+ else
+ decode_block_func = decode_block_dc_first;
+ }
+ else
+ {
+ if (refinement_scan)
+ decode_block_func = decode_block_ac_refine;
+ else
+ decode_block_func = decode_block_ac_first;
+ }
+
+ decode_scan(decode_block_func);
+
+ m_bits_left = 16;
+ get_bits(16);
+ get_bits(16);
+ }
+
+ m_comps_in_scan = m_comps_in_frame;
+
+ for (i = 0; i < m_comps_in_frame; i++)
+ m_comp_list[i] = i;
+
+ calc_mcu_block_order();
+ }
+
+ void jpeg_decoder::init_sequential()
+ {
+ if (!init_scan())
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
+ }
+
+ void jpeg_decoder::decode_start()
+ {
+ init_frame();
+
+ if (m_progressive_flag)
+ init_progressive();
+ else
+ init_sequential();
+ }
+
+ void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
+ {
+ init(pStream);
+ locate_sof_marker();
+ }
+
+ jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
+ {
+ if (setjmp(m_jmp_state))
+ return;
+ decode_init(pStream);
+ }
+
+ int jpeg_decoder::begin_decoding()
+ {
+ if (m_ready_flag)
+ return JPGD_SUCCESS;
+
+ if (m_error_code)
+ return JPGD_FAILED;
+
+ if (setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ decode_start();
+
+ m_ready_flag = true;
+
+ return JPGD_SUCCESS;
+ }
+
+ jpeg_decoder::~jpeg_decoder()
+ {
+ free_all_blocks();
+ }
+
+ jpeg_decoder_file_stream::jpeg_decoder_file_stream()
+ {
+ m_pFile = NULL;
+ m_eof_flag = false;
+ m_error_flag = false;
+ }
+
+ void jpeg_decoder_file_stream::close()
+ {
+ if (m_pFile)
+ {
+ fclose(m_pFile);
+ m_pFile = NULL;
+ }
+
+ m_eof_flag = false;
+ m_error_flag = false;
+ }
+
+ jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
+ {
+ close();
+ }
+
+ bool jpeg_decoder_file_stream::open(const char *Pfilename)
+ {
+ close();
+
+ m_eof_flag = false;
+ m_error_flag = false;
+
+#if defined(_MSC_VER)
+ m_pFile = NULL;
+ fopen_s(&m_pFile, Pfilename, "rb");
+#else
+ m_pFile = fopen(Pfilename, "rb");
+#endif
+ return m_pFile != NULL;
+ }
+
+ int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
+ {
+ if (!m_pFile)
+ return -1;
+
+ if (m_eof_flag)
+ {
+ *pEOF_flag = true;
+ return 0;
+ }
+
+ if (m_error_flag)
+ return -1;
+
+ int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile));
+ if (bytes_read < max_bytes_to_read)
+ {
+ if (ferror(m_pFile))
+ {
+ m_error_flag = true;
+ return -1;
+ }
+
+ m_eof_flag = true;
+ *pEOF_flag = true;
+ }
+
+ return bytes_read;
+ }
+
+ bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
+ {
+ close();
+ m_pSrc_data = pSrc_data;
+ m_ofs = 0;
+ m_size = size;
+ return true;
+ }
+
+ int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
+ {
+ *pEOF_flag = false;
+
+ if (!m_pSrc_data)
+ return -1;
+
+ uint bytes_remaining = m_size - m_ofs;
+ if ((uint)max_bytes_to_read > bytes_remaining)
+ {
+ max_bytes_to_read = bytes_remaining;
+ *pEOF_flag = true;
+ }
+
+ memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
+ m_ofs += max_bytes_to_read;
+
+ return max_bytes_to_read;
+ }
+
+ unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
+ {
+ if (!actual_comps)
+ return NULL;
+ *actual_comps = 0;
+
+ if ((!pStream) || (!width) || (!height) || (!req_comps))
+ return NULL;
+
+ if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
+ return NULL;
+
+ jpeg_decoder decoder(pStream);
+ if (decoder.get_error_code() != JPGD_SUCCESS)
+ return NULL;
+
+ const int image_width = decoder.get_width(), image_height = decoder.get_height();
+ *width = image_width;
+ *height = image_height;
+ *actual_comps = decoder.get_num_components();
+
+ if (decoder.begin_decoding() != JPGD_SUCCESS)
+ return NULL;
+
+ const int dst_bpl = image_width * req_comps;
+
+ uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
+ if (!pImage_data)
+ return NULL;
+
+ for (int y = 0; y < image_height; y++)
+ {
+ const uint8* pScan_line = 0;
+ uint scan_line_len;
+ if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
+ {
+ jpgd_free(pImage_data);
+ return NULL;
+ }
+
+ uint8 *pDst = pImage_data + y * dst_bpl;
+
+ if (((req_comps == 4) && (decoder.get_num_components() == 3)) ||
+ ((req_comps == 1) && (decoder.get_num_components() == 1)))
+ {
+ memcpy(pDst, pScan_line, dst_bpl);
+ }
+ else if (decoder.get_num_components() == 1)
+ {
+ if (req_comps == 3)
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ uint8 luma = pScan_line[x];
+ pDst[0] = luma;
+ pDst[1] = luma;
+ pDst[2] = luma;
+ pDst += 3;
+ }
+ }
+ else
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ uint8 luma = pScan_line[x];
+ pDst[0] = luma;
+ pDst[1] = luma;
+ pDst[2] = luma;
+ pDst[3] = 255;
+ pDst += 4;
+ }
+ }
+ }
+ else if (decoder.get_num_components() == 3)
+ {
+ if (req_comps == 1)
+ {
+ const int YR = 19595, YG = 38470, YB = 7471;
+ for (int x = 0; x < image_width; x++)
+ {
+ int r = pScan_line[x*4+0];
+ int g = pScan_line[x*4+1];
+ int b = pScan_line[x*4+2];
+ *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16);
+ }
+ }
+ else
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ pDst[0] = pScan_line[x*4+0];
+ pDst[1] = pScan_line[x*4+1];
+ pDst[2] = pScan_line[x*4+2];
+ pDst += 3;
+ }
+ }
+ }
+ }
+
+ return pImage_data;
+ }
+
+// BEGIN EPIC MOD
+ unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps, int format)
+ {
+ jpg_format = (ERGBFormatJPG)format;
+// EMD EPIC MOD
+ jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
+ return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
+ }
+
+ unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
+ {
+ jpgd::jpeg_decoder_file_stream file_stream;
+ if (!file_stream.open(pSrc_filename))
+ return NULL;
+ return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
+ }
+
+} // namespace jpgd
diff --git a/crazy_functions/test_project/cpp/longcode/jpge.cpp b/crazy_functions/test_project/cpp/longcode/jpge.cpp
new file mode 100644
index 0000000..2e26b71
--- /dev/null
+++ b/crazy_functions/test_project/cpp/longcode/jpge.cpp
@@ -0,0 +1,1049 @@
+// jpge.cpp - C++ class for JPEG compression.
+// Public domain, Rich Geldreich
+// v1.01, Dec. 18, 2010 - Initial release
+// v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
+// v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
+// Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
+// v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
+// Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
+// Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
+
+#include "jpge.h"
+
+#include
+#include
+#if PLATFORM_WINDOWS
+#include
+#endif
+
+#define JPGE_MAX(a,b) (((a)>(b))?(a):(b))
+#define JPGE_MIN(a,b) (((a)<(b))?(a):(b))
+
+namespace jpge {
+
+static inline void *jpge_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
+static inline void jpge_free(void *p) { FMemory::Free(p);; }
+
+// Various JPEG enums and tables.
+enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 };
+enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 };
+
+static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
+static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 };
+static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 };
+static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };
+static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
+static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d };
+static uint8 s_ac_lum_val[AC_LUM_CODES] =
+{
+ 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
+ 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
+ 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
+ 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
+ 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
+ 0xf9,0xfa
+};
+static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };
+static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
+static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 };
+static uint8 s_ac_chroma_val[AC_CHROMA_CODES] =
+{
+ 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
+ 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
+ 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
+ 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
+ 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
+ 0xf9,0xfa
+};
+
+// Low-level helper functions.
+template inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
+
+const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329;
+static inline uint8 clamp(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); }
+
+static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
+{
+ for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--)
+ {
+ const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
+ pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16);
+ pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
+ pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
+ }
+}
+
+static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
+{
+ for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--)
+ pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
+}
+
+static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
+{
+ for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--)
+ {
+ const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
+ pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16);
+ pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
+ pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
+ }
+}
+
+static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
+{
+ for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--)
+ pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
+}
+
+static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
+{
+ for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
+}
+
+// Forward DCT - DCT derived from jfdctint.
+#define CONST_BITS 13
+#define ROW_BITS 2
+#define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n))
+#define DCT_MUL(var, c) (static_cast(var) * static_cast(c))
+#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \
+ int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \
+ int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \
+ int32 u1 = DCT_MUL(t12 + t13, 4433); \
+ s2 = u1 + DCT_MUL(t13, 6270); \
+ s6 = u1 + DCT_MUL(t12, -15137); \
+ u1 = t4 + t7; \
+ int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \
+ int32 z5 = DCT_MUL(u3 + u4, 9633); \
+ t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \
+ t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \
+ u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \
+ u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \
+ u3 += z5; u4 += z5; \
+ s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3;
+
+static void DCT2D(int32 *p)
+{
+ int32 c, *q = p;
+ for (c = 7; c >= 0; c--, q += 8)
+ {
+ int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
+ DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
+ q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
+ q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
+ }
+ for (q = p, c = 7; c >= 0; c--, q++)
+ {
+ int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
+ DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
+ q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
+ q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
+ }
+}
+
+struct sym_freq { uint m_key, m_sym_index; };
+
+// Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
+static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1)
+{
+ const uint cMaxPasses = 4;
+ uint32 hist[256 * cMaxPasses]; clear_obj(hist);
+ for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; }
+ sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1;
+ uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
+ for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+ {
+ const uint32* pHist = &hist[pass << 8];
+ uint offsets[256], cur_ofs = 0;
+ for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
+ for (uint i = 0; i < num_syms; i++)
+ pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+ sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
+ }
+ return pCur_syms;
+}
+
+// calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
+static void calculate_minimum_redundancy(sym_freq *A, int n)
+{
+ int root, leaf, next, avbl, used, dpth;
+ if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
+ A[0].m_key += A[1].m_key; root = 0; leaf = 2;
+ for (next=1; next < n-1; next++)
+ {
+ if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
+ avbl = 1; used = dpth = 0; root = n-2; next = n-1;
+ while (avbl>0)
+ {
+ while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
+ while (avbl>used) { A[next--].m_key = dpth; avbl--; }
+ avbl = 2*used; dpth++; used = 0;
+ }
+}
+
+// Limits canonical Huffman code table's max code size to max_code_size.
+static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+{
+ if (code_list_len <= 1) return;
+
+ for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
+
+ uint32 total = 0;
+ for (int i = max_code_size; i > 0; i--)
+ total += (((uint32)pNum_codes[i]) << (max_code_size - i));
+
+ while (total != (1UL << max_code_size))
+ {
+ pNum_codes[max_code_size]--;
+ for (int i = max_code_size - 1; i > 0; i--)
+ {
+ if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
+ }
+ total--;
+ }
+}
+
+// Generates an optimized offman table.
+void jpeg_encoder::optimize_huffman_table(int table_num, int table_len)
+{
+ sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS];
+ syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's
+ int num_used_syms = 1;
+ const uint32 *pSym_count = &m_huff_count[table_num][0];
+ for (int i = 0; i < table_len; i++)
+ if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; }
+ sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
+ calculate_minimum_redundancy(pSyms, num_used_syms);
+
+ // Count the # of symbols of each code size.
+ int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes);
+ for (int i = 0; i < num_used_syms; i++)
+ num_codes[pSyms[i].m_key]++;
+
+ const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
+ huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT);
+
+ // Compute m_huff_bits array, which contains the # of symbols per code size.
+ clear_obj(m_huff_bits[table_num]);
+ for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++)
+ m_huff_bits[table_num][i] = static_cast(num_codes[i]);
+
+ // Remove the dummy symbol added above, which must be in largest bucket.
+ for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--)
+ {
+ if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
+ }
+
+ // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
+ for (int i = num_used_syms - 1; i >= 1; i--)
+ m_huff_val[table_num][num_used_syms - 1 - i] = static_cast(pSyms[i].m_sym_index - 1);
+}
+
+// JPEG marker generation.
+void jpeg_encoder::emit_byte(uint8 i)
+{
+ m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i);
+}
+
+void jpeg_encoder::emit_word(uint i)
+{
+ emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF));
+}
+
+void jpeg_encoder::emit_marker(int marker)
+{
+ emit_byte(uint8(0xFF)); emit_byte(uint8(marker));
+}
+
+// Emit JFIF marker
+void jpeg_encoder::emit_jfif_app0()
+{
+ emit_marker(M_APP0);
+ emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1);
+ emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
+ emit_byte(0);
+ emit_byte(1); /* Major version */
+ emit_byte(1); /* Minor version */
+ emit_byte(0); /* Density unit */
+ emit_word(1);
+ emit_word(1);
+ emit_byte(0); /* No thumbnail image */
+ emit_byte(0);
+}
+
+// Emit quantization tables
+void jpeg_encoder::emit_dqt()
+{
+ for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++)
+ {
+ emit_marker(M_DQT);
+ emit_word(64 + 1 + 2);
+ emit_byte(static_cast(i));
+ for (int j = 0; j < 64; j++)
+ emit_byte(static_cast(m_quantization_tables[i][j]));
+ }
+}
+
+// Emit start of frame marker
+void jpeg_encoder::emit_sof()
+{
+ emit_marker(M_SOF0); /* baseline */
+ emit_word(3 * m_num_components + 2 + 5 + 1);
+ emit_byte(8); /* precision */
+ emit_word(m_image_y);
+ emit_word(m_image_x);
+ emit_byte(m_num_components);
+ for (int i = 0; i < m_num_components; i++)
+ {
+ emit_byte(static_cast(i + 1)); /* component ID */
+ emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */
+ emit_byte(i > 0); /* quant. table num */
+ }
+}
+
+// Emit Huffman table.
+void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag)
+{
+ emit_marker(M_DHT);
+
+ int length = 0;
+ for (int i = 1; i <= 16; i++)
+ length += bits[i];
+
+ emit_word(length + 2 + 1 + 16);
+ emit_byte(static_cast(index + (ac_flag << 4)));
+
+ for (int i = 1; i <= 16; i++)
+ emit_byte(bits[i]);
+
+ for (int i = 0; i < length; i++)
+ emit_byte(val[i]);
+}
+
+// Emit all Huffman tables.
+void jpeg_encoder::emit_dhts()
+{
+ emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false);
+ emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true);
+ if (m_num_components == 3)
+ {
+ emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false);
+ emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true);
+ }
+}
+
+// emit start of scan
+void jpeg_encoder::emit_sos()
+{
+ emit_marker(M_SOS);
+ emit_word(2 * m_num_components + 2 + 1 + 3);
+ emit_byte(m_num_components);
+ for (int i = 0; i < m_num_components; i++)
+ {
+ emit_byte(static_cast(i + 1));
+ if (i == 0)
+ emit_byte((0 << 4) + 0);
+ else
+ emit_byte((1 << 4) + 1);
+ }
+ emit_byte(0); /* spectral selection */
+ emit_byte(63);
+ emit_byte(0);
+}
+
+// Emit all markers at beginning of image file.
+void jpeg_encoder::emit_markers()
+{
+ emit_marker(M_SOI);
+ emit_jfif_app0();
+ emit_dqt();
+ emit_sof();
+ emit_dhts();
+ emit_sos();
+}
+
+// Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
+void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val)
+{
+ int i, l, last_p, si;
+ uint8 huff_size[257];
+ uint huff_code[257];
+ uint code;
+
+ int p = 0;
+ for (l = 1; l <= 16; l++)
+ for (i = 1; i <= bits[l]; i++)
+ huff_size[p++] = (char)l;
+
+ huff_size[p] = 0; last_p = p; // write sentinel
+
+ code = 0; si = huff_size[0]; p = 0;
+
+ while (huff_size[p])
+ {
+ while (huff_size[p] == si)
+ huff_code[p++] = code++;
+ code <<= 1;
+ si++;
+ }
+
+ memset(codes, 0, sizeof(codes[0])*256);
+ memset(code_sizes, 0, sizeof(code_sizes[0])*256);
+ for (p = 0; p < last_p; p++)
+ {
+ codes[val[p]] = huff_code[p];
+ code_sizes[val[p]] = huff_size[p];
+ }
+}
+
+// Quantization table generation.
+void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc)
+{
+ int32 q;
+ if (m_params.m_quality < 50)
+ q = 5000 / m_params.m_quality;
+ else
+ q = 200 - m_params.m_quality * 2;
+ for (int i = 0; i < 64; i++)
+ {
+ int32 j = *pSrc++; j = (j * q + 50L) / 100L;
+ *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
+ }
+}
+
+// Higher-level methods.
+void jpeg_encoder::first_pass_init()
+{
+ m_bit_buffer = 0; m_bits_in = 0;
+ memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0]));
+ m_mcu_y_ofs = 0;
+ m_pass_num = 1;
+}
+
+bool jpeg_encoder::second_pass_init()
+{
+ compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]);
+ compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]);
+ if (m_num_components > 1)
+ {
+ compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]);
+ compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]);
+ }
+ first_pass_init();
+ emit_markers();
+ m_pass_num = 2;
+ return true;
+}
+
+bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels)
+{
+ m_num_components = 3;
+ switch (m_params.m_subsampling)
+ {
+ case Y_ONLY:
+ {
+ m_num_components = 1;
+ m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
+ m_mcu_x = 8; m_mcu_y = 8;
+ break;
+ }
+ case H1V1:
+ {
+ m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
+ m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
+ m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
+ m_mcu_x = 8; m_mcu_y = 8;
+ break;
+ }
+ case H2V1:
+ {
+ m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
+ m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
+ m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
+ m_mcu_x = 16; m_mcu_y = 8;
+ break;
+ }
+ case H2V2:
+ {
+ m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
+ m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
+ m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
+ m_mcu_x = 16; m_mcu_y = 16;
+ }
+ }
+
+ m_image_x = p_x_res; m_image_y = p_y_res;
+ m_image_bpp = src_channels;
+ m_image_bpl = m_image_x * src_channels;
+ m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1));
+ m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1));
+ m_image_bpl_xlt = m_image_x * m_num_components;
+ m_image_bpl_mcu = m_image_x_mcu * m_num_components;
+ m_mcus_per_row = m_image_x_mcu / m_mcu_x;
+
+ if ((m_mcu_lines[0] = static_cast(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false;
+ for (int i = 1; i < m_mcu_y; i++)
+ m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu;
+
+ compute_quant_table(m_quantization_tables[0], s_std_lum_quant);
+ compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant);
+
+ m_out_buf_left = JPGE_OUT_BUF_SIZE;
+ m_pOut_buf = m_out_buf;
+
+ if (m_params.m_two_pass_flag)
+ {
+ clear_obj(m_huff_count);
+ first_pass_init();
+ }
+ else
+ {
+ memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES);
+ memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES);
+ memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES);
+ memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES);
+ if (!second_pass_init()) return false; // in effect, skip over the first pass
+ }
+ return m_all_stream_writes_succeeded;
+}
+
+void jpeg_encoder::load_block_8_8_grey(int x)
+{
+ uint8 *pSrc;
+ sample_array_t *pDst = m_sample_array;
+ x <<= 3;
+ for (int i = 0; i < 8; i++, pDst += 8)
+ {
+ pSrc = m_mcu_lines[i] + x;
+ pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128;
+ pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128;
+ }
+}
+
+void jpeg_encoder::load_block_8_8(int x, int y, int c)
+{
+ uint8 *pSrc;
+ sample_array_t *pDst = m_sample_array;
+ x = (x * (8 * 3)) + c;
+ y <<= 3;
+ for (int i = 0; i < 8; i++, pDst += 8)
+ {
+ pSrc = m_mcu_lines[y + i] + x;
+ pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128;
+ pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128;
+ }
+}
+
+void jpeg_encoder::load_block_16_8(int x, int c)
+{
+ uint8 *pSrc1, *pSrc2;
+ sample_array_t *pDst = m_sample_array;
+ x = (x * (16 * 3)) + c;
+ int a = 0, b = 2;
+ for (int i = 0; i < 16; i += 2, pDst += 8)
+ {
+ pSrc1 = m_mcu_lines[i + 0] + x;
+ pSrc2 = m_mcu_lines[i + 1] + x;
+ pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128;
+ pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128;
+ pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128;
+ pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128;
+ int temp = a; a = b; b = temp;
+ }
+}
+
+void jpeg_encoder::load_block_16_8_8(int x, int c)
+{
+ uint8 *pSrc1;
+ sample_array_t *pDst = m_sample_array;
+ x = (x * (16 * 3)) + c;
+ for (int i = 0; i < 8; i++, pDst += 8)
+ {
+ pSrc1 = m_mcu_lines[i + 0] + x;
+ pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128;
+ pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128;
+ pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128;
+ pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128;
+ }
+}
+
+void jpeg_encoder::load_quantized_coefficients(int component_num)
+{
+ int32 *q = m_quantization_tables[component_num > 0];
+ int16 *pDst = m_coefficient_array;
+ for (int i = 0; i < 64; i++)
+ {
+ sample_array_t j = m_sample_array[s_zag[i]];
+ if (j < 0)
+ {
+ if ((j = -j + (*q >> 1)) < *q)
+ *pDst++ = 0;
+ else
+ *pDst++ = static_cast(-(j / *q));
+ }
+ else
+ {
+ if ((j = j + (*q >> 1)) < *q)
+ *pDst++ = 0;
+ else
+ *pDst++ = static_cast((j / *q));
+ }
+ q++;
+ }
+}
+
+void jpeg_encoder::flush_output_buffer()
+{
+ if (m_out_buf_left != JPGE_OUT_BUF_SIZE)
+ m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left);
+ m_pOut_buf = m_out_buf;
+ m_out_buf_left = JPGE_OUT_BUF_SIZE;
+}
+
+void jpeg_encoder::put_bits(uint bits, uint len)
+{
+ m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len)));
+ while (m_bits_in >= 8)
+ {
+ uint8 c;
+ #define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
+ JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF));
+ if (c == 0xFF) JPGE_PUT_BYTE(0);
+ m_bit_buffer <<= 8;
+ m_bits_in -= 8;
+ }
+}
+
+void jpeg_encoder::code_coefficients_pass_one(int component_num)
+{
+ if (component_num >= 3) return; // just to shut up static analysis
+ int i, run_len, nbits, temp1;
+ int16 *src = m_coefficient_array;
+ uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0];
+
+ temp1 = src[0] - m_last_dc_val[component_num];
+ m_last_dc_val[component_num] = src[0];
+ if (temp1 < 0) temp1 = -temp1;
+
+ nbits = 0;
+ while (temp1)
+ {
+ nbits++; temp1 >>= 1;
+ }
+
+ dc_count[nbits]++;
+ for (run_len = 0, i = 1; i < 64; i++)
+ {
+ if ((temp1 = m_coefficient_array[i]) == 0)
+ run_len++;
+ else
+ {
+ while (run_len >= 16)
+ {
+ ac_count[0xF0]++;
+ run_len -= 16;
+ }
+ if (temp1 < 0) temp1 = -temp1;
+ nbits = 1;
+ while (temp1 >>= 1) nbits++;
+ ac_count[(run_len << 4) + nbits]++;
+ run_len = 0;
+ }
+ }
+ if (run_len) ac_count[0]++;
+}
+
+void jpeg_encoder::code_coefficients_pass_two(int component_num)
+{
+ int i, j, run_len, nbits, temp1, temp2;
+ int16 *pSrc = m_coefficient_array;
+ uint *codes[2];
+ uint8 *code_sizes[2];
+
+ if (component_num == 0)
+ {
+ codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0];
+ code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0];
+ }
+ else
+ {
+ codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1];
+ code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1];
+ }
+
+ temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num];
+ m_last_dc_val[component_num] = pSrc[0];
+
+ if (temp1 < 0)
+ {
+ temp1 = -temp1; temp2--;
+ }
+
+ nbits = 0;
+ while (temp1)
+ {
+ nbits++; temp1 >>= 1;
+ }
+
+ put_bits(codes[0][nbits], code_sizes[0][nbits]);
+ if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits);
+
+ for (run_len = 0, i = 1; i < 64; i++)
+ {
+ if ((temp1 = m_coefficient_array[i]) == 0)
+ run_len++;
+ else
+ {
+ while (run_len >= 16)
+ {
+ put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
+ run_len -= 16;
+ }
+ if ((temp2 = temp1) < 0)
+ {
+ temp1 = -temp1;
+ temp2--;
+ }
+ nbits = 1;
+ while (temp1 >>= 1)
+ nbits++;
+ j = (run_len << 4) + nbits;
+ put_bits(codes[1][j], code_sizes[1][j]);
+ put_bits(temp2 & ((1 << nbits) - 1), nbits);
+ run_len = 0;
+ }
+ }
+ if (run_len)
+ put_bits(codes[1][0], code_sizes[1][0]);
+}
+
+void jpeg_encoder::code_block(int component_num)
+{
+ DCT2D(m_sample_array);
+ load_quantized_coefficients(component_num);
+ if (m_pass_num == 1)
+ code_coefficients_pass_one(component_num);
+ else
+ code_coefficients_pass_two(component_num);
+}
+
+void jpeg_encoder::process_mcu_row()
+{
+ if (m_num_components == 1)
+ {
+ for (int i = 0; i < m_mcus_per_row; i++)
+ {
+ load_block_8_8_grey(i); code_block(0);
+ }
+ }
+ else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
+ {
+ for (int i = 0; i < m_mcus_per_row; i++)
+ {
+ load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
+ }
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
+ {
+ for (int i = 0; i < m_mcus_per_row; i++)
+ {
+ load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
+ load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
+ }
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
+ {
+ for (int i = 0; i < m_mcus_per_row; i++)
+ {
+ load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
+ load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0);
+ load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
+ }
+ }
+}
+
+bool jpeg_encoder::terminate_pass_one()
+{
+ optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
+ if (m_num_components > 1)
+ {
+ optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
+ }
+ return second_pass_init();
+}
+
+bool jpeg_encoder::terminate_pass_two()
+{
+ put_bits(0x7F, 7);
+ flush_output_buffer();
+ emit_marker(M_EOI);
+ m_pass_num++; // purposely bump up m_pass_num, for debugging
+ return true;
+}
+
+bool jpeg_encoder::process_end_of_image()
+{
+ if (m_mcu_y_ofs)
+ {
+ if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
+ {
+ for (int i = m_mcu_y_ofs; i < m_mcu_y; i++)
+ memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu);
+ }
+
+ process_mcu_row();
+ }
+
+ if (m_pass_num == 1)
+ return terminate_pass_one();
+ else
+ return terminate_pass_two();
+}
+
+void jpeg_encoder::load_mcu(const void *pSrc)
+{
+ const uint8* Psrc = reinterpret_cast(pSrc);
+
+ uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
+
+ if (m_num_components == 1)
+ {
+ if (m_image_bpp == 4)
+ RGBA_to_Y(pDst, Psrc, m_image_x);
+ else if (m_image_bpp == 3)
+ RGB_to_Y(pDst, Psrc, m_image_x);
+ else
+ memcpy(pDst, Psrc, m_image_x);
+ }
+ else
+ {
+ if (m_image_bpp == 4)
+ RGBA_to_YCC(pDst, Psrc, m_image_x);
+ else if (m_image_bpp == 3)
+ RGB_to_YCC(pDst, Psrc, m_image_x);
+ else
+ Y_to_YCC(pDst, Psrc, m_image_x);
+ }
+
+ // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
+ if (m_num_components == 1)
+ memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x);
+ else
+ {
+ const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2];
+ uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt;
+ for (int i = m_image_x; i < m_image_x_mcu; i++)
+ {
+ *q++ = y; *q++ = cb; *q++ = cr;
+ }
+ }
+
+ if (++m_mcu_y_ofs == m_mcu_y)
+ {
+ process_mcu_row();
+ m_mcu_y_ofs = 0;
+ }
+}
+
+void jpeg_encoder::clear()
+{
+ m_mcu_lines[0] = NULL;
+ m_pass_num = 0;
+ m_all_stream_writes_succeeded = true;
+}
+
+jpeg_encoder::jpeg_encoder()
+{
+ clear();
+}
+
+jpeg_encoder::~jpeg_encoder()
+{
+ deinit();
+}
+
+bool jpeg_encoder::init(output_stream *pStream, int64_t width, int64_t height, int64_t src_channels, const params &comp_params)
+{
+ deinit();
+ if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check_valid())) return false;
+ m_pStream = pStream;
+ m_params = comp_params;
+ return jpg_open(width, height, src_channels);
+}
+
+void jpeg_encoder::deinit()
+{
+ jpge_free(m_mcu_lines[0]);
+ clear();
+}
+
+bool jpeg_encoder::process_scanline(const void* pScanline)
+{
+ if ((m_pass_num < 1) || (m_pass_num > 2)) return false;
+ if (m_all_stream_writes_succeeded)
+ {
+ if (!pScanline)
+ {
+ if (!process_end_of_image()) return false;
+ }
+ else
+ {
+ load_mcu(pScanline);
+ }
+ }
+ return m_all_stream_writes_succeeded;
+}
+
+// Higher level wrappers/examples (optional).
+#include
+
+class cfile_stream : public output_stream
+{
+ cfile_stream(const cfile_stream &);
+ cfile_stream &operator= (const cfile_stream &);
+
+ FILE* m_pFile;
+ bool m_bStatus;
+
+public:
+ cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
+
+ virtual ~cfile_stream()
+ {
+ close();
+ }
+
+ bool open(const char *pFilename)
+ {
+ close();
+#if defined(_MSC_VER)
+ if (fopen_s(&m_pFile, pFilename, "wb") != 0)
+ {
+ return false;
+ }
+#else
+ m_pFile = fopen(pFilename, "wb");
+#endif
+ m_bStatus = (m_pFile != NULL);
+ return m_bStatus;
+ }
+
+ bool close()
+ {
+ if (m_pFile)
+ {
+ if (fclose(m_pFile) == EOF)
+ {
+ m_bStatus = false;
+ }
+ m_pFile = NULL;
+ }
+ return m_bStatus;
+ }
+
+ virtual bool put_buf(const void* pBuf, int64_t len)
+ {
+ m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1);
+ return m_bStatus;
+ }
+
+ uint get_size() const
+ {
+ return m_pFile ? ftell(m_pFile) : 0;
+ }
+};
+
+// Writes JPEG image to file.
+bool compress_image_to_jpeg_file(const char *pFilename, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
+{
+ cfile_stream dst_stream;
+ if (!dst_stream.open(pFilename))
+ return false;
+
+ jpge::jpeg_encoder dst_image;
+ if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
+ return false;
+
+ for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
+ {
+ for (int64_t i = 0; i < height; i++)
+ {
+ // i, width, and num_channels are all 64bit
+ const uint8* pBuf = pImage_data + i * width * num_channels;
+ if (!dst_image.process_scanline(pBuf))
+ return false;
+ }
+ if (!dst_image.process_scanline(NULL))
+ return false;
+ }
+
+ dst_image.deinit();
+
+ return dst_stream.close();
+}
+
+class memory_stream : public output_stream
+{
+ memory_stream(const memory_stream &);
+ memory_stream &operator= (const memory_stream &);
+
+ uint8 *m_pBuf;
+ uint64_t m_buf_size, m_buf_ofs;
+
+public:
+ memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { }
+
+ virtual ~memory_stream() { }
+
+ virtual bool put_buf(const void* pBuf, int64_t len)
+ {
+ uint64_t buf_remaining = m_buf_size - m_buf_ofs;
+ if ((uint64_t)len > buf_remaining)
+ return false;
+ memcpy(m_pBuf + m_buf_ofs, pBuf, len);
+ m_buf_ofs += len;
+ return true;
+ }
+
+ uint64_t get_size() const
+ {
+ return m_buf_ofs;
+ }
+};
+
+bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf_size, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
+{
+ if ((!pDstBuf) || (!buf_size))
+ return false;
+
+ memory_stream dst_stream(pDstBuf, buf_size);
+
+ buf_size = 0;
+
+ jpge::jpeg_encoder dst_image;
+ if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
+ return false;
+
+ for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
+ {
+ for (int64_t i = 0; i < height; i++)
+ {
+ const uint8* pScanline = pImage_data + i * width * num_channels;
+ if (!dst_image.process_scanline(pScanline))
+ return false;
+ }
+ if (!dst_image.process_scanline(NULL))
+ return false;
+ }
+
+ dst_image.deinit();
+
+ buf_size = dst_stream.get_size();
+ return true;
+}
+
+} // namespace jpge
\ No newline at end of file
diff --git a/crazy_functions/test_project/cpp/longcode/prod_cons.h b/crazy_functions/test_project/cpp/longcode/prod_cons.h
new file mode 100644
index 0000000..28d99bd
--- /dev/null
+++ b/crazy_functions/test_project/cpp/longcode/prod_cons.h
@@ -0,0 +1,433 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include "libipc/def.h"
+
+#include "libipc/platform/detail.h"
+#include "libipc/circ/elem_def.h"
+#include "libipc/utility/log.h"
+#include "libipc/utility/utility.h"
+
+namespace ipc {
+
+////////////////////////////////////////////////////////////////
+/// producer-consumer implementation
+////////////////////////////////////////////////////////////////
+
+template
+struct prod_cons_impl;
+
+template <>
+struct prod_cons_impl> {
+
+ template
+ struct elem_t {
+ std::aligned_storage_t data_ {};
+ };
+
+ alignas(cache_line_size) std::atomic rd_; // read index
+ alignas(cache_line_size) std::atomic wt_; // write index
+
+ constexpr circ::u2_t cursor() const noexcept {
+ return 0;
+ }
+
+ template
+ bool push(W* /*wrapper*/, F&& f, E* elems) {
+ auto cur_wt = circ::index_of(wt_.load(std::memory_order_relaxed));
+ if (cur_wt == circ::index_of(rd_.load(std::memory_order_acquire) - 1)) {
+ return false; // full
+ }
+ std::forward(f)(&(elems[cur_wt].data_));
+ wt_.fetch_add(1, std::memory_order_release);
+ return true;
+ }
+
+ /**
+ * In single-single-unicast, 'force_push' means 'no reader' or 'the only one reader is dead'.
+ * So we could just disconnect all connections of receiver, and return false.
+ */
+ template
+ bool force_push(W* wrapper, F&&, E*) {
+ wrapper->elems()->disconnect_receiver(~static_cast(0u));
+ return false;
+ }
+
+ template
+ bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) {
+ auto cur_rd = circ::index_of(rd_.load(std::memory_order_relaxed));
+ if (cur_rd == circ::index_of(wt_.load(std::memory_order_acquire))) {
+ return false; // empty
+ }
+ std::forward(f)(&(elems[cur_rd].data_));
+ std::forward(out)(true);
+ rd_.fetch_add(1, std::memory_order_release);
+ return true;
+ }
+};
+
+template <>
+struct prod_cons_impl>
+ : prod_cons_impl> {
+
+ template
+ bool force_push(W* wrapper, F&&, E*) {
+ wrapper->elems()->disconnect_receiver(1);
+ return false;
+ }
+
+ template class E, std::size_t DS, std::size_t AS>
+ bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) {
+ byte_t buff[DS];
+ for (unsigned k = 0;;) {
+ auto cur_rd = rd_.load(std::memory_order_relaxed);
+ if (circ::index_of(cur_rd) ==
+ circ::index_of(wt_.load(std::memory_order_acquire))) {
+ return false; // empty
+ }
+ std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
+ if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
+ std::forward(f)(buff);
+ std::forward(out)(true);
+ return true;
+ }
+ ipc::yield(k);
+ }
+ }
+};
+
+template <>
+struct prod_cons_impl>
+ : prod_cons_impl> {
+
+ using flag_t = std::uint64_t;
+
+ template
+ struct elem_t {
+ std::aligned_storage_t data_ {};
+ std::atomic f_ct_ { 0 }; // commit flag
+ };
+
+ alignas(cache_line_size) std::atomic ct_; // commit index
+
+ template
+ bool push(W* /*wrapper*/, F&& f, E* elems) {
+ circ::u2_t cur_ct, nxt_ct;
+ for (unsigned k = 0;;) {
+ cur_ct = ct_.load(std::memory_order_relaxed);
+ if (circ::index_of(nxt_ct = cur_ct + 1) ==
+ circ::index_of(rd_.load(std::memory_order_acquire))) {
+ return false; // full
+ }
+ if (ct_.compare_exchange_weak(cur_ct, nxt_ct, std::memory_order_acq_rel)) {
+ break;
+ }
+ ipc::yield(k);
+ }
+ auto* el = elems + circ::index_of(cur_ct);
+ std::forward(f)(&(el->data_));
+ // set flag & try update wt
+ el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release);
+ while (1) {
+ auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
+ if (cur_ct != wt_.load(std::memory_order_relaxed)) {
+ return true;
+ }
+ if ((~cac_ct) != cur_ct) {
+ return true;
+ }
+ if (!el->f_ct_.compare_exchange_strong(cac_ct, 0, std::memory_order_relaxed)) {
+ return true;
+ }
+ wt_.store(nxt_ct, std::memory_order_release);
+ cur_ct = nxt_ct;
+ nxt_ct = cur_ct + 1;
+ el = elems + circ::index_of(cur_ct);
+ }
+ return true;
+ }
+
+ template
+ bool force_push(W* wrapper, F&&, E*) {
+ wrapper->elems()->disconnect_receiver(1);
+ return false;
+ }
+
+ template class E, std::size_t DS, std::size_t AS>
+ bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) {
+ byte_t buff[DS];
+ for (unsigned k = 0;;) {
+ auto cur_rd = rd_.load(std::memory_order_relaxed);
+ auto cur_wt = wt_.load(std::memory_order_acquire);
+ auto id_rd = circ::index_of(cur_rd);
+ auto id_wt = circ::index_of(cur_wt);
+ if (id_rd == id_wt) {
+ auto* el = elems + id_wt;
+ auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
+ if ((~cac_ct) != cur_wt) {
+ return false; // empty
+ }
+ if (el->f_ct_.compare_exchange_weak(cac_ct, 0, std::memory_order_relaxed)) {
+ wt_.store(cur_wt + 1, std::memory_order_release);
+ }
+ k = 0;
+ }
+ else {
+ std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
+ if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
+ std::forward(f)(buff);
+ std::forward(out)(true);
+ return true;
+ }
+ ipc::yield(k);
+ }
+ }
+ }
+};
+
+template <>
+struct prod_cons_impl> {
+
+ using rc_t = std::uint64_t;
+
+ enum : rc_t {
+ ep_mask = 0x00000000ffffffffull,
+ ep_incr = 0x0000000100000000ull
+ };
+
+ template
+ struct elem_t {
+ std::aligned_storage_t data_ {};
+ std::atomic rc_ { 0 }; // read-counter
+ };
+
+ alignas(cache_line_size) std::atomic wt_; // write index
+ alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer
+
+ circ::u2_t cursor() const noexcept {
+ return wt_.load(std::memory_order_acquire);
+ }
+
+ template
+ bool push(W* wrapper, F&& f, E* elems) {
+ E* el;
+ for (unsigned k = 0;;) {
+ circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
+ if (cc == 0) return false; // no reader
+ el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
+ // check all consumers have finished reading this element
+ auto cur_rc = el->rc_.load(std::memory_order_acquire);
+ circ::cc_t rem_cc = cur_rc & ep_mask;
+ if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch_)) {
+ return false; // has not finished yet
+ }
+ // consider rem_cc to be 0 here
+ if (el->rc_.compare_exchange_weak(
+ cur_rc, epoch_ | static_cast(cc), std::memory_order_release)) {
+ break;
+ }
+ ipc::yield(k);
+ }
+ std::forward(f)(&(el->data_));
+ wt_.fetch_add(1, std::memory_order_release);
+ return true;
+ }
+
+ template
+ bool force_push(W* wrapper, F&& f, E* elems) {
+ E* el;
+ epoch_ += ep_incr;
+ for (unsigned k = 0;;) {
+ circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
+ if (cc == 0) return false; // no reader
+ el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
+ // check all consumers have finished reading this element
+ auto cur_rc = el->rc_.load(std::memory_order_acquire);
+ circ::cc_t rem_cc = cur_rc & ep_mask;
+ if (cc & rem_cc) {
+ ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
+ cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
+ if (cc == 0) return false; // no reader
+ }
+ // just compare & exchange
+ if (el->rc_.compare_exchange_weak(
+ cur_rc, epoch_ | static_cast(cc), std::memory_order_release)) {
+ break;
+ }
+ ipc::yield(k);
+ }
+ std::forward(f)(&(el->data_));
+ wt_.fetch_add(1, std::memory_order_release);
+ return true;
+ }
+
+ template
+ bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E* elems) {
+ if (cur == cursor()) return false; // acquire
+ auto* el = elems + circ::index_of(cur++);
+ std::forward(f)(&(el->data_));
+ for (unsigned k = 0;;) {
+ auto cur_rc = el->rc_.load(std::memory_order_acquire);
+ if ((cur_rc & ep_mask) == 0) {
+ std::forward(out)(true);
+ return true;
+ }
+ auto nxt_rc = cur_rc & ~static_cast(wrapper->connected_id());
+ if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
+ std::forward(out)((nxt_rc & ep_mask) == 0);
+ return true;
+ }
+ ipc::yield(k);
+ }
+ }
+};
+
+template <>
+struct prod_cons_impl> {
+
+ using rc_t = std::uint64_t;
+ using flag_t = std::uint64_t;
+
+ enum : rc_t {
+ rc_mask = 0x00000000ffffffffull,
+ ep_mask = 0x00ffffffffffffffull,
+ ep_incr = 0x0100000000000000ull,
+ ic_mask = 0xff000000ffffffffull,
+ ic_incr = 0x0000000100000000ull
+ };
+
+ template
+ struct elem_t {
+ std::aligned_storage_t data_ {};
+ std::atomic rc_ { 0 }; // read-counter
+ std::atomic f_ct_ { 0 }; // commit flag
+ };
+
+ alignas(cache_line_size) std::atomic ct_; // commit index
+ alignas(cache_line_size) std::atomic epoch_ { 0 };
+
+ circ::u2_t cursor() const noexcept {
+ return ct_.load(std::memory_order_acquire);
+ }
+
+ constexpr static rc_t inc_rc(rc_t rc) noexcept {
+ return (rc & ic_mask) | ((rc + ic_incr) & ~ic_mask);
+ }
+
+ constexpr static rc_t inc_mask(rc_t rc) noexcept {
+ return inc_rc(rc) & ~rc_mask;
+ }
+
+ template
+ bool push(W* wrapper, F&& f, E* elems) {
+ E* el;
+ circ::u2_t cur_ct;
+ rc_t epoch = epoch_.load(std::memory_order_acquire);
+ for (unsigned k = 0;;) {
+ circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
+ if (cc == 0) return false; // no reader
+ el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
+ // check all consumers have finished reading this element
+ auto cur_rc = el->rc_.load(std::memory_order_relaxed);
+ circ::cc_t rem_cc = cur_rc & rc_mask;
+ if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch)) {
+ return false; // has not finished yet
+ }
+ else if (!rem_cc) {
+ auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
+ if ((cur_fl != cur_ct) && cur_fl) {
+ return false; // full
+ }
+ }
+ // consider rem_cc to be 0 here
+ if (el->rc_.compare_exchange_weak(
+ cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast(cc), std::memory_order_relaxed) &&
+ epoch_.compare_exchange_weak(epoch, epoch, std::memory_order_acq_rel)) {
+ break;
+ }
+ ipc::yield(k);
+ }
+ // only one thread/process would touch here at one time
+ ct_.store(cur_ct + 1, std::memory_order_release);
+ std::forward(f)(&(el->data_));
+ // set flag & try update wt
+ el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release);
+ return true;
+ }
+
+ template
+ bool force_push(W* wrapper, F&& f, E* elems) {
+ E* el;
+ circ::u2_t cur_ct;
+ rc_t epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
+ for (unsigned k = 0;;) {
+ circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
+ if (cc == 0) return false; // no reader
+ el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
+ // check all consumers have finished reading this element
+ auto cur_rc = el->rc_.load(std::memory_order_acquire);
+ circ::cc_t rem_cc = cur_rc & rc_mask;
+ if (cc & rem_cc) {
+ ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
+ cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
+ if (cc == 0) return false; // no reader
+ }
+ // just compare & exchange
+ if (el->rc_.compare_exchange_weak(
+ cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast(cc), std::memory_order_relaxed)) {
+ if (epoch == epoch_.load(std::memory_order_acquire)) {
+ break;
+ }
+ else if (push(wrapper, std::forward(f), elems)) {
+ return true;
+ }
+ epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
+ }
+ ipc::yield(k);
+ }
+ // only one thread/process would touch here at one time
+ ct_.store(cur_ct + 1, std::memory_order_release);
+ std::forward(f)(&(el->data_));
+ // set flag & try update wt
+ el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release);
+ return true;
+ }
+
+ template
+ bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E(& elems)[N]) {
+ auto* el = elems + circ::index_of(cur);
+ auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
+ if (cur_fl != ~static_cast(cur)) {
+ return false; // empty
+ }
+ ++cur;
+ std::forward(f)(&(el->data_));
+ for (unsigned k = 0;;) {
+ auto cur_rc = el->rc_.load(std::memory_order_acquire);
+ if ((cur_rc & rc_mask) == 0) {
+ std::forward(out)(true);
+ el->f_ct_.store(cur + N - 1, std::memory_order_release);
+ return true;
+ }
+ auto nxt_rc = inc_rc(cur_rc) & ~static_cast(wrapper->connected_id());
+ bool last_one = false;
+ if ((last_one = (nxt_rc & rc_mask) == 0)) {
+ el->f_ct_.store(cur + N - 1, std::memory_order_release);
+ }
+ if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
+ std::forward(out)(last_one);
+ return true;
+ }
+ ipc::yield(k);
+ }
+ }
+};
+
+} // namespace ipc
diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py
index ec14e74..b5c84b9 100644
--- a/crazy_functions/高级功能函数模板.py
+++ b/crazy_functions/高级功能函数模板.py
@@ -11,7 +11,7 @@ def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPr
for i in range(5):
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
- i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述改事件的三个最重要的单词。'
+ i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
diff --git a/functional_crazy.py b/functional_crazy.py
index af4c83e..2f91a32 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -19,10 +19,10 @@ def get_crazy_functionals():
function_plugins = {
"请解析并解构此项目本身": {
- # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
+ "AsButton": False, # 加入下拉菜单中
"Function": 解析项目本身
},
- "解析整个py项目": {
+ "解析整个Py项目": {
"Color": "stop", # 按钮颜色
"Function": 解析一个Python项目
},
@@ -32,9 +32,10 @@ def get_crazy_functionals():
},
"解析整个C++项目": {
"Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
"Function": 解析一个C项目
},
- "读tex论文写摘要": {
+ "读Tex论文写摘要": {
"Color": "stop", # 按钮颜色
"Function": 读文章写摘要
},
@@ -52,7 +53,7 @@ def get_crazy_functionals():
},
}
- # VisibleLevel=1 经过测试,但功能未达到理想状态
+ # VisibleLevel=1 经过测试,但功能上距离达到完美状态还差一点点
if UserVisibleLevel >= 1:
from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
@@ -60,11 +61,11 @@ def get_crazy_functionals():
function_plugins.update({
"[仅供开发调试] 批量总结PDF文档": {
"Color": "stop",
- # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
- "Function": HotReload(批量总结PDF文档)
+ "Function": HotReload(批量总结PDF文档) # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
},
"[仅供开发调试] 批量总结PDF文档pdfminer": {
"Color": "stop",
+ "AsButton": False, # 加入下拉菜单中
"Function": HotReload(批量总结PDF文档pdfminer)
},
"[仅供开发调试] 批量总结Word文档": {
diff --git a/main.py b/main.py
index 10bbddc..cdcfaa4 100644
--- a/main.py
+++ b/main.py
@@ -4,9 +4,8 @@ from predict import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
-proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = \
- get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
-
+proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT = \
+ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT')
# 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@@ -17,18 +16,18 @@ title_html = """ChatGPT 学术优化
"""
# 问询记录, python 版本建议3.9+(越新越好)
import logging
-os.makedirs('gpt_log', exist_ok=True)
-try:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8')
-except:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO)
-print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!')
+os.makedirs("gpt_log", exist_ok=True)
+try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
+except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
+print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
# 一些普通功能模块
from functional import get_functionals
functional = get_functionals()
-# 对一些丧心病狂的实验性功能模块进行测试
+# 高级函数插件
from functional_crazy import get_crazy_functionals
-crazy_functional = get_crazy_functionals()
+crazy_fns = get_crazy_functionals()
# 处理markdown文本格式的转变
gr.Chatbot.postprocess = format_io
@@ -40,11 +39,10 @@ set_theme = adjust_theme()
cancel_handles = []
with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
gr.HTML(title_html)
- with gr.Row():
+ with gr.Row().style(equal_height=True):
with gr.Column(scale=2):
chatbot = gr.Chatbot()
- chatbot.style(height=1150)
- chatbot.style()
+ chatbot.style(height=CHATBOT_HEIGHT)
history = gr.State([])
with gr.Column(scale=1):
with gr.Row():
@@ -66,49 +64,70 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
with gr.Row():
gr.Markdown("注意:以下“红颜色”标识的函数插件需从input区读取路径作为参数.")
with gr.Row():
- for k in crazy_functional:
- variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
- crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
+ for k in crazy_fns:
+ if not crazy_fns[k].get("AsButton", True): continue
+ variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
+ crazy_fns[k]["Button"] = gr.Button(k, variant=variant)
with gr.Row():
- with gr.Accordion("展开“文件上传区”。上传本地文件供“红颜色”的函数插件调用。", open=False):
- file_upload = gr.Files(label='任何文件, 但推荐上传压缩文件(zip, tar)', file_count="multiple")
+ with gr.Accordion("更多函数插件", open=True):
+ dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)]
+ with gr.Column(scale=1):
+ dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False)
+ with gr.Column(scale=1):
+ switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary")
+ with gr.Row():
+ with gr.Accordion("点击展开“文件上传区”。上传本地文件可供红色函数插件调用。", open=False) as area_file_up:
+ file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
with gr.Accordion("展开SysPrompt & GPT参数 & 交互界面布局", open=False):
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
- checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"],
- value=["基础功能区", "函数插件区"], label="显示哪些功能区")
+ checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
- def what_is_this(a):
+ # 功能区显示开关与功能区的互动
+ def fn_area_visibility(a):
ret = {}
- # if area_basic_fn.visible != ("基础功能区" in a):
- ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
- # if area_crazy_fn.visible != ("函数插件区" in a):
- ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
+ ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
+ ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
return ret
-
- checkboxes.select(what_is_this, [checkboxes], [area_basic_fn, area_crazy_fn] )
-
- predict_args = dict(fn=predict, inputs=[txt, top_p, temperature, chatbot, history, system_prompt], outputs=[chatbot, history, statusDisplay], show_progress=True)
+ checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn] )
+ # 整理反复出现的控件句柄组合
+ input_combo = [txt, top_p, temperature, chatbot, history, system_prompt]
+ output_combo = [chatbot, history, statusDisplay]
+ predict_args = dict(fn=predict, inputs=input_combo, outputs=output_combo, show_progress=True)
empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏
-
- cancel_handles.append(txt.submit(**predict_args))
- # txt.submit(**empty_txt_args) 在提交后清空输入栏
- cancel_handles.append(submitBtn.click(**predict_args))
- # submitBtn.click(**empty_txt_args) 在提交后清空输入栏
- resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, statusDisplay])
+ # 提交按钮、重置按钮
+ cancel_handles.append(txt.submit(**predict_args)) #; txt.submit(**empty_txt_args) 在提交后清空输入栏
+ cancel_handles.append(submitBtn.click(**predict_args)) #; submitBtn.click(**empty_txt_args) 在提交后清空输入栏
+ resetBtn.click(lambda: ([], [], "已重置"), None, output_combo)
+ # 基础功能区的回调函数注册
for k in functional:
- click_handle = functional[k]["Button"].click(predict,
- [txt, top_p, temperature, chatbot, history, system_prompt, gr.State(True), gr.State(k)], [chatbot, history, statusDisplay], show_progress=True)
+ click_handle = functional[k]["Button"].click(predict, [*input_combo, gr.State(True), gr.State(k)], output_combo, show_progress=True)
cancel_handles.append(click_handle)
+ # 文件上传区,接收文件后与chatbot的互动
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt])
- for k in crazy_functional:
- click_handle = crazy_functional[k]["Button"].click(crazy_functional[k]["Function"],
- [txt, top_p, temperature, chatbot, history, system_prompt, gr.State(PORT)], [chatbot, history, statusDisplay]
- )
- try: click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
- except: pass
+ # 函数插件-固定按钮区
+ for k in crazy_fns:
+ if not crazy_fns[k].get("AsButton", True): continue
+ click_handle = crazy_fns[k]["Button"].click(crazy_fns[k]["Function"], [*input_combo, gr.State(PORT)], output_combo)
+ click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
cancel_handles.append(click_handle)
+ # 函数插件-下拉菜单与随变按钮的互动
+ def on_dropdown_changed(k):
+ variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
+ return {switchy_bt: gr.update(value=k, variant=variant)}
+ dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
+ # 随变按钮的回调函数注册
+ def route(k, *args, **kwargs):
+ if k in [r"打开插件列表", r"先从插件列表中选择"]: return
+ yield from crazy_fns[k]["Function"](*args, **kwargs)
+ click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
+ click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+ def expand_file_area(file_upload, area_file_up):
+ if len(file_upload)>0: return {area_file_up: gr.update(open=True)}
+ click_handle.then(expand_file_area, [file_upload, area_file_up], [area_file_up])
+ cancel_handles.append(click_handle)
+ # 终止按钮的回调函数注册
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
# gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
@@ -117,7 +136,7 @@ def auto_opentab_delay():
print(f"如果浏览器没有自动打开,请复制并转到以下URL: http://localhost:{PORT}")
def open():
time.sleep(2)
- webbrowser.open_new_tab(f'http://localhost:{PORT}')
+ webbrowser.open_new_tab(f"http://localhost:{PORT}")
threading.Thread(target=open, name="open-browser", daemon=True).start()
auto_opentab_delay()
diff --git a/predict.py b/predict.py
index 84036bc..31a5861 100644
--- a/predict.py
+++ b/predict.py
@@ -96,13 +96,19 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
except StopIteration: break
if len(chunk)==0: continue
if not chunk.startswith('data:'):
- chunk = get_full_error(chunk.encode('utf8'), stream_response)
- raise ConnectionAbortedError("OpenAI拒绝了请求:" + chunk.decode())
- delta = json.loads(chunk.lstrip('data:'))['choices'][0]["delta"]
+ error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
+ if "reduce the length" in error_msg:
+ raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
+ else:
+ raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
+ json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
+ delta = json_data["delta"]
if len(delta) == 0: break
if "role" in delta: continue
if "content" in delta: result += delta["content"]; print(delta["content"], end='')
else: raise RuntimeError("意外Json结构:"+delta)
+ if json_data['finish_reason'] == 'length':
+ raise ConnectionAbortedError("正常结束,但显示Token不足。")
return result
diff --git a/toolbox.py b/toolbox.py
index b78a513..bf88760 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -2,21 +2,21 @@ import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect
from show_math import convert as convert_math
from functools import wraps
-def get_reduce_token_percent(e):
+def get_reduce_token_percent(text):
try:
# text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens"
pattern = r"(\d+)\s+tokens\b"
match = re.findall(pattern, text)
- eps = 50 # 稍微留一点余地, 确保下次别再超过token
- max_limit = float(match[0]) - eps
+ EXCEED_ALLO = 500 # 稍微留一点余地,否则在回复时会因余量太少出问题
+ max_limit = float(match[0]) - EXCEED_ALLO
current_tokens = float(match[1])
ratio = max_limit/current_tokens
assert ratio > 0 and ratio < 1
- return ratio
+ return ratio, str(int(current_tokens-max_limit))
except:
- return 0.5
+ return 0.5, '不详'
-def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=False):
+def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=True):
"""
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
i_say: 当前输入
@@ -45,19 +45,18 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
break
except ConnectionAbortedError as token_exceeded_error:
# 尝试计算比例,尽可能多地保留文本
- p_ratio = get_reduce_token_percent(str(token_exceeded_error))
+ p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
if len(history) > 0:
history = [his[ int(len(his) *p_ratio): ] for his in history if his is not None]
- mutable[1] = 'Warning! History conversation is too long, cut into half. '
else:
i_say = i_say[: int(len(i_say) *p_ratio) ]
- mutable[1] = 'Warning! Input file is too long, cut into half. '
+ mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。'
except TimeoutError as e:
- mutable[0] = '[Local Message] Failed with timeout.'
+ mutable[0] = '[Local Message] 请求超时。'
raise TimeoutError
except Exception as e:
- mutable[0] = f'[Local Message] Failed with {str(e)}.'
- raise RuntimeError(f'[Local Message] Failed with {str(e)}.')
+ mutable[0] = f'[Local Message] 异常:{str(e)}.'
+ raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
# 创建新线程发出http请求
thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
@@ -286,7 +285,7 @@ def on_report_generated(files, chatbot):
report_files = find_recent_files('gpt_log')
if len(report_files) == 0: return report_files, chatbot
# files.extend(report_files)
- chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区,请查收。'])
+ chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
return report_files, chatbot
def get_conf(*args):
From b0dfef48e9b81afb389e88ef26d5d901e1c1c33c Mon Sep 17 00:00:00 2001
From: Your Name
Date: Sat, 1 Apr 2023 03:39:43 +0800
Subject: [PATCH 34/35] =?UTF-8?q?=E5=B0=86css=E6=A0=B7=E5=BC=8F=E7=A7=BB?=
=?UTF-8?q?=E5=8A=A8=E5=88=B0theme=E6=96=87=E4=BB=B6=EF=BC=8C=E5=87=8F?=
=?UTF-8?q?=E5=B0=91main.py=E7=9A=84=E4=BB=A3=E7=A0=81=E8=A1=8C=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
main.py | 16 ++--------------
theme.py | 12 ++++++++++++
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/main.py b/main.py
index 23f848b..08cf2fb 100644
--- a/main.py
+++ b/main.py
@@ -33,23 +33,11 @@ crazy_fns = get_crazy_functionals()
gr.Chatbot.postprocess = format_io
# 做一些外观色彩上的调整
-from theme import adjust_theme
+from theme import adjust_theme, advanced_css
set_theme = adjust_theme()
-CSS = """
-.markdown-body table {
- border: 1px solid #ddd;
- border-collapse: collapse;
-}
-
-.markdown-body th, .markdown-body td {
- border: 1px solid #ddd;
- padding: 5px;
-}
-"""
-
cancel_handles = []
-with gr.Blocks(theme=set_theme, analytics_enabled=False, css=CSS) as demo:
+with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
gr.HTML(title_html)
with gr.Row().style(equal_height=True):
with gr.Column(scale=2):
diff --git a/theme.py b/theme.py
index d7544ed..14f16fb 100644
--- a/theme.py
+++ b/theme.py
@@ -80,3 +80,15 @@ def adjust_theme():
except:
set_theme = None; print('gradio版本较旧, 不能自定义字体和颜色')
return set_theme
+
+advanced_css = """
+.markdown-body table {
+ border: 1px solid #ddd;
+ border-collapse: collapse;
+}
+
+.markdown-body th, .markdown-body td {
+ border: 1px solid #ddd;
+ padding: 5px;
+}
+"""
\ No newline at end of file
From 2c963cc368a32306910a5d1f56cb10b6e8ec8a13 Mon Sep 17 00:00:00 2001
From: Your Name
Date: Sat, 1 Apr 2023 04:11:31 +0800
Subject: [PATCH 35/35] =?UTF-8?q?=E4=BA=A4=E4=BA=92=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
config.py | 4 +---
functional_crazy.py | 4 ++--
main.py | 8 ++++----
toolbox.py | 35 +++++++++++++++++++++++------------
4 files changed, 30 insertions(+), 21 deletions(-)
diff --git a/config.py b/config.py
index 54e40a5..f4e1bc8 100644
--- a/config.py
+++ b/config.py
@@ -16,15 +16,13 @@ if USE_PROXY:
"http": "socks5h://localhost:11284",
"https": "socks5h://localhost:11284",
}
- print('网络代理状态:运行。')
else:
proxies = None
- print('网络代理状态:未配置。无代理状态下很可能无法访问。')
# [step 3]>> 以下配置可以优化体验,但大部分场合下并不需要修改
# 对话窗的高度
-CHATBOT_HEIGHT = 1117
+CHATBOT_HEIGHT = 1116
# 发送请求到OpenAI后,等待多久判定为超时
TIMEOUT_SECONDS = 25
diff --git a/functional_crazy.py b/functional_crazy.py
index 2f91a32..456bdcb 100644
--- a/functional_crazy.py
+++ b/functional_crazy.py
@@ -18,7 +18,7 @@ def get_crazy_functionals():
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
function_plugins = {
- "请解析并解构此项目本身": {
+ "请解析并解构此项目本身(源码自译解)": {
"AsButton": False, # 加入下拉菜单中
"Function": 解析项目本身
},
@@ -30,7 +30,7 @@ def get_crazy_functionals():
"Color": "stop", # 按钮颜色
"Function": 解析一个C项目的头文件
},
- "解析整个C++项目": {
+ "解析整个C++项目(.cpp/.h)": {
"Color": "stop", # 按钮颜色
"AsButton": False, # 加入下拉菜单中
"Function": 解析一个C项目
diff --git a/main.py b/main.py
index 08cf2fb..6ad9c28 100644
--- a/main.py
+++ b/main.py
@@ -54,7 +54,7 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
with gr.Row():
from check_proxy import check_proxy
- statusDisplay = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {check_proxy(proxies)}")
+ status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {check_proxy(proxies)}")
with gr.Accordion("基础功能区", open=True) as area_basic_fn:
with gr.Row():
for k in functional:
@@ -93,8 +93,8 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn] )
# 整理反复出现的控件句柄组合
input_combo = [txt, top_p, temperature, chatbot, history, system_prompt]
- output_combo = [chatbot, history, statusDisplay]
- predict_args = dict(fn=predict, inputs=input_combo, outputs=output_combo, show_progress=True)
+ output_combo = [chatbot, history, status]
+ predict_args = dict(fn=predict, inputs=input_combo, outputs=output_combo)
empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏
# 提交按钮、重置按钮
cancel_handles.append(txt.submit(**predict_args)) #; txt.submit(**empty_txt_args) 在提交后清空输入栏
@@ -102,7 +102,7 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
resetBtn.click(lambda: ([], [], "已重置"), None, output_combo)
# 基础功能区的回调函数注册
for k in functional:
- click_handle = functional[k]["Button"].click(predict, [*input_combo, gr.State(True), gr.State(k)], output_combo, show_progress=True)
+ click_handle = functional[k]["Button"].click(predict, [*input_combo, gr.State(True), gr.State(k)], output_combo)
cancel_handles.append(click_handle)
# 文件上传区,接收文件后与chatbot的互动
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt])
diff --git a/toolbox.py b/toolbox.py
index 6c48cbb..363acb6 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -1,6 +1,6 @@
import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect, re
from show_math import convert as convert_math
-from functools import wraps
+from functools import wraps, lru_cache
def get_reduce_token_percent(text):
try:
@@ -289,22 +289,33 @@ def on_report_generated(files, chatbot):
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
return report_files, chatbot
+@lru_cache
+def read_single_conf_with_lru_cache(arg):
+ try: r = getattr(importlib.import_module('config_private'), arg)
+ except: r = getattr(importlib.import_module('config'), arg)
+ # 在读取API_KEY时,检查一下是不是忘了改config
+ if arg=='API_KEY':
+ # 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
+ API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
+ if API_MATCH:
+ print(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
+ else:
+ assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
+ "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
+ if arg=='proxies':
+ if r is None:
+ print('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问。建议:检查USE_PROXY选项是否修改。')
+ else:
+ print('[PROXY] 网络代理状态:已配置。配置信息如下:', r)
+ assert isinstance(r, dict), 'proxies格式错误,请注意proxies选项的格式,不要遗漏括号。'
+ return r
+
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []
for arg in args:
- try: r = getattr(importlib.import_module('config_private'), arg)
- except: r = getattr(importlib.import_module('config'), arg)
+ r = read_single_conf_with_lru_cache(arg)
res.append(r)
- # 在读取API_KEY时,检查一下是不是忘了改config
- if arg=='API_KEY':
- # 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
- API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
- if API_MATCH:
- print(f"您的 API_KEY 是: {r[:15]}*** \nAPI_KEY 导入成功")
- else:
- assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
- "(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
return res
def clear_line_break(txt):