concat pdf after translation

2023-07-20 12:46:48 +08:00 · 2023-07-20 12:46:48 +08:00 · 7368580cd6
commit 7368580cd6
parent 73b22f85be
2 changed files with 45 additions and 1 deletions
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@ -773,9 +773,18 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
        if modified_pdf_success:
            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
+            origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
            if os.path.exists(pj(work_folder, '..', 'translation')):
                shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
+            # 将两个PDF拼接
+            if original_pdf_success: 
+                try:
+                    concat_pdf = pj(work_folder_modified, f'comparison.pdf')
+                    merge_pdfs(origin_pdf, result_pdf, concat_pdf)
+                    promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
+                except:
+                    pass
            return True # 成功啦
        else:
            if n_fix>=max_try: break
@ -794,4 +803,38 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
    return False # 失败啦


-
+def merge_pdfs(pdf1_path, pdf2_path, output_path):
+    import PyPDF2
+    # Open the first PDF file
+    with open(pdf1_path, 'rb') as pdf1_file:
+        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
+        # Open the second PDF file
+        with open(pdf2_path, 'rb') as pdf2_file:
+            pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
+            # Create a new PDF file to store the merged pages
+            output_writer = PyPDF2.PdfFileWriter()
+            # Determine the number of pages in each PDF file
+            num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
+            # Merge the pages from the two PDF files
+            for page_num in range(num_pages):
+                # Add the page from the first PDF file
+                if page_num < pdf1_reader.numPages:
+                    page1 = pdf1_reader.getPage(page_num)
+                else:
+                    page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
+                # Add the page from the second PDF file
+                if page_num < pdf2_reader.numPages:
+                    page2 = pdf2_reader.getPage(page_num)
+                else:
+                    page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
+                # Create a new empty page with double width
+                new_page = PyPDF2.PageObject.createBlankPage(
+                    width=2 * max(page1.mediaBox.getWidth(), page2.mediaBox.getWidth()),
+                    height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
+                )
+                new_page.mergeTranslatedPage(page1, 0, 0)
+                new_page.mergeTranslatedPage(page2, page1.mediaBox.getWidth(), 0)
+                output_writer.addPage(new_page)
+            # Save the merged PDF file
+            with open(output_path, 'wb') as output_file:
+                output_writer.write(output_file)
--- a/requirements.txt
+++ b/requirements.txt
@ -18,3 +18,4 @@ openai
 numpy
 arxiv
 rich
+pypdf2