Skip to content

为 openai-translator 代码添加中文注释 #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions langchain/openai-translator/ai_translator/book/book.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .page import Page
from .page import Page # 导入 Page 类,用于添加页面

class Book:
def __init__(self, pdf_file_path):
self.pdf_file_path = pdf_file_path
self.pages = []
def __init__(self, pdf_file_path): # 初始化函数,传入 pdf 文件路径
self.pdf_file_path = pdf_file_path # 将 pdf 文件路径存储到实例变量中
self.pages = [] # 初始化一个空的页面列表

def add_page(self, page: Page):
self.pages.append(page)
def add_page(self, page: Page): # 添加页面的方法,传入一个 Page 类的实例
self.pages.append(page) # 将传入的页面实例添加到页面列表中
116 changes: 58 additions & 58 deletions langchain/openai-translator/ai_translator/book/content.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,85 @@
import pandas as pd
import pandas as pd # 导入 pandas 库,用于处理表格数据

from enum import Enum, auto
from PIL import Image as PILImage
from utils import LOG
from io import StringIO
from enum import Enum, auto # 导入枚举类和自动编号功能,用于定义 ContentType 枚举类
from PIL import Image as PILImage # 导入 PIL 库中的 Image 类并重命名为 PILImage,用于处理图像数据
from utils import LOG # 导入 utils 模块中的 LOG 对象,用于输出调试信息
from io import StringIO # 导入 StringIO 类,用于将字符串转换为文件对象

class ContentType(Enum):
TEXT = auto()
TABLE = auto()
IMAGE = auto()
class ContentType(Enum): # 定义 ContentType 枚举类
TEXT = auto() # 文本类型
TABLE = auto() # 表格类型
IMAGE = auto() # 图像类型

class Content:
def __init__(self, content_type, original, translation=None):
self.content_type = content_type
self.original = original
self.translation = translation
self.status = False
class Content: # 定义 Content 类
def __init__(self, content_type, original, translation=None): # 初始化函数,传入内容类型、原始内容和翻译内容(可选)
self.content_type = content_type # 将内容类型存储到实例变量中
self.original = original # 将原始内容存储到实例变量中
self.translation = translation # 将翻译内容存储到实例变量中
self.status = False # 初始化翻译状态为 False

def set_translation(self, translation, status):
if not self.check_translation_type(translation):
raise ValueError(f"Invalid translation type. Expected {self.content_type}, but got {type(translation)}")
self.translation = translation
self.status = status
def set_translation(self, translation, status): # 设置翻译内容和翻译状态的方法,传入翻译内容和翻译状态
if not self.check_translation_type(translation): # 如果翻译内容类型不符合要求
raise ValueError(f"Invalid translation type. Expected {self.content_type}, but got {type(translation)}") # 抛出 ValueError 异常
self.translation = translation # 将翻译内容存储到实例变量中
self.status = status # 将翻译状态存储到实例变量中

def check_translation_type(self, translation):
if self.content_type == ContentType.TEXT and isinstance(translation, str):
return True
elif self.content_type == ContentType.TABLE and isinstance(translation, list):
return True
elif self.content_type == ContentType.IMAGE and isinstance(translation, PILImage.Image):
return True
return False
def check_translation_type(self, translation): # 检查翻译内容类型的方法,传入翻译内容
if self.content_type == ContentType.TEXT and isinstance(translation, str): # 如果内容类型为 TEXT,且翻译内容为字符串类型
return True # 返回 True
elif self.content_type == ContentType.TABLE and isinstance(translation, list): # 如果内容类型为 TABLE,且翻译内容为列表类型
return True # 返回 True
elif self.content_type == ContentType.IMAGE and isinstance(translation, PILImage.Image): # 如果内容类型为 IMAGE,且翻译内容为 PIL 库中的 Image 类型
return True # 返回 True
return False # 否则返回 False

def __str__(self):
def __str__(self): # 定义 __str__ 方法,返回原始内容
return self.original


class TableContent(Content):
def __init__(self, data, translation=None):
df = pd.DataFrame(data)
class TableContent(Content): # 定义 TableContent 类,继承自 Content 类
def __init__(self, data, translation=None): # 初始化函数,传入表格数据和翻译内容(可选)
df = pd.DataFrame(data) # 将表格数据转换为 DataFrame 对象

# Verify if the number of rows and columns in the data and DataFrame object match
# 验证提取的表格数据和 DataFrame 对象的行数和列数是否匹配
if len(data) != len(df) or len(data[0]) != len(df.columns):
raise ValueError("The number of rows and columns in the extracted table data and DataFrame object do not match.")

super().__init__(ContentType.TABLE, df)
super().__init__(ContentType.TABLE, df) # 调用父类的初始化函数,传入内容类型和 DataFrame 对象

def set_translation(self, translation, status):
def set_translation(self, translation, status): # 设置翻译内容和翻译状态的方法,传入翻译内容和翻译状态
try:
if not isinstance(translation, str):
raise ValueError(f"Invalid translation type. Expected str, but got {type(translation)}")
if not isinstance(translation, str): # 如果翻译内容不是字符串类型
raise ValueError(f"Invalid translation type. Expected str, but got {type(translation)}") # 抛出 ValueError 异常

LOG.debug(f"[translation]\n{translation}")
# Extract column names from the first set of brackets
LOG.debug(f"[translation]\n{translation}") # 输出调试信息
# 从第一组方括号中提取列名
header = translation.split(']')[0][1:].split(', ')
# Extract data rows from the remaining brackets
# 从剩余的方括号中提取数据行
data_rows = translation.split('] ')[1:]
# Replace Chinese punctuation and split each row into a list of values
# 将数据行中的每一行转换为列表
data_rows = [row[1:-1].split(', ') for row in data_rows]
# Create a DataFrame using the extracted header and data
# 使用提取的列名和数据创建 DataFrame
translated_df = pd.DataFrame(data_rows, columns=header)
LOG.debug(f"[translated_df]\n{translated_df}")
self.translation = translated_df
self.status = status
except Exception as e:
LOG.error(f"An error occurred during table translation: {e}")
self.translation = None
self.status = False
self.translation = translated_df # 将翻译后的 DataFrame 存储到实例变量中
self.status = status # 将翻译状态存储到实例变量中
except Exception as e: # 捕获所有异常
LOG.error(f"An error occurred during table translation: {e}") # 输出错误信息
self.translation = None # 将翻译内容设置为 None
self.status = False # 将翻译状态设置为 False

def __str__(self):
def __str__(self): # 定义 __str__ 方法,返回原始内容的字符串表示(不包括表头和行号)
return self.original.to_string(header=False, index=False)

def iter_items(self, translated=False):
target_df = self.translation if translated else self.original
for row_idx, row in target_df.iterrows():
for col_idx, item in enumerate(row):
yield (row_idx, col_idx, item)
def iter_items(self, translated=False): # 定义迭代表格元素的方法,传入是否翻译的标志
target_df = self.translation if translated else self.original # 根据是否翻译的标志选择要迭代的 DataFrame
for row_idx, row in target_df.iterrows(): # 遍历 DataFrame 的每一行
for col_idx, item in enumerate(row): # 遍历每一行的每一列
yield (row_idx, col_idx, item) # 返回行索引、列索引和元素值的元组

def update_item(self, row_idx, col_idx, new_value, translated=False):
target_df = self.translation if translated else self.original
target_df.at[row_idx, col_idx] = new_value
def update_item(self, row_idx, col_idx, new_value, translated=False): # 定义更新表格元素的方法,传入行索引、列索引、新值和是否翻译的标志
target_df = self.translation if translated else self.original # 根据是否翻译的标志选择要更新的 DataFrame
target_df.at[row_idx, col_idx] = new_value # 更新指定位置的元素值

def get_original_as_str(self):
def get_original_as_str(self): # 定义获取原始内容的字符串表示的方法
return self.original.to_string(header=False, index=False)
5 changes: 4 additions & 1 deletion langchain/openai-translator/ai_translator/book/page.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# 导入 Content 类,用于创建页面内容
from .content import Content

class Page:
def __init__(self):
# 初始化一个空列表,用于存储页面内容
self.contents = []

def add_content(self, content: Content):
self.contents.append(content)
# 将传入的内容添加到页面内容列表中
self.contents.append(content)
35 changes: 22 additions & 13 deletions langchain/openai-translator/ai_translator/flask_server.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,37 @@
import sys
import os
import sys # 导入sys模块,用于添加系统路径
import os # 导入os模块,用于获取文件路径

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.dirname(os.path.abspath(__file__))) # 将当前文件所在目录添加到系统路径中,以便导入其他模块

from flask import Flask, request, send_file, jsonify
from translator import PDFTranslator, TranslationConfig
from utils import ArgumentParser, LOG
from flask import Flask, request, send_file, jsonify # 导入Flask框架中的Flask、request、send_file和jsonify模块,用于创建Flask应用实例、接收请求、发送文件和返回JSON数据
from translator import PDFTranslator, TranslationConfig # 导入自定义的PDFTranslator和TranslationConfig类,用于翻译PDF文件
from utils import ArgumentParser, LOG # 导入自定义的ArgumentParser和LOG函数,用于解析命令行参数和打印日志

app = Flask(__name__)
app = Flask(__name__) # 创建Flask应用实例

TEMP_FILE_DIR = "flask_temps/"
TEMP_FILE_DIR = "flask_temps/" # 定义临时文件目录

# 使用 Flask 装饰器语法,将该函数绑定到 /translation 路径上,并指定请求方法为 POST。
@app.route('/translation', methods=['POST'])
def translation():
try:
# 获取上传的文件
input_file = request.files['input_file']
# 获取源语言和目标语言
source_language = request.form.get('source_language', 'English')
target_language = request.form.get('target_language', 'Chinese')

# 打印上传文件的信息
LOG.debug(f"[input_file]\n{input_file}")
LOG.debug(f"[input_file.filename]\n{input_file.filename}")

# 判断上传的文件是否存在
if input_file and input_file.filename:
# # 创建临时文件
# 创建临时文件
input_file_path = TEMP_FILE_DIR+input_file.filename
LOG.debug(f"[input_file_path]\n{input_file_path}")

# 保存上传的文件到临时文件夹
input_file.save(input_file_path)

# 调用翻译函数
Expand All @@ -44,22 +50,25 @@ def translation():
# 返回翻译后的文件
return send_file(output_file_path, as_attachment=True)
except Exception as e:
# 返回错误信息
response = {
'status': 'error',
'message': str(e)
}
return jsonify(response), 400


def initialize_translator():
# 解析命令行
# 解析命令行参数
argument_parser = ArgumentParser()
args = argument_parser.parse_arguments()

# 初始化配置单例
# 设置 OpenAI API Key
os.environ["OPENAI_API_KEY"] = args.api_key

# 初始化配置
config = TranslationConfig()
config.initialize(args)
# 实例化 PDFTranslator 类,并调用 translate_pdf() 方法
# 初始化全局变量 Translator
global Translator
Translator = PDFTranslator(config.model_name)

Expand Down
100 changes: 62 additions & 38 deletions langchain/openai-translator/ai_translator/gradio_server.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,78 @@
import sys
import os
import gradio as gr
import sys # 系统库,用于添加系统路径
import os # 操作系统库,用于处理文件路径
import gradio as gr # Gradio库,用于构建用户界面

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.dirname(os.path.abspath(__file__))) # 将当前文件所在目录添加到系统路径中,以便导入其他模块

from utils import ArgumentParser, LOG
from translator import PDFTranslator, TranslationConfig
# 导入 utils 模块的 ArgumentParser和LOG类,用于解析命令行参数和打印日志
from utils import ArgumentParser, LOG

# 导入 translator 模块的 PDFTranslator和TranslationConfig类,用于翻译PDF文件
from translator import PDFTranslator, TranslationConfig

# 翻译函数,用于将PDF文件翻译为指定格式的文件
# input_file: gradio.File类型,输入文件
# source_language: str类型,源语言,默认为英语
# target_language: str类型,目标语言,默认为中文
def translation(input_file, source_language, target_language):
LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}")

output_file_path = Translator.translate_pdf(
input_file.name, source_language=source_language, target_language=target_language)
# 打印调试信息
LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}")

# 调用翻译器的translate_pdf方法进行翻译
# input_file.name: str类型,输入文件路径
output_file_path = Translator.translate_pdf(
input_file.name, source_language=source_language, target_language=target_language)

return output_file_path
# 返回翻译后的文件路径
return output_file_path

# 启动Gradio服务的函数
def launch_gradio():

iface = gr.Interface(
fn=translation,
title="OpenAI-Translator v2.0(PDF 电子书翻译工具)",
inputs=[
gr.File(label="上传PDF文件"),
gr.Textbox(label="源语言(默认:英文)", placeholder="English", value="English"),
gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese")
],
outputs=[
gr.File(label="下载翻译文件")
],
allow_flagging="never"
)

iface.launch(share=True, server_name="0.0.0.0")
# 构建界面
# 点击提交按钮时,inputs 的值作为 fn 的参数传入
# fn 的返回值作为 outputs 的值
iface = gr.Interface(
fn=translation,
title="OpenAI-Translator v2.0(PDF电子书翻译工具)",
inputs=[
gr.File(label="上传PDF文件"),
gr.Textbox(label="源语言(默认:英文)", placeholder="English", value="English"),
gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese")
],
outputs=[
gr.File(label="下载翻译文件")
],
allow_flagging="never"
)

# 启动服务
iface.launch(share=True, server_name="0.0.0.0")

# 初始化翻译器
def initialize_translator():
# 解析命令行
argument_parser = ArgumentParser()
args = argument_parser.parse_arguments()

# 初始化配置单例
config = TranslationConfig()
config.initialize(args)
# 实例化 PDFTranslator 类,并调用 translate_pdf() 方法
global Translator
Translator = PDFTranslator(config.model_name)
# 解析命令行参数
argument_parser = ArgumentParser()
args = argument_parser.parse_arguments()

# 设置OpenAI API Key
os.environ["OPENAI_API_KEY"] = args.api_key

# 初始化配置类
config = TranslationConfig()
config.initialize(args)

# 实例化翻译器对象
global Translator
Translator = PDFTranslator(config.model_name)

# 程序入口
if __name__ == "__main__":
# 初始化 translator
initialize_translator()
# 启动 Gradio 服务
launch_gradio()

# 初始化翻译器
initialize_translator()

# 启动Gradio服务
launch_gradio()
Loading