Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,13 @@ DEBUG=false
LOG_LEVEL=INFO

# PDF解析
MINERU_MODEL_SOURCE=local
MINERU_MODEL_SOURCE=local

# 信息增强
LLM_MODEL_NAME=gpt-4o
LLM_BASE_URL=http://192.168.120.2:4000
LLM_API_KEY=ae

VLLM_MODEL_NAME=qwen2.5-vl-7b-instruct
VLLM_API_KEY=sk-
VLLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
9 changes: 9 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,13 @@ class Settings:
MAX_FILES_PER_REQUEST: int = int(os.getenv("MAX_FILES_PER_REQUEST", "20"))
TASK_TIMEOUT: int = int(os.getenv("TASK_TIMEOUT", "3600")) # 1小时

# 模型配置
LLM_MODEL_NAME: str = os.getenv("LLM_MODEL_NAME", "gpt-4o")
LLM_BASE_URL: str = os.getenv("LLM_BASE_URL", "http://192.168.120.2:4000")
LLM_API_KEY: str = os.getenv("LLM_API_KEY", "sk-")

VLLM_MODEL_NAME: str = os.getenv("VLLM_MODEL_NAME", "qwen2.5-vl-7b-instruct")
VLLM_API_KEY: str = os.getenv("VLLM_API_KEY", "sk-")
VLLM_BASE_URL: str = os.getenv("VLLM_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")

settings = Settings()
52 changes: 32 additions & 20 deletions enhancers/base_models.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
from abc import ABC, abstractmethod
from typing import Any

from openai import AsyncOpenAI
from pydantic import BaseModel
from tenacity import retry, stop_after_attempt, wait_exponential

from parsers.base_models import ChunkData

MAX_RETRIES = 3
WAIT_TIME = 4
WAIT_MAX_TIME = 15
MULTIPLIER = 1

class JsonResponseFormat(BaseModel):
"""JSON 响应格式"""
description:str

class InformationEnhancer(ABC):
"""信息增强器基类"""
def __init__(self, model_name: str, base_url: str, api_key: str):
self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
self.model_name = model_name
self.system_prompt = "You are a helpful assistant."

@abstractmethod
async def enhance(self, information: ChunkData) -> ChunkData:
"""增强信息"""
pass

class TableInformationEnhancer(InformationEnhancer):
"""表格信息增强器"""

async def enhance(self, information: ChunkData) -> ChunkData:
"""增强信息"""
return information

class FormulasInformationEnhancer(InformationEnhancer):
"""公式信息增强器"""

async def enhance(self, information: ChunkData) -> ChunkData:
"""增强信息"""
return information

class ImageInformationEnhancer(InformationEnhancer):
"""图片信息增强器"""

async def enhance(self, information: ChunkData) -> ChunkData:
"""增强信息"""
return information
@retry(stop=stop_after_attempt(MAX_RETRIES), wait=wait_exponential(multiplier=MULTIPLIER, min=WAIT_TIME, max=WAIT_MAX_TIME))
async def get_structured_response(self, user_prompt: list[dict[str, Any]], response_format: JsonResponseFormat) -> str|None:
"""获取结构化响应"""
response = await self.client.chat.completions.parse(
model=self.model_name,
messages=[
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": user_prompt} # type: ignore
],
response_format=response_format # type: ignore
)
if response.choices[0].message.refusal:
raise ValueError(f"模型拒绝了请求: {response.choices[0].message.refusal}")
return response.choices[0].message.parsed
7 changes: 6 additions & 1 deletion enhancers/enhancer_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
from collections.abc import Callable

from config import settings
from enhancers.base_models import InformationEnhancer
from parsers.base_models import ChunkType

Expand Down Expand Up @@ -67,7 +68,11 @@ def get_enhancer(modality: ChunkType) -> InformationEnhancer | None:

enhancer_class = ENHANCER_REGISTRY[modality_type]
try:
return enhancer_class()
match modality_type:
case ChunkType.IMAGE.value.lower():
return enhancer_class(settings.VLLM_MODEL_NAME, settings.VLLM_BASE_URL, settings.VLLM_API_KEY)
case _:
return enhancer_class(settings.LLM_MODEL_NAME, settings.LLM_BASE_URL, settings.LLM_API_KEY)
except Exception as e:
logger.error(f"创建信息增强器实例失败: {enhancer_class.__name__}, 错误: {e}")
return None
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"docling>=2.45.0",
"mineru[core]>=2.1.11",
"beautifulsoup4>=4.13.4",
"tenacity>=9.1.2",
]

[dependency-groups]
Expand Down
11 changes: 11 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading