3
3
from enum import Enum
4
4
from typing import Any
5
5
6
- from pydantic import BaseModel
6
+ from pydantic import BaseModel , Field
7
7
8
8
logger = logging .getLogger (__name__ )
9
9
@@ -19,9 +19,9 @@ class TableDataItem(BaseModel):
19
19
"""表格数据类"""
20
20
rows : int # 行数
21
21
columns : int # 列数
22
- row_headers : list [Any ] = [] # 行头
23
- column_headers : list [Any ] = [] # 列头
24
- data : list [list [str ]] = [] # 数据
22
+ row_headers : list [Any ] = Field ( default_factory = list ) # 行头
23
+ column_headers : list [Any ] = Field ( default_factory = list ) # 列头
24
+ data : list [list [str ]] = Field ( default_factory = list ) # 数据
25
25
26
26
class ChunkData (BaseModel ):
27
27
"""块数据类"""
@@ -33,10 +33,10 @@ class ChunkData(BaseModel):
33
33
class DocumentData (BaseModel ):
34
34
"""解析结果类"""
35
35
title : str = ""
36
- texts : list [ChunkData ] = []
37
- tables : list [ChunkData ] = []
38
- images : list [ChunkData ] = []
39
- formulas : list [ChunkData ] = []
36
+ texts : list [ChunkData ] = Field ( default_factory = list )
37
+ tables : list [ChunkData ] = Field ( default_factory = list )
38
+ images : list [ChunkData ] = Field ( default_factory = list )
39
+ formulas : list [ChunkData ] = Field ( default_factory = list )
40
40
processing_time : float = 0
41
41
success : bool
42
42
error_message : str | None = None
@@ -45,7 +45,7 @@ class DocumentParser(ABC):
45
45
"""文档解析器基类"""
46
46
47
47
def __init__ (self ) -> None :
48
- self .supported_formats : list [str ] = []
48
+ self .supported_formats : list [str ] = Field ( default_factory = list )
49
49
50
50
@abstractmethod
51
51
async def parse (self , file_path : str ) -> DocumentData :
@@ -55,4 +55,4 @@ async def parse(self, file_path: str) -> DocumentData:
55
55
@abstractmethod
56
56
def can_parse (self , file_path : str ) -> bool :
57
57
"""检查是否可以解析该文件"""
58
- pass
58
+ return any ( file_path . lower (). endswith ( fmt ) for fmt in self . supported_formats )
0 commit comments