|
11 | 11 | from pgai.vectorizer.loading import LoadedDocument
|
12 | 12 |
|
13 | 13 | # Thread pool for CPU-intensive parsing operations
|
14 |
| -_PARSING_EXECUTOR = ThreadPoolExecutor(max_workers=4, thread_name_prefix="parsing") |
| 14 | +max_workers = int(os.getenv("PARSING_MAX_WORKERS", 4)) |
| 15 | +_PARSING_EXECUTOR = ThreadPoolExecutor( |
| 16 | + max_workers=max_workers, thread_name_prefix="parsing" |
| 17 | +) |
15 | 18 |
|
16 | 19 |
|
17 | 20 | class ParsingNone(BaseModel):
|
@@ -91,7 +94,7 @@ class ParsingPyMuPDF(BaseDocumentParsing):
|
91 | 94 | @override
|
92 | 95 | async def parse_doc(self, row: dict[str, Any], payload: LoadedDocument) -> str: # noqa: ARG002
|
93 | 96 | # Run blocking parsing operation in thread pool
|
94 |
| - loop = asyncio.get_event_loop() |
| 97 | + loop = asyncio.get_running_loop() |
95 | 98 | return await loop.run_in_executor(
|
96 | 99 | _PARSING_EXECUTOR, self._parse_with_pymupdf, payload
|
97 | 100 | )
|
@@ -122,7 +125,7 @@ class ParsingDocling(BaseDocumentParsing):
|
122 | 125 | @override
|
123 | 126 | async def parse_doc(self, row: dict[str, Any], payload: LoadedDocument) -> str: # noqa: ARG002
|
124 | 127 | # Run blocking parsing operation in thread pool
|
125 |
| - loop = asyncio.get_event_loop() |
| 128 | + loop = asyncio.get_running_loop() |
126 | 129 | return await loop.run_in_executor(
|
127 | 130 | _PARSING_EXECUTOR, self._parse_with_docling, payload
|
128 | 131 | )
|
|
0 commit comments