Skip to content

Commit a997ed6

Browse files
committed
fix: review comments
1 parent 593d033 commit a997ed6

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

projects/pgai/pgai/vectorizer/parsing.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
from pgai.vectorizer.loading import LoadedDocument
1212

1313
# Thread pool for CPU-intensive parsing operations
14-
_PARSING_EXECUTOR = ThreadPoolExecutor(max_workers=4, thread_name_prefix="parsing")
14+
max_workers = int(os.getenv("PARSING_MAX_WORKERS", 4))
15+
_PARSING_EXECUTOR = ThreadPoolExecutor(
16+
max_workers=max_workers, thread_name_prefix="parsing"
17+
)
1518

1619

1720
class ParsingNone(BaseModel):
@@ -91,7 +94,7 @@ class ParsingPyMuPDF(BaseDocumentParsing):
9194
@override
9295
async def parse_doc(self, row: dict[str, Any], payload: LoadedDocument) -> str: # noqa: ARG002
9396
# Run blocking parsing operation in thread pool
94-
loop = asyncio.get_event_loop()
97+
loop = asyncio.get_running_loop()
9598
return await loop.run_in_executor(
9699
_PARSING_EXECUTOR, self._parse_with_pymupdf, payload
97100
)
@@ -122,7 +125,7 @@ class ParsingDocling(BaseDocumentParsing):
122125
@override
123126
async def parse_doc(self, row: dict[str, Any], payload: LoadedDocument) -> str: # noqa: ARG002
124127
# Run blocking parsing operation in thread pool
125-
loop = asyncio.get_event_loop()
128+
loop = asyncio.get_running_loop()
126129
return await loop.run_in_executor(
127130
_PARSING_EXECUTOR, self._parse_with_docling, payload
128131
)

0 commit comments

Comments
 (0)