-
Notifications
You must be signed in to change notification settings - Fork 0
fix(infra): fix some dependency hells and add some lazy loading to reduce celery worker RAM usage #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: eval-pr-5478-target-1758731970657
Are you sure you want to change the base?
fix(infra): fix some dependency hells and add some lazy loading to reduce celery worker RAM usage #3
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,12 +5,12 @@ | |
|
|
||
| from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState | ||
| from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate | ||
| from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs | ||
| from onyx.agents.agent_search.shared_graph_utils.utils import ( | ||
| get_langgraph_node_log_string, | ||
| ) | ||
| from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event | ||
| from onyx.context.search.models import SavedSearchDoc | ||
| from onyx.context.search.models import SearchDoc | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Group imports from the same module into a single statement to reduce duplication and improve readability. DEV MODE: This violation would have been filtered out by screening filters. Failing filters: commentPurpose, functionalImpact, objectivity. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Duplicate imports from the same module on separate lines. Combine into a single import for clarity and style consistency. DEV MODE: This violation would have been filtered out by screening filters. Failing filters: functionalImpact, objectivity. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Duplicate imports from the same module on separate lines. Combine into a single import for clarity and style consistency. DEV MODE: This violation would have been filtered out by screening filters. Failing filters: functionalImpact, objectivity. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: General AI Review Agent |
||
| from onyx.server.query_and_chat.streaming_models import SectionEnd | ||
| from onyx.utils.logger import setup_logger | ||
|
|
||
|
|
@@ -47,7 +47,7 @@ def is_reducer( | |
| doc_list.append(x) | ||
|
|
||
| # Convert InferenceSections to SavedSearchDocs | ||
| search_docs = chunks_or_sections_to_search_docs(doc_list) | ||
| search_docs = SearchDoc.chunks_or_sections_to_search_docs(doc_list) | ||
| retrieved_saved_search_docs = [ | ||
| SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0) | ||
| for search_doc in search_docs | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ | |
| ) | ||
| from onyx.context.search.models import InferenceSection | ||
| from onyx.context.search.models import SavedSearchDoc | ||
| from onyx.context.search.utils import chunks_or_sections_to_search_docs | ||
| from onyx.context.search.models import SearchDoc | ||
| from onyx.tools.tool_implementations.web_search.web_search_tool import ( | ||
| WebSearchTool, | ||
| ) | ||
|
|
@@ -266,7 +266,7 @@ def convert_inference_sections_to_search_docs( | |
| is_internet: bool = False, | ||
| ) -> list[SavedSearchDoc]: | ||
| # Convert InferenceSections to SavedSearchDocs | ||
| search_docs = chunks_or_sections_to_search_docs(inference_sections) | ||
| search_docs = SearchDoc.chunks_or_sections_to_search_docs(inference_sections) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Conversion logic from InferenceSection to SavedSearchDoc with default db_doc_id=0 duplicates functionality in Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Unmapped Agent in getAgentNameFromViolationSource (undefined) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Conversion logic from InferenceSection to SavedSearchDoc with default db_doc_id=0 duplicates functionality in • Libraries consulted: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Unmapped Agent in getAgentNameFromViolationSource (undefined) |
||
| for search_doc in search_docs: | ||
| search_doc.is_internet = is_internet | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,7 +86,6 @@ | |
| from onyx.file_store.document_batch_storage import get_document_batch_storage | ||
| from onyx.httpx.httpx_pool import HttpxPool | ||
| from onyx.indexing.embedder import DefaultIndexingEmbedder | ||
| from onyx.indexing.indexing_pipeline import run_indexing_pipeline | ||
| from onyx.natural_language_processing.search_nlp_models import EmbeddingModel | ||
| from onyx.natural_language_processing.search_nlp_models import ( | ||
| InformationContentClassificationModel, | ||
|
|
@@ -1268,6 +1267,8 @@ def _docprocessing_task( | |
| tenant_id: str, | ||
| batch_num: int, | ||
| ) -> None: | ||
| from onyx.indexing.indexing_pipeline import run_indexing_pipeline | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Heartbeat timeout is mutated per-loop and persists across attempts, delaying failure detection for others. • Libraries consulted: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Heartbeat timeout is mutated per-loop and persists across attempts, delaying failure detection for others. • Libraries consulted: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent |
||
|
|
||
| start_time = time.monotonic() | ||
|
|
||
| if tenant_id: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,6 @@ | |
| from onyx.connectors.connector_runner import ConnectorRunner | ||
| from onyx.connectors.exceptions import ConnectorValidationError | ||
| from onyx.connectors.exceptions import UnexpectedValidationError | ||
| from onyx.connectors.factory import instantiate_connector | ||
| from onyx.connectors.interfaces import CheckpointedConnector | ||
| from onyx.connectors.models import ConnectorFailure | ||
| from onyx.connectors.models import ConnectorStopSignal | ||
|
|
@@ -66,7 +65,6 @@ | |
| from onyx.httpx.httpx_pool import HttpxPool | ||
| from onyx.indexing.embedder import DefaultIndexingEmbedder | ||
| from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface | ||
| from onyx.indexing.indexing_pipeline import run_indexing_pipeline | ||
| from onyx.natural_language_processing.search_nlp_models import ( | ||
| InformationContentClassificationModel, | ||
| ) | ||
|
|
@@ -100,6 +98,8 @@ def _get_connector_runner( | |
| are the complete list of existing documents of the connector. If the task | ||
| of type LOAD_STATE, the list will be considered complete and otherwise incomplete. | ||
| """ | ||
| from onyx.connectors.factory import instantiate_connector | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Local import placed outside try in _get_connector_runner allows ImportError to escape and leaves index attempt stuck (not marked failed). Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Early exceptions before try/except in connector_document_extraction cause stuck IN_PROGRESS attempts and MemoryTracer not stopped due to lazy import in _get_connector_runner. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Early exceptions before try/except in connector_document_extraction cause stuck IN_PROGRESS attempts and MemoryTracer not stopped due to lazy import in _get_connector_runner. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lazy import placed outside error handling; ImportError would bypass the existing try/except, leading to unhandled failure instead of graceful connector pause/logging. Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Import of instantiate_connector outside try/except causes unhandled setup exceptions, leaving attempts stuck IN_PROGRESS and skipping CCPair pause. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Import inside _get_connector_runner occurs outside its try/except, so import-time errors bypass pause logic and, since caller’s try is later, leave attempt IN_PROGRESS. Reasoning: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent |
||
|
|
||
| task = attempt.connector_credential_pair.connector.input_type | ||
|
|
||
| try: | ||
|
|
@@ -283,6 +283,8 @@ def _run_indexing( | |
| 2. Embed and index these documents into the chosen datastore (vespa) | ||
| 3. Updates Postgres to record the indexed documents + the outcome of this run | ||
| """ | ||
| from onyx.indexing.indexing_pipeline import run_indexing_pipeline | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Import is outside the function’s exception handling; an ImportError would escape the try/except that manages indexing failures. Reasoning: Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent |
||
|
|
||
| start_time = time.monotonic() # jsut used for logging | ||
|
|
||
| with get_session_with_current_tenant() as db_session_temp: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| from langchain_core.messages import BaseMessage | ||
| from pydantic import BaseModel | ||
|
|
||
| from onyx.llm.models import PreviousMessage | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Module-level import of onyx.llm.models triggers heavy LangChain/LLM dependencies at import time, undermining lazy-loading and increasing memory footprint. Use TYPE_CHECKING and forward references to avoid runtime import, or move this type to a lighter module. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: General AI Review Agent |
||
|
|
||
|
|
||
| class PromptSnapshot(BaseModel): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
`PromptSnapshot` includes LangChain `BaseMessage` instances but does not enable pydantic's `arbitrary_types_allowed`, risking validation/runtime errors when constructing the model. (Based on your team's feedback about LangChain using pydantic v1 and the need to allow arbitrary types when nesting LangChain models inside pydantic v2 models, as seen in existing patterns.)
Prompt for AI agents~~~ Address the following comment on backend/onyx/chat/prompt_builder/schemas.py at line 7: `PromptSnapshot` includes LangChain `BaseMessage` instances but does not enable pydantic's `arbitrary_types_allowed`, risking validation/runtime errors when constructing the model. (Based on your team's feedback about LangChain using pydantic v1 and the need to allow arbitrary types when nesting LangChain models inside pydantic v2 models, as seen in existing patterns.) @@ -0,0 +1,10 @@ +from onyx.llm.models import PreviousMessage + + +class PromptSnapshot(BaseModel): + raw_message_history: list[PreviousMessage] + raw_user_query: str ~~~ |
||
| raw_message_history: list[PreviousMessage] | ||
| raw_user_query: str | ||
| built_prompt: list[BaseMessage] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Pydantic v2 model includes Reasoning: • Libraries consulted: Pydantic v2 arbitrary_types_allowed, LangChain BaseMessage messages, Pydantic, Langchain, Python_langchain Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Pydantic v2 model includes Reasoning: • Libraries consulted: Pydantic v2 arbitrary types allowed, Pydantic Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pydantic v2 model lacks arbitrary_types_allowed while using non-Pydantic type BaseMessage, causing schema/instantiation errors. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing arbitrary_types_allowed for field using external type BaseMessage; PromptSnapshot will fail validation on instantiation. Reasoning: • Libraries consulted: pydantic arbitrary_types_allowed v2, langchain_core BaseMessage, Pydantic, Langchain Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pydantic model lacks arbitrary_types_allowed for list[BaseMessage], causing schema/validation error Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent |
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,3 +1,4 @@ | ||||||||||
| from collections.abc import Sequence | ||||||||||
| from datetime import datetime | ||||||||||
| from typing import Any | ||||||||||
|
|
||||||||||
|
|
@@ -355,6 +356,44 @@ class SearchDoc(BaseModel): | |||||||||
| secondary_owners: list[str] | None = None | ||||||||||
| is_internet: bool = False | ||||||||||
|
|
||||||||||
| @classmethod | ||||||||||
| def chunks_or_sections_to_search_docs( | ||||||||||
| cls, | ||||||||||
| items: "Sequence[InferenceChunk | InferenceSection] | None", | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Invalid type annotation: mixing a string literal with Prompt for AI agents[internal] Confidence score: 10/10 [internal] Posted by: General AI Review Agent
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stringified type annotation is unnecessary here; use a real annotation so the imported Sequence is actually used and annotations remain introspectable, preventing potential unused-import warnings. Reasoning: Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent
Suggested change
|
||||||||||
| ) -> list["SearchDoc"]: | ||||||||||
| """Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects.""" | ||||||||||
| if not items: | ||||||||||
| return [] | ||||||||||
|
|
||||||||||
| search_docs = [ | ||||||||||
| cls( | ||||||||||
| document_id=( | ||||||||||
| chunk := ( | ||||||||||
| item.center_chunk | ||||||||||
| if isinstance(item, InferenceSection) | ||||||||||
| else item | ||||||||||
| ) | ||||||||||
| ).document_id, | ||||||||||
| chunk_ind=chunk.chunk_id, | ||||||||||
| semantic_identifier=chunk.semantic_identifier or "Unknown", | ||||||||||
| link=chunk.source_links[0] if chunk.source_links else None, | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential KeyError: Reasoning: Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
two-agent-filter: Directly accessing chunk.source_links[0] assumes key 0 exists on a dict[int, str]; if source_links is present but lacks key 0, this raises KeyError. Use a safe first-value retrieval or get(0). (Based on your team's feedback about consolidating conversion into SearchDoc to avoid brittle assumptions, this influenced checking for safer link extraction.) • **Libraries consulted**:
Prompt for AI agents~~~ Address the following comment on backend/onyx/context/search/models.py at line 379: two-agent-filter: Directly accessing chunk.source_links[0] assumes key 0 exists on a dict[int, str]; if source_links is present but lacks key 0, this raises KeyError. Use a safe first-value retrieval or get(0). (Based on your team's feedback about consolidating conversion into SearchDoc to avoid brittle assumptions, this influenced checking for safer link extraction.) • **Libraries consulted**: @@ -355,6 +356,44 @@ class SearchDoc(BaseModel): + ).document_id, + chunk_ind=chunk.chunk_id, + semantic_identifier=chunk.semantic_identifier or "Unknown", + link=chunk.source_links[0] if chunk.source_links else None, + blurb=chunk.blurb, + source_type=chunk.source_type, ~~~There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
single-agent-filter: Directly accessing chunk.source_links[0] assumes key 0 exists on a dict[int, str]; if source_links is present but lacks key 0, this raises KeyError. Use a safe first-value retrieval or get(0). (Based on your team's feedback about consolidating conversion into SearchDoc to avoid brittle assumptions, this influenced checking for safer link extraction.) • **Libraries consulted**:
Prompt for AI agents~~~ Address the following comment on backend/onyx/context/search/models.py at line 379: single-agent-filter: Directly accessing chunk.source_links[0] assumes key 0 exists on a dict[int, str]; if source_links is present but lacks key 0, this raises KeyError. Use a safe first-value retrieval or get(0). (Based on your team's feedback about consolidating conversion into SearchDoc to avoid brittle assumptions, this influenced checking for safer link extraction.) • **Libraries consulted**: @@ -355,6 +356,44 @@ class SearchDoc(BaseModel): + ).document_id, + chunk_ind=chunk.chunk_id, + semantic_identifier=chunk.semantic_identifier or "Unknown", + link=chunk.source_links[0] if chunk.source_links else None, + blurb=chunk.blurb, + source_type=chunk.source_type, ~~~There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Directly indexing • Libraries consulted: Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Directly indexing • Libraries consulted: Prompt for AI agents[internal] Confidence score: 8/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two-agent-filter: Dict index by fixed key 0 can raise KeyError when source_links lacks key 0. • Libraries consulted: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single-agent-filter: Dict index by fixed key 0 can raise KeyError when source_links lacks key 0. • Libraries consulted: Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Direct indexing chunk.source_links[0] can raise KeyError when dict lacks key 0, causing runtime failure. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. KeyError risk: source_links is dict, code assumes key 0 exists and indexes like a list Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indexing source_links by key 0 can raise KeyError; source_links is a dict[int, str], not guaranteed to contain 0. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent
Suggested change
|
||||||||||
| blurb=chunk.blurb, | ||||||||||
| source_type=chunk.source_type, | ||||||||||
| boost=chunk.boost, | ||||||||||
| hidden=chunk.hidden, | ||||||||||
| metadata=chunk.metadata, | ||||||||||
| score=chunk.score, | ||||||||||
| match_highlights=chunk.match_highlights, | ||||||||||
| updated_at=chunk.updated_at, | ||||||||||
| primary_owners=chunk.primary_owners, | ||||||||||
| secondary_owners=chunk.secondary_owners, | ||||||||||
| is_internet=False, | ||||||||||
| ) | ||||||||||
| for item in items | ||||||||||
| ] | ||||||||||
|
|
||||||||||
| return search_docs | ||||||||||
|
|
||||||||||
| def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore | ||||||||||
| initial_dict = super().model_dump(*args, **kwargs) # type: ignore | ||||||||||
| initial_dict["updated_at"] = ( | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,14 +15,12 @@ | |
| from typing import Any | ||
| from typing import IO | ||
| from typing import NamedTuple | ||
| from typing import Optional | ||
| from typing import TYPE_CHECKING | ||
| from zipfile import BadZipFile | ||
|
|
||
| import chardet | ||
| import openpyxl | ||
| from markitdown import FileConversionException | ||
| from markitdown import MarkItDown | ||
| from markitdown import StreamInfo | ||
| from markitdown import UnsupportedFormatException | ||
| from PIL import Image | ||
| from pypdf import PdfReader | ||
| from pypdf.errors import PdfStreamError | ||
|
|
@@ -37,6 +35,8 @@ | |
| from onyx.utils.file_types import WORD_PROCESSING_MIME_TYPE | ||
| from onyx.utils.logger import setup_logger | ||
|
|
||
| if TYPE_CHECKING: | ||
| from markitdown import MarkItDown | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lazy import of Reasoning: Prompt for AI agents[internal] Confidence score: 7/10 [internal] Posted by: General AI Review Agent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unhandled ImportError in get_markitdown_converter causes docx/pptx processing to raise RuntimeError via extract_file_text instead of graceful fallback. Prompt for AI agents[internal] Confidence score: 9/10 [internal] Posted by: Functional Bugs Agent |
||
| logger = setup_logger() | ||
|
|
||
| # NOTE(rkuo): Unify this with upload_files_for_chat and file_valiation.py | ||
|
|
@@ -85,16 +85,18 @@ | |
| "image/webp", | ||
| ] | ||
|
|
||
| _MARKITDOWN_CONVERTER: MarkItDown | None = None | ||
| _MARKITDOWN_CONVERTER: Optional["MarkItDown"] = None | ||
|
|
||
| KNOWN_OPENPYXL_BUGS = [ | ||
| "Value must be either numerical or a string containing a wildcard", | ||
| "File contains no valid workbook part", | ||
| ] | ||
|
|
||
|
|
||
| def get_markitdown_converter() -> MarkItDown: | ||
| def get_markitdown_converter() -> "MarkItDown": | ||
| global _MARKITDOWN_CONVERTER | ||
| from markitdown import MarkItDown | ||
|
|
||
| if _MARKITDOWN_CONVERTER is None: | ||
| _MARKITDOWN_CONVERTER = MarkItDown(enable_plugins=False) | ||
| return _MARKITDOWN_CONVERTER | ||
|
|
@@ -358,6 +360,12 @@ def docx_to_text_and_images( | |
| The images list returned is empty in this case. | ||
| """ | ||
| md = get_markitdown_converter() | ||
| from markitdown import ( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Runtime import of Reasoning: Prompt for AI agents[internal] Confidence score: 7/10 [internal] Posted by: General AI Review Agent |
||
| StreamInfo, | ||
| FileConversionException, | ||
| UnsupportedFormatException, | ||
| ) | ||
|
|
||
| try: | ||
| doc = md.convert( | ||
| to_bytesio(file), stream_info=StreamInfo(mimetype=WORD_PROCESSING_MIME_TYPE) | ||
|
|
@@ -394,6 +402,12 @@ def docx_to_text_and_images( | |
|
|
||
| def pptx_to_text(file: IO[Any], file_name: str = "") -> str: | ||
| md = get_markitdown_converter() | ||
| from markitdown import ( | ||
| StreamInfo, | ||
| FileConversionException, | ||
| UnsupportedFormatException, | ||
| ) | ||
|
|
||
| stream_info = StreamInfo( | ||
| mimetype=PRESENTATION_MIME_TYPE, filename=file_name or None, extension=".pptx" | ||
| ) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
two-agent-filter: Group imports from the same module into a single statement to reduce duplication and improve readability.
DEV MODE: This violation would have been filtered out by screening filters. Failing filters: commentPurpose, functionalImpact, objectivity.
Reasoning:
• GPT-5: Stylistic-only import grouping suggestion with no functional or maintainability impact; per criteria, filter out low-importance style issues.
Prompt for AI agents
[internal] Confidence score: 9/10
[internal] Posted by: General AI Review Agent