Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate
from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.utils.logger import setup_logger

Expand Down Expand Up @@ -47,7 +47,7 @@ def is_reducer(
doc_list.append(x)

# Convert InferenceSections to SavedSearchDocs
search_docs = chunks_or_sections_to_search_docs(doc_list)
search_docs = SearchDoc.chunks_or_sections_to_search_docs(doc_list)
retrieved_saved_search_docs = [
SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)
for search_doc in search_docs
Expand Down
4 changes: 2 additions & 2 deletions backend/onyx/agents/agent_search/dr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.utils import chunks_or_sections_to_search_docs
from onyx.context.search.models import SearchDoc
from onyx.tools.tool_implementations.web_search.web_search_tool import (
WebSearchTool,
)
Expand Down Expand Up @@ -266,7 +266,7 @@ def convert_inference_sections_to_search_docs(
is_internet: bool = False,
) -> list[SavedSearchDoc]:
# Convert InferenceSections to SavedSearchDocs
search_docs = chunks_or_sections_to_search_docs(inference_sections)
search_docs = SearchDoc.chunks_or_sections_to_search_docs(inference_sections)
for search_doc in search_docs:
search_doc.is_internet = is_internet

Expand Down
92 changes: 92 additions & 0 deletions backend/onyx/context/search/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Sequence
from datetime import datetime
from typing import Any

Expand Down Expand Up @@ -355,6 +356,97 @@ class SearchDoc(BaseModel):
secondary_owners: list[str] | None = None
is_internet: bool = False

@classmethod
def chunks_or_sections_to_search_docs(
Copy link

@cubic-dev-ai cubic-dev-ai bot Sep 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chunks_or_sections_to_search_docs drops is_relevant and relevance_explanation from InferenceChunk, unlike the specific converters, causing loss of relevance data for downstream consumers.

Prompt for AI agents
Address the following comment on backend/onyx/context/search/models.py at line 360:

<comment>chunks_or_sections_to_search_docs drops is_relevant and relevance_explanation from InferenceChunk, unlike the specific converters, causing loss of relevance data for downstream consumers.</comment>

<file context>
@@ -355,6 +356,97 @@ class SearchDoc(BaseModel):
     is_internet: bool = False
 
+    @classmethod
+    def chunks_or_sections_to_search_docs(
+        cls,
+        items: &quot;Sequence[InferenceChunk | InferenceSection] | None&quot;,
</file context>

[internal] Confidence score: 8/10

[internal] Posted by: General AI Review Agent

Fix with Cubic

cls,
items: "Sequence[InferenceChunk | InferenceSection] | None",
) -> list["SearchDoc"]:
"""Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects."""
if not items:
return []

search_docs = [
cls(
document_id=(
chunk := (
item.center_chunk
if isinstance(item, InferenceSection)
else item
)
).document_id,
chunk_ind=chunk.chunk_id,
semantic_identifier=chunk.semantic_identifier or "Unknown",
link=chunk.source_links[0] if chunk.source_links else None,
blurb=chunk.blurb,
source_type=chunk.source_type,
boost=chunk.boost,
hidden=chunk.hidden,
metadata=chunk.metadata,
score=chunk.score,
match_highlights=chunk.match_highlights,
updated_at=chunk.updated_at,
primary_owners=chunk.primary_owners,
secondary_owners=chunk.secondary_owners,
is_internet=False,
)
for item in items
]

return search_docs

@classmethod
def from_inference_section(
cls, inference_section: "InferenceSection"
) -> "SearchDoc":
"""Convert an InferenceSection to a SearchDoc using the center chunk's data."""
chunk = inference_section.center_chunk
return cls(
document_id=chunk.document_id,
chunk_ind=chunk.chunk_id,
semantic_identifier=chunk.semantic_identifier or "Unknown",
link=chunk.source_links[0] if chunk.source_links else None,
blurb=chunk.blurb,
source_type=chunk.source_type,
boost=chunk.boost,
hidden=chunk.hidden,
metadata=chunk.metadata,
score=chunk.score,
is_relevant=chunk.is_relevant,
relevance_explanation=chunk.relevance_explanation,
match_highlights=chunk.match_highlights,
updated_at=chunk.updated_at,
primary_owners=chunk.primary_owners,
secondary_owners=chunk.secondary_owners,
is_internet=False,
)

@classmethod
def from_inference_chunk(cls, inference_chunk: "InferenceChunk") -> "SearchDoc":
"""Convert an InferenceChunk to a SearchDoc."""
return cls(
document_id=inference_chunk.document_id,
chunk_ind=inference_chunk.chunk_id,
semantic_identifier=inference_chunk.semantic_identifier or "Unknown",
link=(
inference_chunk.source_links[0]
if inference_chunk.source_links
else None
),
blurb=inference_chunk.blurb,
source_type=inference_chunk.source_type,
boost=inference_chunk.boost,
hidden=inference_chunk.hidden,
metadata=inference_chunk.metadata,
score=inference_chunk.score,
is_relevant=inference_chunk.is_relevant,
relevance_explanation=inference_chunk.relevance_explanation,
match_highlights=inference_chunk.match_highlights,
updated_at=inference_chunk.updated_at,
primary_owners=inference_chunk.primary_owners,
secondary_owners=inference_chunk.secondary_owners,
is_internet=False,
)

def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
initial_dict = super().model_dump(*args, **kwargs) # type: ignore
initial_dict["updated_at"] = (
Expand Down
34 changes: 0 additions & 34 deletions backend/onyx/context/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,40 +118,6 @@ def inference_section_from_chunks(
)


def chunks_or_sections_to_search_docs(
items: Sequence[InferenceChunk | InferenceSection] | None,
) -> list[SearchDoc]:
if not items:
return []

search_docs = [
SearchDoc(
document_id=(
chunk := (
item.center_chunk if isinstance(item, InferenceSection) else item
)
).document_id,
chunk_ind=chunk.chunk_id,
semantic_identifier=chunk.semantic_identifier or "Unknown",
link=chunk.source_links[0] if chunk.source_links else None,
blurb=chunk.blurb,
source_type=chunk.source_type,
boost=chunk.boost,
hidden=chunk.hidden,
metadata=chunk.metadata,
score=chunk.score,
match_highlights=chunk.match_highlights,
updated_at=chunk.updated_at,
primary_owners=chunk.primary_owners,
secondary_owners=chunk.secondary_owners,
is_internet=False,
)
for item in items
]

return search_docs


def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
try:
# Re-tokenize using the NLTK tokenizer for better matching
Expand Down
3 changes: 1 addition & 2 deletions backend/onyx/db/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from onyx.context.search.models import RetrievalDocs
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc as ServerSearchDoc
from onyx.context.search.utils import chunks_or_sections_to_search_docs
from onyx.db.models import AgentSearchMetrics
from onyx.db.models import AgentSubQuery
from onyx.db.models import AgentSubQuestion
Expand Down Expand Up @@ -1147,7 +1146,7 @@ def log_agent_sub_question_results(
db_session.add(sub_query_object)
db_session.commit()

search_docs = chunks_or_sections_to_search_docs(
search_docs = ServerSearchDoc.chunks_or_sections_to_search_docs(
sub_query.retrieved_documents
)
for doc in search_docs:
Expand Down
3 changes: 1 addition & 2 deletions backend/onyx/server/query_and_chat/query_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from onyx.context.search.preprocessing.access_filters import (
build_access_filters_for_user,
)
from onyx.context.search.utils import chunks_or_sections_to_search_docs
from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_chat_sessions_by_user
Expand Down Expand Up @@ -74,7 +73,7 @@ def admin_search(
)
matching_chunks = document_index.admin_retrieval(query=query, filters=final_filters)

documents = chunks_or_sections_to_search_docs(matching_chunks)
documents = SearchDoc.chunks_or_sections_to_search_docs(matching_chunks)

# Deduplicate documents by id
deduplicated_documents: list[SearchDoc] = []
Expand Down