From be3becfdc8be396b25798201d0ce28192c283632 Mon Sep 17 00:00:00 2001 From: HamadaSalhab Date: Fri, 8 Nov 2024 04:13:20 +0300 Subject: [PATCH] Create docs with custom embeddings --- agents-api/agents_api/activities/embed_docs.py | 13 +++++++++---- agents-api/agents_api/activities/types.py | 1 + agents-api/agents_api/autogen/Docs.py | 9 +++++---- agents-api/agents_api/autogen/Tools.py | 1 + agents-api/agents_api/models/docs/create_doc.py | 1 + agents-api/agents_api/routers/docs/create_doc.py | 4 ++++ integrations-service/integrations/autogen/Docs.py | 9 +++++---- integrations-service/integrations/autogen/Tools.py | 1 + typespec/docs/models.tsp | 1 - .../@typespec/openapi3/openapi-1.0.0.yaml | 14 +++++++++++++- 10 files changed, 40 insertions(+), 14 deletions(-) diff --git a/agents-api/agents_api/activities/embed_docs.py b/agents-api/agents_api/activities/embed_docs.py index 0dbf7f03b..807a11f0f 100644 --- a/agents-api/agents_api/activities/embed_docs.py +++ b/agents-api/agents_api/activities/embed_docs.py @@ -33,10 +33,15 @@ async def embed_batch(snippets): ] ) - embeddings = reduce( - operator.add, - await asyncio.gather(*[embed_batch(snippets) for snippets in batched_snippets]), - ) + if payload.embeddings: + embeddings = [payload.embeddings] + else: + embeddings = reduce( + operator.add, + await asyncio.gather( + *[embed_batch(snippets) for snippets in batched_snippets] + ), + ) embed_snippets_query( developer_id=payload.developer_id, diff --git a/agents-api/agents_api/activities/types.py b/agents-api/agents_api/activities/types.py index c2af67936..6b1be9253 100644 --- a/agents-api/agents_api/activities/types.py +++ b/agents-api/agents_api/activities/types.py @@ -34,3 +34,4 @@ class EmbedDocsPayload(BaseModel): embed_instruction: str | None title: str | None = None include_title: bool = False # Need to be a separate parameter for the activity + embeddings: list[float] | list[list[float]] | None = None diff --git a/agents-api/agents_api/autogen/Docs.py b/agents-api/agents_api/autogen/Docs.py index 45b57bbcf..983fccc45 100644 --- a/agents-api/agents_api/autogen/Docs.py +++ b/agents-api/agents_api/autogen/Docs.py @@ -42,6 +42,10 @@ class CreateDocRequest(BaseModel): """ Contents of the document """ + embeddings: list[float] | list[list[float]] | None = None + """ + Embeddings for the document + """ embed_instruction: str | None = None """ Instruction for the embedding model. @@ -66,10 +70,7 @@ class Doc(BaseModel): """ Contents of the document """ - embeddings: Annotated[ - list[float] | list[list[float]] | None, - Field(json_schema_extra={"readOnly": True}), - ] = None + embeddings: list[float] | list[list[float]] | None = None """ Embeddings for the document """ diff --git a/agents-api/agents_api/autogen/Tools.py b/agents-api/agents_api/autogen/Tools.py index e4940bfe1..0361d9986 100644 --- a/agents-api/agents_api/autogen/Tools.py +++ b/agents-api/agents_api/autogen/Tools.py @@ -12,6 +12,7 @@ BaseModel, ConfigDict, Field, + RootModel, StrictBool, ) diff --git a/agents-api/agents_api/models/docs/create_doc.py b/agents-api/agents_api/models/docs/create_doc.py index 3b9c8c9f7..bc9648cf3 100644 --- a/agents-api/agents_api/models/docs/create_doc.py +++ b/agents-api/agents_api/models/docs/create_doc.py @@ -68,6 +68,7 @@ def create_doc( doc_data = data.model_dump() doc_data.pop("embed_instruction", None) + doc_data.pop("embeddings", None) content = doc_data.pop("content") doc_data["owner_type"] = owner_type diff --git a/agents-api/agents_api/routers/docs/create_doc.py b/agents-api/agents_api/routers/docs/create_doc.py index 1aaf664c1..12fc524c5 100644 --- a/agents-api/agents_api/routers/docs/create_doc.py +++ b/agents-api/agents_api/routers/docs/create_doc.py @@ -21,6 +21,7 @@ async def run_embed_docs_task( doc_id: UUID, title: str, content: list[str], + embeddings: list[float] | list[list[float]] | None = None, embed_instruction: str | None = None, job_id: UUID, background_tasks: BackgroundTasks, @@ -36,6 +37,7 @@ async def run_embed_docs_task( content=content, title=title, embed_instruction=embed_instruction, + embeddings=embeddings, ) handle = await client.start_workflow( @@ -88,6 +90,7 @@ async def create_user_doc( doc_id=doc.id, title=doc.title, content=doc.content, + embeddings=data.embeddings, embed_instruction=data.embed_instruction, job_id=embed_job_id, background_tasks=background_tasks, @@ -119,6 +122,7 @@ async def create_agent_doc( doc_id=doc.id, title=doc.title, content=doc.content, + embeddings=data.embeddings, embed_instruction=data.embed_instruction, job_id=embed_job_id, background_tasks=background_tasks, diff --git a/integrations-service/integrations/autogen/Docs.py b/integrations-service/integrations/autogen/Docs.py index 45b57bbcf..983fccc45 100644 --- a/integrations-service/integrations/autogen/Docs.py +++ b/integrations-service/integrations/autogen/Docs.py @@ -42,6 +42,10 @@ class CreateDocRequest(BaseModel): """ Contents of the document """ + embeddings: list[float] | list[list[float]] | None = None + """ + Embeddings for the document + """ embed_instruction: str | None = None """ Instruction for the embedding model. @@ -66,10 +70,7 @@ class Doc(BaseModel): """ Contents of the document """ - embeddings: Annotated[ - list[float] | list[list[float]] | None, - Field(json_schema_extra={"readOnly": True}), - ] = None + embeddings: list[float] | list[list[float]] | None = None """ Embeddings for the document """ diff --git a/integrations-service/integrations/autogen/Tools.py b/integrations-service/integrations/autogen/Tools.py index e4940bfe1..0361d9986 100644 --- a/integrations-service/integrations/autogen/Tools.py +++ b/integrations-service/integrations/autogen/Tools.py @@ -12,6 +12,7 @@ BaseModel, ConfigDict, Field, + RootModel, StrictBool, ) diff --git a/typespec/docs/models.tsp b/typespec/docs/models.tsp index 9c5cc9fa1..6ef008fff 100644 --- a/typespec/docs/models.tsp +++ b/typespec/docs/models.tsp @@ -25,7 +25,6 @@ model Doc { content: string | string[]; /** Embeddings for the document */ - @visibility("read") embeddings?: float32[] | float32[][]; } diff --git a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml index 9bbb879f6..d3aecf113 100644 --- a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml +++ b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml @@ -2486,6 +2486,19 @@ components: items: type: string description: Contents of the document + embeddings: + anyOf: + - type: array + items: + type: number + format: float + - type: array + items: + type: array + items: + type: number + format: float + description: Embeddings for the document embed_instruction: type: string nullable: true @@ -2536,7 +2549,6 @@ components: type: number format: float description: Embeddings for the document - readOnly: true Docs.DocOwner: type: object required: