Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hindsight-api/hindsight_api/engine/search/graph_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ async def _retrieve_with_conn(
entry_points = await conn.fetch(
f"""
SELECT id, text, context, event_date, occurred_start, occurred_end,
mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
mentioned_at, fact_type, document_id, chunk_id, tags,
1 - (embedding <=> $1::vector) AS similarity
FROM {fq_table("memory_units")}
WHERE bank_id = $2
Expand Down Expand Up @@ -216,7 +216,7 @@ async def _retrieve_with_conn(
neighbors = await conn.fetch(
f"""
SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
mu.mentioned_at, mu.embedding, mu.fact_type,
mu.mentioned_at, mu.fact_type,
mu.document_id, mu.chunk_id, mu.tags,
ml.weight, ml.link_type, ml.from_unit_id
FROM {fq_table("memory_links")} ml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ async def _find_semantic_seeds(
rows = await conn.fetch(
f"""
SELECT id, text, context, event_date, occurred_start, occurred_end,
mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
mentioned_at, fact_type, document_id, chunk_id, tags,
1 - (embedding <=> $1::vector) AS similarity
FROM {fq_table("memory_units")}
WHERE bank_id = $2
Expand Down Expand Up @@ -216,7 +216,7 @@ async def retrieve(
-- Only exclude the actual seed observations
SELECT
mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
mu.occurred_end, mu.mentioned_at, mu.embedding,
mu.occurred_end, mu.mentioned_at,
mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
COUNT(DISTINCT cs.source_id)::float AS score
FROM all_connected_sources cs
Expand All @@ -239,7 +239,7 @@ async def retrieve(
f"""
SELECT
mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
mu.occurred_end, mu.mentioned_at, mu.embedding,
mu.occurred_end, mu.mentioned_at,
mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
COUNT(*)::float AS score
FROM {fq_table("unit_entities")} seed_ue
Expand All @@ -264,7 +264,7 @@ async def retrieve(
f"""
SELECT DISTINCT ON (mu.id)
mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
mu.occurred_end, mu.mentioned_at, mu.embedding,
mu.occurred_end, mu.mentioned_at,
mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
ml.weight + 1.0 AS score
FROM {fq_table("memory_links")} ml
Expand All @@ -291,7 +291,7 @@ async def retrieve(
WITH outgoing AS (
-- Links FROM seeds TO other facts
SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
mu.occurred_end, mu.mentioned_at, mu.embedding,
mu.occurred_end, mu.mentioned_at,
mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
ml.weight
FROM {fq_table("memory_links")} ml
Expand All @@ -305,7 +305,7 @@ async def retrieve(
incoming AS (
-- Links FROM other facts TO seeds (reverse direction)
SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
mu.occurred_end, mu.mentioned_at, mu.embedding,
mu.occurred_end, mu.mentioned_at,
mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
ml.weight
FROM {fq_table("memory_links")} ml
Expand All @@ -323,12 +323,12 @@ async def retrieve(
)
SELECT DISTINCT ON (id)
id, text, context, event_date, occurred_start,
occurred_end, mentioned_at, embedding,
occurred_end, mentioned_at,
fact_type, document_id, chunk_id, tags,
(MAX(weight) * 0.5) AS score
FROM combined
GROUP BY id, text, context, event_date, occurred_start,
occurred_end, mentioned_at, embedding,
occurred_end, mentioned_at,
fact_type, document_id, chunk_id, tags
ORDER BY id, score DESC
LIMIT $4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ async def fetch_memory_units_by_ids(
rows = await conn.fetch(
f"""
SELECT id, text, context, event_date, occurred_start, occurred_end,
mentioned_at, embedding, fact_type, document_id, chunk_id, tags
mentioned_at, fact_type, document_id, chunk_id, tags
FROM {fq_table("memory_units")}
WHERE id = ANY($1::uuid[])
AND fact_type = $2
Expand Down
18 changes: 9 additions & 9 deletions hindsight-api/hindsight_api/engine/search/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async def retrieve_semantic_bm25_combined(
results = await conn.fetch(
f"""
WITH semantic_ranked AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
1 - (embedding <=> $1::vector) AS similarity,
NULL::float AS bm25_score,
'semantic' AS source,
Expand All @@ -139,7 +139,7 @@ async def retrieve_semantic_bm25_combined(
AND (1 - (embedding <=> $1::vector)) >= 0.3
{tags_clause}
)
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
similarity, bm25_score, source
FROM semantic_ranked
WHERE rn <= $4
Expand Down Expand Up @@ -194,7 +194,7 @@ async def retrieve_semantic_bm25_combined(
# Single query template with backend-specific parts injected
query = f"""
WITH semantic_ranked AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
1 - (embedding <=> $1::vector) AS similarity,
NULL::float AS bm25_score,
'semantic' AS source,
Expand All @@ -207,7 +207,7 @@ async def retrieve_semantic_bm25_combined(
{tags_clause}
),
bm25_ranked AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
NULL::float AS similarity,
{bm25_score_expr} AS bm25_score,
'bm25' AS source,
Expand All @@ -219,12 +219,12 @@ async def retrieve_semantic_bm25_combined(
{tags_clause}
),
semantic AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
similarity, bm25_score, source
FROM semantic_ranked WHERE rn <= $4
),
bm25 AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
similarity, bm25_score, source
FROM bm25_ranked WHERE rn <= $4
)
Expand Down Expand Up @@ -301,7 +301,7 @@ async def retrieve_temporal_combined(
entry_points = await conn.fetch(
f"""
WITH ranked_entries AS (
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags,
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags,
1 - (embedding <=> $1::vector) AS similarity,
ROW_NUMBER() OVER (PARTITION BY fact_type ORDER BY COALESCE(occurred_start, mentioned_at, occurred_end) DESC, embedding <=> $1::vector) AS rn
FROM {fq_table("memory_units")}
Expand All @@ -321,7 +321,7 @@ async def retrieve_temporal_combined(
AND (1 - (embedding <=> $1::vector)) >= $6
{tags_clause}
)
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, embedding, fact_type, document_id, chunk_id, tags, similarity
SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, similarity
FROM ranked_entries
WHERE rn <= 10
""",
Expand Down Expand Up @@ -401,7 +401,7 @@ async def retrieve_temporal_combined(

neighbors = await conn.fetch(
f"""
SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.embedding, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
ml.weight, ml.link_type, ml.from_unit_id,
1 - (mu.embedding <=> $1::vector) AS similarity
FROM {fq_table("memory_links")} ml
Expand Down
3 changes: 0 additions & 3 deletions hindsight-api/hindsight_api/engine/search/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class RetrievalResult:
mentioned_at: datetime | None = None
document_id: str | None = None
chunk_id: str | None = None
embedding: list[float] | None = None
tags: list[str] | None = None # Visibility scope tags

# Retrieval-specific scores (only one will be set depending on retrieval method)
Expand All @@ -70,7 +69,6 @@ def from_db_row(cls, row: dict[str, Any]) -> "RetrievalResult":
mentioned_at=row.get("mentioned_at"),
document_id=row.get("document_id"),
chunk_id=row.get("chunk_id"),
embedding=row.get("embedding"),
tags=row.get("tags"),
similarity=row.get("similarity"),
bm25_score=row.get("bm25_score"),
Expand Down Expand Up @@ -154,7 +152,6 @@ def to_dict(self) -> dict[str, Any]:
"mentioned_at": self.retrieval.mentioned_at,
"document_id": self.retrieval.document_id,
"chunk_id": self.retrieval.chunk_id,
"embedding": self.retrieval.embedding,
"tags": self.retrieval.tags,
"semantic_similarity": self.retrieval.similarity,
"bm25_score": self.retrieval.bm25_score,
Expand Down
Loading