Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ HINDSIGHT_API_LOG_LEVEL=info
# HINDSIGHT_API_DATABASE_URL=postgresql://user:pass@host:5432/db
# HINDSIGHT_API_DATABASE_SCHEMA=public # PostgreSQL schema name (default: public)

# Vector Extension (Optional - uses pgvector by default)
# Options: "pgvector" (default), "vchord", "pgvectorscale" (DiskANN)
# HINDSIGHT_API_VECTOR_EXTENSION=pgvector
# For Azure PostgreSQL with DiskANN:
# HINDSIGHT_API_VECTOR_EXTENSION=pgvectorscale # Auto-detects pg_diskann on Azure

# Embeddings Configuration (Optional - uses local by default)
# Provider: "local" (default) or "tei" (HuggingFace Text Embeddings Inference)
# HINDSIGHT_API_EMBEDDINGS_PROVIDER=local
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,26 @@ def _detect_vector_extension() -> str:

# Validate configured extension is installed
if vector_extension == "pgvectorscale":
# pgvectorscale requires pgvector
# pgvectorscale/DiskANN requires pgvector
pgvector_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).scalar()
if not pgvector_check:
raise RuntimeError(
"pgvectorscale requires pgvector. Install with: CREATE EXTENSION vector; CREATE EXTENSION vectorscale CASCADE;"
"DiskANN requires pgvector. Install with: CREATE EXTENSION vector; then vectorscale or pg_diskann CASCADE;"
)
# Check for either vectorscale (open source) or pg_diskann (Azure)
vectorscale_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vectorscale'")).scalar()
if not vectorscale_check:
pg_diskann_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'pg_diskann'")).scalar()

if vectorscale_check:
return "pgvectorscale"
elif pg_diskann_check:
return "pg_diskann"
else:
raise RuntimeError(
"Configured vector extension 'pgvectorscale' not found. Install it with: CREATE EXTENSION vectorscale CASCADE;"
"Configured vector extension 'pgvectorscale' not found. Install either:\n"
" - pgvectorscale: CREATE EXTENSION vectorscale CASCADE;\n"
" - pg_diskann (Azure): CREATE EXTENSION pg_diskann CASCADE;"
)
return "pgvectorscale"
elif vector_extension == "vchord":
vchord_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vchord'")).scalar()
if not vchord_check:
Expand Down Expand Up @@ -311,6 +319,13 @@ def upgrade() -> None:
USING diskann (embedding vector_cosine_ops)
WITH (num_neighbors = 50)
""")
elif vector_ext == "pg_diskann":
# Use DiskANN index for pg_diskann (Azure)
op.execute("""
CREATE INDEX idx_memory_units_embedding ON memory_units
USING diskann (embedding vector_cosine_ops)
WITH (max_neighbors = 50)
""")
elif vector_ext == "vchord":
# Use vchordrq index for vchord (supports high-dimensional embeddings)
op.execute("""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,26 @@ def _detect_vector_extension() -> str:

# Validate configured extension is installed
if vector_extension == "pgvectorscale":
# pgvectorscale requires pgvector
# pgvectorscale/DiskANN requires pgvector
pgvector_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).scalar()
if not pgvector_check:
raise RuntimeError(
"pgvectorscale requires pgvector. Install with: CREATE EXTENSION vector; CREATE EXTENSION vectorscale CASCADE;"
"DiskANN requires pgvector. Install with: CREATE EXTENSION vector; then vectorscale or pg_diskann CASCADE;"
)
# Check for either vectorscale (open source) or pg_diskann (Azure)
vectorscale_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vectorscale'")).scalar()
if not vectorscale_check:
pg_diskann_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'pg_diskann'")).scalar()

if vectorscale_check:
return "pgvectorscale"
elif pg_diskann_check:
return "pg_diskann"
else:
raise RuntimeError(
"Configured vector extension 'pgvectorscale' not found. Install it with: CREATE EXTENSION vectorscale CASCADE;"
"Configured vector extension 'pgvectorscale' not found. Install either:\n"
" - pgvectorscale: CREATE EXTENSION vectorscale CASCADE;\n"
" - pg_diskann (Azure): CREATE EXTENSION pg_diskann CASCADE;"
)
return "pgvectorscale"
elif vector_extension == "vchord":
vchord_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vchord'")).scalar()
if not vchord_check:
Expand Down Expand Up @@ -155,6 +163,12 @@ def upgrade() -> None:
USING diskann (embedding vector_cosine_ops)
WITH (num_neighbors = 50)
""")
elif vector_ext == "pg_diskann":
op.execute(f"""
CREATE INDEX idx_learnings_embedding ON {schema}learnings
USING diskann (embedding vector_cosine_ops)
WITH (max_neighbors = 50)
""")
elif vector_ext == "vchord":
op.execute(f"""
CREATE INDEX idx_learnings_embedding ON {schema}learnings
Expand Down Expand Up @@ -228,6 +242,12 @@ def upgrade() -> None:
USING diskann (embedding vector_cosine_ops)
WITH (num_neighbors = 50)
""")
elif vector_ext == "pg_diskann":
op.execute(f"""
CREATE INDEX idx_pinned_reflections_embedding ON {schema}pinned_reflections
USING diskann (embedding vector_cosine_ops)
WITH (max_neighbors = 50)
""")
elif vector_ext == "vchord":
op.execute(f"""
CREATE INDEX idx_pinned_reflections_embedding ON {schema}pinned_reflections
Expand Down
40 changes: 29 additions & 11 deletions hindsight-api/hindsight_api/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,38 @@ def _detect_vector_extension(conn, vector_extension: str = "pgvector") -> str:
vector_extension: Configured extension ("pgvector", "vchord", or "pgvectorscale")

Returns:
"pgvector", "vchord", or "pgvectorscale"
"pgvector", "vchord", "pgvectorscale", or "pg_diskann"

Raises:
RuntimeError: If configured extension is not installed
"""
# Verify the configured extension is installed
if vector_extension == "pgvectorscale":
# pgvectorscale requires pgvector to be installed first
# pgvectorscale/DiskANN requires pgvector to be installed first
pgvector_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).scalar()
if not pgvector_check:
raise RuntimeError(
"pgvectorscale requires pgvector to be installed. "
"Install it with: CREATE EXTENSION vector; CREATE EXTENSION vectorscale CASCADE;"
"DiskANN (pgvectorscale/pg_diskann) requires pgvector to be installed. "
"Install it with: CREATE EXTENSION vector; then CREATE EXTENSION vectorscale CASCADE; (or pg_diskann on Azure)"
)

# Check for vectorscale extension
# Check for either vectorscale (open source) or pg_diskann (Azure)
vectorscale_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vectorscale'")).scalar()
if not vectorscale_check:
pg_diskann_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'pg_diskann'")).scalar()

if vectorscale_check:
logger.debug("Using vector extension: pgvectorscale (DiskANN)")
return "pgvectorscale"
elif pg_diskann_check:
logger.debug("Using vector extension: pg_diskann (Azure DiskANN)")
return "pg_diskann" # Return distinct name for parameter handling
else:
raise RuntimeError(
"Configured vector extension 'pgvectorscale' not found. "
"Install it with: CREATE EXTENSION vectorscale CASCADE;"
"Install either:\n"
" - pgvectorscale (open source): CREATE EXTENSION vectorscale CASCADE;\n"
" - pg_diskann (Azure): CREATE EXTENSION pg_diskann CASCADE;"
)
logger.debug("Using configured vector extension: pgvectorscale (DiskANN)")
return "pgvectorscale"
elif vector_extension == "vchord":
vchord_check = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vchord'")).scalar()
if not vchord_check:
Expand Down Expand Up @@ -609,7 +617,7 @@ def ensure_vector_extension(
]

# Determine target index type
if target_ext == "pgvectorscale":
if target_ext in ("pgvectorscale", "pg_diskann"):
target_index_type = "diskann"
elif target_ext == "vchord":
target_index_type = "vchordrq"
Expand Down Expand Up @@ -713,7 +721,7 @@ def ensure_vector_extension(

# Create new index with appropriate type
if target_ext == "pgvectorscale":
logger.info(f"Creating DiskANN index on {table_name}")
logger.info(f"Creating DiskANN index on {table_name} (pgvectorscale)")
conn.execute(
text(f"""
CREATE INDEX IF NOT EXISTS {index_name}
Expand All @@ -722,6 +730,16 @@ def ensure_vector_extension(
WITH (num_neighbors = 50)
""")
)
elif target_ext == "pg_diskann":
logger.info(f"Creating DiskANN index on {table_name} (pg_diskann/Azure)")
conn.execute(
text(f"""
CREATE INDEX IF NOT EXISTS {index_name}
ON {schema_name}.{table_name}
USING diskann (embedding vector_cosine_ops)
WITH (max_neighbors = 50)
""")
)
elif target_ext == "vchord":
logger.info(f"Creating vchordrq index on {table_name}")
conn.execute(
Expand Down
8 changes: 5 additions & 3 deletions hindsight-docs/docs/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ Hindsight supports three PostgreSQL vector extensions:
- Most widely deployed and supported

#### **pgvectorscale** (DiskANN - recommended for scale) ⭐
- Disk-based index using StreamingDiskANN algorithm (by Timescale)
- Disk-based index using StreamingDiskANN algorithm
- **28x lower p95 latency** and **16x higher throughput** vs dedicated vector DBs
- **60-75% cost reduction** at scale (SSDs cheaper than RAM)
- Superior filtering performance with streaming retrieval model
- Optimized for large datasets (10M+ vectors)
- Requires both `pgvector` and `vectorscale` extensions
- **Installation:** `CREATE EXTENSION vector; CREATE EXTENSION vectorscale CASCADE;`
- Supports both **pgvectorscale** (open source) and **pg_diskann** (Azure)
- **Installation:**
- Open source/self-hosted: `CREATE EXTENSION vector; CREATE EXTENSION vectorscale CASCADE;`
- Azure PostgreSQL: `CREATE EXTENSION vector; CREATE EXTENSION pg_diskann CASCADE;`

#### **vchord** (vchordrq)
- Alternative high-performance vector index
Expand Down
3 changes: 2 additions & 1 deletion hindsight-docs/docs/developer/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ Hindsight requires PostgreSQL with the **pgvector** extension for vector similar
**For production**, use an external PostgreSQL with pgvector:
- **Supabase** — Managed PostgreSQL with pgvector built-in
- **Neon** — Serverless PostgreSQL with pgvector
- **AWS RDS** / **Cloud SQL** / **Azure** — With pgvector extension enabled
- **Azure Database for PostgreSQL** — With pgvector and pg_diskann (DiskANN) support
- **AWS RDS** / **Cloud SQL** — With pgvector extension enabled
- **Self-hosted** — PostgreSQL 14+ with pgvector installed

### LLM Provider
Expand Down
Loading