Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,16 +255,14 @@ jobs:
NEO4J_URI: ${{ secrets.NEO4J_STAGING_URI }}
NEO4J_USER: neo4j
NEO4J_PASSWORD: ${{ secrets.NEO4J_STAGING_PASSWORD }}
# Anthropic API for Graphiti entity extraction
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_MODEL: claude-sonnet-4-20250514
# Vertex AI for embeddings
EMBEDDING_PROVIDER: vertex-ai
VERTEX_AI_PROJECT: ai-knowledge-base-42
VERTEX_AI_LOCATION: us-central1
# LLM configuration
LLM_PROVIDER: gemini
GEMINI_MODEL_ID: gemini-2.0-flash
GEMINI_INTAKE_MODEL: gemini-2.5-flash
GEMINI_CONVERSATION_MODEL: gemini-2.5-flash
GCP_PROJECT_ID: ai-knowledge-base-42
steps:
- uses: actions/checkout@v4
Expand Down
5 changes: 5 additions & 0 deletions deploy/terraform/cloudrun-jobs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ resource "google_cloud_run_v2_job" "pipeline" {
value = "true"
}

env {
name = "GEMINI_INTAKE_MODEL"
value = "gemini-2.5-flash"
}

env {
name = "GRAPHITI_BULK_ENABLED"
value = "true"
Expand Down
18 changes: 6 additions & 12 deletions deploy/terraform/cloudrun-slack.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,6 @@ resource "google_cloud_run_v2_service" "slack_bot" {
}
}

env {
name = "ANTHROPIC_API_KEY"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.anthropic_api_key.secret_id
version = "latest"
}
}
}

# Graph Database Configuration (Graphiti + Neo4j)
env {
name = "GRAPH_BACKEND"
Expand Down Expand Up @@ -82,7 +72,7 @@ resource "google_cloud_run_v2_service" "slack_bot" {
}

env {
name = "GEMINI_MODEL_ID"
name = "GEMINI_CONVERSATION_MODEL"
value = "gemini-2.5-flash"
}

Expand All @@ -106,6 +96,11 @@ resource "google_cloud_run_v2_service" "slack_bot" {
value = var.region
}

env {
name = "GOOGLE_GENAI_USE_VERTEXAI"
value = "true"
}

# Health check
startup_probe {
http_get {
Expand Down Expand Up @@ -134,7 +129,6 @@ resource "google_cloud_run_v2_service" "slack_bot" {
depends_on = [
google_secret_manager_secret_version.slack_bot_token,
google_secret_manager_secret_version.slack_signing_secret,
google_secret_manager_secret_version.anthropic_api_key,
]
}

Expand Down
12 changes: 0 additions & 12 deletions deploy/terraform/secret-manager.tf
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,6 @@ resource "google_secret_manager_secret_iam_member" "slack_signing_secret_access"
member = "serviceAccount:${google_service_account.slack_bot.email}"
}

resource "google_secret_manager_secret_iam_member" "slack_anthropic_access" {
secret_id = google_secret_manager_secret.anthropic_api_key.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.slack_bot.email}"
}

# Slack Bot Neo4j password access
resource "google_secret_manager_secret_iam_member" "slack_neo4j_password_access" {
secret_id = google_secret_manager_secret.neo4j_password.secret_id
Expand Down Expand Up @@ -213,12 +207,6 @@ resource "google_secret_manager_secret_iam_member" "jobs_confluence_token_access
member = "serviceAccount:${google_service_account.jobs.email}"
}

resource "google_secret_manager_secret_iam_member" "jobs_anthropic_access" {
secret_id = google_secret_manager_secret.anthropic_api_key.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.jobs.email}"
}

# Jobs service account Neo4j password access
resource "google_secret_manager_secret_iam_member" "jobs_neo4j_password_access" {
secret_id = google_secret_manager_secret.neo4j_password.secret_id
Expand Down
39 changes: 11 additions & 28 deletions deploy/terraform/staging.tf
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,6 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" {
}
}

env {
name = "ANTHROPIC_API_KEY"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.anthropic_api_key.secret_id
version = "latest"
}
}
}

# Graph Database Configuration (Graphiti + Neo4j)
env {
name = "GRAPH_BACKEND"
Expand Down Expand Up @@ -213,7 +203,7 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" {
}

env {
name = "GEMINI_MODEL_ID"
name = "GEMINI_CONVERSATION_MODEL"
value = "gemini-2.5-flash"
}

Expand All @@ -237,6 +227,11 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" {
value = var.region
}

env {
name = "GOOGLE_GENAI_USE_VERTEXAI"
value = "true"
}

env {
name = "ENVIRONMENT"
value = "staging"
Expand Down Expand Up @@ -326,12 +321,6 @@ resource "google_secret_manager_secret_iam_member" "slack_bot_staging_signing_ac
member = "serviceAccount:${google_service_account.slack_bot_staging.email}"
}

resource "google_secret_manager_secret_iam_member" "slack_bot_staging_anthropic_access" {
secret_id = google_secret_manager_secret.anthropic_api_key.id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.slack_bot_staging.email}"
}

# Reference existing Neo4j password secret (created in secret-manager.tf)
data "google_secret_manager_secret" "neo4j_password_secret" {
secret_id = "neo4j-password"
Expand Down Expand Up @@ -466,6 +455,11 @@ resource "google_cloud_run_v2_job" "confluence_sync_staging" {
value = "true"
}

env {
name = "GEMINI_INTAKE_MODEL"
value = "gemini-2.5-flash"
}

# Adaptive bulk indexing (TCP-style congestion control)
env {
name = "GRAPHITI_BULK_ENABLED"
Expand All @@ -476,17 +470,6 @@ resource "google_cloud_run_v2_job" "confluence_sync_staging" {
name = "CHECKPOINT_PERSIST_PATH"
value = "/mnt/pipeline-state/staging-knowledge-base.db"
}

# Keep Anthropic key as fallback (optional)
env {
name = "ANTHROPIC_API_KEY"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.anthropic_api_key.secret_id
version = "latest"
}
}
}
}

volumes {
Expand Down
27 changes: 20 additions & 7 deletions src/knowledge_base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Settings(BaseSettings):
REDIS_URL: str = "redis://redis:6379/0"

# LLM Provider Selection
LLM_PROVIDER: str = "claude" # 'ollama', 'claude', or empty for auto-select
LLM_PROVIDER: str = "gemini" # 'gemini', 'claude', 'vertex-claude', or 'ollama'

# Ollama (local LLM)
OLLAMA_BASE_URL: str = "http://ollama:11434"
Expand All @@ -45,10 +45,13 @@ class Settings(BaseSettings):
EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" # sentence-transformer model
INDEX_BATCH_SIZE: int = 100

# Anthropic (Claude)
# Gemini model settings (separate models for different use cases)
# Gemini 2.5 Flash supports up to 65K output tokens (required for graphiti-core's 16384)
GEMINI_INTAKE_MODEL: str = "gemini-2.5-flash" # Graphiti entity extraction (intake pipeline)
GEMINI_CONVERSATION_MODEL: str = "gemini-2.5-flash" # Slack bot RAG conversations

# Anthropic (Claude) — used when LLM_PROVIDER=claude
ANTHROPIC_API_KEY: str = ""
# Using Sonnet for Graphiti entity extraction - Haiku doesn't support the max_tokens
# that graphiti-core internally uses (16384). Sonnet is more expensive but works.
ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514"
METADATA_BATCH_SIZE: int = 10

Expand Down Expand Up @@ -113,9 +116,9 @@ class Settings(BaseSettings):
VERTEX_AI_LOCATION: str = "us-central1" # Region for Vertex AI
VERTEX_AI_EMBEDDING_MODEL: str = "text-embedding-005" # Embedding model
VERTEX_AI_EMBEDDING_DIMENSION: int = 768 # Embedding dimension
# Gemini 2.5 Flash supports up to 65K output tokens (required for graphiti-core's 16384)
# Gemini 2.0 Flash only supports 8K output which causes errors with graphiti
VERTEX_AI_LLM_MODEL: str = "gemini-2.5-flash" # Gemini model for entity extraction
# DEPRECATED: Use GEMINI_INTAKE_MODEL / GEMINI_CONVERSATION_MODEL instead.
# Kept for backward compat with existing deployments that set this env var.
VERTEX_AI_LLM_MODEL: str = ""
VERTEX_AI_CLAUDE_MODEL: str = "claude-sonnet-4@20250514" # Claude via Vertex AI
VERTEX_AI_BATCH_SIZE: int = 20 # Max texts per embedding batch (keep under 20k token limit)
VERTEX_AI_TIMEOUT: float = 60.0 # API timeout in seconds
Expand Down Expand Up @@ -144,6 +147,16 @@ def confluence_space_list(self) -> list[str]:
return []
return [s.strip() for s in self.CONFLUENCE_SPACE_KEYS.split(",") if s.strip()]

@model_validator(mode="after")
def migrate_vertex_ai_llm_model(self) -> "Settings":
"""Backward compat: map deprecated VERTEX_AI_LLM_MODEL to new settings."""
if self.VERTEX_AI_LLM_MODEL:
if not os.environ.get("GEMINI_INTAKE_MODEL"):
self.GEMINI_INTAKE_MODEL = self.VERTEX_AI_LLM_MODEL
if not os.environ.get("GEMINI_CONVERSATION_MODEL"):
self.GEMINI_CONVERSATION_MODEL = self.VERTEX_AI_LLM_MODEL
return self

@model_validator(mode="after")
def check_security_settings(self) -> "Settings":
"""Validate security settings."""
Expand Down
30 changes: 10 additions & 20 deletions src/knowledge_base/graph/graphiti_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,34 +205,24 @@ async def _create_neo4j_client(self) -> "Graphiti":
def _get_llm_client(self):
"""Get the LLM client for Graphiti entity extraction.

Supports multiple LLM providers:
- 'claude'/'anthropic': Uses Anthropic Claude API
- 'gemini': Uses Google Gemini API

Falls back based on available credentials.
Dispatches based on LLM_PROVIDER setting. No silent fallback —
if the configured provider is not available, raises an error.
"""
from graphiti_core.llm_client import LLMConfig

llm_provider = settings.LLM_PROVIDER.lower()

# Try Gemini if explicitly configured or as fallback
if llm_provider == "gemini":
return self._get_gemini_client()

# Try Anthropic
if llm_provider in ("claude", "anthropic", ""):
if settings.ANTHROPIC_API_KEY:
return self._get_anthropic_client()
else:
# Fall back to Gemini if Anthropic key not available
logger.warning(
"ANTHROPIC_API_KEY not set, falling back to Gemini for entity extraction"
if llm_provider in ("claude", "anthropic"):
if not settings.ANTHROPIC_API_KEY:
raise GraphitiClientError(
"LLM_PROVIDER is set to 'claude' but ANTHROPIC_API_KEY is not configured."
)
return self._get_gemini_client()
return self._get_anthropic_client()

raise GraphitiClientError(
f"Unsupported LLM_PROVIDER: {llm_provider}. "
"Use 'claude', 'anthropic', or 'gemini'."
f"Unsupported LLM_PROVIDER for Graphiti: '{llm_provider}'. "
"Use 'gemini' or 'claude'."
)

def _get_anthropic_client(self):
Expand Down Expand Up @@ -273,7 +263,7 @@ def _get_gemini_client(self):
# Check for Google API key (direct API access)
google_api_key = os.environ.get("GOOGLE_API_KEY", "")

model = settings.VERTEX_AI_LLM_MODEL or "gemini-2.0-flash"
model = settings.GEMINI_INTAKE_MODEL

# If we have an API key, use consumer Gemini API
if google_api_key:
Expand Down
59 changes: 20 additions & 39 deletions src/knowledge_base/rag/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,55 +64,36 @@ def get_provider(name: str) -> "BaseLLM":
async def get_llm(provider: str | None = None) -> "BaseLLM":
"""Get an LLM instance (main entry point).

Selection order:
1. Use specified provider if given
2. Use LLM_PROVIDER from config if set
3. Auto-select based on availability (Claude if API key exists, else Ollama)
Uses the configured LLM_PROVIDER. No silent fallback — if the configured
provider is not available, raises an error immediately.

Args:
provider: Specific provider name, or None for automatic selection
provider: Specific provider name, or None to use LLM_PROVIDER setting

Returns:
Configured LLM instance

Raises:
LLMProviderNotConfiguredError: If no provider is available
LLMProviderNotConfiguredError: If no provider is configured or available
"""
# Use specified or configured provider
provider_name = provider or settings.LLM_PROVIDER
if not provider_name:
raise LLMProviderNotConfiguredError(
"LLM_PROVIDER is not configured. "
"Set it to 'gemini', 'claude', 'vertex-claude', or 'ollama'.",
provider="none",
)

llm = get_provider(provider_name)
if not await llm.is_available():
raise LLMProviderNotConfiguredError(
f"LLM provider '{provider_name}' is configured but not available. "
f"Check your credentials and configuration.",
provider=provider_name,
)

if provider_name:
llm = get_provider(provider_name)
if await llm.is_available():
logger.info(f"Using LLM provider: {llm.provider_name}")
return llm
logger.warning(f"Configured provider '{provider_name}' not available")

# Auto-select: try Claude, then Gemini, then Ollama
if settings.ANTHROPIC_API_KEY:
llm = get_provider("claude")
if await llm.is_available():
logger.info("Auto-selected Claude LLM provider")
return llm

# Try Gemini if GCP project is configured
if settings.VERTEX_AI_PROJECT or settings.GCP_PROJECT_ID:
llm = get_provider("gemini")
if await llm.is_available():
logger.info("Auto-selected Gemini LLM provider")
return llm

# Fall back to Ollama
llm = get_provider("ollama")
if await llm.is_available():
logger.info("Auto-selected Ollama LLM provider")
return llm

raise LLMProviderNotConfiguredError(
"No LLM provider is configured or available. "
"Set ANTHROPIC_API_KEY for Claude, configure GCP project for Gemini, or ensure Ollama is running.",
provider="none",
)
logger.info(f"Using LLM provider: {llm.provider_name}")
return llm


# Import BaseLLM here to avoid circular imports
Expand Down
2 changes: 1 addition & 1 deletion src/knowledge_base/rag/providers/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
"""
self.project = project or settings.VERTEX_AI_PROJECT or settings.GCP_PROJECT_ID
self.location = location or settings.VERTEX_AI_LOCATION
self.model_name = model or settings.VERTEX_AI_LLM_MODEL
self.model_name = model or settings.GEMINI_CONVERSATION_MODEL
self.max_output_tokens = max_output_tokens
self.temperature = temperature
self._model = None
Expand Down
Loading