vitali87 · whoisjayd · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
diff --git a/.env.example b/.env.example
@@ -1,78 +1,270 @@
-# Model Provider Configuration
+# ============================================================================
+# LiteLLM Universal Backend Configuration
+# ============================================================================
+# This file shows all available configuration options for the LiteLLM provider.
+# Copy this file to .env and uncomment the configuration you want to use.
+#
+# Quick Start: Choose ONE of the example configurations below
+# ============================================================================
 
-# Example 1: All Ollama (local)
+# ============================================================================
+# EXAMPLE 1: Local/Offline (Ollama) - FREE, NO API KEY NEEDED
+# ============================================================================
+# Perfect for: Privacy, no costs, offline work
+# Requirements: Install Ollama from https://ollama.com
+#
 # ORCHESTRATOR_PROVIDER=ollama
-# ORCHESTRATOR_MODEL=llama3.2
-# ORCHESTRATOR_ENDPOINT=http://localhost:11434/v1
-
+# ORCHESTRATOR_MODEL=llama3
 # CYPHER_PROVIDER=ollama
 # CYPHER_MODEL=codellama
-# CYPHER_ENDPOINT=http://localhost:11434/v1
+# OLLAMA_BASE_URL=http://localhost:11434
+
+# ============================================================================
+# EXAMPLE 2: Google Gemini - FREE TIER AVAILABLE
+# ============================================================================
+# Perfect for: Testing, large codebases (1M token context)
+# Get API key: https://aistudio.google.com/app/apikey
+#
+# GEMINI_API_KEY=your-gemini-api-key-here
+# ORCHESTRATOR_PROVIDER=gemini
+# ORCHESTRATOR_MODEL=gemini-1.5-flash
+# CYPHER_PROVIDER=gemini
+# CYPHER_MODEL=gemini-1.5-flash
 
-# Example 2: All OpenAI
+# ============================================================================
+# EXAMPLE 3: OpenAI - INDUSTRY STANDARD
+# ============================================================================
+# Perfect for: Production use, best quality
+# Get API key: https://platform.openai.com/api-keys
+#
+# OPENAI_API_KEY=sk-proj-your-openai-key-here
 # ORCHESTRATOR_PROVIDER=openai
 # ORCHESTRATOR_MODEL=gpt-4o
-# ORCHESTRATOR_API_KEY=sk-your-openai-key
-
 # CYPHER_PROVIDER=openai
 # CYPHER_MODEL=gpt-4o-mini
-# CYPHER_API_KEY=sk-your-openai-key
 
-# Example 3: All Google (AI Studio)
-# ORCHESTRATOR_PROVIDER=google
-# ORCHESTRATOR_MODEL=gemini-2.5-pro
-# ORCHESTRATOR_API_KEY=your-google-api-key
+# ============================================================================
+# EXAMPLE 4: Anthropic Claude - BEST FOR CODE ANALYSIS
+# ============================================================================
+# Perfect for: Code review, long context (200K+ tokens)
+# Get API key: https://console.anthropic.com/settings/keys
+#
+# ANTHROPIC_API_KEY=sk-ant-your-anthropic-key-here
+# ORCHESTRATOR_PROVIDER=anthropic
+# ORCHESTRATOR_MODEL=claude-3-5-sonnet-latest
+# CYPHER_PROVIDER=anthropic
+# CYPHER_MODEL=claude-3-5-haiku-latest
 
-# CYPHER_PROVIDER=google
-# CYPHER_MODEL=gemini-2.5-flash
-# CYPHER_API_KEY=your-google-api-key
+# ============================================================================
+# EXAMPLE 5: Cost-Optimized Hybrid Setup
+# ============================================================================
+# Use powerful model for orchestration, cheap/fast model for Cypher queries
+# Cost savings: 50-70% vs using premium models for everything
+#
+# ANTHROPIC_API_KEY=sk-ant-your-key
+# ORCHESTRATOR_PROVIDER=anthropic
+# ORCHESTRATOR_MODEL=claude-3-5-sonnet-latest
+#
+# GROQ_API_KEY=gsk-your-groq-key
+# CYPHER_PROVIDER=groq
+# CYPHER_MODEL=llama3-70b-8192
 
-# Example 4: Google Vertex AI
-# ORCHESTRATOR_PROVIDER=google
-# ORCHESTRATOR_MODEL=gemini-2.5-pro
+# ============================================================================
+# EXAMPLE 6: Google Vertex AI (Google Cloud)
+# ============================================================================
+# Perfect for: Enterprise, existing GCP infrastructure
+# Requires: GCP project, service account with Vertex AI API access
+#
+# ORCHESTRATOR_PROVIDER=vertex_ai
+# ORCHESTRATOR_MODEL=gemini-1.5-pro
 # ORCHESTRATOR_PROJECT_ID=your-gcp-project-id
 # ORCHESTRATOR_REGION=us-central1
-# ORCHESTRATOR_PROVIDER_TYPE=vertex
 # ORCHESTRATOR_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
-
-# CYPHER_PROVIDER=google
-# CYPHER_MODEL=gemini-2.5-flash
+#
+# CYPHER_PROVIDER=vertex_ai
+# CYPHER_MODEL=gemini-1.5-flash
 # CYPHER_PROJECT_ID=your-gcp-project-id
 # CYPHER_REGION=us-central1
-# CYPHER_PROVIDER_TYPE=vertex
 # CYPHER_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
 
-# Example 5: Mixed - Google orchestrator + Ollama cypher
-# ORCHESTRATOR_PROVIDER=google
-# ORCHESTRATOR_MODEL=gemini-2.5-pro
-# ORCHESTRATOR_API_KEY=your-google-api-key
+# ============================================================================
+# EXAMPLE 7: Azure OpenAI
+# ============================================================================
+# Perfect for: Enterprise, compliance requirements, existing Azure infrastructure
+# Note: Use your Azure deployment name (not the base model name)
+#
+# AZURE_API_KEY=your-azure-api-key
+# AZURE_API_BASE=https://your-resource.openai.azure.com
+# AZURE_API_VERSION=2024-02-15-preview
+# ORCHESTRATOR_PROVIDER=azure
+# ORCHESTRATOR_MODEL=my-gpt4-deployment
 
-# CYPHER_PROVIDER=ollama
-# CYPHER_MODEL=codellama
-# CYPHER_ENDPOINT=http://localhost:11434/v1
+# ============================================================================
+# ADVANCED: API Gateway Integration (Portkey, Helicone, etc.)
+# ============================================================================
+# Route requests through third-party gateways for:
+# - Request logging and analytics
+# - Caching (reduce costs)
+# - Load balancing across providers
+# - Usage monitoring and alerts
 
-# Example 6: Mixed - OpenAI orchestrator + Google cypher
+# Portkey Gateway Example:
 # ORCHESTRATOR_PROVIDER=openai
 # ORCHESTRATOR_MODEL=gpt-4o
-# ORCHESTRATOR_API_KEY=sk-your-openai-key
+# ORCHESTRATOR_ENDPOINT=https://api.portkey.ai/v1
+# ORCHESTRATOR_EXTRA_HEADERS={"x-portkey-api-key":"pk-your-key","x-portkey-provider":"openai"}
 
-# CYPHER_PROVIDER=google
-# CYPHER_MODEL=gemini-2.5-flash
-# CYPHER_API_KEY=your-google-api-key
+# Helicone Observability Example:
+# OPENAI_API_KEY=sk-your-openai-key
+# ORCHESTRATOR_PROVIDER=openai
+# ORCHESTRATOR_MODEL=gpt-4o
+# ORCHESTRATOR_ENDPOINT=https://oai.hconeai.com/v1
+# ORCHESTRATOR_EXTRA_HEADERS={"Helicone-Auth":"Bearer sk-helicone-key"}
 
-# Thinking budget for reasoning models (optional)
+# ============================================================================
+# ADVANCED: Thinking Budget (for reasoning models)
+# ============================================================================
+# Control the amount of "thinking" for reasoning models like o1, o3, DeepSeek-R1
+# Higher values = more thorough reasoning, slower responses, higher cost
+#
 # ORCHESTRATOR_THINKING_BUDGET=10000
 # CYPHER_THINKING_BUDGET=5000
 
-# Memgraph settings
-MEMGRAPH_HOST=localhost
-MEMGRAPH_PORT=7687
-MEMGRAPH_HTTP_PORT=7444
-LAB_PORT=3000
-MEMGRAPH_BATCH_SIZE=1000
+# ============================================================================
+# ADDITIONAL PROVIDER API KEYS
+# ============================================================================
+# Uncomment and set the API key for any provider you want to use
 
-# Repository settings
-TARGET_REPO_PATH=.
+# DeepSeek (code-specialized models)
+# DEEPSEEK_API_KEY=sk-your-deepseek-key
+
+# Groq (ultra-fast inference)
+# GROQ_API_KEY=gsk-your-groq-key
+
+# Mistral AI
+# MISTRAL_API_KEY=your-mistral-key
+
+# Cohere
+# COHERE_API_KEY=your-cohere-key
 
-# Ollama base URL (without /v1 suffix)
-OLLAMA_BASE_URL=http://localhost:11434
+# Together AI
+# TOGETHERAI_API_KEY=your-together-key
+
+# Fireworks AI
+# FIREWORKS_API_KEY=your-fireworks-key
+
+# Perplexity
+# PERPLEXITY_API_KEY=pplx-your-key
+
+# Replicate
+# REPLICATE_API_KEY=r8-your-key
+
+# Hugging Face
+# HUGGINGFACE_API_KEY=hf-your-key
+
+# ============================================================================
+# MODEL NAMING CONVENTION
+# ============================================================================
+# Always use "/" delimiter: provider/model-name
+# Examples:
+#   ✅ openai/gpt-4o
+#   ✅ anthropic/claude-3-5-sonnet-latest
+#   ✅ gemini/gemini-1.5-pro
+#
+# The ":" delimiter is deprecated and will show warnings:
+#   ⚠️  openai:gpt-4o (deprecated, use "/" instead)
+
+# ============================================================================
+# AUTOMATIC FEATURES (no configuration needed)
+# ============================================================================
+# These features are automatically enabled:
+#
+# ✅ Timeout: 300 seconds (5 minutes) default
+#    - Prevents indefinite hangs
+#    - Can override via --timeout flag
+#
+# ✅ Retry Policy: 3 retries with 5-second exponential backoff
+#    - Handles transient network errors
+#    - Automatic retry on rate limits
+#
+# ✅ Header Validation: RFC 7230 compliance
+#    - CRLF injection prevention
+#    - System header protection (host, content-length, etc.)
+#    - Size limits (256 chars for name, 8KB for value)
+#
+# ✅ Thread Safety: Concurrent request isolation
+#    - Environment variables isolated per request
+#    - No cross-tenant config leakage (Vertex AI)
+#    - Safe for multi-threaded applications
+
+# ============================================================================
+# POPULAR MODEL RECOMMENDATIONS
+# ============================================================================
+#
+# GETTING STARTED:
+#   gemini/gemini-1.5-flash (free tier, fast, good quality)
+#   ollama/llama3 (free, local, no internet needed)
+#
+# BEST QUALITY:
+#   openai/gpt-4o (industry-leading, production-ready)
+#   anthropic/claude-3-5-sonnet-latest (excellent for code)
+#
+# COST OPTIMIZED:
+#   openai/gpt-4o-mini (60% cheaper than GPT-4o)
+#   groq/llama3-70b-8192 (ultra-fast, cheap)
+#
+# LARGE CODEBASES:
+#   gemini/gemini-1.5-pro (1M token context window)
+#   anthropic/claude-3-5-sonnet-latest (200K tokens)
+#
+# CODE SPECIALIZED:
+#   deepseek/deepseek-coder (optimized for code)
+#   ollama/codellama (local code model)
+#
+# ULTRA-FAST:
+#   groq/llama3-70b-8192 (blazing fast inference)
+#   groq/mixtral-8x7b-32768 (fast, long context)
+
+# ============================================================================
+# TROUBLESHOOTING
+# ============================================================================
+#
+# Problem: "Authentication Error"
+# Solution: Check API key is correct, no extra spaces/quotes
+#
+# Problem: "Provider not found"
+# Solution: Use format "provider/model" (e.g., "openai/gpt-4o")
+#
+# Problem: "Connection Error" (Ollama)
+# Solution: Run "ollama serve" and verify model is downloaded
+#
+# Problem: Rate limits
+# Solution: Check provider dashboard, switch to cheaper model, or use multi-model strategy
+#
+# Problem: Slow responses
+# Solution: Switch to faster provider (Groq), smaller model, or Ollama locally
+
+# ============================================================================
+# DOCUMENTATION
+# ============================================================================
+# For more information, see:
+#   docs/GETTING_STARTED_LLM.md - Quick start guide
+#   docs/LLM_CONFIGURATION.md - Detailed configuration
+#   docs/SUPPORTED_PROVIDERS.md - Complete list of 100+ supported models
+#
+# LiteLLM Documentation: https://docs.litellm.ai/docs/providers
+
+# ============================================================================
+# CORE SYSTEM SETTINGS
+# ============================================================================
+# These are rarely changed but can be configured if needed.
+#
+# Memgraph settings
+# MEMGRAPH_HOST=localhost
+# MEMGRAPH_PORT=7687
+# MEMGRAPH_HTTP_PORT=7444
+# LAB_PORT=3000
+# MEMGRAPH_BATCH_SIZE=1000
+#
+# Repository settings
+# TARGET_REPO_PATH=.
diff --git a/.github/workflows/build-binaries.yml b/.github/workflows/build-binaries.yml
@@ -54,7 +54,7 @@ jobs:
           uv add --dev pyinstaller
 
       - name: Build binary
-        run: uv run python build_binary.py
+        run: uv run python scripts/build_binary.py
 
       - name: Test binary
         shell: bash