Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,34 @@
# CYPHER_MODEL=gemini-2.5-flash
# CYPHER_API_KEY=your-google-api-key

# Example 7: Anthropic Claude (Direct API)
# ORCHESTRATOR_PROVIDER=anthropic
# ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929
# ORCHESTRATOR_API_KEY=sk-ant-your-key

# CYPHER_PROVIDER=anthropic
# CYPHER_MODEL=claude-haiku-4-20250514
# CYPHER_API_KEY=sk-ant-your-key

# Example 8: Anthropic Claude (via Portkey proxy)
# ORCHESTRATOR_PROVIDER=anthropic
# ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929
# ORCHESTRATOR_ENDPOINT=https://your-portkey-gateway.com
# ORCHESTRATOR_CUSTOM_HEADERS="x-portkey-api-key: pk-xxx\nx-portkey-config: pc-xxx"

# CYPHER_PROVIDER=anthropic
# CYPHER_MODEL=claude-haiku-4-20250514
# CYPHER_ENDPOINT=https://your-portkey-gateway.com
# CYPHER_CUSTOM_HEADERS="x-portkey-api-key: pk-xxx\nx-portkey-config: pc-xxx"

# Example 9: Anthropic Claude (uses ~/.claude/settings.json automatically)
# Just set provider and model - will auto-detect from Claude Code settings
# ORCHESTRATOR_PROVIDER=anthropic
# ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929

# CYPHER_PROVIDER=anthropic
# CYPHER_MODEL=claude-haiku-4-20250514

# Thinking budget for reasoning models (optional)
# ORCHESTRATOR_THINKING_BUDGET=10000
# CYPHER_THINKING_BUDGET=5000
Expand Down
53 changes: 51 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ An accurate Retrieval-Augmented Generation (RAG) system that analyzes multi-lang
- **🌳 Tree-sitter Parsing**: Uses Tree-sitter for robust, language-agnostic AST parsing
- **📊 Knowledge Graph Storage**: Uses Memgraph to store codebase structure as an interconnected graph
- **🗣️ Natural Language Querying**: Ask questions about your codebase in plain English
- **🤖 AI-Powered Cypher Generation**: Supports both cloud models (Google Gemini), local models (Ollama), and OpenAI models for natural language to Cypher translation
- **🤖 OpenAI Integration**: Leverage OpenAI models to enhance AI functionalities.
- **🤖 AI-Powered Cypher Generation**: Supports cloud models (Google Gemini, Anthropic Claude), local models (Ollama), and OpenAI models for natural language to Cypher translation
- **🤖 Multiple LLM Providers**: Supports OpenAI, Anthropic Claude, Google Gemini, and Ollama with flexible authentication (direct API keys, Claude Code settings, or proxy services like Portkey)
- **📝 Code Snippet Retrieval**: Retrieves actual source code snippets for found functions/methods
- **✍️ Advanced File Editing**: Surgical code replacement with AST-based function targeting, visual diff previews, and exact code block modifications
- **⚡️ Shell Command Execution**: Can execute terminal commands for tasks like running tests or using CLI tools.
Expand Down Expand Up @@ -195,6 +195,55 @@ CYPHER_MODEL=codellama
CYPHER_ENDPOINT=http://localhost:11434/v1
```

#### Option 5: Anthropic Claude Models

Anthropic Claude is now supported with multiple authentication methods:

**5a. Direct API Key (recommended for development)**:
```bash
# .env file
ORCHESTRATOR_PROVIDER=anthropic
ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929
ORCHESTRATOR_API_KEY=sk-ant-your-key

CYPHER_PROVIDER=anthropic
CYPHER_MODEL=claude-haiku-4-20250514
CYPHER_API_KEY=sk-ant-your-key
```

**5b. Claude Code Settings (automatic)**:
If you're using Claude Code CLI, the provider will automatically read from `~/.claude/settings.json`. Just set the provider and model:
```bash
# .env file
ORCHESTRATOR_PROVIDER=anthropic
ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929

CYPHER_PROVIDER=anthropic
CYPHER_MODEL=claude-haiku-4-20250514
# No API key needed - uses ANTHROPIC_BASE_URL and ANTHROPIC_CUSTOM_HEADERS from settings
```

**5c. Portkey or Other Proxies (for enterprise)**:
For centralized key management systems like Portkey:
```bash
# .env file
ORCHESTRATOR_PROVIDER=anthropic
ORCHESTRATOR_MODEL=claude-sonnet-4.5-20250929
ORCHESTRATOR_ENDPOINT=https://your-portkey-gateway.com
ORCHESTRATOR_CUSTOM_HEADERS="x-portkey-api-key: pk-xxx\nx-portkey-config: pc-xxx"

CYPHER_PROVIDER=anthropic
CYPHER_MODEL=claude-haiku-4-20250514
CYPHER_ENDPOINT=https://your-portkey-gateway.com
CYPHER_CUSTOM_HEADERS="x-portkey-api-key: pk-xxx\nx-portkey-config: pc-xxx"
```

**Recommended Anthropic Models**:
- **Orchestrator**: `claude-opus-4.5-20251101` (most capable) or `claude-sonnet-4.5-20250929` (balanced)
- **Cypher**: `claude-sonnet-4.5-20250929` or `claude-haiku-4-20250514` (faster, cheaper)

Get your Anthropic API key from [Anthropic Console](https://console.anthropic.com/).

Get your Google API key from [Google AI Studio](https://aistudio.google.com/app/apikey).

**Install and run Ollama**:
Expand Down
10 changes: 10 additions & 0 deletions codebase_rag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from . import exceptions as ex
from . import logs
from .types_defs import CgrignorePatterns, ModelConfigKwargs
from .utils.claude_settings import parse_custom_headers

load_dotenv()

Expand Down Expand Up @@ -105,6 +106,7 @@ class ModelConfig:
provider_type: str | None = None
thinking_budget: int | None = None
service_account_file: str | None = None
custom_headers: dict[str, str] | None = None

def to_update_kwargs(self) -> ModelConfigKwargs:
result = asdict(self)
Expand Down Expand Up @@ -153,6 +155,7 @@ class AppConfig(BaseSettings):
ORCHESTRATOR_PROVIDER_TYPE: str | None = None
ORCHESTRATOR_THINKING_BUDGET: int | None = None
ORCHESTRATOR_SERVICE_ACCOUNT_FILE: str | None = None
ORCHESTRATOR_CUSTOM_HEADERS: str | None = None

CYPHER_PROVIDER: str = ""
CYPHER_MODEL: str = ""
Expand All @@ -163,6 +166,7 @@ class AppConfig(BaseSettings):
CYPHER_PROVIDER_TYPE: str | None = None
CYPHER_THINKING_BUDGET: int | None = None
CYPHER_SERVICE_ACCOUNT_FILE: str | None = None
CYPHER_CUSTOM_HEADERS: str | None = None

OLLAMA_BASE_URL: str = "http://localhost:11434"

Expand Down Expand Up @@ -260,6 +264,11 @@ def _get_default_config(self, role: str) -> ModelConfig:
model = getattr(self, f"{role_upper}_MODEL", None)

if provider and model:
custom_headers_str = getattr(self, f"{role_upper}_CUSTOM_HEADERS", None)
custom_headers = (
parse_custom_headers(custom_headers_str) if custom_headers_str else None
)

return ModelConfig(
provider=provider.lower(),
model_id=model,
Expand All @@ -272,6 +281,7 @@ def _get_default_config(self, role: str) -> ModelConfig:
service_account_file=getattr(
self, f"{role_upper}_SERVICE_ACCOUNT_FILE", None
),
custom_headers=custom_headers,
)

return ModelConfig(
Expand Down
6 changes: 6 additions & 0 deletions codebase_rag/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ class FileAction(StrEnum):

ENV_OPENAI_API_KEY = "OPENAI_API_KEY"
ENV_GOOGLE_API_KEY = "GOOGLE_API_KEY"
ENV_ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
ENV_ANTHROPIC_BASE_URL = "ANTHROPIC_BASE_URL"
ENV_ANTHROPIC_CUSTOM_HEADERS = "ANTHROPIC_CUSTOM_HEADERS"

HELP_ARG = "help"

Expand All @@ -142,6 +145,9 @@ class GoogleProviderType(StrEnum):

# (H) Provider endpoints
OPENAI_DEFAULT_ENDPOINT = "https://api.openai.com/v1"
ANTHROPIC_DEFAULT_ENDPOINT = "https://api.anthropic.com/v1"
OLLAMA_DEFAULT_BASE_URL = "http://localhost:11434"
OLLAMA_DEFAULT_ENDPOINT = f"{OLLAMA_DEFAULT_BASE_URL}/v1"
OLLAMA_HEALTH_PATH = "/api/tags"
GOOGLE_CLOUD_SCOPE = "https://www.googleapis.com/auth/cloud-platform"
V1_PATH = "/v1"
Expand Down
12 changes: 12 additions & 0 deletions codebase_rag/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@
"OpenAI provider requires api_key. "
"Set ORCHESTRATOR_API_KEY or CYPHER_API_KEY in .env file."
)
ANTHROPIC_NO_AUTH = (
"Anthropic provider requires either api_key or custom_headers (for proxy auth). "
"Set ORCHESTRATOR_API_KEY/CYPHER_API_KEY, or configure ORCHESTRATOR_CUSTOM_HEADERS/CYPHER_CUSTOM_HEADERS, "
"or set up ~/.claude/settings.json with ANTHROPIC_BASE_URL and ANTHROPIC_CUSTOM_HEADERS."
)
ANTHROPIC_CLAUDE_SETTINGS_ERROR = (
"Failed to read Claude Code settings from ~/.claude/settings.json: {error}"
)
ANTHROPIC_MALFORMED_HEADER = (
"Malformed custom header line: '{line}'. "
"Expected format 'Header-Name: value'. Each header must contain a colon separator."
)
OLLAMA_NOT_RUNNING = (
"Ollama server not responding at {endpoint}. "
"Make sure Ollama is running: ollama serve"
Expand Down
5 changes: 5 additions & 0 deletions codebase_rag/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,3 +621,8 @@
MODEL_SWITCHED = "Model switched to: {model}"
MODEL_SWITCH_FAILED = "Failed to switch model: {error}"
MODEL_CURRENT = "Current model: {model}"

# (H) Custom headers logs
CUSTOM_HEADERS_EMPTY = (
"Empty custom headers string received, treating as no headers set"
)
85 changes: 80 additions & 5 deletions codebase_rag/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,30 @@

import httpx
from loguru import logger
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel
from pydantic_ai.providers.anthropic import (
AnthropicProvider as PydanticAnthropicProvider,
)
from pydantic_ai.providers.google import GoogleProvider as PydanticGoogleProvider
from pydantic_ai.providers.openai import OpenAIProvider as PydanticOpenAIProvider

from .. import constants as cs
from .. import exceptions as ex
from .. import logs as ls
from ..config import ModelConfig, settings
from ..utils.claude_settings import get_anthropic_config_from_claude_settings


class ModelProvider(ABC):
def __init__(self, **config: str | int | None) -> None:
def __init__(self, **config: str | int | dict[str, str] | None) -> None:
self.config = config

@abstractmethod
def create_model(
self, model_id: str, **kwargs: str | int | None
) -> GoogleModel | OpenAIResponsesModel | OpenAIChatModel:
) -> GoogleModel | OpenAIResponsesModel | OpenAIChatModel | AnthropicModel:
pass

@abstractmethod
Expand Down Expand Up @@ -72,7 +77,6 @@ def create_model(self, model_id: str, **kwargs: str | int | None) -> GoogleModel
if self.provider_type == cs.GoogleProviderType.VERTEX:
credentials = None
if self.service_account_file:
# (H) Convert service account file to credentials object for pydantic-ai
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file(
Expand All @@ -85,7 +89,6 @@ def create_model(self, model_id: str, **kwargs: str | int | None) -> GoogleModel
credentials=credentials,
)
else:
# (H) api_key is guaranteed to be set by validate_config for gla type
assert self.api_key is not None
provider = PydanticGoogleProvider(api_key=self.api_key)

Expand Down Expand Up @@ -155,15 +158,86 @@ def create_model(
return OpenAIChatModel(model_id, provider=provider)


class AnthropicProvider(ModelProvider):
def __init__(
self,
api_key: str | None = None,
endpoint: str | None = None,
custom_headers: dict[str, str] | None = None,
**kwargs: str | int | None,
) -> None:
super().__init__(**kwargs)
self.api_key = api_key
self.endpoint = endpoint
self.custom_headers = custom_headers or {}

if not self.api_key and not self.custom_headers and not self.endpoint:
self._load_from_claude_settings()

def _load_from_claude_settings(self) -> None:
"""Load configuration from Claude Code settings if available."""
try:
base_url, headers = get_anthropic_config_from_claude_settings()

if base_url:
self.endpoint = base_url

if headers:
self.custom_headers = headers
except Exception as e:
logger.debug(f"Could not load Anthropic config from Claude settings: {e}")

@property
def provider_name(self) -> cs.Provider:
return cs.Provider.ANTHROPIC

def validate_config(self) -> None:
"""Validate configuration.

Either API key OR custom headers (for proxy) must be provided.
"""
has_api_key = bool(self.api_key)
has_custom_headers = bool(self.custom_headers)

if not has_api_key and not has_custom_headers:
raise ValueError(ex.ANTHROPIC_NO_AUTH)

def create_model(self, model_id: str, **kwargs: str | int | None) -> AnthropicModel:
"""Create an Anthropic model instance."""
self.validate_config()

base_url = self.endpoint or cs.ANTHROPIC_DEFAULT_ENDPOINT
api_key = self.api_key or "proxy-auth-via-headers"
http_client = None
if self.custom_headers:
http_client = httpx.AsyncClient(
headers=self.custom_headers,
timeout=30.0,
)

provider = PydanticAnthropicProvider(
api_key=api_key,
base_url=base_url,
http_client=http_client,
)

return AnthropicModel(
model_name=model_id,
provider=provider,
)


PROVIDER_REGISTRY: dict[str, type[ModelProvider]] = {
cs.Provider.GOOGLE: GoogleProvider,
cs.Provider.OPENAI: OpenAIProvider,
cs.Provider.OLLAMA: OllamaProvider,
cs.Provider.ANTHROPIC: AnthropicProvider,
}


def get_provider(
provider_name: str | cs.Provider, **config: str | int | None
provider_name: str | cs.Provider,
**config: str | int | dict[str, str] | None,
) -> ModelProvider:
provider_key = str(provider_name)
if provider_key not in PROVIDER_REGISTRY:
Expand All @@ -186,6 +260,7 @@ def get_provider_from_config(config: ModelConfig) -> ModelProvider:
provider_type=config.provider_type,
thinking_budget=config.thinking_budget,
service_account_file=config.service_account_file,
custom_headers=config.custom_headers,
)


Expand Down
Loading