Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
version: '3'
version: '3.8'
services:
claude-wrapper:
build: .
image: ttlequals0/claude-code-openai-wrapper:latest
container_name: claude-wrapper
ports:
- "8000:8000"
volumes:
# Mount Claude CLI credentials
- ~/.claude:/root/.claude
# Optional: Mount a specific workspace directory
# Uncomment and modify the line below to use a custom workspace
# - ./workspace:/workspace
environment:
- PORT=8000
- MAX_TIMEOUT=600000
# Authentication (choose one method):
# Option 1: Direct API key (recommended)
# - ANTHROPIC_API_KEY=your-api-key
# Option 2: Explicit auth method selection
# - CLAUDE_AUTH_METHOD=cli # Options: cli, api_key, bedrock, vertex
# Optional: Set Claude's working directory (defaults to isolated temp dir)
# Uncomment and modify the line below to set a custom working directory
# - CLAUDE_CWD=/workspace
# Optional: Enable debug logging
# - DEBUG_MODE=true
# Optional: Rate limiting configuration
# - RATE_LIMIT_ENABLED=true
# - RATE_LIMIT_CHAT_PER_MINUTE=10
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "claude-code-openai-wrapper"
version = "2.2.0"
version = "2.3.0"
description = "OpenAI API-compatible wrapper for Claude Code"
authors = ["Richard Atkinson <richardatk01@gmail.com>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Claude Code OpenAI Wrapper - A FastAPI-based OpenAI-compatible API for Claude Code."""

__version__ = "2.2.0"
__version__ = "2.3.0"
3 changes: 2 additions & 1 deletion src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ async def chat_endpoint(): ...
# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x
CLAUDE_MODELS = [
# Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED
"claude-opus-4-5-20250929", # Latest Opus 4.5 - Most capable
"claude-opus-4-5-20251101", # Latest Opus 4.5 - Most capable (November 2025)
"claude-opus-4-5-20250929", # Opus 4.5 - September version
"claude-sonnet-4-5-20250929", # Recommended - best coding model
"claude-haiku-4-5-20251001", # Fast & cheap
# Claude 4.1
Expand Down
125 changes: 103 additions & 22 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
rate_limit_endpoint,
)
from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS
from src.model_service import model_service

# Load environment variables
load_dotenv()
Expand Down Expand Up @@ -133,6 +134,9 @@ async def lifespan(app: FastAPI):
"""Verify Claude Code authentication and CLI on startup."""
logger.info("Verifying Claude Code authentication and CLI...")

# Initialize model service (fetch models from API or use fallback)
await model_service.initialize()

# Validate authentication first
auth_valid, auth_info = validate_claude_code_auth()

Expand Down Expand Up @@ -197,6 +201,9 @@ async def lifespan(app: FastAPI):
logger.info("Shutting down session manager...")
session_manager.shutdown()

# Shutdown model service
await model_service.shutdown()


# Create FastAPI app
app = FastAPI(
Expand Down Expand Up @@ -410,6 +417,16 @@ async def generate_streaming_response(
system_prompt = sampling_instructions
logger.debug(f"Added sampling instructions: {sampling_instructions}")

# Check for JSON mode
json_mode = request.response_format and request.response_format.type == "json_object"
if json_mode:
# Prepend JSON instruction to system prompt
if system_prompt:
system_prompt = f"{MessageAdapter.JSON_MODE_INSTRUCTION}\n\n{system_prompt}"
else:
system_prompt = MessageAdapter.JSON_MODE_INSTRUCTION
logger.info("JSON mode enabled (streaming) - response will be accumulated and formatted")

# Filter content for unsupported features
prompt = MessageAdapter.filter_content(prompt)
if system_prompt:
Expand Down Expand Up @@ -443,6 +460,7 @@ async def generate_streaming_response(
chunks_buffer = []
role_sent = False # Track if we've sent the initial role chunk
content_sent = False # Track if we've sent any content
json_mode_buffer = [] # Buffer for JSON mode - accumulate all content

async for chunk in claude_cli.run_completion(
prompt=prompt,
Expand Down Expand Up @@ -501,40 +519,81 @@ async def generate_streaming_response(
filtered_text = MessageAdapter.filter_content(raw_text)

if filtered_text and not filtered_text.isspace():
if json_mode:
# In JSON mode, buffer content for later processing
json_mode_buffer.append(filtered_text)
else:
# Create streaming chunk
stream_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0,
delta={"content": filtered_text},
finish_reason=None,
)
],
)

yield f"data: {stream_chunk.model_dump_json()}\n\n"
content_sent = True

elif isinstance(content, str):
# Filter out tool usage and thinking blocks
filtered_content = MessageAdapter.filter_content(content)

if filtered_content and not filtered_content.isspace():
if json_mode:
# In JSON mode, buffer content for later processing
json_mode_buffer.append(filtered_content)
else:
# Create streaming chunk
stream_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0,
delta={"content": filtered_text},
finish_reason=None,
index=0, delta={"content": filtered_content}, finish_reason=None
)
],
)

yield f"data: {stream_chunk.model_dump_json()}\n\n"
content_sent = True

elif isinstance(content, str):
# Filter out tool usage and thinking blocks
filtered_content = MessageAdapter.filter_content(content)

if filtered_content and not filtered_content.isspace():
# Create streaming chunk
stream_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0, delta={"content": filtered_content}, finish_reason=None
)
],
# Handle JSON mode: emit accumulated content as single JSON-formatted chunk
if json_mode and json_mode_buffer:
# Send role chunk first if not sent
if not role_sent:
initial_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0, delta={"role": "assistant", "content": ""}, finish_reason=None
)
],
)
yield f"data: {initial_chunk.model_dump_json()}\n\n"
role_sent = True

yield f"data: {stream_chunk.model_dump_json()}\n\n"
content_sent = True
# Combine buffered content and enforce JSON format
combined_content = "".join(json_mode_buffer)
json_content = MessageAdapter.enforce_json_format(combined_content, strict=True)

# Emit as single chunk
json_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0, delta={"content": json_content}, finish_reason=None
)
],
)
yield f"data: {json_chunk.model_dump_json()}\n\n"
content_sent = True

# Handle case where no role was sent (send at least role chunk)
if not role_sent:
Expand All @@ -553,13 +612,16 @@ async def generate_streaming_response(

# If we sent role but no content, send a minimal response
if role_sent and not content_sent:
fallback_content = (
"[]" if json_mode else "I'm unable to provide a response at the moment."
)
fallback_chunk = ChatCompletionStreamResponse(
id=request_id,
model=request.model,
choices=[
StreamChoice(
index=0,
delta={"content": "I'm unable to provide a response at the moment."},
delta={"content": fallback_content},
finish_reason=None,
)
],
Expand Down Expand Up @@ -672,6 +734,19 @@ async def chat_completions(
system_prompt = sampling_instructions
logger.debug(f"Added sampling instructions: {sampling_instructions}")

# Check for JSON mode
json_mode = (
request_body.response_format
and request_body.response_format.type == "json_object"
)
if json_mode:
# Prepend JSON instruction to system prompt
if system_prompt:
system_prompt = f"{MessageAdapter.JSON_MODE_INSTRUCTION}\n\n{system_prompt}"
else:
system_prompt = MessageAdapter.JSON_MODE_INSTRUCTION
logger.info("JSON mode enabled - response will be enforced as valid JSON")

# Filter content
prompt = MessageAdapter.filter_content(prompt)
if system_prompt:
Expand Down Expand Up @@ -724,6 +799,12 @@ async def chat_completions(
# Filter out tool usage and thinking blocks
assistant_content = MessageAdapter.filter_content(raw_assistant_content)

# Enforce JSON format if JSON mode is enabled
if json_mode:
assistant_content = MessageAdapter.enforce_json_format(
assistant_content, strict=True
)

# Add assistant response to session if using session mode
if actual_session_id:
assistant_message = Message(role="assistant", content=assistant_content)
Expand Down Expand Up @@ -864,12 +945,12 @@ async def list_models(
# Check FastAPI API key if configured
await verify_api_key(request, credentials)

# Use constants for single source of truth
# Use dynamic models from model_service (fetched from API or fallback to constants)
return {
"object": "list",
"data": [
{"id": model_id, "object": "model", "owned_by": "anthropic"}
for model_id in CLAUDE_MODELS
for model_id in model_service.get_models()
],
}

Expand Down
Loading
Loading