RichardAtCT · ttlequals0 · Jan 31, 2026
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,16 +1,34 @@
-version: '3'
+version: '3.8'
 services:
   claude-wrapper:
-    build: .
+    image: ttlequals0/claude-code-openai-wrapper:latest
+    container_name: claude-wrapper
     ports:
       - "8000:8000"
     volumes:
+      # Mount Claude CLI credentials
       - ~/.claude:/root/.claude
       # Optional: Mount a specific workspace directory
-      # Uncomment and modify the line below to use a custom workspace
       # - ./workspace:/workspace
     environment:
       - PORT=8000
+      - MAX_TIMEOUT=600000
+      # Authentication (choose one method):
+      # Option 1: Direct API key (recommended)
+      # - ANTHROPIC_API_KEY=your-api-key
+      # Option 2: Explicit auth method selection
+      # - CLAUDE_AUTH_METHOD=cli  # Options: cli, api_key, bedrock, vertex
       # Optional: Set Claude's working directory (defaults to isolated temp dir)
-      # Uncomment and modify the line below to set a custom working directory
       # - CLAUDE_CWD=/workspace
+      # Optional: Enable debug logging
+      # - DEBUG_MODE=true
+      # Optional: Rate limiting configuration
+      # - RATE_LIMIT_ENABLED=true
+      # - RATE_LIMIT_CHAT_PER_MINUTE=10
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "claude-code-openai-wrapper"
-version = "2.2.0"
+version = "2.3.0"
 description = "OpenAI API-compatible wrapper for Claude Code"
 authors = ["Richard Atkinson <richardatk01@gmail.com>"]
 readme = "README.md"

diff --git a/src/__init__.py b/src/__init__.py
@@ -1,3 +1,3 @@
 """Claude Code OpenAI Wrapper - A FastAPI-based OpenAI-compatible API for Claude Code."""
 
-__version__ = "2.2.0"
+__version__ = "2.3.0"
diff --git a/src/constants.py b/src/constants.py
@@ -70,7 +70,8 @@ async def chat_endpoint(): ...
 # NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x
 CLAUDE_MODELS = [
     # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED
-    "claude-opus-4-5-20250929",  # Latest Opus 4.5 - Most capable
+    "claude-opus-4-5-20251101",  # Latest Opus 4.5 - Most capable (November 2025)
+    "claude-opus-4-5-20250929",  # Opus 4.5 - September version
     "claude-sonnet-4-5-20250929",  # Recommended - best coding model
     "claude-haiku-4-5-20251001",  # Fast & cheap
     # Claude 4.1

diff --git a/src/main.py b/src/main.py
@@ -52,6 +52,7 @@
     rate_limit_endpoint,
 )
 from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS
+from src.model_service import model_service
 
 # Load environment variables
 load_dotenv()
@@ -133,6 +134,9 @@ async def lifespan(app: FastAPI):
     """Verify Claude Code authentication and CLI on startup."""
     logger.info("Verifying Claude Code authentication and CLI...")
 
+    # Initialize model service (fetch models from API or use fallback)
+    await model_service.initialize()
+
     # Validate authentication first
     auth_valid, auth_info = validate_claude_code_auth()
 
@@ -197,6 +201,9 @@ async def lifespan(app: FastAPI):
     logger.info("Shutting down session manager...")
     session_manager.shutdown()
 
+    # Shutdown model service
+    await model_service.shutdown()
+
 
 # Create FastAPI app
 app = FastAPI(
@@ -410,6 +417,16 @@ async def generate_streaming_response(
                 system_prompt = sampling_instructions
             logger.debug(f"Added sampling instructions: {sampling_instructions}")
 
+        # Check for JSON mode
+        json_mode = request.response_format and request.response_format.type == "json_object"
+        if json_mode:
+            # Prepend JSON instruction to system prompt
+            if system_prompt:
+                system_prompt = f"{MessageAdapter.JSON_MODE_INSTRUCTION}\n\n{system_prompt}"
+            else:
+                system_prompt = MessageAdapter.JSON_MODE_INSTRUCTION
+            logger.info("JSON mode enabled (streaming) - response will be accumulated and formatted")
+
         # Filter content for unsupported features
         prompt = MessageAdapter.filter_content(prompt)
         if system_prompt:
@@ -443,6 +460,7 @@ async def generate_streaming_response(
         chunks_buffer = []
         role_sent = False  # Track if we've sent the initial role chunk
         content_sent = False  # Track if we've sent any content
+        json_mode_buffer = []  # Buffer for JSON mode - accumulate all content
 
         async for chunk in claude_cli.run_completion(
             prompt=prompt,
@@ -501,40 +519,81 @@ async def generate_streaming_response(
                         filtered_text = MessageAdapter.filter_content(raw_text)
 
                         if filtered_text and not filtered_text.isspace():
+                            if json_mode:
+                                # In JSON mode, buffer content for later processing
+                                json_mode_buffer.append(filtered_text)
+                            else:
+                                # Create streaming chunk
+                                stream_chunk = ChatCompletionStreamResponse(
+                                    id=request_id,
+                                    model=request.model,
+                                    choices=[
+                                        StreamChoice(
+                                            index=0,
+                                            delta={"content": filtered_text},
+                                            finish_reason=None,
+                                        )
+                                    ],
+                                )
+
+                                yield f"data: {stream_chunk.model_dump_json()}\n\n"
+                                content_sent = True
+
+                elif isinstance(content, str):
+                    # Filter out tool usage and thinking blocks
+                    filtered_content = MessageAdapter.filter_content(content)
+
+                    if filtered_content and not filtered_content.isspace():
+                        if json_mode:
+                            # In JSON mode, buffer content for later processing
+                            json_mode_buffer.append(filtered_content)
+                        else:
                             # Create streaming chunk
                             stream_chunk = ChatCompletionStreamResponse(
                                 id=request_id,
                                 model=request.model,
                                 choices=[
                                     StreamChoice(
-                                        index=0,
-                                        delta={"content": filtered_text},
-                                        finish_reason=None,
+                                        index=0, delta={"content": filtered_content}, finish_reason=None
                                     )
                                 ],
                             )
 
                             yield f"data: {stream_chunk.model_dump_json()}\n\n"
                             content_sent = True
 
-                elif isinstance(content, str):
-                    # Filter out tool usage and thinking blocks
-                    filtered_content = MessageAdapter.filter_content(content)
-
-                    if filtered_content and not filtered_content.isspace():
-                        # Create streaming chunk
-                        stream_chunk = ChatCompletionStreamResponse(
-                            id=request_id,
-                            model=request.model,
-                            choices=[
-                                StreamChoice(
-                                    index=0, delta={"content": filtered_content}, finish_reason=None
-                                )
-                            ],
+        # Handle JSON mode: emit accumulated content as single JSON-formatted chunk
+        if json_mode and json_mode_buffer:
+            # Send role chunk first if not sent
+            if not role_sent:
+                initial_chunk = ChatCompletionStreamResponse(
+                    id=request_id,
+                    model=request.model,
+                    choices=[
+                        StreamChoice(
+                            index=0, delta={"role": "assistant", "content": ""}, finish_reason=None
                         )
+                    ],
+                )
+                yield f"data: {initial_chunk.model_dump_json()}\n\n"
+                role_sent = True
 
-                        yield f"data: {stream_chunk.model_dump_json()}\n\n"
-                        content_sent = True
+            # Combine buffered content and enforce JSON format
+            combined_content = "".join(json_mode_buffer)
+            json_content = MessageAdapter.enforce_json_format(combined_content, strict=True)
+
+            # Emit as single chunk
+            json_chunk = ChatCompletionStreamResponse(
+                id=request_id,
+                model=request.model,
+                choices=[
+                    StreamChoice(
+                        index=0, delta={"content": json_content}, finish_reason=None
+                    )
+                ],
+            )
+            yield f"data: {json_chunk.model_dump_json()}\n\n"
+            content_sent = True
 
         # Handle case where no role was sent (send at least role chunk)
         if not role_sent:
@@ -553,13 +612,16 @@ async def generate_streaming_response(
 
         # If we sent role but no content, send a minimal response
         if role_sent and not content_sent:
+            fallback_content = (
+                "[]" if json_mode else "I'm unable to provide a response at the moment."
+            )
             fallback_chunk = ChatCompletionStreamResponse(
                 id=request_id,
                 model=request.model,
                 choices=[
                     StreamChoice(
                         index=0,
-                        delta={"content": "I'm unable to provide a response at the moment."},
+                        delta={"content": fallback_content},
                         finish_reason=None,
                     )
                 ],
@@ -672,6 +734,19 @@ async def chat_completions(
                     system_prompt = sampling_instructions
                 logger.debug(f"Added sampling instructions: {sampling_instructions}")
 
+            # Check for JSON mode
+            json_mode = (
+                request_body.response_format
+                and request_body.response_format.type == "json_object"
+            )
+            if json_mode:
+                # Prepend JSON instruction to system prompt
+                if system_prompt:
+                    system_prompt = f"{MessageAdapter.JSON_MODE_INSTRUCTION}\n\n{system_prompt}"
+                else:
+                    system_prompt = MessageAdapter.JSON_MODE_INSTRUCTION
+                logger.info("JSON mode enabled - response will be enforced as valid JSON")
+
             # Filter content
             prompt = MessageAdapter.filter_content(prompt)
             if system_prompt:
@@ -724,6 +799,12 @@ async def chat_completions(
             # Filter out tool usage and thinking blocks
             assistant_content = MessageAdapter.filter_content(raw_assistant_content)
 
+            # Enforce JSON format if JSON mode is enabled
+            if json_mode:
+                assistant_content = MessageAdapter.enforce_json_format(
+                    assistant_content, strict=True
+                )
+
             # Add assistant response to session if using session mode
             if actual_session_id:
                 assistant_message = Message(role="assistant", content=assistant_content)
@@ -864,12 +945,12 @@ async def list_models(
     # Check FastAPI API key if configured
     await verify_api_key(request, credentials)
 
-    # Use constants for single source of truth
+    # Use dynamic models from model_service (fetched from API or fallback to constants)
     return {
         "object": "list",
         "data": [
             {"id": model_id, "object": "model", "owned_by": "anthropic"}
-            for model_id in CLAUDE_MODELS
+            for model_id in model_service.get_models()
         ],
     }