Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 179 additions & 38 deletions hindsight-api/hindsight_api/engine/providers/claude_code_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,61 +291,202 @@ async def call_with_tools(
tool_choice: str | dict[str, Any] = "auto",
) -> LLMToolCallResult:
"""
Make an LLM API call with tool/function calling support.
Make an LLM API call with tool/function calling support using Claude Agent SDK.

Note: This is a simplified implementation. Full tool support would require
integrating with Claude Agent SDK's tool system.
This implementation uses ClaudeSDKClient (not query()) because custom tools via
SDK MCP servers are only supported with the client. Tools are converted from OpenAI
format to SDK MCP tools, and tool names are formatted as mcp__hindsight_tools__{name}.

Args:
messages: List of message dicts. Can include tool results with role='tool'.
tools: List of tool definitions in OpenAI format.
max_completion_tokens: Maximum tokens in response.
temperature: Sampling temperature.
max_completion_tokens: Maximum tokens in response (not used by Claude Agent SDK).
temperature: Sampling temperature (not used by Claude Agent SDK).
scope: Scope identifier for tracking.
max_retries: Maximum retry attempts.
initial_backoff: Initial backoff time in seconds.
max_backoff: Maximum backoff time in seconds.
tool_choice: How to choose tools - "auto", "none", "required", or specific function.
tool_choice: How to choose tools (not used by Claude Agent SDK).

Returns:
LLMToolCallResult with content and/or tool_calls.
"""
# For now, use regular call without tools
# Full implementation would require mapping OpenAI tool format to Claude Agent SDK tools
logger.warning(
"Claude Code provider does not fully support tool calling yet. Falling back to regular text completion."
from claude_agent_sdk import (
AssistantMessage,
ClaudeAgentOptions,
ClaudeSDKClient,
SdkMcpTool,
TextBlock,
ToolUseBlock,
create_sdk_mcp_server,
)

result = await self.call(
messages=messages,
response_format=None,
max_completion_tokens=max_completion_tokens,
temperature=temperature,
scope=scope,
max_retries=max_retries,
initial_backoff=initial_backoff,
max_backoff=max_backoff,
return_usage=True,
)
start_time = time.time()

if isinstance(result, tuple):
text, usage = result
return LLMToolCallResult(
content=text,
tool_calls=[],
finish_reason="stop",
input_tokens=usage.input_tokens,
output_tokens=usage.output_tokens,
)
else:
# Fallback if return_usage didn't work as expected
return LLMToolCallResult(
content=str(result),
tool_calls=[],
finish_reason="stop",
input_tokens=0,
output_tokens=0,
# Convert OpenAI tool format to Claude Agent SDK SdkMcpTool format
sdk_tools: list[SdkMcpTool] = []
tool_names: list[str] = []

for tool in tools:
func = tool.get("function", {})
tool_name = func.get("name", "")
tool_description = func.get("description", "")
parameters = func.get("parameters", {})

# Create a handler with proper closure to avoid transport issues
def make_handler(name: str):
async def handler(args: dict[str, Any]) -> dict[str, Any]:
# Return immediately with success - tool execution happens externally
return {
"content": [
{
"type": "text",
"text": f"[Tool {name} called successfully]",
}
]
}

return handler

sdk_tools.append(
SdkMcpTool(
name=tool_name,
description=tool_description,
input_schema=parameters,
handler=make_handler(tool_name),
)
)
tool_names.append(tool_name)

# Create an MCP server with the tools
mcp_server = create_sdk_mcp_server(
name="hindsight_tools",
version="1.0.0",
tools=sdk_tools if sdk_tools else None,
)

# Build system prompt and user content from messages
system_prompt = ""
user_content = ""

for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")

if role == "system":
system_prompt += ("\n\n" + content) if system_prompt else content
elif role == "user":
user_content += ("\n\n" + content) if user_content else content
elif role == "assistant":
# Include previous assistant messages as context
user_content += f"\n\n[Previous assistant response: {content}]"
elif role == "tool":
# Tool results are already in tool_results_map, append to user context
tool_call_id = msg.get("tool_call_id", "")
user_content += f"\n\n[Tool result for {tool_call_id}: {content}]"

# Format tool names for SDK MCP servers: mcp__{server_name}__{tool_name}
# This is required by the Claude Agent SDK for MCP server tools
allowed_tool_names = [f"mcp__hindsight_tools__{name}" for name in tool_names]

# Configure SDK options with MCP server
options = ClaudeAgentOptions(
system_prompt=system_prompt if system_prompt else None,
max_turns=1, # Single-turn for API-style interactions
mcp_servers={"hindsight_tools": mcp_server} if sdk_tools else {},
allowed_tools=allowed_tool_names if allowed_tool_names else [],
)

# Call Claude Agent SDK with retry logic
last_exception = None
for attempt in range(max_retries + 1):
try:
full_text = ""
tool_calls: list[LLMToolCall] = []

# Use ClaudeSDKClient for tool calling support
# Note: query() does NOT support custom tools, only ClaudeSDKClient does
async with ClaudeSDKClient(options=options) as client:
# Send the query
await client.query(user_content)

# Receive response
async for message in client.receive_response():
if isinstance(message, AssistantMessage):
for block in message.content:
if isinstance(block, TextBlock):
full_text += block.text
elif isinstance(block, ToolUseBlock):
# SDK returns tool names with MCP prefix (mcp__hindsight_tools__{name})
# Strip the prefix to return original tool name expected by caller
tool_name = block.name
if tool_name.startswith("mcp__hindsight_tools__"):
tool_name = tool_name.replace("mcp__hindsight_tools__", "", 1)

tool_calls.append(
LLMToolCall(
id=block.id,
name=tool_name,
arguments=block.input,
)
)

# Record metrics
duration = time.time() - start_time
metrics = get_metrics_collector()

# Estimate token usage (Claude Agent SDK doesn't report exact counts)
estimated_input = sum(len(m.get("content", "")) for m in messages) // 4
estimated_output = len(full_text) // 4

metrics.record_llm_call(
provider=self.provider,
model=self.model,
scope=scope,
duration=duration,
input_tokens=estimated_input,
output_tokens=estimated_output,
success=True,
)

# Log slow calls
if duration > 10.0:
logger.info(
f"slow llm call: scope={scope}, model={self.provider}/{self.model}, time={duration:.3f}s"
)

return LLMToolCallResult(
content=full_text if full_text else None,
tool_calls=tool_calls,
finish_reason="tool_calls" if tool_calls else "stop",
input_tokens=estimated_input,
output_tokens=estimated_output,
)

except Exception as e:
last_exception = e

# Check for authentication errors
error_str = str(e).lower()
if "auth" in error_str or "login" in error_str or "credential" in error_str:
logger.error(f"Claude Code authentication error: {e}")
raise RuntimeError(
f"Claude Code authentication failed: {e}\n\n"
"Run 'claude auth login' to authenticate with Claude Pro/Max."
) from e

if attempt < max_retries:
backoff = min(initial_backoff * (2**attempt), max_backoff)
logger.warning(f"Claude Code tool call error (attempt {attempt + 1}/{max_retries + 1}): {e}")
await asyncio.sleep(backoff)
continue
else:
logger.error(f"Claude Code tool call error after {max_retries + 1} attempts: {e}")
raise

if last_exception:
raise last_exception
raise RuntimeError("Claude Code tool call failed after all retries")

async def cleanup(self) -> None:
"""Clean up resources (no HTTP client to close for Claude Agent SDK)."""
Expand Down
87 changes: 69 additions & 18 deletions hindsight-api/hindsight_api/engine/providers/codex_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ async def call(
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
system_instruction += schema_msg

# gpt-5.2-codex only supports "detailed" reasoning summary
reasoning_summary = "detailed" if "5.2" in self.model else self.reasoning_summary

# Build Codex request payload
payload = {
"model": self.model,
Expand All @@ -192,7 +195,7 @@ async def call(
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": True,
"reasoning": {"summary": self.reasoning_summary},
"reasoning": {"summary": reasoning_summary},
"store": False, # Codex uses stateless mode
"stream": True, # SSE streaming
"include": ["reasoning.encrypted_content"],
Expand Down Expand Up @@ -283,13 +286,20 @@ async def call(
"Run 'codex auth login' to re-authenticate."
) from e

# Log the actual error message from the API
error_detail = e.response.text[:500] if hasattr(e.response, "text") else str(e)

if attempt < max_retries:
backoff = min(initial_backoff * (2**attempt), max_backoff)
logger.warning(f"Codex HTTP error {status_code} (attempt {attempt + 1}/{max_retries + 1})")
logger.warning(
f"Codex HTTP error {status_code} (attempt {attempt + 1}/{max_retries + 1}): {error_detail}"
)
await asyncio.sleep(backoff)
continue
else:
logger.error(f"Codex HTTP error after {max_retries + 1} attempts: {e}")
logger.error(
f"Codex HTTP error after {max_retries + 1} attempts: Status {status_code}, Detail: {error_detail}"
)
raise

except httpx.RequestError as e:
Expand Down Expand Up @@ -379,8 +389,22 @@ async def call_with_tools(
"""
Make API call with tool calling support.

Note: This is a basic implementation. Full tool calling support for Codex
may require additional SSE event parsing.
Parses Codex SSE stream to extract tool calls from response.output_item.done events.
Tools are converted from OpenAI format to Codex format (flat structure at top level).

Args:
messages: List of message dicts. Can include tool results with role='tool'.
tools: List of tool definitions in OpenAI format.
max_completion_tokens: Maximum tokens in response.
temperature: Sampling temperature.
scope: Scope identifier for tracking.
max_retries: Maximum retry attempts.
initial_backoff: Initial backoff time in seconds.
max_backoff: Maximum backoff time in seconds.
tool_choice: How to choose tools - "auto", "none", "required", or specific function.

Returns:
LLMToolCallResult with content and/or tool_calls.
"""
start_time = time.time()

Expand Down Expand Up @@ -413,28 +437,30 @@ async def call_with_tools(
)

# Convert tools to Codex format
# Codex expects tools with type and name/description/parameters at top level
codex_tools = []
for tool in tools:
func = tool.get("function", {})
codex_tools.append(
{
"type": "function",
"function": {
"name": func.get("name", ""),
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
},
"name": func.get("name", ""),
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
}
)

# gpt-5.2-codex only supports "detailed" reasoning summary
reasoning_summary = "detailed" if "5.2" in self.model else self.reasoning_summary

payload = {
"model": self.model,
"instructions": system_instruction,
"input": user_messages,
"tools": codex_tools,
"tool_choice": tool_choice,
"parallel_tool_calls": True,
"reasoning": {"summary": self.reasoning_summary},
"reasoning": {"summary": reasoning_summary},
"store": False,
"stream": True,
"include": ["reasoning.encrypted_content"],
Expand All @@ -451,8 +477,16 @@ async def call_with_tools(

url = f"{self.base_url}/codex/responses"

# Debug logging for troubleshooting
logger.debug(f"Codex tool call request: url={url}, model={payload['model']}, tools={len(codex_tools)}")

try:
response = await self._client.post(url, json=payload, headers=headers, timeout=120.0)

# Log response details on error
if response.status_code != 200:
logger.error(f"Codex API error {response.status_code}: {response.text[:500]}")

response.raise_for_status()

# Parse SSE for tool calls and content
Expand Down Expand Up @@ -512,13 +546,30 @@ async def _parse_sse_tool_stream(self, response: httpx.Response) -> tuple[str |
if event_type == "response.text.delta" and "delta" in data:
content += data["delta"]

# Extract tool calls
elif event_type == "response.function_call_arguments.delta":
# Handle tool call events (implementation depends on actual Codex SSE format)
pass

except json.JSONDecodeError:
pass
# Extract completed tool calls from response.output_item.done
elif event_type == "response.output_item.done":
item = data.get("item", {})
if item.get("type") == "function_call" and item.get("status") == "completed":
tool_name = item.get("name", "")
arguments_str = item.get("arguments", "{}")
call_id = item.get("call_id", "")

try:
arguments = json.loads(arguments_str)
except json.JSONDecodeError:
logger.warning(f"Failed to parse tool arguments: {arguments_str}")
arguments = {}

tool_calls.append(
LLMToolCall(
id=call_id,
name=tool_name,
arguments=arguments,
)
)

except json.JSONDecodeError as e:
logger.warning(f"Failed to parse SSE data: {e}, data_str: {data_str[:200]}")

return content if content else None, tool_calls

Expand Down
Loading
Loading