Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 53 additions & 23 deletions litellm/llms/anthropic/chat/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,14 @@ def __init__(
self.accumulated_json: str = ""
self.chunk_type: Literal["valid_json", "accumulated_json"] = "valid_json"

# Track current content block type to avoid emitting tool calls for non-tool blocks
# See: https://github.com/BerriAI/litellm/issues/17254
self.current_content_block_type: Optional[str] = None

# Accumulate web_search_tool_result blocks for multi-turn reconstruction
# See: https://github.com/BerriAI/litellm/issues/17737
self.web_search_results: List[Dict[str, Any]] = []

def check_empty_tool_call_args(self) -> bool:
"""
Check if the tool call block so far has been an empty string
Expand Down Expand Up @@ -553,18 +561,22 @@ def _content_block_delta_helper(self, chunk: dict) -> Tuple[
if "text" in content_block["delta"]:
text = content_block["delta"]["text"]
elif "partial_json" in content_block["delta"]:
tool_use = cast(
ChatCompletionToolCallChunk,
{
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": content_block["delta"]["partial_json"],
# Only emit tool calls if we're in a tool_use or server_tool_use block
# web_search_tool_result blocks also have input_json_delta but should not be treated as tool calls
# See: https://github.com/BerriAI/litellm/issues/17254
if self.current_content_block_type in ("tool_use", "server_tool_use"):
tool_use = cast(
ChatCompletionToolCallChunk,
{
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": content_block["delta"]["partial_json"],
},
"index": self.tool_index,
},
"index": self.tool_index,
},
)
)
elif "citation" in content_block["delta"]:
provider_specific_fields["citation"] = content_block["delta"]["citation"]
elif (
Expand Down Expand Up @@ -674,6 +686,8 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915

content_block_start = self.get_content_block_start(chunk=chunk)
self.content_blocks = [] # reset content blocks when new block starts
# Track current content block type for filtering deltas
self.current_content_block_type = content_block_start["content_block"]["type"]
if content_block_start["content_block"]["type"] == "text":
text = content_block_start["content_block"]["text"]
elif content_block_start["content_block"]["type"] == "tool_use":
Expand Down Expand Up @@ -714,22 +728,38 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
content_block_start=content_block_start,
provider_specific_fields=provider_specific_fields,
)
elif (
content_block_start["content_block"]["type"]
== "web_search_tool_result"
):
# Capture web_search_tool_result for multi-turn reconstruction
# The full content comes in content_block_start, not in deltas
# See: https://github.com/BerriAI/litellm/issues/17737
self.web_search_results.append(
content_block_start["content_block"]
)
provider_specific_fields["web_search_results"] = (
self.web_search_results
)
elif type_chunk == "content_block_stop":
ContentBlockStop(**chunk) # type: ignore
# check if tool call content block
is_empty = self.check_empty_tool_call_args()
if is_empty:
tool_use = ChatCompletionToolCallChunk(
id=None, # type: ignore[typeddict-item]
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=None, # type: ignore[typeddict-item]
arguments="{}",
),
index=self.tool_index,
)
# check if tool call content block - only for tool_use and server_tool_use blocks
if self.current_content_block_type in ("tool_use", "server_tool_use"):
is_empty = self.check_empty_tool_call_args()
if is_empty:
tool_use = ChatCompletionToolCallChunk(
id=None, # type: ignore[typeddict-item]
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=None, # type: ignore[typeddict-item]
arguments="{}",
),
index=self.tool_index,
)
# Reset response_format tool tracking when block stops
self.is_response_format_tool = False
# Reset current content block type
self.current_content_block_type = None
elif type_chunk == "tool_result":
# Handle tool_result blocks (for tool search results with tool_reference)
# These are automatically handled by Anthropic API, we just pass them through
Expand Down
30 changes: 30 additions & 0 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6750,6 +6750,36 @@ def stream_chunk_builder( # noqa: PLR0915
_choice = cast(Choices, response.choices[0])
_choice.message.audio = processor.get_combined_audio_content(audio_chunks)

# Combine provider_specific_fields from streaming chunks (e.g., web_search_results, citations)
# See: https://github.com/BerriAI/litellm/issues/17737
provider_specific_chunks = [
chunk
for chunk in chunks
if len(chunk["choices"]) > 0
and "provider_specific_fields" in chunk["choices"][0]["delta"]
and chunk["choices"][0]["delta"]["provider_specific_fields"] is not None
]

if len(provider_specific_chunks) > 0:
combined_provider_fields: Dict[str, Any] = {}
for chunk in provider_specific_chunks:
fields = chunk["choices"][0]["delta"]["provider_specific_fields"]
if isinstance(fields, dict):
for key, value in fields.items():
if key not in combined_provider_fields:
combined_provider_fields[key] = value
elif isinstance(value, list) and isinstance(
combined_provider_fields[key], list
):
# For lists like web_search_results, take the last (most complete) one
combined_provider_fields[key] = value
else:
combined_provider_fields[key] = value

if combined_provider_fields:
_choice = cast(Choices, response.choices[0])
_choice.message.provider_specific_fields = combined_provider_fields

completion_output = get_content_from_model_response(response)

reasoning_tokens = processor.count_reasoning_tokens(response)
Expand Down
247 changes: 247 additions & 0 deletions tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,3 +532,250 @@ def test_multiple_partial_chunks_accumulation():
assert result3 is not None
assert iterator.accumulated_json == ""
assert result3.choices[0].delta.content == "Hello"


def test_web_search_tool_result_no_extra_tool_calls():
"""
Test that web_search_tool_result blocks don't emit tool call chunks.

This tests the fix for https://github.com/BerriAI/litellm/issues/17254
where streaming with Anthropic web search was adding trailing {} to tool call arguments.

The issue was that web_search_tool_result blocks have input_json_delta events with {}
that were incorrectly being converted to tool calls.
"""
iterator = ModelResponseIterator(
streaming_response=MagicMock(), sync_stream=True, json_mode=False
)

# Simulate the streaming sequence:
# 1. server_tool_use block starts (web_search)
# 2. input_json_delta with the query
# 3. content_block_stop
# 4. web_search_tool_result block starts
# 5. input_json_delta with {} (this should NOT emit a tool call)
# 6. content_block_stop

chunks = [
# 1. server_tool_use block starts
{
"type": "content_block_start",
"index": 0,
"content_block": {
"type": "server_tool_use",
"id": "srvtoolu_01ABC123",
"name": "web_search",
},
},
# 2. input_json_delta with the query
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "input_json_delta", "partial_json": '{"query": "test"}'},
},
# 3. content_block_stop for server_tool_use
{"type": "content_block_stop", "index": 0},
# 4. web_search_tool_result block starts
{
"type": "content_block_start",
"index": 1,
"content_block": {
"type": "web_search_tool_result",
"tool_use_id": "srvtoolu_01ABC123",
"content": [],
},
},
# 5. input_json_delta with {} - this should NOT emit a tool call
{
"type": "content_block_delta",
"index": 1,
"delta": {"type": "input_json_delta", "partial_json": "{}"},
},
# 6. content_block_stop for web_search_tool_result
{"type": "content_block_stop", "index": 1},
# 7. Another web_search_tool_result with {} - also should NOT emit
{
"type": "content_block_start",
"index": 2,
"content_block": {
"type": "web_search_tool_result",
"tool_use_id": "srvtoolu_01ABC123",
"content": [],
},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": "{}"},
},
{"type": "content_block_stop", "index": 2},
]

tool_calls_emitted = []
for chunk in chunks:
parsed = iterator.chunk_parser(chunk)
if parsed.choices and parsed.choices[0].delta.tool_calls:
for tc in parsed.choices[0].delta.tool_calls:
tool_calls_emitted.append(tc)

# Should have exactly 2 tool calls:
# 1. From content_block_start (server_tool_use) with id and name
# 2. From content_block_delta with the actual query
assert len(tool_calls_emitted) == 2, f"Expected 2 tool calls, got {len(tool_calls_emitted)}"

# First tool call should have the id and name
assert tool_calls_emitted[0]["id"] == "srvtoolu_01ABC123"
assert tool_calls_emitted[0]["function"]["name"] == "web_search"

# Second tool call should have the query arguments
assert tool_calls_emitted[1]["function"]["arguments"] == '{"query": "test"}'

# The {} chunks from web_search_tool_result should NOT have been emitted as tool calls


def test_current_content_block_type_tracking():
"""
Test that current_content_block_type is properly tracked and reset.
"""
iterator = ModelResponseIterator(
streaming_response=MagicMock(), sync_stream=True, json_mode=False
)

# Initially should be None
assert iterator.current_content_block_type is None

# After server_tool_use block start
chunk1 = {
"type": "content_block_start",
"index": 0,
"content_block": {
"type": "server_tool_use",
"id": "srvtoolu_01ABC",
"name": "web_search",
},
}
iterator.chunk_parser(chunk1)
assert iterator.current_content_block_type == "server_tool_use"

# After content_block_stop
chunk2 = {"type": "content_block_stop", "index": 0}
iterator.chunk_parser(chunk2)
assert iterator.current_content_block_type is None

# After web_search_tool_result block start
chunk3 = {
"type": "content_block_start",
"index": 1,
"content_block": {
"type": "web_search_tool_result",
"tool_use_id": "srvtoolu_01ABC",
"content": [],
},
}
iterator.chunk_parser(chunk3)
assert iterator.current_content_block_type == "web_search_tool_result"

# After content_block_stop
chunk4 = {"type": "content_block_stop", "index": 1}
iterator.chunk_parser(chunk4)
assert iterator.current_content_block_type is None


def test_web_search_tool_result_captured_in_provider_specific_fields():
"""
Test that web_search_tool_result content is captured in provider_specific_fields.

This tests the fix for https://github.com/BerriAI/litellm/issues/17737
where streaming with Anthropic web search wasn't capturing web_search_tool_result
blocks, causing multi-turn conversations to fail.

The web_search_tool_result content comes ALL AT ONCE in content_block_start,
not in deltas, so we need to capture it there.
"""
iterator = ModelResponseIterator(
streaming_response=MagicMock(), sync_stream=True, json_mode=False
)

# Simulate the streaming sequence with web_search_tool_result
chunks = [
# 1. message_start
{
"type": "message_start",
"message": {
"id": "msg_123",
"type": "message",
"role": "assistant",
"content": [],
"usage": {"input_tokens": 10, "output_tokens": 1},
},
},
# 2. server_tool_use block starts (web_search)
{
"type": "content_block_start",
"index": 0,
"content_block": {
"type": "server_tool_use",
"id": "srvtoolu_01ABC123",
"name": "web_search",
},
},
# 3. input_json_delta with the query
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "input_json_delta", "partial_json": '{"query": "otter facts"}'},
},
# 4. content_block_stop for server_tool_use
{"type": "content_block_stop", "index": 0},
# 5. web_search_tool_result block starts - THIS IS WHERE THE RESULTS ARE
{
"type": "content_block_start",
"index": 1,
"content_block": {
"type": "web_search_tool_result",
"tool_use_id": "srvtoolu_01ABC123",
"content": [
{
"type": "web_search_result",
"url": "https://example.com/otters",
"title": "Fun Otter Facts",
"encrypted_content": "abc123encrypted",
},
{
"type": "web_search_result",
"url": "https://example.com/otters2",
"title": "More Otter Facts",
"encrypted_content": "def456encrypted",
},
],
},
},
# 6. content_block_stop for web_search_tool_result
{"type": "content_block_stop", "index": 1},
]

web_search_results = None
for chunk in chunks:
parsed = iterator.chunk_parser(chunk)
if (
parsed.choices
and parsed.choices[0].delta.provider_specific_fields
and "web_search_results" in parsed.choices[0].delta.provider_specific_fields
):
web_search_results = parsed.choices[0].delta.provider_specific_fields[
"web_search_results"
]

# Verify web_search_results was captured
assert web_search_results is not None, "web_search_results should be captured"
assert len(web_search_results) == 1, "Should have 1 web_search_tool_result block"
assert (
web_search_results[0]["type"] == "web_search_tool_result"
), "Block type should be web_search_tool_result"
assert (
web_search_results[0]["tool_use_id"] == "srvtoolu_01ABC123"
), "tool_use_id should match"
assert len(web_search_results[0]["content"]) == 2, "Should have 2 search results"
assert (
web_search_results[0]["content"][0]["title"] == "Fun Otter Facts"
), "First result title should match"
Loading