Skip to content

Commit 2e303bf

Browse files
authored
fix(anthropic): capture web_search_tool_result in streaming for multi-turn conversations (#17798)
This fix addresses two issues with Anthropic web search streaming: 1. Fix trailing {} in tool call arguments - web_search_tool_result blocks have input_json_delta events that were incorrectly emitted as tool calls - Added current_content_block_type tracking to only emit tool calls for tool_use and server_tool_use blocks 2. Capture web_search_tool_result for multi-turn - The web_search_tool_result content comes ALL AT ONCE in content_block_start - Now captured in provider_specific_fields.web_search_results - stream_chunk_builder combines these for final message - Allows multi-turn conversations to work with streaming web search
1 parent 13df508 commit 2e303bf

File tree

3 files changed

+330
-23
lines changed

3 files changed

+330
-23
lines changed

litellm/llms/anthropic/chat/handler.py

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,14 @@ def __init__(
504504
self.accumulated_json: str = ""
505505
self.chunk_type: Literal["valid_json", "accumulated_json"] = "valid_json"
506506

507+
# Track current content block type to avoid emitting tool calls for non-tool blocks
508+
# See: https://github.com/BerriAI/litellm/issues/17254
509+
self.current_content_block_type: Optional[str] = None
510+
511+
# Accumulate web_search_tool_result blocks for multi-turn reconstruction
512+
# See: https://github.com/BerriAI/litellm/issues/17737
513+
self.web_search_results: List[Dict[str, Any]] = []
514+
507515
def check_empty_tool_call_args(self) -> bool:
508516
"""
509517
Check if the tool call block so far has been an empty string
@@ -553,18 +561,22 @@ def _content_block_delta_helper(self, chunk: dict) -> Tuple[
553561
if "text" in content_block["delta"]:
554562
text = content_block["delta"]["text"]
555563
elif "partial_json" in content_block["delta"]:
556-
tool_use = cast(
557-
ChatCompletionToolCallChunk,
558-
{
559-
"id": None,
560-
"type": "function",
561-
"function": {
562-
"name": None,
563-
"arguments": content_block["delta"]["partial_json"],
564+
# Only emit tool calls if we're in a tool_use or server_tool_use block
565+
# web_search_tool_result blocks also have input_json_delta but should not be treated as tool calls
566+
# See: https://github.com/BerriAI/litellm/issues/17254
567+
if self.current_content_block_type in ("tool_use", "server_tool_use"):
568+
tool_use = cast(
569+
ChatCompletionToolCallChunk,
570+
{
571+
"id": None,
572+
"type": "function",
573+
"function": {
574+
"name": None,
575+
"arguments": content_block["delta"]["partial_json"],
576+
},
577+
"index": self.tool_index,
564578
},
565-
"index": self.tool_index,
566-
},
567-
)
579+
)
568580
elif "citation" in content_block["delta"]:
569581
provider_specific_fields["citation"] = content_block["delta"]["citation"]
570582
elif (
@@ -674,6 +686,8 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
674686

675687
content_block_start = self.get_content_block_start(chunk=chunk)
676688
self.content_blocks = [] # reset content blocks when new block starts
689+
# Track current content block type for filtering deltas
690+
self.current_content_block_type = content_block_start["content_block"]["type"]
677691
if content_block_start["content_block"]["type"] == "text":
678692
text = content_block_start["content_block"]["text"]
679693
elif content_block_start["content_block"]["type"] == "tool_use":
@@ -714,22 +728,38 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
714728
content_block_start=content_block_start,
715729
provider_specific_fields=provider_specific_fields,
716730
)
731+
elif (
732+
content_block_start["content_block"]["type"]
733+
== "web_search_tool_result"
734+
):
735+
# Capture web_search_tool_result for multi-turn reconstruction
736+
# The full content comes in content_block_start, not in deltas
737+
# See: https://github.com/BerriAI/litellm/issues/17737
738+
self.web_search_results.append(
739+
content_block_start["content_block"]
740+
)
741+
provider_specific_fields["web_search_results"] = (
742+
self.web_search_results
743+
)
717744
elif type_chunk == "content_block_stop":
718745
ContentBlockStop(**chunk) # type: ignore
719-
# check if tool call content block
720-
is_empty = self.check_empty_tool_call_args()
721-
if is_empty:
722-
tool_use = ChatCompletionToolCallChunk(
723-
id=None, # type: ignore[typeddict-item]
724-
type="function",
725-
function=ChatCompletionToolCallFunctionChunk(
726-
name=None, # type: ignore[typeddict-item]
727-
arguments="{}",
728-
),
729-
index=self.tool_index,
730-
)
746+
# check if tool call content block - only for tool_use and server_tool_use blocks
747+
if self.current_content_block_type in ("tool_use", "server_tool_use"):
748+
is_empty = self.check_empty_tool_call_args()
749+
if is_empty:
750+
tool_use = ChatCompletionToolCallChunk(
751+
id=None, # type: ignore[typeddict-item]
752+
type="function",
753+
function=ChatCompletionToolCallFunctionChunk(
754+
name=None, # type: ignore[typeddict-item]
755+
arguments="{}",
756+
),
757+
index=self.tool_index,
758+
)
731759
# Reset response_format tool tracking when block stops
732760
self.is_response_format_tool = False
761+
# Reset current content block type
762+
self.current_content_block_type = None
733763
elif type_chunk == "tool_result":
734764
# Handle tool_result blocks (for tool search results with tool_reference)
735765
# These are automatically handled by Anthropic API, we just pass them through

litellm/main.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6750,6 +6750,36 @@ def stream_chunk_builder( # noqa: PLR0915
67506750
_choice = cast(Choices, response.choices[0])
67516751
_choice.message.audio = processor.get_combined_audio_content(audio_chunks)
67526752

6753+
# Combine provider_specific_fields from streaming chunks (e.g., web_search_results, citations)
6754+
# See: https://github.com/BerriAI/litellm/issues/17737
6755+
provider_specific_chunks = [
6756+
chunk
6757+
for chunk in chunks
6758+
if len(chunk["choices"]) > 0
6759+
and "provider_specific_fields" in chunk["choices"][0]["delta"]
6760+
and chunk["choices"][0]["delta"]["provider_specific_fields"] is not None
6761+
]
6762+
6763+
if len(provider_specific_chunks) > 0:
6764+
combined_provider_fields: Dict[str, Any] = {}
6765+
for chunk in provider_specific_chunks:
6766+
fields = chunk["choices"][0]["delta"]["provider_specific_fields"]
6767+
if isinstance(fields, dict):
6768+
for key, value in fields.items():
6769+
if key not in combined_provider_fields:
6770+
combined_provider_fields[key] = value
6771+
elif isinstance(value, list) and isinstance(
6772+
combined_provider_fields[key], list
6773+
):
6774+
# For lists like web_search_results, take the last (most complete) one
6775+
combined_provider_fields[key] = value
6776+
else:
6777+
combined_provider_fields[key] = value
6778+
6779+
if combined_provider_fields:
6780+
_choice = cast(Choices, response.choices[0])
6781+
_choice.message.provider_specific_fields = combined_provider_fields
6782+
67536783
completion_output = get_content_from_model_response(response)
67546784

67556785
reasoning_tokens = processor.count_reasoning_tokens(response)

tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_handler.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,3 +532,250 @@ def test_multiple_partial_chunks_accumulation():
532532
assert result3 is not None
533533
assert iterator.accumulated_json == ""
534534
assert result3.choices[0].delta.content == "Hello"
535+
536+
537+
def test_web_search_tool_result_no_extra_tool_calls():
538+
"""
539+
Test that web_search_tool_result blocks don't emit tool call chunks.
540+
541+
This tests the fix for https://github.com/BerriAI/litellm/issues/17254
542+
where streaming with Anthropic web search was adding trailing {} to tool call arguments.
543+
544+
The issue was that web_search_tool_result blocks have input_json_delta events with {}
545+
that were incorrectly being converted to tool calls.
546+
"""
547+
iterator = ModelResponseIterator(
548+
streaming_response=MagicMock(), sync_stream=True, json_mode=False
549+
)
550+
551+
# Simulate the streaming sequence:
552+
# 1. server_tool_use block starts (web_search)
553+
# 2. input_json_delta with the query
554+
# 3. content_block_stop
555+
# 4. web_search_tool_result block starts
556+
# 5. input_json_delta with {} (this should NOT emit a tool call)
557+
# 6. content_block_stop
558+
559+
chunks = [
560+
# 1. server_tool_use block starts
561+
{
562+
"type": "content_block_start",
563+
"index": 0,
564+
"content_block": {
565+
"type": "server_tool_use",
566+
"id": "srvtoolu_01ABC123",
567+
"name": "web_search",
568+
},
569+
},
570+
# 2. input_json_delta with the query
571+
{
572+
"type": "content_block_delta",
573+
"index": 0,
574+
"delta": {"type": "input_json_delta", "partial_json": '{"query": "test"}'},
575+
},
576+
# 3. content_block_stop for server_tool_use
577+
{"type": "content_block_stop", "index": 0},
578+
# 4. web_search_tool_result block starts
579+
{
580+
"type": "content_block_start",
581+
"index": 1,
582+
"content_block": {
583+
"type": "web_search_tool_result",
584+
"tool_use_id": "srvtoolu_01ABC123",
585+
"content": [],
586+
},
587+
},
588+
# 5. input_json_delta with {} - this should NOT emit a tool call
589+
{
590+
"type": "content_block_delta",
591+
"index": 1,
592+
"delta": {"type": "input_json_delta", "partial_json": "{}"},
593+
},
594+
# 6. content_block_stop for web_search_tool_result
595+
{"type": "content_block_stop", "index": 1},
596+
# 7. Another web_search_tool_result with {} - also should NOT emit
597+
{
598+
"type": "content_block_start",
599+
"index": 2,
600+
"content_block": {
601+
"type": "web_search_tool_result",
602+
"tool_use_id": "srvtoolu_01ABC123",
603+
"content": [],
604+
},
605+
},
606+
{
607+
"type": "content_block_delta",
608+
"index": 2,
609+
"delta": {"type": "input_json_delta", "partial_json": "{}"},
610+
},
611+
{"type": "content_block_stop", "index": 2},
612+
]
613+
614+
tool_calls_emitted = []
615+
for chunk in chunks:
616+
parsed = iterator.chunk_parser(chunk)
617+
if parsed.choices and parsed.choices[0].delta.tool_calls:
618+
for tc in parsed.choices[0].delta.tool_calls:
619+
tool_calls_emitted.append(tc)
620+
621+
# Should have exactly 2 tool calls:
622+
# 1. From content_block_start (server_tool_use) with id and name
623+
# 2. From content_block_delta with the actual query
624+
assert len(tool_calls_emitted) == 2, f"Expected 2 tool calls, got {len(tool_calls_emitted)}"
625+
626+
# First tool call should have the id and name
627+
assert tool_calls_emitted[0]["id"] == "srvtoolu_01ABC123"
628+
assert tool_calls_emitted[0]["function"]["name"] == "web_search"
629+
630+
# Second tool call should have the query arguments
631+
assert tool_calls_emitted[1]["function"]["arguments"] == '{"query": "test"}'
632+
633+
# The {} chunks from web_search_tool_result should NOT have been emitted as tool calls
634+
635+
636+
def test_current_content_block_type_tracking():
637+
"""
638+
Test that current_content_block_type is properly tracked and reset.
639+
"""
640+
iterator = ModelResponseIterator(
641+
streaming_response=MagicMock(), sync_stream=True, json_mode=False
642+
)
643+
644+
# Initially should be None
645+
assert iterator.current_content_block_type is None
646+
647+
# After server_tool_use block start
648+
chunk1 = {
649+
"type": "content_block_start",
650+
"index": 0,
651+
"content_block": {
652+
"type": "server_tool_use",
653+
"id": "srvtoolu_01ABC",
654+
"name": "web_search",
655+
},
656+
}
657+
iterator.chunk_parser(chunk1)
658+
assert iterator.current_content_block_type == "server_tool_use"
659+
660+
# After content_block_stop
661+
chunk2 = {"type": "content_block_stop", "index": 0}
662+
iterator.chunk_parser(chunk2)
663+
assert iterator.current_content_block_type is None
664+
665+
# After web_search_tool_result block start
666+
chunk3 = {
667+
"type": "content_block_start",
668+
"index": 1,
669+
"content_block": {
670+
"type": "web_search_tool_result",
671+
"tool_use_id": "srvtoolu_01ABC",
672+
"content": [],
673+
},
674+
}
675+
iterator.chunk_parser(chunk3)
676+
assert iterator.current_content_block_type == "web_search_tool_result"
677+
678+
# After content_block_stop
679+
chunk4 = {"type": "content_block_stop", "index": 1}
680+
iterator.chunk_parser(chunk4)
681+
assert iterator.current_content_block_type is None
682+
683+
684+
def test_web_search_tool_result_captured_in_provider_specific_fields():
685+
"""
686+
Test that web_search_tool_result content is captured in provider_specific_fields.
687+
688+
This tests the fix for https://github.com/BerriAI/litellm/issues/17737
689+
where streaming with Anthropic web search wasn't capturing web_search_tool_result
690+
blocks, causing multi-turn conversations to fail.
691+
692+
The web_search_tool_result content comes ALL AT ONCE in content_block_start,
693+
not in deltas, so we need to capture it there.
694+
"""
695+
iterator = ModelResponseIterator(
696+
streaming_response=MagicMock(), sync_stream=True, json_mode=False
697+
)
698+
699+
# Simulate the streaming sequence with web_search_tool_result
700+
chunks = [
701+
# 1. message_start
702+
{
703+
"type": "message_start",
704+
"message": {
705+
"id": "msg_123",
706+
"type": "message",
707+
"role": "assistant",
708+
"content": [],
709+
"usage": {"input_tokens": 10, "output_tokens": 1},
710+
},
711+
},
712+
# 2. server_tool_use block starts (web_search)
713+
{
714+
"type": "content_block_start",
715+
"index": 0,
716+
"content_block": {
717+
"type": "server_tool_use",
718+
"id": "srvtoolu_01ABC123",
719+
"name": "web_search",
720+
},
721+
},
722+
# 3. input_json_delta with the query
723+
{
724+
"type": "content_block_delta",
725+
"index": 0,
726+
"delta": {"type": "input_json_delta", "partial_json": '{"query": "otter facts"}'},
727+
},
728+
# 4. content_block_stop for server_tool_use
729+
{"type": "content_block_stop", "index": 0},
730+
# 5. web_search_tool_result block starts - THIS IS WHERE THE RESULTS ARE
731+
{
732+
"type": "content_block_start",
733+
"index": 1,
734+
"content_block": {
735+
"type": "web_search_tool_result",
736+
"tool_use_id": "srvtoolu_01ABC123",
737+
"content": [
738+
{
739+
"type": "web_search_result",
740+
"url": "https://example.com/otters",
741+
"title": "Fun Otter Facts",
742+
"encrypted_content": "abc123encrypted",
743+
},
744+
{
745+
"type": "web_search_result",
746+
"url": "https://example.com/otters2",
747+
"title": "More Otter Facts",
748+
"encrypted_content": "def456encrypted",
749+
},
750+
],
751+
},
752+
},
753+
# 6. content_block_stop for web_search_tool_result
754+
{"type": "content_block_stop", "index": 1},
755+
]
756+
757+
web_search_results = None
758+
for chunk in chunks:
759+
parsed = iterator.chunk_parser(chunk)
760+
if (
761+
parsed.choices
762+
and parsed.choices[0].delta.provider_specific_fields
763+
and "web_search_results" in parsed.choices[0].delta.provider_specific_fields
764+
):
765+
web_search_results = parsed.choices[0].delta.provider_specific_fields[
766+
"web_search_results"
767+
]
768+
769+
# Verify web_search_results was captured
770+
assert web_search_results is not None, "web_search_results should be captured"
771+
assert len(web_search_results) == 1, "Should have 1 web_search_tool_result block"
772+
assert (
773+
web_search_results[0]["type"] == "web_search_tool_result"
774+
), "Block type should be web_search_tool_result"
775+
assert (
776+
web_search_results[0]["tool_use_id"] == "srvtoolu_01ABC123"
777+
), "tool_use_id should match"
778+
assert len(web_search_results[0]["content"]) == 2, "Should have 2 search results"
779+
assert (
780+
web_search_results[0]["content"][0]["title"] == "Fun Otter Facts"
781+
), "First result title should match"

0 commit comments

Comments
 (0)