Skip to content

Commit dd9679b

Browse files
committed
fix(anthropic): preserve web_fetch_tool_result in multi-turn conversations
Fixes BerriAI#18137 Similar to the fix for web_search_tool_result (BerriAI#17746, BerriAI#17798), this PR preserves web_fetch_tool_result blocks in multi-turn conversations. Changes: - Add handling for web_fetch_tool_result in transformation.py (non-streaming) - Add capture of web_fetch_tool_result in handler.py (streaming) - Fix streaming tool arguments bug where empty input {} was prepended to actual arguments by using empty string instead of str({}) - Add unit tests for web_fetch_tool_result handling
1 parent 9faee8b commit dd9679b

File tree

3 files changed

+188
-2
lines changed

3 files changed

+188
-2
lines changed

litellm/llms/anthropic/chat/handler.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,12 +692,17 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
692692
text = content_block_start["content_block"]["text"]
693693
elif content_block_start["content_block"]["type"] == "tool_use" or content_block_start["content_block"]["type"] == "server_tool_use":
694694
self.tool_index += 1
695+
# Get initial input - use empty string if input is empty dict or not present
696+
# This prevents str({}) = '{}' from being prepended to actual arguments
697+
# that come in subsequent input_json_delta events
698+
_initial_input = content_block_start["content_block"].get("input", {})
699+
_arguments = "" if _initial_input == {} else str(_initial_input)
695700
tool_use = ChatCompletionToolCallChunk(
696701
id=content_block_start["content_block"]["id"],
697702
type="function",
698703
function=ChatCompletionToolCallFunctionChunk(
699704
name=content_block_start["content_block"]["name"],
700-
arguments=str(content_block_start["content_block"]["input"]),
705+
arguments=_arguments,
701706
),
702707
index=self.tool_index,
703708
)
@@ -729,6 +734,19 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
729734
provider_specific_fields["web_search_results"] = (
730735
self.web_search_results
731736
)
737+
elif (
738+
content_block_start["content_block"]["type"]
739+
== "web_fetch_tool_result"
740+
):
741+
# Capture web_fetch_tool_result for multi-turn reconstruction
742+
# The full content comes in content_block_start, not in deltas
743+
# Fixes: https://github.com/BerriAI/litellm/issues/18137
744+
self.web_search_results.append(
745+
content_block_start["content_block"]
746+
)
747+
provider_specific_fields["web_search_results"] = (
748+
self.web_search_results
749+
)
732750
elif type_chunk == "content_block_stop":
733751
ContentBlockStop(**chunk) # type: ignore
734752
# check if tool call content block - only for tool_use and server_tool_use blocks

litellm/llms/anthropic/chat/transformation.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,12 @@ def extract_response_content(self, completion_response: dict) -> Tuple[
11571157
if web_search_results is None:
11581158
web_search_results = []
11591159
web_search_results.append(content)
1160+
## WEB FETCH TOOL RESULT - preserve web fetch results for multi-turn conversations
1161+
## Fixes: https://github.com/BerriAI/litellm/issues/18137
1162+
elif content["type"] == "web_fetch_tool_result":
1163+
if web_search_results is None:
1164+
web_search_results = []
1165+
web_search_results.append(content)
11601166
elif content.get("thinking", None) is not None:
11611167
if thinking_blocks is None:
11621168
thinking_blocks = []

tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_handler.py

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,6 @@ def test_partial_json_chunk_accumulation():
473473
streaming_response=MagicMock(), sync_stream=True, json_mode=False
474474
)
475475

476-
# Simulate a complete JSON chunk being split into two parts
477476
partial_chunk_1 = '{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hel'
478477
partial_chunk_2 = 'lo"}}'
479478

@@ -781,6 +780,169 @@ def test_web_search_tool_result_captured_in_provider_specific_fields():
781780
), "First result title should match"
782781

783782

783+
def test_web_fetch_tool_result_captured_in_provider_specific_fields():
784+
"""
785+
Test that web_fetch_tool_result content is captured in provider_specific_fields.
786+
787+
This tests the fix for https://github.com/BerriAI/litellm/issues/18137
788+
where streaming with Anthropic web fetch wasn't capturing web_fetch_tool_result
789+
blocks, causing multi-turn conversations to fail.
790+
791+
The web_fetch_tool_result content comes ALL AT ONCE in content_block_start,
792+
not in deltas, so we need to capture it there.
793+
"""
794+
iterator = ModelResponseIterator(
795+
streaming_response=MagicMock(), sync_stream=True, json_mode=False
796+
)
797+
798+
# Simulate the streaming sequence with web_fetch_tool_result
799+
chunks = [
800+
# 1. message_start
801+
{
802+
"type": "message_start",
803+
"message": {
804+
"id": "msg_123",
805+
"type": "message",
806+
"role": "assistant",
807+
"content": [],
808+
"usage": {"input_tokens": 10, "output_tokens": 1},
809+
},
810+
},
811+
# 2. server_tool_use block starts (web_fetch)
812+
{
813+
"type": "content_block_start",
814+
"index": 0,
815+
"content_block": {
816+
"type": "server_tool_use",
817+
"id": "srvtoolu_01ABC123",
818+
"name": "web_fetch",
819+
},
820+
},
821+
# 3. input_json_delta with the url
822+
{
823+
"type": "content_block_delta",
824+
"index": 0,
825+
"delta": {"type": "input_json_delta", "partial_json": '{"url": "https://example.com"}'},
826+
},
827+
# 4. content_block_stop for server_tool_use
828+
{"type": "content_block_stop", "index": 0},
829+
# 5. web_fetch_tool_result block starts - THIS IS WHERE THE RESULTS ARE
830+
{
831+
"type": "content_block_start",
832+
"index": 1,
833+
"content_block": {
834+
"type": "web_fetch_tool_result",
835+
"tool_use_id": "srvtoolu_01ABC123",
836+
"content": {
837+
"type": "web_fetch_result",
838+
"url": "https://example.com",
839+
"retrieved_at": "2025-12-16T19:28:29.758000+00:00",
840+
"content": {
841+
"type": "document",
842+
"source": {
843+
"type": "text",
844+
"media_type": "text/plain",
845+
"data": "Hello World",
846+
},
847+
"title": "Example Page",
848+
},
849+
},
850+
},
851+
},
852+
# 6. content_block_stop for web_fetch_tool_result
853+
{"type": "content_block_stop", "index": 1},
854+
]
855+
856+
web_search_results = None
857+
for chunk in chunks:
858+
parsed = iterator.chunk_parser(chunk)
859+
if (
860+
parsed.choices
861+
and parsed.choices[0].delta.provider_specific_fields
862+
and "web_search_results" in parsed.choices[0].delta.provider_specific_fields
863+
):
864+
web_search_results = parsed.choices[0].delta.provider_specific_fields[
865+
"web_search_results"
866+
]
867+
868+
# Verify web_fetch_tool_result was captured (stored in web_search_results list)
869+
assert web_search_results is not None, "web_search_results should be captured"
870+
assert len(web_search_results) == 1, "Should have 1 web_fetch_tool_result block"
871+
assert (
872+
web_search_results[0]["type"] == "web_fetch_tool_result"
873+
), "Block type should be web_fetch_tool_result"
874+
assert (
875+
web_search_results[0]["tool_use_id"] == "srvtoolu_01ABC123"
876+
), "tool_use_id should match"
877+
assert (
878+
web_search_results[0]["content"]["url"] == "https://example.com"
879+
), "URL should match"
880+
assert (
881+
web_search_results[0]["content"]["content"]["title"] == "Example Page"
882+
), "Title should match"
883+
884+
885+
def test_web_fetch_tool_result_no_extra_tool_calls():
886+
"""
887+
Test that web_fetch_tool_result blocks don't emit tool call chunks.
888+
889+
This tests the fix for https://github.com/BerriAI/litellm/issues/18137
890+
where streaming with Anthropic web fetch was causing issues with tool call arguments.
891+
892+
The issue was that web_fetch_tool_result blocks have input_json_delta events with {}
893+
that were incorrectly being converted to tool calls.
894+
"""
895+
iterator = ModelResponseIterator(
896+
streaming_response=MagicMock(), sync_stream=True, json_mode=False
897+
)
898+
899+
# to verify it doesn't emit tool calls
900+
chunks = [
901+
# 1. web_fetch_tool_result block starts
902+
{
903+
"type": "content_block_start",
904+
"index": 1,
905+
"content_block": {
906+
"type": "web_fetch_tool_result",
907+
"tool_use_id": "srvtoolu_01ABC123",
908+
"content": {
909+
"type": "web_fetch_result",
910+
"url": "https://example.com",
911+
"retrieved_at": "2025-12-16T19:28:29.758000+00:00",
912+
"content": {
913+
"type": "document",
914+
"source": {
915+
"type": "text",
916+
"media_type": "text/plain",
917+
"data": "Hello World",
918+
},
919+
"title": "Example Page",
920+
},
921+
},
922+
},
923+
},
924+
# 2. input_json_delta with {} - this should NOT emit a tool call
925+
{
926+
"type": "content_block_delta",
927+
"index": 1,
928+
"delta": {"type": "input_json_delta", "partial_json": "{}"},
929+
},
930+
# 3. content_block_stop for web_fetch_tool_result
931+
{"type": "content_block_stop", "index": 1},
932+
]
933+
934+
tool_call_count = 0
935+
for chunk in chunks:
936+
parsed = iterator.chunk_parser(chunk)
937+
if parsed.choices and parsed.choices[0].delta.tool_calls:
938+
tool_call_count += 1
939+
940+
# Should have 0 tool calls - web_fetch_tool_result should not emit tool calls
941+
assert (
942+
tool_call_count == 0
943+
), f"Expected 0 tool calls, got {tool_call_count}. web_fetch_tool_result should not emit tool calls"
944+
945+
784946
def test_container_in_provider_specific_fields_streaming():
785947
"""
786948
Test that container is captured in provider_specific_fields for streaming responses.

0 commit comments

Comments
 (0)