Skip to content

Commit 3efc9c0

Browse files
fix(litellm): populate cacheWriteInputTokens from cache_creation_input_token not cache_creation_tokens (#1233)
1 parent efeba7b commit 3efc9c0

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

src/strands/models/litellm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,11 @@ def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
222222

223223
# Only LiteLLM over Anthropic supports cache write tokens
224224
# Waiting until a more general approach is available to set cacheWriteInputTokens
225-
226225
if tokens_details := getattr(event["data"], "prompt_tokens_details", None):
227226
if cached := getattr(tokens_details, "cached_tokens", None):
228227
usage_data["cacheReadInputTokens"] = cached
229-
if creation := getattr(tokens_details, "cache_creation_tokens", None):
230-
usage_data["cacheWriteInputTokens"] = creation
228+
if creation := getattr(event["data"], "cache_creation_input_tokens", None):
229+
usage_data["cacheWriteInputTokens"] = creation
231230

232231
return StreamEvent(
233232
metadata=MetadataEvent(

tests/strands/models/test_litellm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ async def test_stream(litellm_acompletion, api_key, model_id, model, agenerator,
193193
mock_event_8 = unittest.mock.Mock(choices=[unittest.mock.Mock(finish_reason="tool_calls", delta=mock_delta_8)])
194194
mock_event_9 = unittest.mock.Mock()
195195
mock_event_9.usage.prompt_tokens_details.cached_tokens = 10
196-
mock_event_9.usage.prompt_tokens_details.cache_creation_tokens = 10
196+
mock_event_9.usage.cache_creation_input_tokens = 10
197197

198198
litellm_acompletion.side_effect = unittest.mock.AsyncMock(
199199
return_value=agenerator(
@@ -255,7 +255,7 @@ async def test_stream(litellm_acompletion, api_key, model_id, model, agenerator,
255255
"metadata": {
256256
"usage": {
257257
"cacheReadInputTokens": mock_event_9.usage.prompt_tokens_details.cached_tokens,
258-
"cacheWriteInputTokens": mock_event_9.usage.prompt_tokens_details.cache_creation_tokens,
258+
"cacheWriteInputTokens": mock_event_9.usage.cache_creation_input_tokens,
259259
"inputTokens": mock_event_9.usage.prompt_tokens,
260260
"outputTokens": mock_event_9.usage.completion_tokens,
261261
"totalTokens": mock_event_9.usage.total_tokens,

tests_integ/models/test_model_litellm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import unittest.mock
2+
from uuid import uuid4
23

34
import pydantic
45
import pytest
@@ -220,7 +221,7 @@ async def test_cache_read_tokens_multi_turn(model):
220221

221222
system_prompt_content: list[SystemContentBlock] = [
222223
# Caching only works when prompts are large
223-
{"text": "You are a helpful assistant. Always be concise." * 200},
224+
{"text": f"You are helpful assistant No. {uuid4()} Always be concise." * 200},
224225
{"cachePoint": {"type": "default"}},
225226
]
226227

0 commit comments

Comments
 (0)