getsentry
diff --git a/‎sentry_sdk/ai/__init__.py‎
Lines changed: 0 additions & 10 deletions b/‎sentry_sdk/ai/__init__.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎sentry_sdk/integrations/openai.py‎
Lines changed: 1 addition & 10 deletions b/‎sentry_sdk/integrations/openai.py‎
Lines changed: 1 addition & 10 deletions
diff --git a/‎tests/integrations/anthropic/test_anthropic.py‎
Lines changed: 33 additions & 235 deletions b/‎tests/integrations/anthropic/test_anthropic.py‎
Lines changed: 33 additions & 235 deletions
@@ -1,17 +1,7 @@
-from .monitoring import record_token_usage  # noqa: F401
 from .utils import (
     set_data_normalized,
     GEN_AI_MESSAGE_ROLE_MAPPING,
     GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING,
     normalize_message_role,
     normalize_message_roles,
 )  # noqa: F401
-
-__all__ = [
-    "record_token_usage",
-    "set_data_normalized",
-    "GEN_AI_MESSAGE_ROLE_MAPPING",
-    "GEN_AI_MESSAGE_ROLE_REVERSE_MAPPING",
-    "normalize_message_role",
-    "normalize_message_roles",
-]
@@ -131,12 +131,7 @@ def _calculate_token_usage(
 
     if hasattr(response, "usage"):
         input_tokens = _get_usage(response.usage, ["input_tokens", "prompt_tokens"])
-        if hasattr(response.usage, "prompt_tokens_details"):
-            input_tokens_cached = _get_usage(
-                response.usage.prompt_tokens_details, ["cached_tokens"]
-            )
-        # OpenAI also supports input_tokens_details for compatibility
-        elif hasattr(response.usage, "input_tokens_details"):
+        if hasattr(response.usage, "input_tokens_details"):
             input_tokens_cached = _get_usage(
                 response.usage.input_tokens_details, ["cached_tokens"]
             )
@@ -148,10 +143,6 @@ def _calculate_token_usage(
             output_tokens_reasoning = _get_usage(
                 response.usage.output_tokens_details, ["reasoning_tokens"]
             )
-        elif hasattr(response.usage, "completion_tokens_details"):
-            output_tokens_reasoning = _get_usage(
-                response.usage.completion_tokens_details, ["reasoning_tokens"]
-            )
 
         total_tokens = _get_usage(response.usage, ["total_tokens"])
 
 
@@ -850,8 +850,10 @@ def test_collect_ai_data_with_input_json_delta():
     output_tokens = 20
     content_blocks = []
 
-    model, new_input_tokens, new_output_tokens, new_content_blocks = _collect_ai_data(
-        event, model, input_tokens, output_tokens, content_blocks
+    model, new_input_tokens, new_output_tokens, _, _, new_content_blocks = (
+        _collect_ai_data(
+            event, model, input_tokens, output_tokens, 0, 0, content_blocks
+        )
     )
 
     assert model is None
@@ -881,6 +883,8 @@ def test_set_output_data_with_input_json_delta(sentry_init):
             model="",
             input_tokens=10,
             output_tokens=20,
+            cache_read_input_tokens=0,
+            cache_write_input_tokens=0,
             content_blocks=[{"text": "".join(json_deltas), "type": "text"}],
         )
 
@@ -1449,118 +1453,44 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events):
 
 
 def test_cache_tokens_nonstreaming(sentry_init, capture_events):
-    """Test that cache read and write tokens are properly tracked for non-streaming responses."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
+    """Test cache read/write tokens are tracked for non-streaming responses."""
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
     events = capture_events()
     client = Anthropic(api_key="z")
 
-    # Create a message with cache token usage
-    message_with_cache = Message(
-        id="id",
-        model="claude-3-5-sonnet-20241022",
-        role="assistant",
-        content=[TextBlock(type="text", text="Response using cache")],
-        type="message",
-        usage=Usage(
-            input_tokens=100,
-            output_tokens=50,
-            cache_read_input_tokens=80,  # 80 tokens read from cache
-            cache_write_input_tokens=20,  # 20 tokens written to cache
-        ),
-    )
-
-    client.messages._post = mock.Mock(return_value=message_with_cache)
-
-    messages = [{"role": "user", "content": "Hello"}]
-
-    with start_transaction(name="anthropic"):
-        response = client.messages.create(
-            max_tokens=1024, messages=messages, model="claude-3-5-sonnet-20241022"
+    client.messages._post = mock.Mock(
+        return_value=Message(
+            id="id",
+            model="claude-3-5-sonnet-20241022",
+            role="assistant",
+            content=[TextBlock(type="text", text="Response")],
+            type="message",
+            usage=Usage(
+                input_tokens=100,
+                output_tokens=50,
+                cache_read_input_tokens=80,
+                cache_creation_input_tokens=20,
+            ),
         )
-
-    assert response == message_with_cache
-    usage = response.usage
-
-    assert usage.input_tokens == 100
-    assert usage.output_tokens == 50
-    assert usage.cache_read_input_tokens == 80
-    assert usage.cache_write_input_tokens == 20
-
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
-
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 150
-    # Check cache-related tokens
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
-
-
-def test_cache_tokens_only_reads(sentry_init, capture_events):
-    """Test tracking when only cache reads are present (no writes)."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
-    events = capture_events()
-    client = Anthropic(api_key="z")
-
-    # Message with only cache reads, no writes
-    message_cache_read_only = Message(
-        id="id",
-        model="claude-3-5-sonnet-20241022",
-        role="assistant",
-        content=[TextBlock(type="text", text="Response")],
-        type="message",
-        usage=Usage(
-            input_tokens=100,
-            output_tokens=50,
-            cache_read_input_tokens=100,  # All tokens read from cache
-            cache_write_input_tokens=0,  # No new cache writes
-        ),
     )
 
-    client.messages._post = mock.Mock(return_value=message_cache_read_only)
-
     with start_transaction(name="anthropic"):
         client.messages.create(
             max_tokens=1024,
             messages=[{"role": "user", "content": "Hello"}],
             model="claude-3-5-sonnet-20241022",
         )
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
-
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 100
-    # Cache write should not be present when it's 0
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE not in span["data"]
+    (span,) = events[0]["spans"]
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
 def test_cache_tokens_streaming(sentry_init, capture_events):
-    """Test that cache tokens are tracked correctly for streaming responses."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
-    events = capture_events()
+    """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")
-
-    # Create streaming events with cache usage
-    stream_events = [
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = [
         MessageStartEvent(
             type="message_start",
             message=Message(
@@ -1573,162 +1503,30 @@ def test_cache_tokens_streaming(sentry_init, capture_events):
                     input_tokens=100,
                     output_tokens=0,
                     cache_read_input_tokens=80,
-                    cache_write_input_tokens=20,
+                    cache_creation_input_tokens=20,
                 ),
             ),
         ),
-        ContentBlockDeltaEvent(
-            type="content_block_delta",
-            index=0,
-            delta=TextDelta(type="text_delta", text="Hello"),
-        ),
         MessageDeltaEvent(
             type="message_delta",
             delta=Delta(stop_reason="end_turn"),
             usage=MessageDeltaUsage(output_tokens=10),
         ),
     ]
 
-    mock_stream = mock.MagicMock(spec=Stream)
-    mock_stream.__iter__ = mock.Mock(return_value=iter(stream_events))
-    mock_stream._iterator = iter(stream_events)
-
-    client.messages._post = mock.Mock(return_value=mock_stream)
-
-    with start_transaction(name="anthropic"):
-        stream = client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-        # Consume the stream
-        for _ in stream:
-            pass
-
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
-
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 110
-    # Check streaming cache tokens
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-
-@pytest.mark.asyncio
-async def test_cache_tokens_streaming_async(sentry_init, capture_events):
-    """Test that cache tokens are tracked correctly for async streaming responses."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
     events = capture_events()
-    client = AsyncAnthropic(api_key="z")
-
-    async def async_iterator(values):
-        for value in values:
-            yield value
-
-    # Create streaming events with cache usage
-    stream_events = [
-        MessageStartEvent(
-            type="message_start",
-            message=Message(
-                id="id",
-                model="claude-3-5-sonnet-20241022",
-                role="assistant",
-                content=[],
-                type="message",
-                usage=Usage(
-                    input_tokens=100,
-                    output_tokens=0,
-                    cache_read_input_tokens=80,
-                    cache_write_input_tokens=20,
-                ),
-            ),
-        ),
-        ContentBlockDeltaEvent(
-            type="content_block_delta",
-            index=0,
-            delta=TextDelta(type="text_delta", text="Hello"),
-        ),
-        MessageDeltaEvent(
-            type="message_delta",
-            delta=Delta(stop_reason="end_turn"),
-            usage=MessageDeltaUsage(output_tokens=10),
-        ),
-    ]
-
-    mock_stream = mock.MagicMock(spec=AsyncStream)
-    mock_stream.__aiter__ = mock.Mock(return_value=async_iterator(stream_events))
-    mock_stream._iterator = async_iterator(stream_events)
-
-    client.messages._post = mock.Mock(return_value=mock_stream)
+    client.messages._post = mock.Mock(return_value=returned_stream)
 
     with start_transaction(name="anthropic"):
-        stream = await client.messages.create(
+        for _ in client.messages.create(
             max_tokens=1024,
             messages=[{"role": "user", "content": "Hello"}],
             model="claude-3-5-sonnet-20241022",
             stream=True,
-        )
-        # Consume the stream
-        async for _ in stream:
+        ):
             pass
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
-
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    # Check async streaming cache tokens
+    (span,) = events[0]["spans"]
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
-
-
-def test_no_cache_tokens(sentry_init, capture_events):
-    """Test that requests without cache usage don't have cache fields."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
-    events = capture_events()
-    client = Anthropic(api_key="z")
-
-    # Message without any cache usage
-    message_no_cache = Message(
-        id="id",
-        model="claude-3-5-sonnet-20241022",
-        role="assistant",
-        content=[TextBlock(type="text", text="Response")],
-        type="message",
-        usage=Usage(input_tokens=100, output_tokens=50),
-    )
-
-    client.messages._post = mock.Mock(return_value=message_no_cache)
-
-    with start_transaction(name="anthropic"):
-        client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
-        )
-
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
-
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
-    # Cache fields should not be present
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED not in span["data"]
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE not in span["data"]