changes to litellm

shellmayr · shellmayr · commit 7a2e2b507616 · 2026-01-15T16:02:32.000+01:00
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
@@ -1,3 +1,4 @@
+from functools import wraps
 from typing import TYPE_CHECKING
 
 import sentry_sdk
@@ -11,79 +12,40 @@
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable, Integration
 from sentry_sdk.scope import should_send_default_pii
-from sentry_sdk.utils import event_from_exception
+from sentry_sdk.utils import capture_internal_exceptions, event_from_exception
 
 if TYPE_CHECKING:
     from typing import Any, Dict
     from datetime import datetime
 
 try:
     import litellm  # type: ignore[import-not-found]
-    from litellm import input_callback, success_callback, failure_callback
 except ImportError:
     raise DidNotEnable("LiteLLM not installed")
 
 
-def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
-    """Get the metadata dictionary from the kwargs."""
-    litellm_params = kwargs.setdefault("litellm_params", {})
-
-    # we need this weird little dance, as metadata might be set but may be None initially
-    metadata = litellm_params.get("metadata")
-    if metadata is None:
-        metadata = {}
-        litellm_params["metadata"] = metadata
-    return metadata
-
-
-def _input_callback(kwargs: "Dict[str, Any]") -> None:
-    """Handle the start of a request."""
-    integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
-
-    if integration is None:
-        return
-
-    # Get key parameters
-    full_model = kwargs.get("model", "")
+def _get_provider_and_model(full_model: str) -> "tuple[str, str]":
+    """Extract provider and model name from full model string."""
     try:
         model, provider, _, _ = litellm.get_llm_provider(full_model)
+        return provider, model
     except Exception:
-        model = full_model
-        provider = "unknown"
-
-    call_type = kwargs.get("call_type", None)
-    if call_type == "embedding":
-        operation = "embeddings"
-    else:
-        operation = "chat"
-
-    # Start a new span/transaction
-    span = get_start_span_function()(
-        op=(
-            consts.OP.GEN_AI_CHAT
-            if operation == "chat"
-            else consts.OP.GEN_AI_EMBEDDINGS
-        ),
-        name=f"{operation} {model}",
-        origin=LiteLLMIntegration.origin,
-    )
-    span.__enter__()
+        return "unknown", full_model
 
-    # Store span for later
-    _get_metadata_dict(kwargs)["_sentry_span"] = span
-
-    # Set basic data
-    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
-    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
 
+def _set_input_data(
+    span: "Any",
+    kwargs: "Dict[str, Any]",
+    operation: str,
+    integration: "LiteLLMIntegration",
+) -> None:
+    """Set input data on the span."""
     # Record input/messages if allowed
     if should_send_default_pii() and integration.include_prompts:
         if operation == "embeddings":
-            # For embeddings, look for the 'input' parameter
             embedding_input = kwargs.get("input")
             if embedding_input:
                 scope = sentry_sdk.get_current_scope()
-                # Normalize to list format
                 input_list = (
                     embedding_input
                     if isinstance(embedding_input, list)
@@ -98,7 +60,6 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                         unpack=False,
                     )
         else:
-            # For chat, look for the 'messages' parameter
             messages = kwargs.get("messages", [])
             if messages:
                 scope = sentry_sdk.get_current_scope()
@@ -111,7 +72,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                         unpack=False,
                     )
 
-    # Record other parameters
+    # Record standard parameters
     params = {
         "model": SPANDATA.GEN_AI_REQUEST_MODEL,
         "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
@@ -126,107 +87,157 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
         if value is not None:
             set_data_normalized(span, attribute, value)
 
-    # Record LiteLLM-specific parameters
-    litellm_params = {
-        "api_base": kwargs.get("api_base"),
-        "api_version": kwargs.get("api_version"),
-        "custom_llm_provider": kwargs.get("custom_llm_provider"),
-    }
-    for key, value in litellm_params.items():
-        if value is not None:
-            set_data_normalized(span, f"gen_ai.litellm.{key}", value)
-
 
-def _success_callback(
-    kwargs: "Dict[str, Any]",
-    completion_response: "Any",
-    start_time: "datetime",
-    end_time: "datetime",
+def _set_output_data(
+    span: "Any",
+    response: "Any",
+    integration: "LiteLLMIntegration",
 ) -> None:
-    """Handle successful completion."""
+    """Set output data on the span."""
+    # Record model information
+    if hasattr(response, "model"):
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
+
+    # Record response content if allowed
+    if should_send_default_pii() and integration.include_prompts:
+        if hasattr(response, "choices"):
+            response_messages = []
+            for choice in response.choices:
+                if hasattr(choice, "message"):
+                    if hasattr(choice.message, "model_dump"):
+                        response_messages.append(choice.message.model_dump())
+                    elif hasattr(choice.message, "dict"):
+                        response_messages.append(choice.message.dict())
+                    else:
+                        msg = {}
+                        if hasattr(choice.message, "role"):
+                            msg["role"] = choice.message.role
+                        if hasattr(choice.message, "content"):
+                            msg["content"] = choice.message.content
+                        if hasattr(choice.message, "tool_calls"):
+                            msg["tool_calls"] = choice.message.tool_calls
+                        response_messages.append(msg)
+
+            if response_messages:
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
+                )
 
-    span = _get_metadata_dict(kwargs).get("_sentry_span")
-    if span is None:
-        return
+    # Record token usage
+    if hasattr(response, "usage"):
+        usage = response.usage
+
+        # Extract cached tokens from prompt_tokens_details (OpenAI format used by LiteLLM)
+        cached_tokens = None
+        prompt_tokens_details = getattr(usage, "prompt_tokens_details", None)
+        if prompt_tokens_details is not None:
+            cached_tokens = getattr(prompt_tokens_details, "cached_tokens", None)
+
+        # Extract cache write tokens (Anthropic only)
+        cache_creation_tokens = getattr(usage, "cache_creation_input_tokens", None)
+
+        record_token_usage(
+            span,
+            input_tokens=getattr(usage, "prompt_tokens", None),
+            input_tokens_cached=cached_tokens,
+            input_tokens_cache_write=cache_creation_tokens,
+            output_tokens=getattr(usage, "completion_tokens", None),
+            total_tokens=getattr(usage, "total_tokens", None),
+        )
 
-    integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
-    if integration is None:
-        return
 
-    try:
-        # Record model information
-        if hasattr(completion_response, "model"):
-            set_data_normalized(
-                span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
-            )
+def _wrap_completion(original_func: "Any") -> "Any":
+    """Wrap litellm.completion to add instrumentation."""
 
-        # Record response content if allowed
-        if should_send_default_pii() and integration.include_prompts:
-            if hasattr(completion_response, "choices"):
-                response_messages = []
-                for choice in completion_response.choices:
-                    if hasattr(choice, "message"):
-                        if hasattr(choice.message, "model_dump"):
-                            response_messages.append(choice.message.model_dump())
-                        elif hasattr(choice.message, "dict"):
-                            response_messages.append(choice.message.dict())
-                        else:
-                            # Fallback for basic message objects
-                            msg = {}
-                            if hasattr(choice.message, "role"):
-                                msg["role"] = choice.message.role
-                            if hasattr(choice.message, "content"):
-                                msg["content"] = choice.message.content
-                            if hasattr(choice.message, "tool_calls"):
-                                msg["tool_calls"] = choice.message.tool_calls
-                            response_messages.append(msg)
-
-                if response_messages:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
-                    )
+    @wraps(original_func)
+    def wrapper(*args: "Any", **kwargs: "Any") -> "Any":
+        integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
+        if integration is None:
+            return original_func(*args, **kwargs)
 
-        # Record token usage
-        if hasattr(completion_response, "usage"):
-            usage = completion_response.usage
-            record_token_usage(
-                span,
-                input_tokens=getattr(usage, "prompt_tokens", None),
-                input_tokens_cached=getattr(usage, "cache_read_input_tokens", None),
-                input_tokens_cache_write=getattr(
-                    usage, "cache_write_input_tokens", None
-                ),
-                output_tokens=getattr(usage, "completion_tokens", None),
-                total_tokens=getattr(usage, "total_tokens", None),
+        # Get model and provider
+        full_model = kwargs.get("model", args[0] if args else "")
+        provider, model = _get_provider_and_model(full_model)
+
+        # Create span
+        span = get_start_span_function()(
+            op=consts.OP.GEN_AI_CHAT,
+            name=f"chat {model}",
+            origin=LiteLLMIntegration.origin,
+        )
+        span.__enter__()
+
+        # Set basic data
+        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
+        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+
+        with capture_internal_exceptions():
+            _set_input_data(span, kwargs, "chat", integration)
+
+        try:
+            response = original_func(*args, **kwargs)
+            with capture_internal_exceptions():
+                _set_output_data(span, response, integration)
+            return response
+        except Exception as exc:
+            event, hint = event_from_exception(
+                exc,
+                client_options=sentry_sdk.get_client().options,
+                mechanism={"type": "litellm", "handled": False},
             )
+            sentry_sdk.capture_event(event, hint=hint)
+            raise
+        finally:
+            span.__exit__(None, None, None)
 
-    finally:
-        # Always finish the span and clean up
-        span.__exit__(None, None, None)
+    return wrapper
 
 
-def _failure_callback(
-    kwargs: "Dict[str, Any]",
-    exception: Exception,
-    start_time: "datetime",
-    end_time: "datetime",
-) -> None:
-    """Handle request failure."""
-    span = _get_metadata_dict(kwargs).get("_sentry_span")
-    if span is None:
-        return
+def _wrap_acompletion(original_func: "Any") -> "Any":
+    """Wrap litellm.acompletion to add instrumentation."""
 
-    try:
-        # Capture the exception
-        event, hint = event_from_exception(
-            exception,
-            client_options=sentry_sdk.get_client().options,
-            mechanism={"type": "litellm", "handled": False},
+    @wraps(original_func)
+    async def wrapper(*args: "Any", **kwargs: "Any") -> "Any":
+        integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
+        if integration is None:
+            return await original_func(*args, **kwargs)
+
+        # Get model and provider
+        full_model = kwargs.get("model", args[0] if args else "")
+        provider, model = _get_provider_and_model(full_model)
+
+        # Create span
+        span = get_start_span_function()(
+            op=consts.OP.GEN_AI_CHAT,
+            name=f"chat {model}",
+            origin=LiteLLMIntegration.origin,
         )
-        sentry_sdk.capture_event(event, hint=hint)
-    finally:
-        # Always finish the span and clean up
-        span.__exit__(type(exception), exception, None)
+        span.__enter__()
+
+        # Set basic data
+        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
+        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+
+        with capture_internal_exceptions():
+            _set_input_data(span, kwargs, "chat", integration)
+
+        try:
+            response = await original_func(*args, **kwargs)
+            with capture_internal_exceptions():
+                _set_output_data(span, response, integration)
+            return response
+        except Exception as exc:
+            event, hint = event_from_exception(
+                exc,
+                client_options=sentry_sdk.get_client().options,
+                mechanism={"type": "litellm", "handled": False},
+            )
+            sentry_sdk.capture_event(event, hint=hint)
+            raise
+        finally:
+            span.__exit__(None, None, None)
+
+    return wrapper
 
 
 class LiteLLMIntegration(Integration):
@@ -282,15 +293,6 @@ def __init__(self: "LiteLLMIntegration", include_prompts: bool = True) -> None:
 
     @staticmethod
     def setup_once() -> None:
-        """Set up LiteLLM callbacks for monitoring."""
-        litellm.input_callback = input_callback or []
-        if _input_callback not in litellm.input_callback:
-            litellm.input_callback.append(_input_callback)
-
-        litellm.success_callback = success_callback or []
-        if _success_callback not in litellm.success_callback:
-            litellm.success_callback.append(_success_callback)
-
-        litellm.failure_callback = failure_callback or []
-        if _failure_callback not in litellm.failure_callback:
-            litellm.failure_callback.append(_failure_callback)
+        """Set up LiteLLM instrumentation by wrapping completion functions."""
+        litellm.completion = _wrap_completion(litellm.completion)
+        litellm.acompletion = _wrap_acompletion(litellm.acompletion)