fix(litellm): map LiteLLM context-window errors to ContextWindowOverflowException

Ratish1 · Ratish1 · commit 7993f1847286 · 2025-10-07T21:04:04.000+04:00
diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py
@@ -13,6 +13,7 @@
 from typing_extensions import Unpack, override
 
 from ..types.content import ContentBlock, Messages
+from ..types.exceptions import ContextWindowOverflowException
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import validate_config_keys
@@ -56,6 +57,24 @@ def __init__(self, client_args: Optional[dict[str, Any]] = None, **model_config:
 
         logger.debug("config=<%s> | initializing", self.config)
 
+    def _handle_context_window_overflow(self, e: Exception) -> None:
+        """Handle context window overflow errors from LiteLLM.
+
+        Args:
+            e: The exception to handle.
+
+        Raises:
+            ContextWindowOverflowException: If the exception is a context window overflow error.
+        """
+        # Prefer litellm-specific typed exception if exposed
+        litellm_exc_type = getattr(litellm, "ContextWindowExceededError", None)
+        if litellm_exc_type and isinstance(e, litellm_exc_type):
+            logger.warning("litellm client raised context window overflow")
+            raise ContextWindowOverflowException(e) from e
+
+        # Not a context-window error — re-raise original
+        raise e
+
     @override
     def update_config(self, **model_config: Unpack[LiteLLMConfig]) -> None:  # type: ignore[override]
         """Update the LiteLLM model configuration with the provided arguments.
@@ -135,7 +154,10 @@ async def stream(
         logger.debug("request=<%s>", request)
 
         logger.debug("invoking model")
-        response = await litellm.acompletion(**self.client_args, **request)
+        try:
+            response = await litellm.acompletion(**self.client_args, **request)
+        except Exception as e:
+            self._handle_context_window_overflow(e)
 
         logger.debug("got response from model")
         yield self.format_chunk({"chunk_type": "message_start"})
@@ -205,15 +227,23 @@ async def structured_output(
         Yields:
             Model events with the last being the structured output.
         """
-        if not supports_response_schema(self.get_config()["model_id"]):
+        supports_schema = supports_response_schema(self.get_config()["model_id"])
+
+        # If the provider does not support response schemas, we cannot reliably parse structured output.
+        # In that case we must not call the provider and must raise the documented ValueError.
+        if not supports_schema:
             raise ValueError("Model does not support response_format")
 
-        response = await litellm.acompletion(
-            **self.client_args,
-            model=self.get_config()["model_id"],
-            messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
-            response_format=output_model,
-        )
+        # For providers that DO support response schemas, call litellm and map context-window errors.
+        try:
+            response = await litellm.acompletion(
+                **self.client_args,
+                model=self.get_config()["model_id"],
+                messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
+                response_format=output_model,
+            )
+        except Exception as e:
+            self._handle_context_window_overflow(e)
 
         if len(response.choices) > 1:
             raise ValueError("Multiple choices found in the response.")