@@ -1634,8 +1634,10 @@ def get_response_stream(
16341634 try :
16351635 tool_calls = []
16361636 response_text = ""
1637+ consecutive_errors = 0
1638+ max_consecutive_errors = 3 # Fallback to non-streaming after 3 consecutive errors
16371639
1638- for chunk in litellm .completion (
1640+ stream_iterator = litellm .completion (
16391641 ** self ._build_completion_params (
16401642 messages = messages ,
16411643 tools = formatted_tools ,
@@ -1645,18 +1647,48 @@ def get_response_stream(
16451647 output_pydantic = output_pydantic ,
16461648 ** kwargs
16471649 )
1648- ):
1649- if chunk and chunk .choices and chunk .choices [0 ].delta :
1650- delta = chunk .choices [0 ].delta
1650+ )
1651+
1652+ for chunk in stream_iterator :
1653+ try :
1654+ if chunk and chunk .choices and chunk .choices [0 ].delta :
1655+ delta = chunk .choices [0 ].delta
1656+
1657+ # Process both content and tool calls using existing helper
1658+ response_text , tool_calls = self ._process_stream_delta (
1659+ delta , response_text , tool_calls , formatted_tools
1660+ )
1661+
1662+ # Yield content chunks in real-time as they arrive
1663+ if delta .content :
1664+ yield delta .content
16511665
1652- # Process both content and tool calls using existing helper
1653- response_text , tool_calls = self ._process_stream_delta (
1654- delta , response_text , tool_calls , formatted_tools
1655- )
1666+ # Reset consecutive error counter only after successful chunk processing
1667+ consecutive_errors = 0
1668+
1669+ except Exception as chunk_error :
1670+ consecutive_errors += 1
16561671
1657- # Yield content chunks in real-time as they arrive
1658- if delta .content :
1659- yield delta .content
1672+ # Log the specific error for debugging
1673+ if verbose :
1674+ logging .warning (f"Chunk processing error ({ consecutive_errors } /{ max_consecutive_errors } ): { chunk_error } " )
1675+
1676+ # Check if this error is recoverable using our helper method
1677+ if self ._is_streaming_error_recoverable (chunk_error ):
1678+ if verbose :
1679+ logging .warning ("Recoverable streaming error detected, skipping malformed chunk and continuing" )
1680+
1681+ # Skip this malformed chunk and continue if we haven't hit the limit
1682+ if consecutive_errors < max_consecutive_errors :
1683+ continue
1684+ else :
1685+ # Too many recoverable errors, fallback to non-streaming
1686+ logging .warning (f"Too many consecutive streaming errors ({ consecutive_errors } ), falling back to non-streaming mode" )
1687+ raise Exception (f"Streaming failed with { consecutive_errors } consecutive errors" ) from chunk_error
1688+ else :
1689+ # For non-recoverable errors, re-raise immediately
1690+ logging .error (f"Non-recoverable streaming error: { chunk_error } " )
1691+ raise chunk_error
16601692
16611693 # After streaming completes, handle tool calls if present
16621694 if tool_calls and execute_tool_fn :
@@ -1716,7 +1748,16 @@ def get_response_stream(
17161748 logging .error (f"Follow-up response failed: { e } " )
17171749
17181750 except Exception as e :
1719- logging .error (f"Streaming failed: { e } " )
1751+ error_msg = str (e ).lower ()
1752+
1753+ # Provide more specific error messages based on the error type
1754+ if any (keyword in error_msg for keyword in ['json' , 'expecting property name' , 'parse' , 'decode' ]):
1755+ logging .warning (f"Streaming failed due to JSON parsing errors (likely malformed chunks from provider): { e } " )
1756+ elif 'connection' in error_msg or 'timeout' in error_msg :
1757+ logging .warning (f"Streaming failed due to connection issues: { e } " )
1758+ else :
1759+ logging .error (f"Streaming failed with unexpected error: { e } " )
1760+
17201761 # Fall back to non-streaming if streaming fails
17211762 use_streaming = False
17221763
@@ -1754,6 +1795,23 @@ def _is_gemini_model(self) -> bool:
17541795 if not self .model :
17551796 return False
17561797 return any (prefix in self .model .lower () for prefix in ['gemini' , 'gemini/' , 'google/gemini' ])
1798+
1799+ def _is_streaming_error_recoverable (self , error : Exception ) -> bool :
1800+ """Check if a streaming error is recoverable (e.g., malformed chunk vs connection error)."""
1801+ error_msg = str (error ).lower ()
1802+
1803+ # JSON parsing errors are often recoverable (skip malformed chunk and continue)
1804+ json_error_keywords = ['json' , 'expecting property name' , 'parse' , 'decode' , 'invalid json' ]
1805+ if any (keyword in error_msg for keyword in json_error_keywords ):
1806+ return True
1807+
1808+ # Connection errors might be temporary but are less recoverable in streaming context
1809+ connection_error_keywords = ['connection' , 'timeout' , 'network' , 'http' ]
1810+ if any (keyword in error_msg for keyword in connection_error_keywords ):
1811+ return False
1812+
1813+ # Other errors are generally not recoverable
1814+ return False
17571815
17581816 async def get_response_async (
17591817 self ,
0 commit comments