strands-agents
diff --git a/‎pyproject.toml
Lines changed: 6 additions & 3 deletions b/‎pyproject.toml
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/strands/agent/agent.py
Lines changed: 50 additions & 81 deletions b/‎src/strands/agent/agent.py
Lines changed: 50 additions & 81 deletions
diff --git a/‎src/strands/agent/conversation_manager/null_conversation_manager.py
Lines changed: 2 additions & 2 deletions b/‎src/strands/agent/conversation_manager/null_conversation_manager.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/strands/models/anthropic.py
Lines changed: 12 additions & 13 deletions b/‎src/strands/models/anthropic.py
Lines changed: 12 additions & 13 deletions
@@ -70,6 +70,9 @@ litellm = [
 llamaapi = [
     "llama-api-client>=0.1.0,<1.0.0",
 ]
+mistral = [
+    "mistralai>=1.8.2",
+]
 ollama = [
     "ollama>=0.4.8,<1.0.0",
 ]
@@ -92,7 +95,7 @@ a2a = [
 source = "vcs"
 
 [tool.hatch.envs.hatch-static-analysis]
-features = ["anthropic", "litellm", "llamaapi", "ollama", "openai", "otel"]
+features = ["anthropic", "litellm", "llamaapi", "ollama", "openai", "otel","mistral"]
 dependencies = [
   "mypy>=1.15.0,<2.0.0",
   "ruff>=0.11.6,<0.12.0",
@@ -116,7 +119,7 @@ lint-fix = [
 ]
 
 [tool.hatch.envs.hatch-test]
-features = ["anthropic", "litellm", "llamaapi", "ollama", "openai", "otel"]
+features = ["anthropic", "litellm", "llamaapi", "ollama", "openai", "otel","mistral"]
 extra-dependencies = [
     "moto>=5.1.0,<6.0.0",
     "pytest>=8.0.0,<9.0.0",
@@ -132,7 +135,7 @@ extra-args = [
 
 [tool.hatch.envs.dev]
 dev-mode = true
-features = ["dev", "docs", "anthropic", "litellm", "llamaapi", "ollama", "otel"]
+features = ["dev", "docs", "anthropic", "litellm", "llamaapi", "ollama", "otel","mistral"]
 
 [tool.hatch.envs.a2a]
 dev-mode = true
 
@@ -9,21 +9,18 @@
 2. Method-style for direct tool access: `agent.tool.tool_name(param1="value")`
 """
 
-import asyncio
 import json
 import logging
 import os
 import random
 from concurrent.futures import ThreadPoolExecutor
-from threading import Thread
-from typing import Any, AsyncIterator, Callable, Dict, List, Mapping, Optional, Type, TypeVar, Union
-from uuid import uuid4
+from typing import Any, AsyncIterator, Callable, Generator, Mapping, Optional, Type, TypeVar, Union, cast
 
 from opentelemetry import trace
 from pydantic import BaseModel
 
 from ..event_loop.event_loop import event_loop_cycle
-from ..handlers.callback_handler import CompositeCallbackHandler, PrintingCallbackHandler, null_callback_handler
+from ..handlers.callback_handler import PrintingCallbackHandler, null_callback_handler
 from ..handlers.tool_handler import AgentToolHandler
 from ..models.bedrock import BedrockModel
 from ..telemetry.metrics import EventLoopMetrics
@@ -183,7 +180,7 @@ def __init__(
         self,
         model: Union[Model, str, None] = None,
         messages: Optional[Messages] = None,
-        tools: Optional[List[Union[str, Dict[str, str], Any]]] = None,
+        tools: Optional[list[Union[str, dict[str, str], Any]]] = None,
         system_prompt: Optional[str] = None,
         callback_handler: Optional[
             Union[Callable[..., Any], _DefaultCallbackHandlerSentinel]
@@ -255,7 +252,7 @@ def __init__(
         self.conversation_manager = conversation_manager if conversation_manager else SlidingWindowConversationManager()
 
         # Process trace attributes to ensure they're of compatible types
-        self.trace_attributes: Dict[str, AttributeValue] = {}
+        self.trace_attributes: dict[str, AttributeValue] = {}
         if trace_attributes:
             for k, v in trace_attributes.items():
                 if isinstance(v, (str, int, float, bool)) or (
@@ -312,7 +309,7 @@ def tool(self) -> ToolCaller:
         return self.tool_caller
 
     @property
-    def tool_names(self) -> List[str]:
+    def tool_names(self) -> list[str]:
         """Get a list of all registered tool names.
 
         Returns:
@@ -357,19 +354,25 @@ def __call__(self, prompt: str, **kwargs: Any) -> AgentResult:
                 - metrics: Performance metrics from the event loop
                 - state: The final state of the event loop
         """
+        callback_handler = kwargs.get("callback_handler", self.callback_handler)
+
         self._start_agent_trace_span(prompt)
 
         try:
-            # Run the event loop and get the result
-            result = self._run_loop(prompt, kwargs)
+            events = self._run_loop(callback_handler, prompt, kwargs)
+            for event in events:
+                if "callback" in event:
+                    callback_handler(**event["callback"])
+
+            stop_reason, message, metrics, state = event["stop"]
+            result = AgentResult(stop_reason, message, metrics, state)
 
             self._end_agent_trace_span(response=result)
 
             return result
+
         except Exception as e:
             self._end_agent_trace_span(error=e)
-
-            # Re-raise the exception to preserve original behavior
             raise
 
     def structured_output(self, output_model: Type[T], prompt: Optional[str] = None) -> T:
@@ -383,9 +386,9 @@ def structured_output(self, output_model: Type[T], prompt: Optional[str] = None)
         instruct the model to output the structured data.
 
         Args:
-            output_model(Type[BaseModel]): The output model (a JSON schema written as a Pydantic BaseModel)
+            output_model: The output model (a JSON schema written as a Pydantic BaseModel)
                 that the agent will use when responding.
-            prompt(Optional[str]): The prompt to use for the agent.
+            prompt: The prompt to use for the agent.
         """
         messages = self.messages
         if not messages and not prompt:
@@ -396,7 +399,12 @@ def structured_output(self, output_model: Type[T], prompt: Optional[str] = None)
             messages.append({"role": "user", "content": [{"text": prompt}]})
 
         # get the structured output from the model
-        return self.model.structured_output(output_model, messages, self.callback_handler)
+        events = self.model.structured_output(output_model, messages)
+        for event in events:
+            if "callback" in event:
+                self.callback_handler(**cast(dict, event["callback"]))
+
+        return event["output"]
 
     async def stream_async(self, prompt: str, **kwargs: Any) -> AsyncIterator[Any]:
         """Process a natural language prompt and yield events as an async iterator.
@@ -428,94 +436,63 @@ async def stream_async(self, prompt: str, **kwargs: Any) -> AsyncIterator[Any]:
                     yield event["data"]
             ```
         """
-        self._start_agent_trace_span(prompt)
+        callback_handler = kwargs.get("callback_handler", self.callback_handler)
 
-        _stop_event = uuid4()
-
-        queue = asyncio.Queue[Any]()
-        loop = asyncio.get_event_loop()
-
-        def enqueue(an_item: Any) -> None:
-            nonlocal queue
-            nonlocal loop
-            loop.call_soon_threadsafe(queue.put_nowait, an_item)
-
-        def queuing_callback_handler(**handler_kwargs: Any) -> None:
-            enqueue(handler_kwargs.copy())
+        self._start_agent_trace_span(prompt)
 
-        def target_callback() -> None:
-            nonlocal kwargs
+        try:
+            events = self._run_loop(callback_handler, prompt, kwargs)
+            for event in events:
+                if "callback" in event:
+                    callback_handler(**event["callback"])
+                    yield event["callback"]
 
-            try:
-                result = self._run_loop(prompt, kwargs, supplementary_callback_handler=queuing_callback_handler)
-                self._end_agent_trace_span(response=result)
-            except Exception as e:
-                self._end_agent_trace_span(error=e)
-                enqueue(e)
-            finally:
-                enqueue(_stop_event)
+            stop_reason, message, metrics, state = event["stop"]
+            result = AgentResult(stop_reason, message, metrics, state)
 
-        thread = Thread(target=target_callback, daemon=True)
-        thread.start()
+            self._end_agent_trace_span(response=result)
 
-        try:
-            while True:
-                item = await queue.get()
-                if item == _stop_event:
-                    break
-                if isinstance(item, Exception):
-                    raise item
-                yield item
-        finally:
-            thread.join()
+        except Exception as e:
+            self._end_agent_trace_span(error=e)
+            raise
 
     def _run_loop(
-        self, prompt: str, kwargs: Dict[str, Any], supplementary_callback_handler: Optional[Callable[..., Any]] = None
-    ) -> AgentResult:
+        self, callback_handler: Callable[..., Any], prompt: str, kwargs: dict[str, Any]
+    ) -> Generator[dict[str, Any], None, None]:
         """Execute the agent's event loop with the given prompt and parameters."""
         try:
-            # If the call had a callback_handler passed in, then for this event_loop
-            # cycle we call both handlers as the callback_handler
-            invocation_callback_handler = (
-                CompositeCallbackHandler(self.callback_handler, supplementary_callback_handler)
-                if supplementary_callback_handler is not None
-                else self.callback_handler
-            )
-
             # Extract key parameters
-            invocation_callback_handler(init_event_loop=True, **kwargs)
+            yield {"callback": {"init_event_loop": True, **kwargs}}
 
             # Set up the user message with optional knowledge base retrieval
-            message_content: List[ContentBlock] = [{"text": prompt}]
+            message_content: list[ContentBlock] = [{"text": prompt}]
             new_message: Message = {"role": "user", "content": message_content}
             self.messages.append(new_message)
 
             # Execute the event loop cycle with retry logic for context limits
-            return self._execute_event_loop_cycle(invocation_callback_handler, kwargs)
+            yield from self._execute_event_loop_cycle(callback_handler, kwargs)
 
         finally:
             self.conversation_manager.apply_management(self)
 
-    def _execute_event_loop_cycle(self, callback_handler: Callable[..., Any], kwargs: dict[str, Any]) -> AgentResult:
+    def _execute_event_loop_cycle(
+        self, callback_handler: Callable[..., Any], kwargs: dict[str, Any]
+    ) -> Generator[dict[str, Any], None, None]:
         """Execute the event loop cycle with retry logic for context window limits.
 
         This internal method handles the execution of the event loop cycle and implements
         retry logic for handling context window overflow exceptions by reducing the
         conversation context and retrying.
 
-        Args:
-            callback_handler: The callback handler to use for events.
-            kwargs: Additional parameters to pass through event loop.
-
-        Returns:
-            The result of the event loop cycle.
+        Yields:
+            Events of the loop cycle.
         """
         # Add `Agent` to kwargs to keep backwards-compatibility
         kwargs["agent"] = self
 
         try:
             # Execute the main event loop cycle
-            events = event_loop_cycle(
+            yield from event_loop_cycle(
                 model=self.model,
                 system_prompt=self.system_prompt,
                 messages=self.messages,  # will be modified by event_loop_cycle
@@ -527,19 +504,11 @@ def _execute_event_loop_cycle(self, callback_handler: Callable[..., Any], kwargs
                 event_loop_parent_span=self.trace_span,
                 kwargs=kwargs,
             )
-            for event in events:
-                if "callback" in event:
-                    callback_handler(**event["callback"])
-
-            stop_reason, message, metrics, state = event["stop"]
-
-            return AgentResult(stop_reason, message, metrics, state)
 
         except ContextWindowOverflowException as e:
             # Try reducing the context size and retrying
-
             self.conversation_manager.reduce_context(self, e=e)
-            return self._execute_event_loop_cycle(callback_handler, kwargs)
+            yield from self._execute_event_loop_cycle(callback_handler_override, kwargs)
 
     def _record_tool_execution(
         self,
@@ -625,7 +594,7 @@ def _end_agent_trace_span(
             error: Error to record as a trace attribute.
         """
         if self.trace_span:
-            trace_attributes: Dict[str, Any] = {
+            trace_attributes: dict[str, Any] = {
                 "span": self.trace_span,
             }
 
 
@@ -23,15 +23,15 @@ def apply_management(self, _agent: "Agent") -> None:
         """Does nothing to the conversation history.
 
         Args:
-            agent: The agent whose conversation history will remain unmodified.
+            _agent: The agent whose conversation history will remain unmodified.
         """
         pass
 
     def reduce_context(self, _agent: "Agent", e: Optional[Exception] = None) -> None:
         """Does not reduce context and raises an exception.
 
         Args:
-            agent: The agent whose conversation history will remain unmodified.
+            _agent: The agent whose conversation history will remain unmodified.
             e: The exception that triggered the context reduction, if any.
 
         Raises:
 
@@ -7,14 +7,13 @@
 import json
 import logging
 import mimetypes
-from typing import Any, Callable, Iterable, Optional, Type, TypedDict, TypeVar, cast
+from typing import Any, Generator, Iterable, Optional, Type, TypedDict, TypeVar, Union, cast
 
 import anthropic
 from pydantic import BaseModel
 from typing_extensions import Required, Unpack, override
 
 from ..event_loop.streaming import process_stream
-from ..handlers.callback_handler import PrintingCallbackHandler
 from ..tools import convert_pydantic_to_tool_spec
 from ..types.content import ContentBlock, Messages
 from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
@@ -378,24 +377,24 @@ def stream(self, request: dict[str, Any]) -> Iterable[dict[str, Any]]:
 
     @override
     def structured_output(
-        self, output_model: Type[T], prompt: Messages, callback_handler: Optional[Callable] = None
-    ) -> T:
+        self, output_model: Type[T], prompt: Messages
+    ) -> Generator[dict[str, Union[T, Any]], None, None]:
         """Get structured output from the model.
 
         Args:
-            output_model(Type[BaseModel]): The output model to use for the agent.
-            prompt(Messages): The prompt messages to use for the agent.
-            callback_handler(Optional[Callable]): Optional callback handler for processing events. Defaults to None.
+            output_model: The output model to use for the agent.
+            prompt: The prompt messages to use for the agent.
+
+        Yields:
+            Model events with the last being the structured output.
         """
-        callback_handler = callback_handler or PrintingCallbackHandler()
         tool_spec = convert_pydantic_to_tool_spec(output_model)
 
         response = self.converse(messages=prompt, tool_specs=[tool_spec])
         for event in process_stream(response, prompt):
-            if "callback" in event:
-                callback_handler(**event["callback"])
-        else:
-            stop_reason, messages, _, _ = event["stop"]
+            yield event
+
+        stop_reason, messages, _, _ = event["stop"]
 
         if stop_reason != "tool_use":
             raise ValueError("No valid tool use or tool use input was found in the Anthropic response.")
@@ -413,4 +412,4 @@ def structured_output(
         if output_response is None:
             raise ValueError("No valid tool use or tool use input was found in the Anthropic response.")
 
-        return output_model(**output_response)
+        yield {"output": output_model(**output_response)}