models - ollama - async (#373)

pgrayy · web-flow · commit 93f2eb6232e8 · 2025-07-08T13:29:44.000-04:00
diff --git a/src/strands/models/ollama.py b/src/strands/models/ollama.py
@@ -7,7 +7,7 @@
 import logging
 from typing import Any, AsyncGenerator, Optional, Type, TypeVar, Union, cast
 
-from ollama import Client as OllamaClient
+import ollama
 from pydantic import BaseModel
 from typing_extensions import TypedDict, Unpack, override
 
@@ -74,7 +74,7 @@ def __init__(
 
         ollama_client_args = ollama_client_args if ollama_client_args is not None else {}
 
-        self.client = OllamaClient(host, **ollama_client_args)
+        self.client = ollama.AsyncClient(host, **ollama_client_args)
 
     @override
     def update_config(self, **model_config: Unpack[OllamaConfig]) -> None:  # type: ignore
@@ -296,12 +296,12 @@ async def stream(self, request: dict[str, Any]) -> AsyncGenerator[dict[str, Any]
         """
         tool_requested = False
 
-        response = self.client.chat(**request)
+        response = await self.client.chat(**request)
 
         yield {"chunk_type": "message_start"}
         yield {"chunk_type": "content_start", "data_type": "text"}
 
-        for event in response:
+        async for event in response:
             for tool_call in event.message.tool_calls or []:
                 yield {"chunk_type": "content_start", "data_type": "tool", "data": tool_call}
                 yield {"chunk_type": "content_delta", "data_type": "tool", "data": tool_call}
@@ -330,7 +330,7 @@ async def structured_output(
         formatted_request = self.format_request(messages=prompt)
         formatted_request["format"] = output_model.model_json_schema()
         formatted_request["stream"] = False
-        response = self.client.chat(**formatted_request)
+        response = await self.client.chat(**formatted_request)
 
         try:
             content = response.message.content.strip()
diff --git a/tests-integ/test_model_ollama.py b/tests-integ/test_model_ollama.py
@@ -2,6 +2,7 @@
 import requests
 from pydantic import BaseModel
 
+import strands
 from strands import Agent
 from strands.models.ollama import OllamaModel
 
@@ -13,35 +14,80 @@ def is_server_available() -> bool:
         return False
 
 
-@pytest.fixture
+@pytest.fixture(scope="module")
 def model():
     return OllamaModel(host="http://localhost:11434", model_id="llama3.3:70b")
 
 
-@pytest.fixture
-def agent(model):
-    return Agent(model=model)
+@pytest.fixture(scope="module")
+def tools():
+    @strands.tool
+    def tool_time() -> str:
+        return "12:00"
 
+    @strands.tool
+    def tool_weather() -> str:
+        return "sunny"
 
-@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
-def test_agent(agent):
-    result = agent("Say 'hello world' with no other text")
-    assert isinstance(result.message["content"][0]["text"], str)
+    return [tool_time, tool_weather]
 
 
-@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
-def test_structured_output(agent):
-    class Weather(BaseModel):
-        """Extract the time and weather.
+@pytest.fixture(scope="module")
+def agent(model, tools):
+    return Agent(model=model, tools=tools)
 
-        Time format: HH:MM
-        Weather: sunny, cloudy, rainy, etc.
-        """
+
+@pytest.fixture(scope="module")
+def weather():
+    class Weather(BaseModel):
+        """Extracts the time and weather from the user's message with the exact strings."""
 
         time: str
         weather: str
 
-    result = agent.structured_output(Weather, "The time is 12:00 and the weather is sunny")
-    assert isinstance(result, Weather)
-    assert result.time == "12:00"
-    assert result.weather == "sunny"
+    return Weather(time="12:00", weather="sunny")
+
+
+@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
+def test_agent_invoke(agent):
+    result = agent("What is the time and weather in New York?")
+    text = result.message["content"][0]["text"].lower()
+
+    assert all(string in text for string in ["12:00", "sunny"])
+
+
+@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
+@pytest.mark.asyncio
+async def test_agent_invoke_async(agent):
+    result = await agent.invoke_async("What is the time and weather in New York?")
+    text = result.message["content"][0]["text"].lower()
+
+    assert all(string in text for string in ["12:00", "sunny"])
+
+
+@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
+@pytest.mark.asyncio
+async def test_agent_stream_async(agent):
+    stream = agent.stream_async("What is the time and weather in New York?")
+    async for event in stream:
+        _ = event
+
+    result = event["result"]
+    text = result.message["content"][0]["text"].lower()
+
+    assert all(string in text for string in ["12:00", "sunny"])
+
+
+@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
+def test_agent_structured_output(agent, weather):
+    tru_weather = agent.structured_output(type(weather), "The time is 12:00 and the weather is sunny")
+    exp_weather = weather
+    assert tru_weather == exp_weather
+
+
+@pytest.mark.skipif(not is_server_available(), reason="Local Ollama endpoint not available at localhost:11434")
+@pytest.mark.asyncio
+async def test_agent_structured_output_async(agent, weather):
+    tru_weather = await agent.structured_output_async(type(weather), "The time is 12:00 and the weather is sunny")
+    exp_weather = weather
+    assert tru_weather == exp_weather
diff --git a/tests/strands/models/test_ollama.py b/tests/strands/models/test_ollama.py
@@ -11,7 +11,7 @@
 
 @pytest.fixture
 def ollama_client():
-    with unittest.mock.patch.object(strands.models.ollama, "OllamaClient") as mock_client_cls:
+    with unittest.mock.patch.object(strands.models.ollama.ollama, "AsyncClient") as mock_client_cls:
         yield mock_client_cls.return_value
 
 
@@ -416,13 +416,13 @@ def test_format_chunk_other(model):
 
 
 @pytest.mark.asyncio
-async def test_stream(ollama_client, model, alist):
+async def test_stream(ollama_client, model, agenerator, alist):
     mock_event = unittest.mock.Mock()
     mock_event.message.tool_calls = None
     mock_event.message.content = "Hello"
     mock_event.done_reason = "stop"
 
-    ollama_client.chat.return_value = [mock_event]
+    ollama_client.chat = unittest.mock.AsyncMock(return_value=agenerator([mock_event]))
 
     request = {"model": "m1", "messages": [{"role": "user", "content": "Hello"}]}
     response = model.stream(request)
@@ -442,14 +442,14 @@ async def test_stream(ollama_client, model, alist):
 
 
 @pytest.mark.asyncio
-async def test_stream_with_tool_calls(ollama_client, model, alist):
+async def test_stream_with_tool_calls(ollama_client, model, agenerator, alist):
     mock_event = unittest.mock.Mock()
     mock_tool_call = unittest.mock.Mock()
     mock_event.message.tool_calls = [mock_tool_call]
     mock_event.message.content = "I'll calculate that for you"
     mock_event.done_reason = "stop"
 
-    ollama_client.chat.return_value = [mock_event]
+    ollama_client.chat = unittest.mock.AsyncMock(return_value=agenerator([mock_event]))
 
     request = {"model": "m1", "messages": [{"role": "user", "content": "Calculate 2+2"}]}
     response = model.stream(request)
@@ -478,7 +478,7 @@ async def test_structured_output(ollama_client, model, test_output_model_cls, al
     mock_response = unittest.mock.Mock()
     mock_response.message.content = '{"name": "John", "age": 30}'
 
-    ollama_client.chat.return_value = mock_response
+    ollama_client.chat = unittest.mock.AsyncMock(return_value=mock_response)
 
     stream = model.structured_output(test_output_model_cls, messages)
     events = await alist(stream)