ollama · ParthSareen · Sep 24, 2025 · Sep 19, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/examples/README.md b/examples/README.md
@@ -1,80 +1,124 @@
 # Running Examples
 
 Run the examples in this directory with:
+
 ```sh
 # Run example
 python3 examples/<example>.py
+
+# or with uv
+uv run examples/<example>.py
 ```
 
 See [ollama/docs/api.md](https://github.com/ollama/ollama/blob/main/docs/api.md) for full API documentation
 
 ### Chat - Chat with a model
+
 - [chat.py](chat.py)
 - [async-chat.py](async-chat.py)
 - [chat-stream.py](chat-stream.py) - Streamed outputs
 - [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation
 
-
 ### Generate - Generate text with a model
+
 - [generate.py](generate.py)
 - [async-generate.py](async-generate.py)
 - [generate-stream.py](generate-stream.py) - Streamed outputs
 - [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
 
-
 ### Tools/Function Calling - Call a function with a model
+
 - [tools.py](tools.py) - Simple example of Tools/Function Calling
 - [async-tools.py](async-tools.py)
 - [multi-tool.py](multi-tool.py) - Using multiple tools, with thinking enabled
 
- #### gpt-oss
+#### gpt-oss
+
 - [gpt-oss-tools.py](gpt-oss-tools.py)
-- [gpt-oss-tools-stream.py](gpt-oss-tools-stream.py) 
+- [gpt-oss-tools-stream.py](gpt-oss-tools-stream.py)
 - [gpt-oss-tools-browser.py](gpt-oss-tools-browser.py) - Using browser research tools with gpt-oss
 - [gpt-oss-tools-browser-stream.py](gpt-oss-tools-browser-stream.py) - Using browser research tools with gpt-oss, with streaming enabled
 
+### Web search
+
+An API key from Ollama's cloud service is required. You can create one [here](https://ollama.com/settings/keys).
+
+```shell
+export OLLAMA_API_KEY="your_api_key_here"
+```
+
+- [web-search.py](web-search.py)
+
+#### MCP server
+
+The MCP server can be used with an MCP client like Cursor, Cline, Codex, Open WebUI, Goose, and more.
+
+```sh
+uv run examples/web-search-mcp.py
+```
+
+Configuration to use with an MCP client:
+
+```json
+{
+  "mcpServers": {
+    "web_search": {
+      "type": "stdio",
+      "command": "uv",
+      "args": ["run", "path/to/ollama-python/examples/web-search-mcp.py"],
+      "env": { "OLLAMA_API_KEY": "your_api_key_here" }
+    }
+  }
+}
+```
+
+- [web-search-mcp.py](web-search-mcp.py)
 
 ### Multimodal with Images - Chat with a multimodal (image chat) model
+
 - [multimodal-chat.py](multimodal-chat.py)
 - [multimodal-generate.py](multimodal-generate.py)
 
-
 ### Structured Outputs - Generate structured outputs with a model
+
 - [structured-outputs.py](structured-outputs.py)
 - [async-structured-outputs.py](async-structured-outputs.py)
 - [structured-outputs-image.py](structured-outputs-image.py)
 
-
 ### Ollama List - List all downloaded models and their properties
-- [list.py](list.py)
 
+- [list.py](list.py)
 
 ### Ollama Show - Display model properties and capabilities
-- [show.py](show.py)
 
+- [show.py](show.py)
 
 ### Ollama ps - Show model status with CPU/GPU usage
-- [ps.py](ps.py)
 
+- [ps.py](ps.py)
 
 ### Ollama Pull - Pull a model from Ollama
+
 Requirement: `pip install tqdm`
-- [pull.py](pull.py) 
 
+- [pull.py](pull.py)
 
 ### Ollama Create - Create a model from a Modelfile
-- [create.py](create.py) 
 
+- [create.py](create.py)
 
 ### Ollama Embed - Generate embeddings with a model
-- [embed.py](embed.py)
 
+- [embed.py](embed.py)
 
 ### Thinking - Enable thinking mode for a model
+
 - [thinking.py](thinking.py)
 
 ### Thinking (generate) - Enable thinking mode for a model
+
 - [thinking-generate.py](thinking-generate.py)
 
 ### Thinking (levels) - Choose the thinking level
+
 - [thinking-levels.py](thinking-levels.py)
diff --git a/examples/web-search-mcp.py b/examples/web-search-mcp.py
@@ -0,0 +1,116 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "mcp",
+#   "rich",
+#   "ollama",
+# ]
+# ///
+"""
+MCP stdio server exposing Ollama web_search and web_fetch as tools.
+
+Environment:
+- OLLAMA_API_KEY (required): if set, will be used as Authorization header.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, Dict
+
+from ollama import Client
+
+try:
+  # Preferred high-level API (if available)
+  from mcp.server.fastmcp import FastMCP  # type: ignore
+
+  _FASTMCP_AVAILABLE = True
+except Exception:
+  _FASTMCP_AVAILABLE = False
+
+if not _FASTMCP_AVAILABLE:
+  # Fallback to the low-level stdio server API
+  from mcp.server import Server  # type: ignore
+  from mcp.server.stdio import stdio_server  # type: ignore
+
+
+client = Client()
+
+
+def _web_search_impl(query: str, max_results: int = 3) -> Dict[str, Any]:
+  res = client.web_search(query=query, max_results=max_results)
+  return res.model_dump()
+
+
+def _web_fetch_impl(url: str) -> Dict[str, Any]:
+  res = client.web_fetch(url=url)
+  return res.model_dump()
+
+
+if _FASTMCP_AVAILABLE:
+  app = FastMCP('ollama-search-fetch')
+
+  @app.tool()
+  def web_search(query: str, max_results: int = 3) -> Dict[str, Any]:
+    """
+    Perform a web search using Ollama's hosted search API.
+
+    Args:
+      query: The search query to run.
+      max_results: Maximum results to return (default: 3).
+
+    Returns:
+      JSON-serializable dict matching ollama.WebSearchResponse.model_dump()
+    """
+
+    return _web_search_impl(query=query, max_results=max_results)
+
+  @app.tool()
+  def web_fetch(url: str) -> Dict[str, Any]:
+    """
+    Fetch the content of a web page for the provided URL.
+
+    Args:
+      url: The absolute URL to fetch.
+
+    Returns:
+      JSON-serializable dict matching ollama.WebFetchResponse.model_dump()
+    """
+
+    return _web_fetch_impl(url=url)
+
+  if __name__ == '__main__':
+    app.run()
+
+else:
+  server = Server('ollama-search-fetch')  # type: ignore[name-defined]
+
+  @server.tool()  # type: ignore[attr-defined]
+  async def web_search(query: str, max_results: int = 3) -> Dict[str, Any]:
+    """
+    Perform a web search using Ollama's hosted search API.
+
+    Args:
+      query: The search query to run.
+      max_results: Maximum results to return (default: 3).
+    """
+
+    return await asyncio.to_thread(_web_search_impl, query, max_results)
+
+  @server.tool()  # type: ignore[attr-defined]
+  async def web_fetch(url: str) -> Dict[str, Any]:
+    """
+    Fetch the content of a web page for the provided URL.
+
+    Args:
+      url: The absolute URL to fetch.
+    """
+
+    return await asyncio.to_thread(_web_fetch_impl, url)
+
+  async def _main() -> None:
+    async with stdio_server() as (read, write):  # type: ignore[name-defined]
+      await server.run(read, write)  # type: ignore[attr-defined]
+
+  if __name__ == '__main__':
+    asyncio.run(_main())
diff --git a/examples/web-search-crawl.py → examples/web-search.py b/examples/web-search-crawl.py → examples/web-search.py