lastmile-ai · felixbouazza · Apr 12, 2025 · Apr 14, 2025
diff --git a/examples/mcp_agent_token_usage_tracking/README.md b/examples/mcp_agent_token_usage_tracking/README.md
@@ -0,0 +1,10 @@
+# Tracking Token Usage with AugmentedLLMs
+
+This example demonstrates how to monitor token consumption across different LLM providers using the new `response_history` attribute of AugmentedLLMs.
+
+## What you'll learn
+- How to access and analyze token usage data
+- Compare token efficiency between OpenAI and Anthropic models for identical queries
+- Leverage the `response_history` property for advanced analytics
+
+The introduction of the `response_history` attribute in the `AugmentedLLM` class enables not only token tracking but also opens up numerous other analytics possibilities. While this example focuses on token consumption comparison, the same approach can be applied to monitor response times, model behavior patterns, and other performance metrics.
diff --git a/examples/mcp_agent_token_usage_tracking/main.py b/examples/mcp_agent_token_usage_tracking/main.py
@@ -0,0 +1,103 @@
+import asyncio
+import time
+
+from mcp_agent.app import MCPApp
+from mcp_agent.config import (
+    Settings,
+    LoggerSettings,
+    MCPSettings,
+    MCPServerSettings,
+    OpenAISettings,
+    AnthropicSettings,
+)
+from mcp_agent.agents.agent import Agent
+from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
+from mcp_agent.workflows.llm.augmented_llm_anthropic import AnthropicAugmentedLLM
+
+settings = Settings(
+    execution_engine="asyncio",
+    logger=LoggerSettings(type="file", level="debug"),
+    mcp=MCPSettings(
+        servers={
+            "fetch": MCPServerSettings(
+                command="uvx",
+                args=["mcp-server-fetch"],
+            )
+        }
+    ),
+    openai=OpenAISettings(
+        api_key="sk-my-openai-api-key",
+        default_model="gpt-4o-mini",
+    ),
+    anthropic=AnthropicSettings(
+        api_key="sk-my-anthropic-api-key",
+    ),
+)
+
+# Settings can either be specified programmatically,
+# or loaded from mcp_agent.config.yaml/mcp_agent.secrets.yaml
+app = MCPApp(name="mcp_basic_agent")  # settings=settings)
+
+
+async def example_usage():
+    async with app.run() as agent_app:
+        logger = agent_app.logger
+
+        finder_agent = Agent(
+            name="finder",
+            instruction="""You are an agent with the ability to fetch URLs. Your job is to identify 
+            the closest match to a user's request, make the appropriate tool calls, 
+            and return the URI and CONTENTS of the closest match.""",
+            server_names=["fetch"],
+        )
+
+        async with finder_agent:
+            logger.info("finder: Connected to server, calling list_tools...")
+            result = await finder_agent.list_tools()
+            logger.info("Tools available:", data=result.model_dump())
+
+            message = "Print the first 2 paragraphs of https://modelcontextprotocol.io/introduction"
+
+            llm = await finder_agent.attach_llm(OpenAIAugmentedLLM)
+            result = await llm.generate_str(
+                message=message,
+            )
+
+            logger.info(f"First 2 paragraphs of Model Context Protocol docs: {result}")
+
+            openai_total_token_usage = 0
+            for index, response in enumerate(llm.response_history.get()):
+                logger.info(f"{index}: Token usage: {response.usage.total_tokens}")
+                openai_total_token_usage += response.usage.total_tokens
+
+            logger.info(f"OpenAI total token usage: {openai_total_token_usage}")
+
+            # Let's switch the same agent to a different LLM
+            llm = await finder_agent.attach_llm(AnthropicAugmentedLLM)
+
+            result = await llm.generate_str(message=message)
+            logger.info("First 2 paragraphs of Model Context Protocol docs: %s", result)
+
+            anthropic_total_token_usage = 0
+            for index, response in enumerate(llm.response_history.get()):
+                logger.info(
+                    f"{index}: Token usage: {response.usage.input_tokens + response.usage.output_tokens}"
+                )
+                anthropic_total_token_usage += (
+                    response.usage.input_tokens + response.usage.output_tokens
+                )
+
+            logger.info(f"Anthropic total token usage: {anthropic_total_token_usage}")
+
+            logger.info(
+                f"OpenAI uses {openai_total_token_usage} tokens, while Anthropic uses {anthropic_total_token_usage} tokens"
+            )
+
+
+if __name__ == "__main__":
+    start = time.time()
+    asyncio.run(example_usage())
+    end = time.time()
+    t = end - start
+
+    print(f"Total run time: {t:.2f}s")
diff --git a/examples/mcp_agent_token_usage_tracking/mcp_agent.config.yaml b/examples/mcp_agent_token_usage_tracking/mcp_agent.config.yaml
@@ -0,0 +1,22 @@
+$schema: ../../schema/mcp-agent.config.schema.json
+
+execution_engine: asyncio
+logger:
+  transports: [console, file]
+  level: debug
+  progress_display: true
+  path_settings:
+    path_pattern: "logs/mcp-agent-{unique_id}.jsonl"
+    unique_id: "timestamp" # Options: "timestamp" or "session_id"
+    timestamp_format: "%Y%m%d_%H%M%S"
+
+mcp:
+  servers:
+    fetch:
+      command: "uvx"
+      args: ["mcp-server-fetch"]
+
+openai:
+  # Secrets (API keys, etc.) are stored in an mcp_agent.secrets.yaml file which can be gitignored
+  #  default_model: "o3-mini"
+  default_model: "gpt-4o-mini"
diff --git a/examples/mcp_agent_token_usage_tracking/mcp_agent.secrets.yaml.example b/examples/mcp_agent_token_usage_tracking/mcp_agent.secrets.yaml.example
@@ -0,0 +1,7 @@
+$schema: ../../schema/mcp-agent.config.schema.json
+
+openai:
+  api_key: openai_api_key
+
+anthropic:
+  api_key: anthropic_api_key
diff --git a/examples/mcp_agent_token_usage_tracking/requirements.txt b/examples/mcp_agent_token_usage_tracking/requirements.txt
@@ -0,0 +1,6 @@
+# Core framework dependency
+mcp-agent @ file://../../  # Link to the local mcp-agent project root
+
+# Additional dependencies specific to this example
+anthropic
+openai
diff --git a/src/mcp_agent/workflows/llm/augmented_llm.py b/src/mcp_agent/workflows/llm/augmented_llm.py
@@ -31,6 +31,9 @@
     from mcp_agent.context import Context
     from mcp_agent.logging.logger import Logger
 
+MemoryItemT = TypeVar("MemoryItemT")
+"""A type representing a memory item."""
+
 MessageParamT = TypeVar("MessageParamT")
 """A type representing an input message to an LLM."""
 
@@ -40,12 +43,15 @@
 ModelT = TypeVar("ModelT")
 """A type representing a structured output message from an LLM."""
 
+ResponseT = TypeVar("ResponseT")
+"""A type representing a response from an LLM."""
+
 # TODO: saqadri - SamplingMessage is fairly limiting - consider extending
 MCPMessageParam = SamplingMessage
 MCPMessageResult = CreateMessageResult
 
 
-class Memory(Protocol, Generic[MessageParamT]):
+class Memory(Protocol, Generic[MemoryItemT]):
     """
     Simple memory management for storing past interactions in-memory.
     """
@@ -54,35 +60,35 @@ class Memory(Protocol, Generic[MessageParamT]):
 
     def __init__(self): ...
 
-    def extend(self, messages: List[MessageParamT]) -> None: ...
+    def extend(self, items: List[MemoryItemT]) -> None: ...
 
-    def set(self, messages: List[MessageParamT]) -> None: ...
+    def set(self, items: List[MemoryItemT]) -> None: ...
 
-    def append(self, message: MessageParamT) -> None: ...
+    def append(self, item: MemoryItemT) -> None: ...
 
-    def get(self) -> List[MessageParamT]: ...
+    def get(self) -> List[MemoryItemT]: ...
 
     def clear(self) -> None: ...
 
 
-class SimpleMemory(Memory, Generic[MessageParamT]):
+class SimpleMemory(Memory, Generic[MemoryItemT]):
     """
     Simple memory management for storing past interactions in-memory.
     """
 
     def __init__(self):
-        self.history: List[MessageParamT] = []
+        self.history: List[MemoryItemT] = []
 
-    def extend(self, messages: List[MessageParamT]):
-        self.history.extend(messages)
+    def extend(self, items: List[MemoryItemT]):
+        self.history.extend(items)
 
-    def set(self, messages: List[MessageParamT]):
-        self.history = messages.copy()
+    def set(self, items: List[MemoryItemT]):
+        self.history = items.copy()
 
-    def append(self, message: MessageParamT):
-        self.history.append(message)
+    def append(self, item: MemoryItemT):
+        self.history.append(item)
 
-    def get(self) -> List[MessageParamT]:
+    def get(self) -> List[MemoryItemT]:
         return self.history
 
     def clear(self):
@@ -131,7 +137,7 @@ class RequestParams(CreateMessageRequestParams):
     """
 
 
-class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT]):
+class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT, ResponseT]):
     """Protocol defining the interface for augmented LLMs"""
 
     async def generate(
@@ -183,7 +189,9 @@ def from_mcp_tool_result(
         """Convert an MCP tool result to an LLM input type"""
 
 
-class AugmentedLLM(ContextDependent, AugmentedLLMProtocol[MessageParamT, MessageT]):
+class AugmentedLLM(
+    ContextDependent, AugmentedLLMProtocol[MessageParamT, MessageT, ResponseT]
+):
     """
     The basic building block of agentic systems is an LLM enhanced with augmentations
     such as retrieval, tools, and memory provided from a collection of MCP servers.
@@ -223,6 +231,7 @@ def __init__(
             agent.instruction if agent and isinstance(agent.instruction, str) else None
         )
         self.history: Memory[MessageParamT] = SimpleMemory[MessageParamT]()
+        self.response_history: Memory[ResponseT] = SimpleMemory[ResponseT]()
         self.default_request_params = default_request_params
         self.model_preferences = (
             self.default_request_params.modelPreferences

diff --git a/src/mcp_agent/workflows/llm/augmented_llm_anthropic.py b/src/mcp_agent/workflows/llm/augmented_llm_anthropic.py
@@ -62,7 +62,7 @@
 ]
 
 
-class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
+class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message, Message]):
     """
     The basic building block of agentic systems is an LLM enhanced with augmentations
     such as retrieval, tools, and memory provided from a collection of MCP servers.
@@ -170,6 +170,8 @@ async def generate(
                 anthropic.messages.create, **arguments
             )
 
+            self.response_history.extend(executor_result)
+
             response = executor_result[0]
 
             if isinstance(response, BaseException):

diff --git a/src/mcp_agent/workflows/llm/augmented_llm_azure.py b/src/mcp_agent/workflows/llm/augmented_llm_azure.py
@@ -2,6 +2,7 @@
 from typing import Iterable, List, Optional, Type, Union
 from azure.ai.inference import ChatCompletionsClient
 from azure.ai.inference.models import (
+    ChatCompletions,
     ChatResponseMessage,
     UserMessage,
     AssistantMessage,
@@ -57,7 +58,7 @@ class ResponseMessage(ChatResponseMessage):
     content: Optional[str]
 
 
-class AzureAugmentedLLM(AugmentedLLM[MessageParam, ResponseMessage]):
+class AzureAugmentedLLM(AugmentedLLM[MessageParam, ResponseMessage, ChatCompletions]):
     """
     The basic building block of agentic systems is an LLM enhanced with augmentations
     such as retrieval, tools, and memory provided from a collection of MCP servers.
@@ -165,6 +166,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
                 self.azure_client.complete, **arguments
             )
 
+            self.response_history.extend(executor_result)
+
             response = executor_result[0]
 
             if isinstance(response, BaseException):

diff --git a/src/mcp_agent/workflows/llm/augmented_llm_bedrock.py b/src/mcp_agent/workflows/llm/augmented_llm_bedrock.py
@@ -24,19 +24,23 @@
     from mypy_boto3_bedrock_runtime.type_defs import (
         MessageOutputTypeDef,
         ConverseRequestTypeDef,
+        ConverseResponseTypeDef,
         MessageUnionTypeDef,
         ContentBlockUnionTypeDef,
         ToolConfigurationTypeDef,
     )
 else:
     MessageOutputTypeDef = object
     ConverseRequestTypeDef = object
+    ConverseResponseTypeDef = object
     MessageUnionTypeDef = object
     ContentBlockUnionTypeDef = object
     ToolConfigurationTypeDef = object
 
 
-class BedrockAugmentedLLM(AugmentedLLM[MessageUnionTypeDef, MessageUnionTypeDef]):
+class BedrockAugmentedLLM(
+    AugmentedLLM[MessageUnionTypeDef, MessageUnionTypeDef, ConverseResponseTypeDef]
+):
     """
     The basic building block of agentic systems is an LLM enhanced with augmentations
     such as retrieval, tools, and memory provided from a collection of MCP servers.
@@ -157,6 +161,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
                 self.bedrock_client.converse, **arguments
             )
 
+            self.response_history.extend(executor_result)
+
             response = executor_result[0]
 
             if isinstance(response, BaseException):

diff --git a/src/mcp_agent/workflows/llm/augmented_llm_openai.py b/src/mcp_agent/workflows/llm/augmented_llm_openai.py
@@ -4,6 +4,7 @@
 
 from openai import OpenAI
 from openai.types.chat import (
+    ChatCompletion,
     ChatCompletionAssistantMessageParam,
     ChatCompletionContentPartParam,
     ChatCompletionContentPartTextParam,
@@ -38,7 +39,7 @@
 
 
 class OpenAIAugmentedLLM(
-    AugmentedLLM[ChatCompletionMessageParam, ChatCompletionMessage]
+    AugmentedLLM[ChatCompletionMessageParam, ChatCompletionMessage, ChatCompletion]
 ):
     """
     The basic building block of agentic systems is an LLM enhanced with augmentations
@@ -181,6 +182,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
                 openai_client.chat.completions.create, **arguments
             )
 
+            self.response_history.extend(executor_result)
+
             response = executor_result[0]
 
             self.logger.debug(
@@ -376,7 +379,9 @@ async def execute_tool_call(
             return ChatCompletionToolMessageParam(
                 role="tool",
                 tool_call_id=tool_call_id,
-                content="\n".join(str(mcp_content_to_openai_content(c)) for c in result.content),
+                content="\n".join(
+                    str(mcp_content_to_openai_content(c)) for c in result.content
+                ),
             )
 
         return None