Skip to content

Feature/llm response history #133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/mcp_agent_token_usage_tracking/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Tracking Token Usage with AugmentedLLMs

This example demonstrates how to monitor token consumption across different LLM providers using the new `response_history` attribute of AugmentedLLMs.

## What you'll learn
- How to access and analyze token usage data
- Compare token efficiency between OpenAI and Anthropic models for identical queries
- Leverage the `response_history` property for advanced analytics

The introduction of the `response_history` attribute in the `AugmentedLLM` class enables not only token tracking but also opens up numerous other analytics possibilities. While this example focuses on token consumption comparison, the same approach can be applied to monitor response times, model behavior patterns, and other performance metrics.
103 changes: 103 additions & 0 deletions examples/mcp_agent_token_usage_tracking/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import asyncio
import time

from mcp_agent.app import MCPApp
from mcp_agent.config import (
Settings,
LoggerSettings,
MCPSettings,
MCPServerSettings,
OpenAISettings,
AnthropicSettings,
)
from mcp_agent.agents.agent import Agent
from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
from mcp_agent.workflows.llm.augmented_llm_anthropic import AnthropicAugmentedLLM

settings = Settings(
execution_engine="asyncio",
logger=LoggerSettings(type="file", level="debug"),
mcp=MCPSettings(
servers={
"fetch": MCPServerSettings(
command="uvx",
args=["mcp-server-fetch"],
)
}
),
openai=OpenAISettings(
api_key="sk-my-openai-api-key",
default_model="gpt-4o-mini",
),
anthropic=AnthropicSettings(
api_key="sk-my-anthropic-api-key",
),
)

# Settings can either be specified programmatically,
# or loaded from mcp_agent.config.yaml/mcp_agent.secrets.yaml
app = MCPApp(name="mcp_basic_agent") # settings=settings)


async def example_usage():
async with app.run() as agent_app:
logger = agent_app.logger

finder_agent = Agent(
name="finder",
instruction="""You are an agent with the ability to fetch URLs. Your job is to identify
the closest match to a user's request, make the appropriate tool calls,
and return the URI and CONTENTS of the closest match.""",
server_names=["fetch"],
)

async with finder_agent:
logger.info("finder: Connected to server, calling list_tools...")
result = await finder_agent.list_tools()
logger.info("Tools available:", data=result.model_dump())

message = "Print the first 2 paragraphs of https://modelcontextprotocol.io/introduction"

llm = await finder_agent.attach_llm(OpenAIAugmentedLLM)
result = await llm.generate_str(
message=message,
)

logger.info(f"First 2 paragraphs of Model Context Protocol docs: {result}")

openai_total_token_usage = 0
for index, response in enumerate(llm.response_history.get()):
logger.info(f"{index}: Token usage: {response.usage.total_tokens}")
openai_total_token_usage += response.usage.total_tokens

logger.info(f"OpenAI total token usage: {openai_total_token_usage}")

# Let's switch the same agent to a different LLM
llm = await finder_agent.attach_llm(AnthropicAugmentedLLM)

result = await llm.generate_str(message=message)
logger.info("First 2 paragraphs of Model Context Protocol docs: %s", result)

anthropic_total_token_usage = 0
for index, response in enumerate(llm.response_history.get()):
logger.info(
f"{index}: Token usage: {response.usage.input_tokens + response.usage.output_tokens}"
)
anthropic_total_token_usage += (
response.usage.input_tokens + response.usage.output_tokens
)

logger.info(f"Anthropic total token usage: {anthropic_total_token_usage}")

logger.info(
f"OpenAI uses {openai_total_token_usage} tokens, while Anthropic uses {anthropic_total_token_usage} tokens"
)


if __name__ == "__main__":
start = time.time()
asyncio.run(example_usage())
end = time.time()
t = end - start

print(f"Total run time: {t:.2f}s")
22 changes: 22 additions & 0 deletions examples/mcp_agent_token_usage_tracking/mcp_agent.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
$schema: ../../schema/mcp-agent.config.schema.json

execution_engine: asyncio
logger:
transports: [console, file]
level: debug
progress_display: true
path_settings:
path_pattern: "logs/mcp-agent-{unique_id}.jsonl"
unique_id: "timestamp" # Options: "timestamp" or "session_id"
timestamp_format: "%Y%m%d_%H%M%S"

mcp:
servers:
fetch:
command: "uvx"
args: ["mcp-server-fetch"]

openai:
# Secrets (API keys, etc.) are stored in an mcp_agent.secrets.yaml file which can be gitignored
# default_model: "o3-mini"
default_model: "gpt-4o-mini"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
$schema: ../../schema/mcp-agent.config.schema.json

openai:
api_key: openai_api_key

anthropic:
api_key: anthropic_api_key
6 changes: 6 additions & 0 deletions examples/mcp_agent_token_usage_tracking/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Core framework dependency
mcp-agent @ file://../../ # Link to the local mcp-agent project root

# Additional dependencies specific to this example
anthropic
openai
41 changes: 25 additions & 16 deletions src/mcp_agent/workflows/llm/augmented_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
from mcp_agent.context import Context
from mcp_agent.logging.logger import Logger

MemoryItemT = TypeVar("MemoryItemT")
"""A type representing a memory item."""

MessageParamT = TypeVar("MessageParamT")
"""A type representing an input message to an LLM."""

Expand All @@ -40,12 +43,15 @@
ModelT = TypeVar("ModelT")
"""A type representing a structured output message from an LLM."""

ResponseT = TypeVar("ResponseT")
"""A type representing a response from an LLM."""

# TODO: saqadri - SamplingMessage is fairly limiting - consider extending
MCPMessageParam = SamplingMessage
MCPMessageResult = CreateMessageResult


class Memory(Protocol, Generic[MessageParamT]):
class Memory(Protocol, Generic[MemoryItemT]):
"""
Simple memory management for storing past interactions in-memory.
"""
Expand All @@ -54,35 +60,35 @@ class Memory(Protocol, Generic[MessageParamT]):

def __init__(self): ...

def extend(self, messages: List[MessageParamT]) -> None: ...
def extend(self, items: List[MemoryItemT]) -> None: ...

def set(self, messages: List[MessageParamT]) -> None: ...
def set(self, items: List[MemoryItemT]) -> None: ...

def append(self, message: MessageParamT) -> None: ...
def append(self, item: MemoryItemT) -> None: ...

def get(self) -> List[MessageParamT]: ...
def get(self) -> List[MemoryItemT]: ...

def clear(self) -> None: ...


class SimpleMemory(Memory, Generic[MessageParamT]):
class SimpleMemory(Memory, Generic[MemoryItemT]):
"""
Simple memory management for storing past interactions in-memory.
"""

def __init__(self):
self.history: List[MessageParamT] = []
self.history: List[MemoryItemT] = []

def extend(self, messages: List[MessageParamT]):
self.history.extend(messages)
def extend(self, items: List[MemoryItemT]):
self.history.extend(items)

def set(self, messages: List[MessageParamT]):
self.history = messages.copy()
def set(self, items: List[MemoryItemT]):
self.history = items.copy()

def append(self, message: MessageParamT):
self.history.append(message)
def append(self, item: MemoryItemT):
self.history.append(item)

def get(self) -> List[MessageParamT]:
def get(self) -> List[MemoryItemT]:
return self.history

def clear(self):
Expand Down Expand Up @@ -131,7 +137,7 @@ class RequestParams(CreateMessageRequestParams):
"""


class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT]):
class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT, ResponseT]):
"""Protocol defining the interface for augmented LLMs"""

async def generate(
Expand Down Expand Up @@ -183,7 +189,9 @@ def from_mcp_tool_result(
"""Convert an MCP tool result to an LLM input type"""


class AugmentedLLM(ContextDependent, AugmentedLLMProtocol[MessageParamT, MessageT]):
class AugmentedLLM(
ContextDependent, AugmentedLLMProtocol[MessageParamT, MessageT, ResponseT]
):
"""
The basic building block of agentic systems is an LLM enhanced with augmentations
such as retrieval, tools, and memory provided from a collection of MCP servers.
Expand Down Expand Up @@ -223,6 +231,7 @@ def __init__(
agent.instruction if agent and isinstance(agent.instruction, str) else None
)
self.history: Memory[MessageParamT] = SimpleMemory[MessageParamT]()
self.response_history: Memory[ResponseT] = SimpleMemory[ResponseT]()
self.default_request_params = default_request_params
self.model_preferences = (
self.default_request_params.modelPreferences
Expand Down
4 changes: 3 additions & 1 deletion src/mcp_agent/workflows/llm/augmented_llm_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
]


class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message, Message]):
"""
The basic building block of agentic systems is an LLM enhanced with augmentations
such as retrieval, tools, and memory provided from a collection of MCP servers.
Expand Down Expand Up @@ -170,6 +170,8 @@ async def generate(
anthropic.messages.create, **arguments
)

self.response_history.extend(executor_result)

response = executor_result[0]

if isinstance(response, BaseException):
Expand Down
5 changes: 4 additions & 1 deletion src/mcp_agent/workflows/llm/augmented_llm_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Iterable, List, Optional, Type, Union
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import (
ChatCompletions,
ChatResponseMessage,
UserMessage,
AssistantMessage,
Expand Down Expand Up @@ -57,7 +58,7 @@ class ResponseMessage(ChatResponseMessage):
content: Optional[str]


class AzureAugmentedLLM(AugmentedLLM[MessageParam, ResponseMessage]):
class AzureAugmentedLLM(AugmentedLLM[MessageParam, ResponseMessage, ChatCompletions]):
"""
The basic building block of agentic systems is an LLM enhanced with augmentations
such as retrieval, tools, and memory provided from a collection of MCP servers.
Expand Down Expand Up @@ -165,6 +166,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
self.azure_client.complete, **arguments
)

self.response_history.extend(executor_result)

response = executor_result[0]

if isinstance(response, BaseException):
Expand Down
8 changes: 7 additions & 1 deletion src/mcp_agent/workflows/llm/augmented_llm_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,23 @@
from mypy_boto3_bedrock_runtime.type_defs import (
MessageOutputTypeDef,
ConverseRequestTypeDef,
ConverseResponseTypeDef,
MessageUnionTypeDef,
ContentBlockUnionTypeDef,
ToolConfigurationTypeDef,
)
else:
MessageOutputTypeDef = object
ConverseRequestTypeDef = object
ConverseResponseTypeDef = object
MessageUnionTypeDef = object
ContentBlockUnionTypeDef = object
ToolConfigurationTypeDef = object


class BedrockAugmentedLLM(AugmentedLLM[MessageUnionTypeDef, MessageUnionTypeDef]):
class BedrockAugmentedLLM(
AugmentedLLM[MessageUnionTypeDef, MessageUnionTypeDef, ConverseResponseTypeDef]
):
"""
The basic building block of agentic systems is an LLM enhanced with augmentations
such as retrieval, tools, and memory provided from a collection of MCP servers.
Expand Down Expand Up @@ -157,6 +161,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
self.bedrock_client.converse, **arguments
)

self.response_history.extend(executor_result)

response = executor_result[0]

if isinstance(response, BaseException):
Expand Down
9 changes: 7 additions & 2 deletions src/mcp_agent/workflows/llm/augmented_llm_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from openai import OpenAI
from openai.types.chat import (
ChatCompletion,
ChatCompletionAssistantMessageParam,
ChatCompletionContentPartParam,
ChatCompletionContentPartTextParam,
Expand Down Expand Up @@ -38,7 +39,7 @@


class OpenAIAugmentedLLM(
AugmentedLLM[ChatCompletionMessageParam, ChatCompletionMessage]
AugmentedLLM[ChatCompletionMessageParam, ChatCompletionMessage, ChatCompletion]
):
"""
The basic building block of agentic systems is an LLM enhanced with augmentations
Expand Down Expand Up @@ -181,6 +182,8 @@ async def generate(self, message, request_params: RequestParams | None = None):
openai_client.chat.completions.create, **arguments
)

self.response_history.extend(executor_result)

response = executor_result[0]

self.logger.debug(
Expand Down Expand Up @@ -376,7 +379,9 @@ async def execute_tool_call(
return ChatCompletionToolMessageParam(
role="tool",
tool_call_id=tool_call_id,
content="\n".join(str(mcp_content_to_openai_content(c)) for c in result.content),
content="\n".join(
str(mcp_content_to_openai_content(c)) for c in result.content
),
)

return None
Expand Down
Loading