newrelic · umaannamalai · Jun 27, 2025 · Jun 24, 2025 · Jun 27, 2025 · Jun 27, 2025
@@ -2866,7 +2866,11 @@ def _process_module_builtin_defaults():
     _process_module_definition(
         "autogen_ext.tools.mcp._base", "newrelic.hooks.mlmodel_autogen", "instrument_autogen_ext_tools_mcp__base"
     )
-
+    _process_module_definition(
+        "autogen_agentchat.agents._assistant_agent",
+        "newrelic.hooks.mlmodel_autogen",
+        "instrument_autogen_agentchat_agents__assistant_agent",
+    )
     _process_module_definition("mcp.client.session", "newrelic.hooks.adapter_mcp", "instrument_mcp_client_session")
 
     _process_module_definition("structlog._base", "newrelic.hooks.logger_structlog", "instrument_structlog__base")

@@ -12,11 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
+import logging
+import sys
+import uuid
+
 from newrelic.api.function_trace import FunctionTrace
+from newrelic.api.time_trace import get_trace_linking_metadata
 from newrelic.api.transaction import current_transaction
 from newrelic.common.object_names import callable_name
 from newrelic.common.object_wrapper import wrap_function_wrapper
+from newrelic.common.package_version_utils import get_package_version
 from newrelic.common.signature import bind_args
+from newrelic.core.config import global_settings
+
+# Check for the presence of the autogen-core, autogen-agentchat, or autogen-ext package as they should all have the
+# same version and one or multiple could be installed
+AUTOGEN_VERSION = (
+    get_package_version("autogen-core")
+    or get_package_version("autogen-agentchat")
+    or get_package_version("autogen-ext")
+)
+
+
+RECORD_EVENTS_FAILURE_LOG_MESSAGE = "Exception occurred in Autogen instrumentation: Failed to record LLM events. Please report this issue to New Relic Support.\n%s"
+
+
+_logger = logging.getLogger(__name__)
 
 
 async def wrap_from_server_params(wrapped, instance, args, kwargs):
@@ -32,6 +54,123 @@ async def wrap_from_server_params(wrapped, instance, args, kwargs):
         return await wrapped(*args, **kwargs)
 
 
+def wrap_on_messages_stream(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+    if not transaction:
+        return wrapped(*args, **kwargs)
+
+    agent_name = getattr(instance, "name", "agent")
+    func_name = callable_name(wrapped)
+    function_trace_name = f"{func_name}/{agent_name}"
+    with FunctionTrace(name=function_trace_name, group="Llm", source=wrapped):
+        return wrapped(*args, **kwargs)
+
+
+def _get_llm_metadata(transaction):
+    # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events
+    custom_attrs_dict = transaction._custom_params
+    llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")}
+    llm_context_attrs = getattr(transaction, "_llm_context_attrs", None)
+    if llm_context_attrs:
+        llm_metadata_dict.update(llm_context_attrs)
+
+    return llm_metadata_dict
+
+
+def _extract_tool_output(return_val, tool_name):
+    try:
+        output = getattr(return_val[1], "content", None)
+        return output
+    except Exception:
+        _logger.warning("Unable to parse tool output value from %s. Omitting output from LlmTool event.", tool_name)
+        return None
+
+
+def _construct_base_tool_event_dict(bound_args, tool_call_data, tool_id, transaction, settings):
+    try:
+        _input = getattr(tool_call_data, "arguments", None)
+        tool_input = str(_input) if _input else None
+        run_id = getattr(tool_call_data, "id", None)
+        tool_name = getattr(tool_call_data, "name", "tool")
+        agent_name = bound_args.get("agent_name")
+        linking_metadata = get_trace_linking_metadata()
+
+        tool_event_dict = {
+            "id": tool_id,
+            "run_id": run_id,
+            "name": tool_name,
+            "span_id": linking_metadata.get("span.id"),
+            "trace_id": linking_metadata.get("trace.id"),
+            "agent_name": agent_name,
+            "vendor": "autogen",
+            "ingest_source": "Python",
+        }
+        if settings.ai_monitoring.record_content.enabled:
+            tool_event_dict.update({"input": tool_input})
+        tool_event_dict.update(_get_llm_metadata(transaction))
+    except Exception:
+        tool_event_dict = {}
+        _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True)
+
+    return tool_event_dict
+
+
+async def wrap__execute_tool_call(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+    if not transaction:
+        return await wrapped(*args, **kwargs)
+
+    settings = transaction.settings or global_settings()
+    if not settings.ai_monitoring.enabled:
+        return await wrapped(*args, **kwargs)
+
+    # Framework metric also used for entity tagging in the UI
+    transaction.add_ml_model_info("Autogen", AUTOGEN_VERSION)
+    transaction._add_agent_attribute("llm", True)
+
+    tool_id = str(uuid.uuid4())
+    bound_args = bind_args(wrapped, args, kwargs)
+    tool_call_data = bound_args.get("tool_call")
+    tool_event_dict = _construct_base_tool_event_dict(bound_args, tool_call_data, tool_id, transaction, settings)
+
+    tool_name = getattr(tool_call_data, "name", "tool")
+
+    func_name = callable_name(wrapped)
+    ft = FunctionTrace(name=f"{func_name}/{tool_name}", group="Llm/tool/Autogen")
+    ft.__enter__()
+
+    try:
+        return_val = await wrapped(*args, **kwargs)
+    except Exception:
+        ft.notice_error(attributes={"tool_id": tool_id})
+        ft.__exit__(*sys.exc_info())
+        # If we hit an exception, append the error attribute and duration from the exited function trace
+        tool_event_dict.update({"duration": ft.duration * 1000, "error": True})
+        transaction.record_custom_event("LlmTool", tool_event_dict)
+        raise
+
+    ft.__exit__(None, None, None)
+
+    tool_event_dict.update({"duration": ft.duration * 1000})
+
+    # If the tool was executed successfully, we can grab the tool output from the result
+    tool_output = _extract_tool_output(return_val, tool_name)
+    if settings.ai_monitoring.record_content.enabled:
+        tool_event_dict.update({"output": tool_output})
+
+    transaction.record_custom_event("LlmTool", tool_event_dict)
+
+    return return_val
+
+
+def instrument_autogen_agentchat_agents__assistant_agent(module):
+    if hasattr(module, "AssistantAgent"):
+        if hasattr(module.AssistantAgent, "on_messages_stream"):
+            wrap_function_wrapper(module, "AssistantAgent.on_messages_stream", wrap_on_messages_stream)
+        if hasattr(module.AssistantAgent, "_execute_tool_call"):
+            wrap_function_wrapper(module, "AssistantAgent._execute_tool_call", wrap__execute_tool_call)
+
+
 def instrument_autogen_ext_tools_mcp__base(module):
     if hasattr(module, "McpToolAdapter"):
         if hasattr(module.McpToolAdapter, "from_server_params"):

@@ -12,11 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
+import pytest
+from autogen_agentchat.agents import AssistantAgent
+from autogen_agentchat.base import TaskResult
+from autogen_agentchat.teams import RoundRobinGroupChat
+from autogen_core import ComponentModel, FunctionCall, Image
+from autogen_core.models import CreateResult, RequestUsage
+from autogen_core.models._model_client import ModelFamily
+from autogen_ext.models.replay import ReplayChatCompletionClient
+from pydantic import BaseModel, ValidationError
 from testing_support.fixture.event_loop import event_loop as loop
 from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture
 
+from newrelic.common.object_names import callable_name
+
 _default_settings = {
-    "package_reporting.enabled": False,
+    "package_reporting.enabled": False,  # Turn off package reporting for testing as it causes slowdowns.
     "transaction_tracer.explain_threshold": 0.0,
     "transaction_tracer.transaction_threshold": 0.0,
     "transaction_tracer.stack_trace_threshold": 0.0,
@@ -28,3 +41,133 @@
 collector_agent_registration = collector_agent_registration_fixture(
     app_name="Python Agent Test (mlmodel_autogen)", default_settings=_default_settings
 )
+
+
+@pytest.fixture
+def single_tool_model_client():
+    model_client = ReplayChatCompletionClient(
+        [
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="1", arguments=json.dumps({"message": "Hello"}), name="add_exclamation")],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            "Hello",
+            "TERMINATE",
+        ],
+        model_info={
+            "function_calling": True,
+            "vision": True,
+            "json_output": True,
+            "family": "gpt-4.1-nano",
+            "structured_output": True,
+        },
+    )
+    return model_client
+
+
+@pytest.fixture
+def single_tool_model_client_error():
+    model_client = ReplayChatCompletionClient(
+        [
+            CreateResult(
+                finish_reason="function_calls",
+                # Set arguments to an invalid type to trigger error in tool
+                content=[FunctionCall(id="1", arguments=12, name="add_exclamation")],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            "Hello",
+            "TERMINATE",
+        ],
+        model_info={
+            "function_calling": True,
+            "vision": True,
+            "json_output": True,
+            "family": "gpt-4.1-nano",
+            "structured_output": True,
+        },
+    )
+    return model_client
+
+
+@pytest.fixture
+def multi_tool_model_client():
+    model_client = ReplayChatCompletionClient(
+        chat_completions=[
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="1", name="add_exclamation", arguments=json.dumps({"message": "Hello"}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="2", name="add_exclamation", arguments=json.dumps({"message": "Goodbye"}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="3", name="compute_sum", arguments=json.dumps({"a": 5, "b": 3}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="4", name="compute_sum", arguments=json.dumps({"a": 123, "b": 2}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+        ],
+        model_info={
+            "family": "gpt-4.1-nano",
+            "function_calling": True,
+            "json_output": True,
+            "vision": True,
+            "structured_output": True,
+        },
+    )
+    return model_client
+
+
+@pytest.fixture
+def multi_tool_model_client_error():
+    model_client = ReplayChatCompletionClient(
+        chat_completions=[
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="1", name="add_exclamation", arguments=json.dumps({"message": "Hello"}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="2", name="add_exclamation", arguments=json.dumps({"message": "Goodbye"}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                content=[FunctionCall(id="3", name="compute_sum", arguments=json.dumps({"a": 5, "b": 3}))],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+            CreateResult(
+                finish_reason="function_calls",
+                # Set arguments to an invalid type to trigger error in tool
+                content=[FunctionCall(id="4", name="compute_sum", arguments=12)],
+                usage=RequestUsage(prompt_tokens=10, completion_tokens=5),
+                cached=False,
+            ),
+        ],
+        model_info={
+            "family": "gpt-4.1-nano",
+            "function_calling": True,
+            "json_output": True,
+            "vision": True,
+            "structured_output": True,
+        },
+    )
+    return model_client