Skip to content

chore(llmobs): dac strip io from gemini #13874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 0 additions & 122 deletions ddtrace/contrib/internal/google_generativeai/_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
import sys

from google.generativeai.types.generation_types import to_generation_config_dict
import wrapt

from ddtrace.internal.utils import get_argument_value
from ddtrace.llmobs._integrations.utils import get_generation_config_google
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
from ddtrace.llmobs._integrations.utils import tag_request_content_part_google
from ddtrace.llmobs._integrations.utils import tag_response_part_google


class BaseTracedGenerateContentResponse(wrapt.ObjectProxy):
"""Base wrapper class for GenerateContentResponse objects for tracing streamed responses."""
Expand All @@ -31,7 +24,6 @@ def __iter__(self):
self._dd_span.set_exc_info(*sys.exc_info())
raise
finally:
tag_response(self._dd_span, self.__wrapped__, self._dd_integration, self._model_instance)
self._kwargs["instance"] = self._model_instance
self._dd_integration.llmobs_set_tags(
self._dd_span,
Expand All @@ -51,7 +43,6 @@ async def __aiter__(self):
self._dd_span.set_exc_info(*sys.exc_info())
raise
finally:
tag_response(self._dd_span, self.__wrapped__, self._dd_integration, self._model_instance)
self._kwargs["instance"] = self._model_instance
self._dd_integration.llmobs_set_tags(
self._dd_span,
Expand All @@ -60,116 +51,3 @@ async def __aiter__(self):
response=self.__wrapped__,
)
self._dd_span.finish()


def _extract_api_key(instance):
"""Extract the API key from the model instance."""
client = getattr(instance, "_client", None)
if getattr(instance, "_async_client", None):
client = getattr(instance._async_client, "_client", None)
if not client:
return None
client_options = getattr(client, "_client_options", None)
if not client_options:
return None
return getattr(client_options, "api_key", None)


def _tag_request_content(span, integration, content, content_idx):
"""Tag the generation span with request contents."""
if isinstance(content, str):
span.set_tag_str("google_generativeai.request.contents.%d.text" % content_idx, integration.trunc(content))
return
if isinstance(content, dict):
role = content.get("role", "")
if role:
span.set_tag_str("google_generativeai.request.contents.%d.role" % content_idx, str(content.get("role", "")))
parts = content.get("parts", [])
for part_idx, part in enumerate(parts):
tag_request_content_part_google("google_generativeai", span, integration, part, part_idx, content_idx)
return
role = getattr(content, "role", "")
if role:
span.set_tag_str("google_generativeai.request.contents.%d.role" % content_idx, str(role))
parts = getattr(content, "parts", [])
if not parts:
span.set_tag_str(
"google_generativeai.request.contents.%d.text" % content_idx,
integration.trunc("[Non-text content object: {}]".format(repr(content))),
)
return
for part_idx, part in enumerate(parts):
tag_request_content_part_google("google_generativeai", span, integration, part, part_idx, content_idx)


def tag_request(span, integration, instance, args, kwargs):
"""Tag the generation span with request details.
Includes capturing generation configuration, system prompt, and messages.
"""
contents = get_argument_value(args, kwargs, 0, "contents")
generation_config = get_generation_config_google(instance, kwargs)
system_instruction = get_system_instructions_from_google_model(instance)
stream = kwargs.get("stream", None)

try:
generation_config_dict = to_generation_config_dict(generation_config)
except TypeError:
generation_config_dict = None
if generation_config_dict is not None:
for k, v in generation_config_dict.items():
span.set_tag_str("google_generativeai.request.generation_config.%s" % k, str(v))

if stream:
span.set_tag("google_generativeai.request.stream", True)

if not integration.is_pc_sampled_span(span):
return

if system_instruction:
for idx, text in enumerate(system_instruction):
span.set_tag_str("google_generativeai.request.system_instruction.%d.text" % idx, integration.trunc(text))

if isinstance(contents, str):
span.set_tag_str("google_generativeai.request.contents.0.text", integration.trunc(contents))
return
elif isinstance(contents, dict):
span.set_tag_str("google_generativeai.request.contents.0.text", integration.trunc(str(contents)))
return
elif not isinstance(contents, list):
return
for content_idx, content in enumerate(contents):
_tag_request_content(span, integration, content, content_idx)


def tag_response(span, generations, integration, instance):
"""Tag the generation span with response details.
Includes capturing generation text, roles, finish reasons, and token counts.
"""
api_key = _extract_api_key(instance)
if api_key:
span.set_tag("google_generativeai.request.api_key", "...{}".format(api_key[-4:]))

generations_dict = generations.to_dict()
for candidate_idx, candidate in enumerate(generations_dict.get("candidates", [])):
finish_reason = candidate.get("finish_reason", None)
if finish_reason:
span.set_tag_str(
"google_generativeai.response.candidates.%d.finish_reason" % candidate_idx, str(finish_reason)
)
candidate_content = candidate.get("content", {})
role = candidate_content.get("role", "")
span.set_tag_str("google_generativeai.response.candidates.%d.content.role" % candidate_idx, str(role))
if not integration.is_pc_sampled_span(span):
continue
parts = candidate_content.get("parts", [])
for part_idx, part in enumerate(parts):
tag_response_part_google("google_generativeai", span, integration, part, part_idx, candidate_idx)

token_counts = generations_dict.get("usage_metadata", None)
if not token_counts:
return
span.set_metric("google_generativeai.response.usage.prompt_tokens", token_counts.get("prompt_token_count", 0))
span.set_metric(
"google_generativeai.response.usage.completion_tokens", token_counts.get("candidates_token_count", 0)
)
span.set_metric("google_generativeai.response.usage.total_tokens", token_counts.get("total_token_count", 0))
10 changes: 0 additions & 10 deletions ddtrace/contrib/internal/google_generativeai/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from ddtrace import config
from ddtrace.contrib.internal.google_generativeai._utils import TracedAsyncGenerateContentResponse
from ddtrace.contrib.internal.google_generativeai._utils import TracedGenerateContentResponse
from ddtrace.contrib.internal.google_generativeai._utils import _extract_api_key
from ddtrace.contrib.internal.google_generativeai._utils import tag_request
from ddtrace.contrib.internal.google_generativeai._utils import tag_response
from ddtrace.contrib.internal.trace_utils import unwrap
from ddtrace.contrib.internal.trace_utils import with_traced_module
from ddtrace.contrib.internal.trace_utils import wrap
Expand Down Expand Up @@ -51,14 +48,9 @@ def traced_generate(genai, pin, func, instance, args, kwargs):
submit_to_llmobs=True,
)
try:
tag_request(span, integration, instance, args, kwargs)
generations = func(*args, **kwargs)
api_key = _extract_api_key(instance)
if api_key:
span.set_tag("google_generativeai.request.api_key", "...{}".format(api_key[-4:]))
if stream:
return TracedGenerateContentResponse(generations, instance, integration, span, args, kwargs)
tag_response(span, generations, integration, instance)
except Exception:
span.set_exc_info(*sys.exc_info())
raise
Expand All @@ -84,11 +76,9 @@ async def traced_agenerate(genai, pin, func, instance, args, kwargs):
submit_to_llmobs=True,
)
try:
tag_request(span, integration, instance, args, kwargs)
generations = await func(*args, **kwargs)
if stream:
return TracedAsyncGenerateContentResponse(generations, instance, integration, span, args, kwargs)
tag_response(span, generations, integration, instance)
except Exception:
span.set_exc_info(*sys.exc_info())
raise
Expand Down
24 changes: 22 additions & 2 deletions ddtrace/llmobs/_integrations/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@

from ddtrace.internal.utils import get_argument_value
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import extract_message_from_part_google
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
from ddtrace.llmobs._utils import _get_attr
Expand Down Expand Up @@ -59,7 +61,7 @@ def _llmobs_set_tags(
METADATA: metadata,
INPUT_MESSAGES: input_messages,
OUTPUT_MESSAGES: output_messages,
METRICS: get_llmobs_metrics_tags("google_generativeai", span),
METRICS: self._extract_metrics(response),
}
)

Expand Down Expand Up @@ -108,3 +110,21 @@ def _extract_output_message(self, generations):
message = extract_message_from_part_google(part, role)
output_messages.append(message)
return output_messages

def _extract_metrics(self, generations):
if not generations:
return {}
generations_dict = generations.to_dict()

token_counts = generations_dict.get("usage_metadata", None)
if not token_counts:
return
input_tokens = token_counts.get("prompt_token_count", 0)
output_tokens = token_counts.get("candidates_token_count", 0)
total_tokens = input_tokens + output_tokens

usage = {}
usage[INPUT_TOKENS_METRIC_KEY] = input_tokens
usage[OUTPUT_TOKENS_METRIC_KEY] = output_tokens
usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens
return usage
50 changes: 0 additions & 50 deletions ddtrace/llmobs/_integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,56 +74,6 @@ def get_generation_config_google(instance, kwargs):
return generation_config or _get_attr(instance, "_generation_config", {})


def tag_request_content_part_google(tag_prefix, span, integration, part, part_idx, content_idx):
"""Tag the generation span with request content parts."""
text = _get_attr(part, "text", "")
function_call = _get_attr(part, "function_call", None)
function_response = _get_attr(part, "function_response", None)
span.set_tag_str(
"%s.request.contents.%d.parts.%d.text" % (tag_prefix, content_idx, part_idx), integration.trunc(str(text))
)
if function_call:
function_call_dict = type(function_call).to_dict(function_call)
span.set_tag_str(
"%s.request.contents.%d.parts.%d.function_call.name" % (tag_prefix, content_idx, part_idx),
function_call_dict.get("name", ""),
)
span.set_tag_str(
"%s.request.contents.%d.parts.%d.function_call.args" % (tag_prefix, content_idx, part_idx),
integration.trunc(str(function_call_dict.get("args", {}))),
)
if function_response:
function_response_dict = type(function_response).to_dict(function_response)
span.set_tag_str(
"%s.request.contents.%d.parts.%d.function_response.name" % (tag_prefix, content_idx, part_idx),
function_response_dict.get("name", ""),
)
span.set_tag_str(
"%s.request.contents.%d.parts.%d.function_response.response" % (tag_prefix, content_idx, part_idx),
integration.trunc(str(function_response_dict.get("response", {}))),
)


def tag_response_part_google(tag_prefix, span, integration, part, part_idx, candidate_idx):
"""Tag the generation span with response part text and function calls."""
text = _get_attr(part, "text", "")
span.set_tag_str(
"%s.response.candidates.%d.content.parts.%d.text" % (tag_prefix, candidate_idx, part_idx),
integration.trunc(str(text)),
)
function_call = _get_attr(part, "function_call", None)
if not function_call:
return
span.set_tag_str(
"%s.response.candidates.%d.content.parts.%d.function_call.name" % (tag_prefix, candidate_idx, part_idx),
_get_attr(function_call, "name", ""),
)
span.set_tag_str(
"%s.response.candidates.%d.content.parts.%d.function_call.args" % (tag_prefix, candidate_idx, part_idx),
integration.trunc(str(_get_attr(function_call, "args", {}))),
)


def llmobs_get_metadata_google(kwargs, instance):
metadata = {}
model_config = getattr(instance, "_generation_config", {}) or {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
other:
- |
gemini: Removes the IO data from the APM spans for Gemini LLM requests and responses, which is duplicated in the LLM Observability span.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def test_global_tags(genai, mock_client, mock_tracer):
assert span.get_tag("env") == "staging"
assert span.get_tag("version") == "1234"
assert span.get_tag("google_generativeai.request.model") == "gemini-1.5-flash"
assert span.get_tag("google_generativeai.request.api_key") == "...key>"


SNAPSHOT_IGNORES = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,19 @@
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "66e1da0a00000000",
"google_generativeai.request.api_key": "...key>",
"google_generativeai.request.contents.0.text": "What is the argument for LeBron James being the GOAT?",
"google_generativeai.request.generation_config.max_output_tokens": "35",
"google_generativeai.request.generation_config.stop_sequences": "['x']",
"google_generativeai.request.generation_config.temperature": "1.0",
"_dd.p.tid": "68715e7800000000",
"google_generativeai.request.model": "gemini-1.5-flash",
"google_generativeai.request.provider": "google",
"google_generativeai.response.candidates.0.content.parts.0.text": "The argument for LeBron James being the 'Greatest of All Time' (GOAT) is multifaceted and involves a variety of factors. Here's ...",
"google_generativeai.response.candidates.0.content.role": "model",
"google_generativeai.response.candidates.0.finish_reason": "2",
"language": "python",
"runtime-id": "bd8636b7d4bb4b3abc2d4bc7129ec109"
"runtime-id": "e72fd406a9a04657a973cf959e2935f5"
},
"metrics": {
"_dd.measured": 1,
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"google_generativeai.response.usage.completion_tokens": 30,
"google_generativeai.response.usage.prompt_tokens": 12,
"google_generativeai.response.usage.total_tokens": 42,
"process_id": 7954
"process_id": 66831
},
"duration": 338000,
"start": 1726077450580144000
"duration": 176000,
"start": 1752260216102575000
}]]
Loading
Loading