-
Notifications
You must be signed in to change notification settings - Fork 440
feat(llmobs): add datasets and experiments features #13314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
cbf6808
f7aea75
f247e9c
abe7934
d151bc6
5c5d520
1893c37
a5841a9
9884fc7
a648fe5
91ee6af
df93380
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,10 @@ | |
from ddtrace.llmobs._constants import SPAN_LINKS | ||
from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING | ||
from ddtrace.llmobs._constants import TAGS | ||
from ddtrace.llmobs._constants import EXPECTED_OUTPUT | ||
from ddtrace.llmobs._constants import EXPERIMENT_INPUT | ||
from ddtrace.llmobs._constants import EXPERIMENT_OUTPUT | ||
from ddtrace.llmobs._constants import EXPERIMENT_ID_BAGGAGE_KEY | ||
from ddtrace.llmobs._context import LLMObsContextProvider | ||
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner | ||
from ddtrace.llmobs._utils import AnnotationContext | ||
|
@@ -204,6 +208,14 @@ def _llmobs_span_event(cls, span: Span) -> Dict[str, Any]: | |
span._set_ctx_item(ML_APP, ml_app) | ||
parent_id = span._get_ctx_item(PARENT_ID_KEY) or ROOT_PARENT_ID | ||
|
||
# Experiments related | ||
if span._get_ctx_item(EXPECTED_OUTPUT) is not None: | ||
meta["expected_output"] = span._get_ctx_item(EXPECTED_OUTPUT) | ||
if span._get_ctx_item(EXPERIMENT_INPUT) is not None: | ||
meta["input"] = span._get_ctx_item(EXPERIMENT_INPUT) | ||
if span._get_ctx_item(EXPERIMENT_OUTPUT) is not None: | ||
meta["output"] = span._get_ctx_item(EXPERIMENT_OUTPUT) | ||
|
||
llmobs_span_event = { | ||
"trace_id": format_trace_id(span.trace_id), | ||
"span_id": str(span.span_id), | ||
|
@@ -241,6 +253,12 @@ def _llmobs_tags(span: Span, ml_app: str, session_id: Optional[str] = None) -> L | |
"language": "python", | ||
"error": span.error, | ||
} | ||
|
||
# Add experiment_id from baggage if present | ||
experiment_id = span.context.get_baggage_item(EXPERIMENT_ID_BAGGAGE_KEY) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should just look generally in the context for the experiment ID to follow the same paradigm we do for parent id and mlobs trace id |
||
if experiment_id: | ||
tags["experiment_id"] = experiment_id | ||
|
||
err_type = span.get_tag(ERROR_TYPE) | ||
if err_type: | ||
tags["error_type"] = err_type | ||
|
@@ -775,6 +793,35 @@ def agent( | |
"agent", name=name, session_id=session_id, ml_app=ml_app, _decorator=_decorator | ||
) | ||
|
||
@classmethod | ||
def _experiment( | ||
cls, | ||
name: Optional[str] = None, | ||
session_id: Optional[str] = None, | ||
ml_app: Optional[str] = None, | ||
experiment_id: Optional[str] = None, | ||
) -> Span: | ||
""" | ||
Trace an LLM experiment, only used internally by the experiments SDK. | ||
|
||
:param str name: The name of the traced operation. If not provided, a default value of "agent" will be set. | ||
:param str session_id: The ID of the underlying user session. Required for tracking sessions. | ||
:param str ml_app: The name of the ML application that the agent is orchestrating. If not provided, the default | ||
value will be set to the value of `DD_LLMOBS_ML_APP`. | ||
:param str experiment_id: The ID of the experiment to associate with this span and its children. | ||
|
||
:returns: The Span object representing the traced operation. | ||
""" | ||
if cls.enabled is False: | ||
log.warning(SPAN_START_WHILE_DISABLED_WARNING) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be an info log in case this is expected (user doesn't want to submit spans) also i don't think we need a constant for this log line - the indirection makes it less easy to debug |
||
span = cls._instance._start_span("experiment", name=name, session_id=session_id, ml_app=ml_app) | ||
|
||
# Set experiment_id in baggage if provided | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, probably want to set it in the context rather than the baggage |
||
if experiment_id: | ||
span.context.set_baggage_item(EXPERIMENT_ID_BAGGAGE_KEY, experiment_id) | ||
|
||
return span | ||
|
||
@classmethod | ||
def workflow( | ||
cls, | ||
|
@@ -967,11 +1014,22 @@ def annotate( | |
error = cls._tag_embedding_io(span, input_documents=input_data, output_text=output_data) | ||
elif span_kind == "retrieval": | ||
error = cls._tag_retrieval_io(span, input_text=input_data, output_documents=output_data) | ||
elif span_kind == "experiment": | ||
error = cls._tag_experiment_io(span, input_data=input_data, output_data=output_data) | ||
else: | ||
cls._tag_text_io(span, input_value=input_data, output_value=output_data) | ||
finally: | ||
telemetry.record_llmobs_annotate(span, error) | ||
|
||
@staticmethod | ||
def _tag_expected_output(span, expected_output: dict) -> None: | ||
"""Tags a given LLMObs span with a prompt""" | ||
try: | ||
span._set_ctx_item(EXPECTED_OUTPUT, expected_output) | ||
except TypeError: | ||
log.warning("Failed to validate expected output with error: ", exc_info=True) | ||
return | ||
|
||
@classmethod | ||
def _tag_llm_io(cls, span, input_messages=None, output_messages=None) -> Optional[str]: | ||
"""Tags input/output messages for LLM-kind spans. | ||
|
@@ -1048,6 +1106,17 @@ def _tag_text_io(cls, span, input_value=None, output_value=None): | |
if output_value is not None: | ||
span._set_ctx_item(OUTPUT_VALUE, safe_json(output_value)) | ||
|
||
@classmethod | ||
def _tag_experiment_io(cls, span, input_data=None, output_data=None): | ||
"""Tags input/output values for experiment kind spans. | ||
Will be mapped to span's `meta.{input,output}.values` fields. | ||
""" | ||
if input_data is not None: | ||
span._set_ctx_item(EXPERIMENT_INPUT, input_data) | ||
if output_data is not None: | ||
span._set_ctx_item(EXPERIMENT_OUTPUT, output_data) | ||
return None | ||
|
||
@staticmethod | ||
def _set_dict_attribute(span: Span, key, value: Dict[str, Any]) -> None: | ||
"""Sets a given LLM Obs span attribute with a dictionary key/values. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
from dataclasses import dataclass | ||
import http.client | ||
import json | ||
from typing import Dict | ||
from typing import List | ||
from typing import Optional | ||
from typing import Tuple | ||
from typing import Union | ||
import urllib.request | ||
from urllib.error import HTTPError | ||
|
||
from ddtrace import config | ||
from ddtrace.ext import SpanTypes | ||
|
@@ -314,3 +317,48 @@ def on_tool_call_output_used(self, tool_id: str, llm_span: Span) -> None: | |
"output", | ||
"input", | ||
) | ||
class HTTPResponse: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this different than |
||
def __init__(self, resp) -> None: | ||
if resp is None: | ||
raise ValueError("Response object cannot be None") | ||
self._resp = resp | ||
self._content = None # type: Optional[bytes] | ||
|
||
@property | ||
def status_code(self) -> int: | ||
if hasattr(self._resp, "status"): | ||
return self._resp.status | ||
elif hasattr(self._resp, "code"): | ||
return self._resp.code | ||
elif hasattr(self._resp, "getcode"): | ||
return self._resp.getcode() | ||
else: | ||
raise AttributeError(f"Could not find status code in response object of type {type(self._resp)}") | ||
|
||
def read(self) -> bytes: | ||
if self._content is None: | ||
content = self._resp.read() | ||
if content is None: | ||
return b"" | ||
self._content = content | ||
return self._content | ||
|
||
def text(self) -> str: | ||
return self.read().decode("utf-8") | ||
|
||
def json(self) -> dict: | ||
return json.loads(self.text()) | ||
|
||
|
||
def http_request( | ||
method: str, url: str, headers: Optional[Dict[str, str]] = None, body: Optional[bytes] = None | ||
) -> HTTPResponse: | ||
"""Make an HTTP request and return an HTTPResponse object.""" | ||
req = urllib.request.Request(url, data=body, method=method) | ||
if headers: | ||
req.headers.update(headers) | ||
try: | ||
response = urllib.request.urlopen(req) | ||
return HTTPResponse(response) | ||
except HTTPError as e: | ||
return HTTPResponse(e) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
""" | ||
LLM Datasets and Experiments. | ||
""" | ||
|
||
from ._dataset import Dataset | ||
from ._experiment import Experiment | ||
from ._decorators import task | ||
from ._decorators import evaluator | ||
from ._decorators import summary_metric | ||
from ._config import init | ||
|
||
|
||
__all__ = ["Dataset", "Experiment", "task", "evaluator", "init", "summary_metric"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
confused as to why we need new fields for input and output here but maybe it will become obvious from further reading