Skip to content

Clean up SpanQuery and related APIs #1287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/evals.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ import logfire
from pydantic_evals import Case, Dataset
from pydantic_evals.evaluators import Evaluator
from pydantic_evals.evaluators.context import EvaluatorContext
from pydantic_evals.otel.span_tree import SpanQuery, as_predicate
from pydantic_evals.otel.span_tree import SpanQuery

logfire.configure( # ensure that an OpenTelemetry tracer is configured
send_to_logfire='if-token-present'
Expand All @@ -443,7 +443,7 @@ class SpanTracingEvaluator(Evaluator[str, str]):
return {'has_spans': False, 'performance_score': 0.0}

# Find all spans with "processing" in the name
processing_spans = span_tree.find_all(lambda node: 'processing' in node.name)
processing_spans = span_tree.find(lambda node: 'processing' in node.name)

# Calculate total processing time
total_processing_time = sum(
Expand All @@ -452,7 +452,7 @@ class SpanTracingEvaluator(Evaluator[str, str]):

# Check for error spans
error_query: SpanQuery = {'name_contains': 'error'}
has_errors = span_tree.any(as_predicate(error_query))
has_errors = span_tree.any(error_query)

# Calculate a performance score (lower is better)
performance_score = 1.0 if total_processing_time < 0.5 else 0.5
Expand Down
6 changes: 3 additions & 3 deletions pydantic_evals/pydantic_evals/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,17 +823,17 @@ async def _run_task(
# otherwise, we don't have a great way to get usage data from arbitrary frameworks.
# Ideally we wouldn't need to hard-code the specific logic here, but I'm not sure a great way to expose it to
# users. Maybe via an argument of type Callable[[SpanTree], dict[str, int | float]] or similar?
for node in span_tree.flattened():
for node in span_tree:
if node.attributes.get('gen_ai.operation.name') == 'chat':
task_run.increment_metric('requests', 1)
for k, v in node.attributes.items():
if not isinstance(v, (int, float)):
continue
# TODO: Revisit this choice to strip the prefix..
if k.startswith('gen_ai.usage.details.'):
task_run.increment_metric(k[21:], v)
task_run.increment_metric(k.removeprefix('gen_ai.usage.details.'), v)
elif k.startswith('gen_ai.usage.'):
task_run.increment_metric(k[13:], v)
task_run.increment_metric(k.removeprefix('gen_ai.usage.'), v)

return EvaluatorContext[InputsT, OutputT, MetadataT](
name=case.name,
Expand Down
Empty file.
4 changes: 2 additions & 2 deletions pydantic_evals/pydantic_evals/evaluators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .common import Contains, Equals, EqualsExpected, IsInstance, LlmJudge, MaxDuration, Python, SpanQuery
from .common import Contains, Equals, EqualsExpected, HasMatchingSpan, IsInstance, LlmJudge, MaxDuration, Python
from .context import EvaluatorContext
from .evaluator import EvaluationReason, EvaluationResult, EvaluationScalar, Evaluator, EvaluatorOutput, run_evaluator

Expand All @@ -10,7 +10,7 @@
'IsInstance',
'MaxDuration',
'LlmJudge',
'SpanQuery',
'HasMatchingSpan',
'Python',
# context
'EvaluatorContext',
Expand Down
12 changes: 6 additions & 6 deletions pydantic_evals/pydantic_evals/evaluators/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pydantic_ai import models

from ..otel.span_tree import SpanQuery as SpanNodeQuery, as_predicate
from ..otel.span_tree import SpanQuery as SpanNodeQuery
from .context import EvaluatorContext
from .evaluator import EvaluationReason, Evaluator, EvaluatorOutput

Expand All @@ -17,7 +17,7 @@
'IsInstance',
'MaxDuration',
'LlmJudge',
'SpanQuery',
'HasMatchingSpan',
'Python',
)

Expand Down Expand Up @@ -177,16 +177,16 @@ async def evaluate(


@dataclass
class SpanQuery(Evaluator[object, object, object]):
"""Check if the span tree contains a span with the specified name."""
class HasMatchingSpan(Evaluator[object, object, object]):
"""Check if the span tree contains a span that matches the specified query."""

query: SpanNodeQuery

def evaluate(
self,
ctx: EvaluatorContext[object, object, object],
) -> bool:
return ctx.span_tree.find_first(as_predicate(self.query)) is not None
return ctx.span_tree.any(self.query)


# TODO: Consider moving this to docs rather than providing it with the library, given the security implications
Expand All @@ -211,6 +211,6 @@ def evaluate(self, ctx: EvaluatorContext[object, object, object]) -> EvaluatorOu
IsInstance,
MaxDuration,
LlmJudge,
SpanQuery,
HasMatchingSpan,
# Python, # not included by default for security reasons
)
Loading