Skip to content

feat(explore) Add equation support to stats queries #93252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions src/sentry/api/bases/organization_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,12 @@ def get_query_columns(columns, rollup):


def resolve_axis_column(
column: str, index: int = 0, transform_alias_to_input_format: bool = False
column: str,
index: int = 0,
transform_alias_to_input_format: bool = False,
use_rpc: bool = False,
) -> str:
if is_equation(column):
if is_equation(column) and not use_rpc:
return f"equation[{index}]"

# Function columns on input have names like `"p95(duration)"`. By default, we convert them to their aliases like `"p95_duration"`. Here, we want to preserve the original name, so we return the column as-is
Expand All @@ -87,10 +90,14 @@ def has_feature(self, organization: Organization, request: Request) -> bool:
)
)

def get_equation_list(self, organization: Organization, request: Request) -> list[str]:
def get_equation_list(
self, organization: Organization, request: Request, param_name: str = "field"
) -> list[str]:
"""equations have a prefix so that they can be easily included alongside our existing fields"""
return [
strip_equation(field) for field in request.GET.getlist("field")[:] if is_equation(field)
strip_equation(field)
for field in request.GET.getlist(param_name)[:]
if is_equation(field)
]

def get_field_list(
Expand Down Expand Up @@ -546,14 +553,15 @@ def get_event_stats_data(
zerofill_results=zerofill_results,
dataset=dataset,
transform_alias_to_input_format=transform_alias_to_input_format,
use_rpc=use_rpc,
)
if request.query_params.get("useOnDemandMetrics") == "true":
results[key]["isMetricsExtractedData"] = self._query_if_extracted_data(
results, key, query_columns
)
else:
column = resolve_axis_column(
query_columns[0], 0, transform_alias_to_input_format
query_columns[0], 0, transform_alias_to_input_format, use_rpc
)
results[key] = serializer.serialize(
event_result,
Expand Down Expand Up @@ -586,14 +594,17 @@ def get_event_stats_data(
zerofill_results=zerofill_results,
dataset=dataset,
transform_alias_to_input_format=transform_alias_to_input_format,
use_rpc=use_rpc,
)
if top_events > 0 and isinstance(result, SnubaTSResult):
serialized_result = {"": serialized_result}
else:
extra_columns = None
if comparison_delta:
extra_columns = ["comparisonCount"]
column = resolve_axis_column(query_columns[0], 0, transform_alias_to_input_format)
column = resolve_axis_column(
query_columns[0], 0, transform_alias_to_input_format, use_rpc
)
serialized_result = serializer.serialize(
result,
column=column,
Expand Down Expand Up @@ -643,6 +654,7 @@ def serialize_multiple_axis(
zerofill_results: bool = True,
dataset: Any | None = None,
transform_alias_to_input_format: bool = False,
use_rpc: bool = False,
) -> dict[str, Any]:
# Return with requested yAxis as the key
result = {}
Expand All @@ -658,7 +670,9 @@ def serialize_multiple_axis(
for index, query_column in enumerate(query_columns):
result[columns[index]] = serializer.serialize(
event_result,
resolve_axis_column(query_column, equations, transform_alias_to_input_format),
resolve_axis_column(
query_column, equations, transform_alias_to_input_format, use_rpc
),
order=index,
allow_partial_buckets=allow_partial_buckets,
zerofill_results=zerofill_results,
Expand Down
1 change: 1 addition & 0 deletions src/sentry/api/endpoints/organization_events_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def _get_event_stats(
use_aggregate_conditions=True,
),
sampling_mode=snuba_params.sampling_mode,
equations=self.get_equation_list(organization, request),
)
return scoped_dataset.top_events_timeseries(
timeseries_columns=query_columns,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def get_event_stats(
use_aggregate_conditions=True,
),
sampling_mode=snuba_params.sampling_mode,
equations=self.get_equation_list(organization, request, param_name="groupBy"),
)
return dataset.top_events_timeseries(
timeseries_columns=query_columns,
Expand Down
29 changes: 19 additions & 10 deletions src/sentry/snuba/rpc_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)

from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
from sentry.discover import arithmetic
from sentry.exceptions import InvalidSearchQuery
from sentry.search.eap.columns import (
ResolvedAggregate,
Expand Down Expand Up @@ -105,20 +106,22 @@ def categorize_column(


def categorize_aggregate(
column: ResolvedAggregate | ResolvedConditionalAggregate | ResolvedFormula,
column: ResolvedAggregate | ResolvedConditionalAggregate | ResolvedFormula | ResolvedEquation,
) -> Expression:
if isinstance(column, ResolvedFormula):
if isinstance(column, (ResolvedFormula, ResolvedEquation)):
# TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point
return Expression(
formula=transform_binary_formula_to_expression(column.proto_definition),
label=column.public_alias,
)
if isinstance(column, ResolvedAggregate):
elif isinstance(column, ResolvedAggregate):
return Expression(aggregation=column.proto_definition, label=column.public_alias)
if isinstance(column, ResolvedConditionalAggregate):
elif isinstance(column, ResolvedConditionalAggregate):
return Expression(
conditional_aggregation=column.proto_definition, label=column.public_alias
)
else:
raise Exception(f"Unknown column type {type(column)}")


def update_timestamps(
Expand Down Expand Up @@ -190,13 +193,15 @@ def get_timeseries_query(
extra_conditions: TraceItemFilter | None = None,
) -> tuple[
TimeSeriesRequest,
list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate],
list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate | ResolvedEquation],
list[ResolvedAttribute],
]:
timeseries_filter, params = update_timestamps(params, search_resolver)
meta = search_resolver.resolve_meta(referrer=referrer, sampling_mode=sampling_mode)
query, _, query_contexts = search_resolver.resolve_query(query_string)
(functions, _) = search_resolver.resolve_functions(y_axes)
selected_equations, selected_axes = arithmetic.categorize_columns(y_axes)
(functions, _) = search_resolver.resolve_functions(selected_axes)
equations, _ = search_resolver.resolve_equations(selected_equations)
groupbys, groupby_contexts = search_resolver.resolve_attributes(groupby)

# Virtual context columns (VCCs) are currently only supported in TraceItemTable.
Expand Down Expand Up @@ -224,15 +229,17 @@ def get_timeseries_query(
TimeSeriesRequest(
meta=meta,
filter=query,
expressions=[categorize_aggregate(fn) for fn in functions if fn.is_aggregate],
expressions=[
categorize_aggregate(fn) for fn in (functions + equations) if fn.is_aggregate
],
group_by=[
groupby.proto_definition
for groupby in groupbys
if isinstance(groupby.proto_definition, AttributeKey)
],
granularity_secs=params.timeseries_granularity_secs,
),
functions,
(functions + equations),
groupbys,
)

Expand Down Expand Up @@ -492,6 +499,7 @@ def run_top_events_timeseries_query(
referrer: str,
config: SearchResolverConfig,
sampling_mode: SAMPLING_MODES | None,
equations: list[str] | None = None,
) -> Any:
"""We intentionally duplicate run_timeseries_query code here to reduce the complexity of needing multiple helper
functions that both would call
Expand All @@ -511,17 +519,18 @@ def run_top_events_timeseries_query(
table_search_resolver = get_resolver(table_query_params, config)

# Make a table query first to get what we need to filter by
_, non_equation_axes = arithmetic.categorize_columns(y_axes)
top_events = run_table_query(
TableQuery(
query_string=query_string,
selected_columns=raw_groupby + y_axes,
selected_columns=raw_groupby + non_equation_axes,
orderby=orderby,
offset=0,
limit=limit,
referrer=referrer,
sampling_mode=sampling_mode,
resolver=table_search_resolver,
equations=[],
equations=equations,
)
)
if len(top_events["data"]) == 0:
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/snuba/spans_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def run_top_events_timeseries_query(
referrer: str,
config: SearchResolverConfig,
sampling_mode: SAMPLING_MODES | None,
equations: list[str] | None = None,
) -> Any:
return rpc_dataset_common.run_top_events_timeseries_query(
get_resolver=get_resolver,
Expand All @@ -171,6 +172,7 @@ def run_top_events_timeseries_query(
referrer=referrer,
config=config,
sampling_mode=sampling_mode,
equations=equations,
)


Expand Down
Loading
Loading