feat(trace-explorer): Allow optional breakdown on category (#70281)

Sometimes the per project breakdown is insufficient, so allow enhancing it with more details on some span categories.
getsentry · May 3, 2024 · 288f928 · 288f928
1 parent 922584c
commit 288f928
Show file tree

Hide file tree

Showing 3 changed files with 346 additions and 44 deletions.
diff --git a/src/sentry/api/endpoints/organization_traces.py b/src/sentry/api/endpoints/organization_traces.py
@@ -37,6 +37,7 @@ class TraceInterval(TypedDict):
     start: int
     end: int
     kind: Literal["project", "missing", "other"]
+    opCategory: str | None
 
 
 class TraceResult(TypedDict):
@@ -55,6 +56,9 @@ class TraceResult(TypedDict):
 
 
 class OrganizationTracesSerializer(serializers.Serializer):
+    breakdownCategory = serializers.ListField(
+        required=False, allow_empty=True, child=serializers.CharField()
+    )
     field = serializers.ListField(required=True, allow_empty=False, child=serializers.CharField())
     sort = serializers.ListField(required=False, allow_empty=True, child=serializers.CharField())
     metricsQuery = serializers.CharField(required=False)
@@ -63,6 +67,7 @@ class OrganizationTracesSerializer(serializers.Serializer):
         required=False, allow_empty=True, child=serializers.CharField(allow_blank=True)
     )
     suggestedQuery = serializers.CharField(required=False)
+    minBreakdownDuration = serializers.IntegerField(default=0, min_value=0)
     maxSpansPerTrace = serializers.IntegerField(default=1, min_value=1, max_value=100)
 
 
@@ -101,6 +106,8 @@ def get(self, request: Request, organization: Organization) -> Response:
             sort=serialized.get("sort"),
             limit=self.get_per_page(request),
             max_spans_per_trace=serialized["maxSpansPerTrace"],
+            breakdown_categories=serialized.get("breakdownCategory", []),
+            min_breakdown_duration=serialized["minBreakdownDuration"],
             get_all_projects=lambda: self.get_projects(
                 request,
                 organization,
@@ -139,6 +146,8 @@ def __init__(
         sort: str | None,
         limit: int,
         max_spans_per_trace: int,
+        breakdown_categories: list[str],
+        min_breakdown_duration: int,
         get_all_projects: Callable[[], list[Project]],
     ):
         self.params = params
@@ -151,6 +160,8 @@ def __init__(
         self.sort = sort
         self.limit = limit
         self.max_spans_per_trace = max_spans_per_trace
+        self.breakdown_categories = breakdown_categories
+        self.min_breakdown_duration = min_breakdown_duration
         self.get_all_projects = get_all_projects
         self._all_projects: list[Project] | None = None
 
@@ -202,8 +213,51 @@ def _execute(self):
                 query.process_results(result) for query, result in zip(all_queries, all_raw_results)
             ]
 
-            meta = self.process_meta_results(all_results)
-            data = self.process_final_results(all_results)
+            # the order of these results is defined by the order
+            # of the queries in `get_all_meta_data_queries`
+
+            idx = 0
+
+            traces_metas_results = all_results[idx]
+            idx += 1
+
+            traces_errors_results = all_results[idx]
+            idx += 1
+
+            traces_occurrences_results = all_results[idx]
+            idx += 1
+
+            traces_breakdown_projects_results = all_results[idx]
+            idx += 1
+
+            if self.breakdown_categories:
+                traces_breakdown_categories_results = all_results[idx]
+                idx += 1
+            else:
+                traces_breakdown_categories_results = {
+                    "data": [],
+                    "meta": {
+                        "fields": {},
+                        "tips": {},
+                    },
+                }
+
+            user_spans_results = all_results[idx]
+            idx += 1
+
+            suggested_spans_results = all_results[idx] if len(all_results) > idx else None
+            idx += 1
+
+            meta = self.process_meta_results(user_spans_results)
+            data = self.process_final_results(
+                traces_metas_results=traces_metas_results,
+                traces_errors_results=traces_errors_results,
+                traces_occurrences_results=traces_occurrences_results,
+                traces_breakdown_projects_results=traces_breakdown_projects_results,
+                traces_breakdown_categories_results=traces_breakdown_categories_results,
+                user_spans_results=user_spans_results,
+                suggested_spans_results=suggested_spans_results,
+            )
 
         return {"data": data, "meta": meta}
 
@@ -465,37 +519,47 @@ def get_all_meta_data_queries(
         snuba_params: SnubaParams,
         trace_ids: list[str],
     ) -> list[QueryBuilder]:
-        traces_breakdowns_query = self.get_traces_breakdowns_query(
+        traces_metas_query = self.get_traces_metas_query(
             params,
             snuba_params,
             trace_ids,
         )
 
-        traces_metas_query = self.get_traces_metas_query(
+        traces_errors_query = self.get_traces_errors_query(
             params,
             snuba_params,
             trace_ids,
         )
 
-        traces_errors_query = self.get_traces_errors_query(
+        traces_occurrences_query = self.get_traces_occurrences_query(
             params,
             snuba_params,
             trace_ids,
         )
 
-        traces_occurrences_query = self.get_traces_occurrences_query(
+        traces_breakdown_projects_query = self.get_traces_breakdown_projects_query(
             params,
             snuba_params,
             trace_ids,
         )
 
-        return [
-            traces_breakdowns_query,
+        queries = [
             traces_metas_query,
             traces_errors_query,
             traces_occurrences_query,
+            traces_breakdown_projects_query,
         ]
 
+        if self.breakdown_categories:
+            traces_breakdown_categories_query = self.get_traces_breakdown_categories_query(
+                params,
+                snuba_params,
+                trace_ids,
+            )
+            queries.append(traces_breakdown_categories_query)
+
+        return queries
+
     def get_all_span_samples_queries(
         self,
         params: ParamsType,
@@ -523,30 +587,34 @@ def get_all_span_samples_queries(
 
         return span_samples_queries
 
-    def process_final_results(self, results) -> list[TraceResult]:
-        # the order of these results is defined by the order
-        # of the queries in `get_all_meta_data_queries`
-        traces_breakdowns_results = results[0]
-        traces_metas_results = results[1]
-        traces_errors_results = results[2]
-        traces_occurrences_results = results[3]
-        user_spans_results = results[4]
-        suggested_spans_results = results[5] if len(results) > 5 else None
-
+    def process_final_results(
+        self,
+        *,
+        traces_metas_results,
+        traces_errors_results,
+        traces_occurrences_results,
+        traces_breakdown_projects_results,
+        traces_breakdown_categories_results,
+        user_spans_results,
+        suggested_spans_results,
+    ) -> list[TraceResult]:
         # mapping of trace id to a tuple of start/finish times
         traces_range = {
             row["trace"]: (row["first_seen()"], row["last_seen()"])
             for row in traces_metas_results["data"]
         }
 
-        traces_breakdowns = process_breakdowns(
-            traces_breakdowns_results["data"],
-            traces_range,
-        )
+        spans = [
+            *traces_breakdown_projects_results["data"],
+            *traces_breakdown_categories_results["data"],
+        ]
+        spans.sort(key=lambda span: (span["precise.start_ts"], span["precise.finish_ts"]))
+
+        traces_breakdowns = process_breakdowns(spans, traces_range)
 
         # mapping of trace id to a tuple of project slug + transaction name
         traces_names: MutableMapping[str, tuple[str, str]] = {}
-        for row in traces_breakdowns_results["data"]:
+        for row in traces_breakdown_projects_results["data"]:
             # The underlying column is a Nullable(UInt64) but we write a default of 0 to it.
             # So make sure to handle both in case something changes.
             if not row["parent_span"] or int(row["parent_span"], 16) == 0:
@@ -594,14 +662,13 @@ def process_final_results(self, results) -> list[TraceResult]:
         ]
 
     def process_meta_results(self, results):
-        user_spans_results = results[4]
-        fields = user_spans_results["meta"].get("fields", {})
+        fields = results["meta"].get("fields", {})
         return {
-            **user_spans_results["meta"],
+            **results["meta"],
             "fields": {field: fields[field] for field in self.fields},
         }
 
-    def get_traces_breakdowns_query(
+    def get_traces_breakdown_projects_query(
         self,
         params: ParamsType,
         snuba_params: SnubaParams,
@@ -632,6 +699,46 @@ def get_traces_breakdowns_query(
             ),
         )
 
+    def get_traces_breakdown_categories_query(
+        self,
+        params: ParamsType,
+        snuba_params: SnubaParams,
+        trace_ids: list[str],
+    ) -> QueryBuilder:
+        conditions = []
+
+        span_categories_str = ",".join(self.breakdown_categories)
+        conditions.append(f"span.category:[{span_categories_str}]")
+
+        trace_ids_str = ",".join(trace_ids)
+        conditions.append(f"trace:[{trace_ids_str}]")
+
+        if self.min_breakdown_duration > 0:
+            conditions.append(f"span.duration:>={self.min_breakdown_duration}")
+
+        return SpansIndexedQueryBuilder(
+            Dataset.SpansIndexed,
+            params,
+            snuba_params=snuba_params,
+            query=" ".join(conditions),
+            selected_columns=[
+                "trace",
+                "project",
+                "transaction",
+                "span.category",
+                "precise.start_ts",
+                "precise.finish_ts",
+            ],
+            orderby=["precise.start_ts", "precise.finish_ts"],
+            # limit the number of segments we fetch per trace so a single
+            # large trace does not result in the rest being blank
+            limitby=("trace", int(MAX_SNUBA_RESULTS / len(trace_ids))),
+            limit=MAX_SNUBA_RESULTS,
+            config=QueryBuilderConfig(
+                transform_alias_to_input_format=True,
+            ),
+        )
+
     def get_traces_metas_query(
         self,
         params: ParamsType,
@@ -829,6 +936,13 @@ def process_breakdowns(data, traces_range):
     breakdowns: Mapping[str, list[TraceInterval]] = defaultdict(list)
     stacks: Mapping[str, list[TraceInterval]] = defaultdict(list)
 
+    def should_merge(interval_a, interval_b):
+        return (
+            interval_a["end"] >= interval_b["start"]
+            and interval_a["project"] == interval_b["project"]
+            and interval_a["opCategory"] == interval_b["opCategory"]
+        )
+
     def breakdown_push(trace, interval):
         # Clip the intervals os that it is within range of the trace
         if trace_range := traces_range.get(trace):
@@ -851,22 +965,19 @@ def breakdown_push(trace, interval):
             # A gap in the breakdown was found, fill it with a missing interval
             breakdown.append(
                 {
+                    "kind": "missing",
                     "project": None,
+                    "opCategory": None,
                     "start": last_interval["end"],
                     "end": interval["start"],
-                    "kind": "missing",
                 }
             )
 
         breakdown.append(interval)
 
     def stack_push(trace, interval):
         last_interval = stack_peek(trace)
-        if (
-            last_interval
-            and last_interval["project"] == interval["project"]
-            and last_interval["end"] >= interval["start"]
-        ):
+        if last_interval and should_merge(last_interval, interval):
             # update the end of this interval and it will
             # be updated in the breakdown as well
             last_interval["end"] = max(interval["end"], last_interval["end"])
@@ -902,6 +1013,7 @@ def stack_clear(trace, until=None):
         cur: TraceInterval = {
             "kind": "project",
             "project": row["project"],
+            "opCategory": row.get("span.category"),
             "start": int(row["precise.start_ts"] * 1000),
             "end": int(row["precise.finish_ts"] * 1000),
         }
@@ -921,8 +1033,9 @@ def stack_clear(trace, until=None):
         # Check to see if there is still a gap before the trace ends and fill it
         # with an other interval.
 
-        other = {
+        other: TraceInterval = {
             "project": None,
+            "opCategory": None,
             "start": trace_start,
             "end": trace_end,
             "kind": "other",

diff --git a/src/sentry/testutils/cases.py b/src/sentry/testutils/cases.py
@@ -1544,6 +1544,7 @@ def store_indexed_span(
         store_only_summary: bool = False,
         store_metrics_summary: Mapping[str, Sequence[Mapping[str, Any]]] | None = None,
         group: str = "00",
+        category: str | None = None,
     ):
         if span_id is None:
             span_id = self._random_span_id()
@@ -1581,6 +1582,8 @@ def store_indexed_span(
             payload["_metrics_summary"] = store_metrics_summary
         if parent_span_id:
             payload["parent_span_id"] = parent_span_id
+        if category is not None:
+            payload["sentry_tags"]["category"] = category
 
         # We want to give the caller the possibility to store only a summary since the database does not deduplicate
         # on the span_id which makes the assumptions of a unique span_id in the database invalid.