getsentry · gggritso · Mar 22, 2023 · Mar 20, 2023 · Mar 22, 2023 · Mar 22, 2023
diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py
@@ -167,19 +167,46 @@ def get_duration_between_spans(first_span: Span, second_span: Span):
 
 
 def get_url_from_span(span: Span) -> str:
+    """
+    Parses the span data and pulls out the URL. Accounts for different SDKs and
+    different versions of SDKs formatting and parsing the URL contents
+    differently.
+    """
+
     data = span.get("data") or {}
-    url = data.get("url") or ""
-    if not url:
-        # If data is missing, fall back to description
-        description = span.get("description") or ""
-        parts = description.split(" ", 1)
-        if len(parts) == 2:
-            url = parts[1]
-
-    if type(url) is dict:
-        url = url.get("pathname") or ""
-
-    return url
+
+    # The most modern version is to provide URL information in the span
+    # data
+    url_data = data.get("url")
+
+    if type(url_data) is dict:
+        # Some transactions mysteriously provide the URL as a dict that looks
+        # like JavaScript's URL object
+        url = url_data.get("pathname") or ""
+        url += url_data.get("search") or ""
+        return url
+
+    if type(url_data) is str:
+        # Usually the URL is a regular string, and so is the query. This
+        # is the standardized format for all SDKs, and is the preferred
+        # format going forward. Otherwise, if `http.query` is absent, `url`
+        # contains the query.
+        url = url_data
+        query_data = data.get("http.query")
+        if type(query_data) is str and len(query_data) > 0:
+            url += f"?{query_data}"
+
+        return url
+
+    # Attempt to parse the full URL from the span description, in case
+    # the previous approaches did not yield a good result
+    description = span.get("description") or ""
+    parts = description.split(" ", 1)
+    if len(parts) == 2:
+        url = parts[1]
+        return url
+
+    return ""
 
 
 def fingerprint_spans(spans: List[Span], unique_only: bool = False):

diff --git a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py
@@ -157,29 +157,32 @@ def is_span_eligible(cls, span: Span) -> bool:
         if description.strip()[:3].upper() != "GET":
             return False
 
+        url = get_url_from_span(span)
+
         # GraphQL URLs have complicated queries in them. Until we parse those
         # queries to check for what's duplicated, we can't tell what is being
         # duplicated. Ignore them for now
-        if "graphql" in description:
+        if "graphql" in url:
             return False
 
         # Next.js infixes its data URLs with a build ID. (e.g.,
         # /_next/data/<uuid>/some-endpoint) This causes a fingerprinting
         # explosion, since every deploy would change this ID and create new
         # fingerprints. Since we're not parameterizing URLs yet, we need to
         # exclude them
-        if "_next/data" in description:
+        if "_next/data" in url:
             return False
 
-        url = get_url_from_span(span)
-
         # Next.js error pages cause an N+1 API Call that isn't useful to anyone
         if "__nextjs_original-stack-frame" in url:
             return False
 
         if not url:
             return False
 
+        # Once most users update their SDKs to use the latest standard, we
+        # won't have to do this, since the URLs will be sent in as `span.data`
+        # in a parsed format
         parsed_url = urlparse(str(url))
 
         if parsed_url.netloc in cls.HOST_DENYLIST:

diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
@@ -343,7 +343,7 @@ def test_allows_eligible_spans(span):
         {
             "span_id": "a",
             "op": "http.client",
-            "description": "GET http://service.io/resource",
+            "description": "GET /resource.js",
             "hash": "a",
             "data": {"url": "/resource.js"},
         },
@@ -353,6 +353,16 @@ def test_allows_eligible_spans(span):
             "description": "GET http://service.io/resource?graphql=somequery",
             "hash": "a",
         },
+        {
+            "span_id": "a",
+            "op": "http.client",
+            "description": "GET http://service.io/resource",  # New JS SDK removes query string from description
+            "hash": "a",
+            "data": {
+                "http.query": "graphql=somequery",
+                "url": "http://service.io/resource",
+            },
+        },
         {
             "span_id": "a",
             "op": "http.client",