Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions src/sentry/issues/endpoints/organization_group_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,21 +385,41 @@ def get(self, request: Request, organization: Organization) -> Response:
for search_filter in query_kwargs.get("search_filters", [])
if search_filter.key.name == "status" and search_filter.operator in EQUALITY_OPERATORS
]
pre_filter_count = len(context)
if status and (GroupStatus.UNRESOLVED in status[0].value.raw_value):
status_labels = {QUERY_STATUS_LOOKUP[s] for s in status[0].value.raw_value}
context = [r for r in context if "status" not in r or r["status"] in status_labels]
post_filter_count = len(context)

# Sanity check: if we're on the first and last page with no more results,
# the estimated hits from sampling may be too high due to Snuba/Postgres
# data inconsistency. Cap hits to match the actual number of results.
# Adjust X-Hits to account for post-filtering
# If post-filtering removed items, we need to adjust the hit count proportionally
if (
cursor_result.hits is not None
and cursor_result.next.has_results is False
and not request.GET.get("cursor")
and pre_filter_count > 0
and post_filter_count < pre_filter_count
):
actual_count = len(context)
if cursor_result.hits > actual_count:
cursor_result.hits = actual_count
# Calculate the filter ratio and apply it to the total hits
filter_ratio = post_filter_count / pre_filter_count
adjusted_hits = int(cursor_result.hits * filter_ratio)
# Don't increase hits, only decrease if filtering occurred
cursor_result.hits = min(cursor_result.hits, adjusted_hits)

# Sanity check: Cap X-Hits to actual results count when appropriate
# This handles cases where sampling overestimated or post-filtering reduced results
if cursor_result.hits is not None:
# If we're on the first page with no cursor
if not request.GET.get("cursor"):
# And there are no more results after this page
if cursor_result.next.has_results is False:
# Cap hits to actual count (all results fit on one page)
if cursor_result.hits > post_filter_count:
cursor_result.hits = post_filter_count
# Or if we got fewer results than requested even though we asked for
# count_hits. This indicates the search backend didn't have enough
# results to fill the page
elif post_filter_count < query_kwargs.get("limit", 100):
# We likely have all results, so cap to actual count
cursor_result.hits = post_filter_count

response = Response(context)

Expand Down
32 changes: 32 additions & 0 deletions src/sentry/issues/endpoints/project_group_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,41 @@ def get(self, request: Request, project: Project) -> Response:
for search_filter in query_kwargs.get("search_filters", [])
if search_filter.key.name == "status" and search_filter.operator in EQUALITY_OPERATORS
]
pre_filter_count = len(context)
if status and (GroupStatus.UNRESOLVED in status[0].value.raw_value):
status_labels = {QUERY_STATUS_LOOKUP[s] for s in status[0].value.raw_value}
context = [r for r in context if "status" not in r or r["status"] in status_labels]
post_filter_count = len(context)

# Adjust X-Hits to account for post-filtering
# If post-filtering removed items, we need to adjust the hit count proportionally
if (
cursor_result.hits is not None
and pre_filter_count > 0
and post_filter_count < pre_filter_count
):
# Calculate the filter ratio and apply it to the total hits
filter_ratio = post_filter_count / pre_filter_count
adjusted_hits = int(cursor_result.hits * filter_ratio)
# Don't increase hits, only decrease if filtering occurred
cursor_result.hits = min(cursor_result.hits, adjusted_hits)

# Sanity check: Cap X-Hits to actual results count when appropriate
# This handles cases where sampling overestimated or post-filtering reduced results
if cursor_result.hits is not None:
# If we're on the first page with no cursor
if not request.GET.get("cursor"):
# And there are no more results after this page
if cursor_result.next.has_results is False:
# Cap hits to actual count (all results fit on one page)
if cursor_result.hits > post_filter_count:
cursor_result.hits = post_filter_count
# Or if we got fewer results than requested even though we asked for
# count_hits. This indicates the search backend didn't have enough
# results to fill the page
elif post_filter_count < query_kwargs.get("limit", 100):
# We likely have all results, so cap to actual count
cursor_result.hits = post_filter_count

response = Response(context)

Expand Down
69 changes: 69 additions & 0 deletions tests/sentry/issues/endpoints/test_organization_group_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,75 @@ def test_hits_capped_when_overestimated(self, mock_calculate_hits: MagicMock) ->
links = self._parse_links(response["Link"])
assert links["next"]["results"] == "false"

@patch("sentry.search.snuba.executors.PostgresSnubaQueryExecutor.calculate_hits")
def test_hits_adjusted_after_post_filtering(self, mock_calculate_hits: MagicMock) -> None:
"""
Test that X-Hits is adjusted proportionally when post-filtering removes results.

When the status post-filter removes some results from the page, X-Hits should
be adjusted to reflect the estimated total after filtering.
"""
self.login_as(user=self.user)

# Create 10 unresolved groups
for i in range(10):
self.store_event(
data={
"timestamp": before_now(days=i).isoformat(),
"fingerprint": [f"group-{i}"],
},
project_id=self.project.id,
)

# Mock calculate_hits to return an estimate
mock_calculate_hits.return_value = 100

# Make a request with status filter
# The post-filter will potentially remove some results
response = self.get_success_response(limit=10, query="is:unresolved")

# Should return 10 groups
assert len(response.data) == 10

# X-Hits should be capped to 10 because:
# 1. We're on the first page (no cursor)
# 2. There are no more results (next.has_results = False)
# 3. The mocked estimate (100) is greater than actual results (10)
# This is correct behavior - we cap overestimates when we have all results
assert response["X-Hits"] == "10"

def test_hits_capped_on_short_first_page(self) -> None:
"""
Test that X-Hits is capped when the first page returns fewer results than requested.

If the first page returns fewer results than the limit and there's no cursor,
we likely have all results, so X-Hits should be capped to the actual count.
"""
self.login_as(user=self.user)

# Create only 5 groups
for i in range(5):
self.store_event(
data={
"timestamp": before_now(days=i).isoformat(),
"fingerprint": [f"group-{i}"],
},
project_id=self.project.id,
)

# Request 25 items but only 5 exist
response = self.get_success_response(limit=25, query="")

# Should return 5 groups
assert len(response.data) == 5

# X-Hits should be capped to 5 (the actual count)
assert response["X-Hits"] == "5"

# Verify no next page exists
links = self._parse_links(response["Link"])
assert links["next"]["results"] == "false"

def test_assigned_me_none(self) -> None:
self.login_as(user=self.user)
groups = []
Expand Down
Loading