Skip to content

ref(assemble): Remove old find_missing_chunks method #86588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sentry/api/endpoints/debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def batch_assemble(project, files):
checksums_to_check -= checksums_without_chunks

# 4. Find missing chunks and group them per checksum.
all_missing_chunks = find_missing_chunks(project.organization, list(chunks_to_check.keys()))
all_missing_chunks = find_missing_chunks(project.organization.id, set(chunks_to_check.keys()))

missing_chunks_per_checksum: dict[str, set[str]] = {}
for chunk in all_missing_chunks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def post(self, request: Request, organization) -> Response:
chunks = data.get("chunks", [])

# We check if all requested chunks have been uploaded.
with sentry_sdk.start_span(op="artifact_bundle.assemble.find_missing_chunks"):
missing_chunks = find_missing_chunks(organization, chunks)
missing_chunks = find_missing_chunks(organization.id, set(chunks))

# In case there are some missing chunks, we will tell the client which chunks we require.
if missing_chunks:
return Response(
Expand Down
44 changes: 5 additions & 39 deletions src/sentry/debug_files/upload.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,21 @@
from collections.abc import Sequence
from datetime import timedelta

import sentry_sdk
from django.utils import timezone

from sentry import features
from sentry.models.files import FileBlob, FileBlobOwner
from sentry.models.organization import Organization
from sentry.models.files import FileBlob


def find_missing_chunks(organization: Organization, chunks: Sequence[str]):
def find_missing_chunks(organization_id: int, chunks: set[str]):
"""Returns a list of chunks which are missing for an org."""
if features.has("organizations:find-missing-chunks-new", organization):
return _find_missing_chunks_new(organization.id, set(chunks))

return _find_missing_chunks_old(organization.id, set(chunks))


def _find_missing_chunks_new(organization_id: int, chunks: set[str]):
with sentry_sdk.start_span(op="find_missing_chunks_new") as span:
with sentry_sdk.start_span(op="find_missing_chunks") as span:
span.set_tag("organization_id", organization_id)
span.set_data("chunks_size", len(chunks))

if not chunks:
return []

with sentry_sdk.start_span(op="find_missing_chunks_new.fetch_owned_file_blobs"):
with sentry_sdk.start_span(op="find_missing_chunks.fetch_owned_file_blobs"):
owned_file_blobs = FileBlob.objects.filter(
checksum__in=chunks, fileblobowner__organization_id=organization_id
).values_list(
Expand All @@ -39,7 +29,7 @@ def _find_missing_chunks_new(organization_id: int, chunks: set[str]):
owned_file_chunks = {checksum for _, checksum, _ in owned_file_blobs}
unowned_file_chunks = chunks - owned_file_chunks

with sentry_sdk.start_span(op="find_missing_chunks_new.fetch_unowned_file_blobs"):
with sentry_sdk.start_span(op="find_missing_chunks.fetch_unowned_file_blobs"):
unowned_file_blobs = FileBlob.objects.filter(
checksum__in=unowned_file_chunks,
).values_list(
Expand All @@ -65,27 +55,3 @@ def _find_missing_chunks_new(organization_id: int, chunks: set[str]):

# We return all the file chunks that are not bound to the supply organization.
return list(unowned_file_chunks)


def _find_missing_chunks_old(organization_id: int, chunks: set[str]):
with sentry_sdk.start_span(op="find_missing_chunks_old") as span:
span.set_tag("organization_id", organization_id)
span.set_data("chunks_size", len(chunks))

now = timezone.now()
threshold = now - timedelta(hours=12)

with sentry_sdk.start_span(op="find_missing_chunks_old.update_timestamp"):
FileBlob.objects.filter(checksum__in=chunks, timestamp__lte=threshold).update(
timestamp=now
)

# Compute the set of all existing chunks.
with sentry_sdk.start_span(op="find_missing_chunks_old.get_owned_chunks"):
owned = set(
FileBlobOwner.objects.filter(
blob__checksum__in=chunks, organization_id=organization_id
).values_list("blob__checksum", flat=True)
)

return list(chunks - owned)
7 changes: 0 additions & 7 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,13 +488,6 @@ def register_temporary_features(manager: FeatureManager):
manager.add("organizations:ourlogs-ingestion", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable updated form for 3p publishing flow
manager.add("organizations:streamlined-publishing-flow", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable new find missing chunks algorithm
manager.add(
"organizations:find-missing-chunks-new",
OrganizationFeature,
FeatureHandlerStrategy.FLAGPOLE,
api_expose=False
)
# Enable per-project selection for Jira integration
manager.add("organizations:jira-per-project-statuses", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable Relay extracting logs from breadcrumbs for a project.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from sentry.silo.base import SiloMode
from sentry.tasks.assemble import ChunkFileState, assemble_artifacts
from sentry.testutils.cases import APITestCase
from sentry.testutils.helpers import with_feature
from sentry.testutils.outbox import outbox_runner
from sentry.testutils.silo import assume_test_silo_mode
from sentry.utils.security.orgauthtoken_token import generate_token, hash_token
Expand All @@ -29,7 +28,6 @@ def setUp(self):
args=[self.organization.slug],
)

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_json_schema(self):
response = self.client.post(
self.url, data={"lol": "test"}, HTTP_AUTHORIZATION=f"Bearer {self.token.token}"
Expand Down Expand Up @@ -118,7 +116,6 @@ def test_assemble_json_schema(self):
assert response.status_code == 200, response.content
assert response.data["state"] == ChunkFileState.NOT_FOUND

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_with_invalid_projects(self):
bundle_file = self.create_artifact_bundle_zip(
org=self.organization.slug, release=self.release.version
Expand All @@ -144,7 +141,6 @@ def test_assemble_with_invalid_projects(self):
assert response.status_code == 400, response.content
assert response.data["error"] == "One or more projects are invalid"

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_with_valid_project_slugs(self):
# Test with all valid project slugs
valid_project = self.create_project()
Expand All @@ -170,7 +166,6 @@ def test_assemble_with_valid_project_slugs(self):

self.assertEqual(response.status_code, 200)

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_with_valid_project_ids(self):
# Test with all valid project IDs
valid_project = self.create_project()
Expand All @@ -196,7 +191,6 @@ def test_assemble_with_valid_project_ids(self):

self.assertEqual(response.status_code, 200)

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_with_mix_of_slugs_and_ids(self):
# Test with a mix of valid project slugs and IDs
valid_project = self.create_project()
Expand Down Expand Up @@ -227,7 +221,6 @@ def test_assemble_with_mix_of_slugs_and_ids(self):

self.assertEqual(response.status_code, 200)

@with_feature("organizations:find-missing-chunks-new")
@patch("sentry.tasks.assemble.assemble_artifacts")
def test_assemble_without_version_and_dist(self, mock_assemble_artifacts):
bundle_file = self.create_artifact_bundle_zip(
Expand Down Expand Up @@ -264,7 +257,6 @@ def test_assemble_without_version_and_dist(self, mock_assemble_artifacts):
}
)

@with_feature("organizations:find-missing-chunks-new")
@patch("sentry.tasks.assemble.assemble_artifacts")
def test_assemble_with_version_and_no_dist(self, mock_assemble_artifacts):
bundle_file = self.create_artifact_bundle_zip(
Expand Down Expand Up @@ -302,7 +294,6 @@ def test_assemble_with_version_and_no_dist(self, mock_assemble_artifacts):
}
)

@with_feature("organizations:find-missing-chunks-new")
@patch("sentry.tasks.assemble.assemble_artifacts")
def test_assemble_with_version_and_dist(self, mock_assemble_artifacts):
dist = "android"
Expand Down Expand Up @@ -342,7 +333,6 @@ def test_assemble_with_version_and_dist(self, mock_assemble_artifacts):
}
)

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_with_missing_chunks(self):
dist = "android"
bundle_file = self.create_artifact_bundle_zip(
Expand Down Expand Up @@ -387,7 +377,6 @@ def test_assemble_with_missing_chunks(self):
assert response.status_code == 200, response.content
assert response.data["state"] == ChunkFileState.CREATED

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_response(self):
bundle_file = self.create_artifact_bundle_zip(
org=self.organization.slug, release=self.release.version
Expand Down Expand Up @@ -417,7 +406,6 @@ def test_assemble_response(self):
assert response.status_code == 200, response.content
assert response.data["state"] == ChunkFileState.CREATED

@with_feature("organizations:find-missing-chunks-new")
def test_assemble_org_auth_token(self):
org2 = self.create_organization(owner=self.user)

Expand Down
Loading