Skip to content

Commit

Permalink
migration: backfill apitoken hashed values (#71728)
Browse files Browse the repository at this point in the history
Supports getsentry/rfcs#32

We've been hashing tokens as they are used to authenticate
(#65941), but it's started to
level out. This is a backfill migration to fill in all of the hashed
values for the remaining tokens.

Huge thank you to @markstory @wedamija and @GabeVillalobos for helping
with the migration test! 🙏
  • Loading branch information
mdtro authored Jun 10, 2024
1 parent 28fe514 commit 2a5bbd8
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 1 deletion.
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ feedback: 0004_index_together
hybridcloud: 0016_add_control_cacheversion
nodestore: 0002_nodestore_no_dictfield
replays: 0004_index_together
sentry: 0725_create_sentry_groupsearchview_table
sentry: 0726_apitoken_backfill_hashes
social_auth: 0002_default_auto_field
154 changes: 154 additions & 0 deletions src/sentry/migrations/0726_apitoken_backfill_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Generated by Django 5.0.6 on 2024-05-29 21:28

import hashlib
import logging
from enum import IntEnum

from django.db import migrations, router
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps

from sentry.new_migrations.migrations import CheckedMigration
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar

logger = logging.getLogger(__name__)


def backfill_hash_values(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
ApiToken = apps.get_model("sentry", "ApiToken")
ControlOutbox = apps.get_model("sentry", "ControlOutbox")
OrganizationMemberMapping = apps.get_model("sentry", "OrganizationMemberMapping")
OrganizationMapping = apps.get_model("sentry", "OrganizationMapping")

try:
from collections.abc import Container

from django.conf import settings

from sentry.services.hybrid_cloud.util import control_silo_function
from sentry.silo.base import SiloMode
from sentry.silo.safety import unguarded_write
except ImportError:
logger.exception("Cannot execute migration. Required symbols could not be imported")
return

# copied from src/sentry/models/outbox.py
class OutboxCategory(IntEnum):
USER_UPDATE = 0
UNUSED_TWO = 4
UNUSUED_THREE = 13
UNUSED_ONE = 19
AUTH_IDENTITY_UPDATE = 25
API_TOKEN_UPDATE = 32

# copied from src/sentry/models/outbox.py
_outbox_categories_for_scope: dict[int, set[OutboxCategory]] = {}
_used_categories: set[OutboxCategory] = set()

# copied from src/sentry/models/outbox.py
def scope_categories(enum_value: int, categories: set[OutboxCategory]) -> int:
_outbox_categories_for_scope[enum_value] = categories
inter = _used_categories.intersection(categories)
assert not inter, f"OutboxCategories {inter} were already registered to a different scope"
_used_categories.update(categories)
return enum_value

# copied from src/sentry/models/outbox.py
class OutboxScope(IntEnum):
USER_SCOPE = scope_categories(
1,
{
OutboxCategory.USER_UPDATE,
OutboxCategory.API_TOKEN_UPDATE,
OutboxCategory.UNUSED_ONE,
OutboxCategory.UNUSED_TWO,
OutboxCategory.UNUSUED_THREE,
OutboxCategory.AUTH_IDENTITY_UPDATE,
},
)

@control_silo_function
def _find_orgs_for_user(user_id: int) -> set[int]:
return {
m["organization_id"]
for m in OrganizationMemberMapping.objects.filter(user_id=user_id).values(
"organization_id"
)
}

@control_silo_function
def find_regions_for_orgs(org_ids: Container[int]) -> set[str]:
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
return {settings.SENTRY_MONOLITH_REGION}
else:
return set(
OrganizationMapping.objects.filter(organization_id__in=org_ids).values_list(
"region_name", flat=True
)
)

@control_silo_function
def find_regions_for_user(user_id: int) -> set[str]:
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
return {settings.SENTRY_MONOLITH_REGION}

org_ids = _find_orgs_for_user(user_id)
return find_regions_for_orgs(org_ids)

for api_token in RangeQuerySetWrapperWithProgressBar(ApiToken.objects.all()):
hashed_token = None
if api_token.hashed_token is None:
hashed_token = hashlib.sha256(api_token.token.encode()).hexdigest()
api_token.hashed_token = hashed_token

# if there's a refresh token make sure it is hashed as well
hashed_refresh_token = None
if api_token.refresh_token:
hashed_refresh_token = hashlib.sha256(api_token.refresh_token.encode()).hexdigest()
api_token.hashed_refresh_token = hashed_refresh_token

# only save if we've actually had to hash values
if hashed_token or hashed_refresh_token:
with unguarded_write(using=router.db_for_write(ApiToken)):
api_token.save(update_fields=["hashed_token", "hashed_refresh_token"])
user_regions = find_regions_for_user(api_token.user_id)
for region in user_regions:
ControlOutbox.objects.create(
shard_scope=OutboxScope.USER_SCOPE,
shard_identifier=api_token.user_id,
category=OutboxCategory.API_TOKEN_UPDATE,
region_name=region,
object_identifier=api_token.id,
)


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "0725_create_sentry_groupsearchview_table"),
]

operations = [
migrations.RunPython(
backfill_hash_values,
migrations.RunPython.noop,
hints={
"tables": [
"sentry_apitoken",
]
},
)
]
53 changes: 53 additions & 0 deletions tests/sentry/migrations/test_0726_apitoken_backfill_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from sentry.models.outbox import ControlOutbox, OutboxCategory, OutboxScope
from sentry.testutils.cases import TestMigrations
from sentry.testutils.helpers import override_options
from sentry.testutils.silo import control_silo_test


@control_silo_test
class TestBackfillApiTokenHashesMigration(TestMigrations):
migrate_from = "0725_create_sentry_groupsearchview_table"
migrate_to = "0726_apitoken_backfill_hashes"
connection = "control"

@override_options({"apitoken.save-hash-on-create": False})
def setup_initial_state(self):
user = self.create_user()
self.user_auth_token = self.create_user_auth_token(user=user)

# Put the user in an org so we have membership
organization = self.create_organization(owner=user)

app = self.create_sentry_app(user=user, organization_id=organization.id)
self.app_install = self.create_sentry_app_installation(
organization=organization, user=user, slug=app.slug
)

assert self.user_auth_token.hashed_token is None
# user auth tokens do not have refresh tokens
assert self.user_auth_token.refresh_token is None

assert self.app_install.api_token.hashed_token is None
assert self.app_install.api_token.hashed_refresh_token is None
# tokens related to sentry apps do have refresh tokens
assert self.app_install.api_token.refresh_token is not None

def test_for_hashed_value(self):
self.user_auth_token.refresh_from_db()
assert self.user_auth_token.hashed_token is not None
assert ControlOutbox.objects.get(
shard_scope=OutboxScope.USER_SCOPE,
category=OutboxCategory.API_TOKEN_UPDATE,
object_identifier=self.user_auth_token.id,
shard_identifier=self.user_auth_token.user_id,
)

self.app_install.refresh_from_db()
assert self.app_install.api_token.hashed_token is not None
assert self.app_install.api_token.hashed_refresh_token is not None
assert ControlOutbox.objects.get(
shard_scope=OutboxScope.USER_SCOPE,
category=OutboxCategory.API_TOKEN_UPDATE,
object_identifier=self.app_install.api_token.id,
shard_identifier=self.app_install.api_token.user_id,
)

0 comments on commit 2a5bbd8

Please sign in to comment.