Skip to content

feat: hash support for user auth token middleware #65941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 18, 2024
Merged
80 changes: 60 additions & 20 deletions src/sentry/api/authentication.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import hashlib
from collections.abc import Callable, Iterable
from typing import Any, ClassVar

Expand Down Expand Up @@ -298,6 +299,55 @@ def authenticate(self, request: Request):
class UserAuthTokenAuthentication(StandardAuthentication):
token_name = b"bearer"

def _find_or_update_token_by_hash(self, token_str: str) -> ApiToken | ApiTokenReplica:
"""
Find token by hash or update token's hash value if only found via plaintext.

1. Hash provided plaintext token.
2. Perform lookup based on hashed value.
3. If found, return the token.
4. If not found, search for the token based on its plaintext value.
5. If found, update the token's hashed value and return the token.
6. If not found via hash or plaintext value, raise AuthenticationFailed

Returns `ApiTokenReplica` if running in REGION silo or
`ApiToken` if running in CONTROL silo.
"""

hashed_token = hashlib.sha256(token_str.encode()).hexdigest()

if SiloMode.get_current_mode() == SiloMode.REGION:
try:
# Try to find the token by its hashed value first
return ApiTokenReplica.objects.get(hashed_token=hashed_token)
except ApiTokenReplica.DoesNotExist:
try:
# If we can't find it by hash, use the plaintext string
return ApiTokenReplica.objects.get(token=token_str)
except ApiTokenReplica.DoesNotExist:
# If the token does not exist by plaintext either, it is not a valid token
raise AuthenticationFailed("Invalid token")
else:
try:
# Try to find the token by its hashed value first
return ApiToken.objects.select_related("user", "application").get(
hashed_token=hashed_token
)
except ApiToken.DoesNotExist:
try:
# If we can't find it by hash, use the plaintext string
api_token = ApiToken.objects.select_related("user", "application").get(
token=token_str
)
except ApiToken.DoesNotExist:
# If the token does not exist by plaintext either, it is not a valid token
raise AuthenticationFailed("Invalid token")
else:
# Update it with the hashed value if found by plaintext
api_token.hashed_token = hashed_token
api_token.save(update_fields=["hashed_token"])
return api_token

def accepts_auth(self, auth: list[bytes]) -> bool:
if not super().accepts_auth(auth):
return False
Expand All @@ -320,26 +370,16 @@ def authenticate_token(self, request: Request, token_str: str) -> tuple[Any, Any
application_is_inactive = False

if not token:
if SiloMode.get_current_mode() == SiloMode.REGION:
try:
atr = token = ApiTokenReplica.objects.get(token=token_str)
except ApiTokenReplica.DoesNotExist:
raise AuthenticationFailed("Invalid token")
user = user_service.get_user(user_id=atr.user_id)
application_is_inactive = not atr.application_is_active
else:
try:
at = token = (
ApiToken.objects.filter(token=token_str)
.select_related("user", "application")
.get()
)
except ApiToken.DoesNotExist:
raise AuthenticationFailed("Invalid token")
user = at.user
token = self._find_or_update_token_by_hash(token_str)
if isinstance(token, ApiTokenReplica): # we're running as a REGION silo
user = user_service.get_user(user_id=token.user_id)
application_is_inactive = not token.application_is_active
else: # the token returned is an ApiToken from the CONTROL silo
user = token.user
application_is_inactive = (
at.application is not None and not at.application.is_active
token.application is not None and not token.application.is_active
)

elif isinstance(token, SystemToken):
user = token.user

Expand Down Expand Up @@ -389,9 +429,9 @@ def authenticate_token(self, request: Request, token_str: str) -> tuple[Any, Any
raise AuthenticationFailed("Invalid org token")
else:
try:
token = OrgAuthToken.objects.filter(
token = OrgAuthToken.objects.get(
token_hashed=token_hashed, date_deactivated__isnull=True
).get()
)
except OrgAuthToken.DoesNotExist:
raise AuthenticationFailed("Invalid org token")

Expand Down
6 changes: 6 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,12 @@
type=Bool,
flags=FLAG_ALLOW_EMPTY | FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"apitoken.save-hash-on-create",
default=True,
type=Bool,
flags=FLAG_ALLOW_EMPTY | FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"api.rate-limit.org-create",
Expand Down
1 change: 1 addition & 0 deletions src/sentry/services/hybrid_cloud/auth/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class RpcApiToken(RpcModel):
application_id: int | None = None
application_is_active: bool = False
token: str = ""
hashed_token: str | None = None
expires_at: datetime.datetime | None = None
allowed_origins: list[str] = Field(default_factory=list)
scope_list: list[str] = Field(default_factory=list)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/services/hybrid_cloud/auth/serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def serialize_api_token(at: ApiToken) -> RpcApiToken:
organization_id=at.organization_id,
application_is_active=at.application_id is None or at.application.is_active,
token=at.token,
hashed_token=at.hashed_token,
expires_at=at.expires_at,
allowed_origins=list(at.get_allowed_origins()),
scope_list=at.get_scopes(),
Expand Down
1 change: 1 addition & 0 deletions src/sentry/services/hybrid_cloud/replica/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def upsert_replicated_api_token(self, *, api_token: RpcApiToken, region_name: st
organization=organization,
application_is_active=api_token.application_is_active,
token=api_token.token,
hashed_token=api_token.hashed_token,
expires_at=api_token.expires_at,
apitoken_id=api_token.id,
scope_list=api_token.scope_list,
Expand Down
69 changes: 69 additions & 0 deletions tests/sentry/api/test_authentication.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import uuid
from datetime import UTC, datetime

Expand Down Expand Up @@ -30,8 +31,11 @@
)
from sentry.silo import SiloMode
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers import override_options
from sentry.testutils.outbox import outbox_runner
from sentry.testutils.pytest.fixtures import django_db_all
from sentry.testutils.silo import assume_test_silo_mode, control_silo_test, no_silo_test
from sentry.types.token import AuthTokenType
from sentry.utils.security.orgauthtoken_token import hash_token


Expand Down Expand Up @@ -202,6 +206,71 @@ def test_no_match(self):
with pytest.raises(AuthenticationFailed):
self.auth.authenticate(request)

@override_options({"apitoken.save-hash-on-create": False})
def test_token_hashed_with_option_off(self):
# see https://github.com/getsentry/sentry/pull/65941
# the UserAuthTokenAuthentication middleware was updated to hash tokens as
# they were used, this test verifies the hash
api_token = ApiToken.objects.create(user=self.user, token_type=AuthTokenType.USER)
expected_hash = hashlib.sha256(api_token.token.encode()).hexdigest()

# we haven't authenticated to the API endpoint yet, so this value should be empty
assert api_token.hashed_token is None

request = HttpRequest()
request.META["HTTP_AUTHORIZATION"] = f"Bearer {api_token.token}"

# trigger the authentication middleware, and thus the hashing
result = self.auth.authenticate(request)
assert result is not None

# check for the expected hash value
api_token.refresh_from_db()
assert api_token.hashed_token == expected_hash


@no_silo_test
class TestTokenAuthenticationReplication(TestCase):
def setUp(self):
super().setUp()

self.auth = UserAuthTokenAuthentication()

@override_options({"apitoken.save-hash-on-create": False})
def test_hash_is_replicated(self):
api_token = ApiToken.objects.create(user=self.user, token_type=AuthTokenType.USER)
expected_hash = hashlib.sha256(api_token.token.encode()).hexdigest()

# we haven't authenticated to the API endpoint yet, so this value should be empty
assert api_token.hashed_token is None

request = HttpRequest()
request.META["HTTP_AUTHORIZATION"] = f"Bearer {api_token.token}"

with assume_test_silo_mode(SiloMode.REGION):
with outbox_runner():
# make sure the token was replicated
api_token_replica = ApiTokenReplica.objects.get(apitoken_id=api_token.id)
assert api_token.token == api_token_replica.token
assert (
api_token_replica.hashed_token is None
) # we don't expect to have a hashed value yet

# trigger the authentication middleware, and thus the hashing backfill
result = self.auth.authenticate(request)
assert result is not None

# check for the expected hash value
api_token.refresh_from_db()
assert api_token.hashed_token == expected_hash

# ApiTokenReplica should also be updated
api_token_replica.refresh_from_db()
assert api_token_replica.hashed_token == expected_hash

# just for good measure
assert api_token.hashed_token == api_token_replica.hashed_token


@django_db_all
@pytest.mark.parametrize("internal", [True, False])
Expand Down