Skip to content

Commit

Permalink
Django caching instrumentation update (#3009)
Browse files Browse the repository at this point in the history
This adds more data to the cache spans and makes adding the cache item size optional.
This implements parts of following spec https://develop.sentry.dev/sdk/performance/modules/cache/

---------

Co-authored-by: Daniel Szoke <7881302+szokeasaurusrex@users.noreply.github.com>
  • Loading branch information
antonpirker and szokeasaurusrex authored May 23, 2024
1 parent ec23396 commit 30f72a3
Show file tree
Hide file tree
Showing 9 changed files with 744 additions and 303 deletions.
21 changes: 20 additions & 1 deletion sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,24 @@ class SPANDATA:
Example: 58
"""

CACHE_KEY = "cache.key"
"""
The key of the requested data.
Example: template.cache.some_item.867da7e2af8e6b2f3aa7213a4080edb3
"""

NETWORK_PEER_ADDRESS = "network.peer.address"
"""
Peer address of the network connection - IP address or Unix domain socket name.
Example: 10.1.2.80, /tmp/my.sock, localhost
"""

NETWORK_PEER_PORT = "network.peer.port"
"""
Peer port number of the network connection.
Example: 6379
"""

HTTP_QUERY = "http.query"
"""
The Query string present in the URL.
Expand Down Expand Up @@ -349,7 +367,8 @@ class SPANDATA:

class OP:
ANTHROPIC_MESSAGES_CREATE = "ai.messages.create.anthropic"
CACHE_GET_ITEM = "cache.get_item"
CACHE_GET = "cache.get"
CACHE_SET = "cache.set"
COHERE_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.cohere"
COHERE_EMBEDDINGS_CREATE = "ai.embeddings.create.cohere"
DB = "db"
Expand Down
14 changes: 13 additions & 1 deletion sentry_sdk/integrations/django/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ def is_authenticated(request_user):


class DjangoIntegration(Integration):
"""
Auto instrument a Django application.
:param transaction_style: How to derive transaction names. Either `"function_name"` or `"url"`. Defaults to `"url"`.
:param middleware_spans: Whether to create spans for middleware. Defaults to `True`.
:param signals_spans: Whether to create spans for signals. Defaults to `True`.
:param signals_denylist: A list of signals to ignore when creating spans.
:param cache_spans: Whether to create spans for cache operations. Defaults to `False`.
"""

identifier = "django"

transaction_style = ""
Expand All @@ -128,10 +138,12 @@ def __init__(
)
self.transaction_style = transaction_style
self.middleware_spans = middleware_spans

self.signals_spans = signals_spans
self.cache_spans = cache_spans
self.signals_denylist = signals_denylist or []

self.cache_spans = cache_spans

@staticmethod
def setup_once():
# type: () -> None
Expand Down
135 changes: 107 additions & 28 deletions sentry_sdk/integrations/django/caching.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,151 @@
import functools
from typing import TYPE_CHECKING
from urllib3.util import parse_url as urlparse

from django import VERSION as DJANGO_VERSION
from django.core.cache import CacheHandler

import sentry_sdk
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.utils import ensure_integration_enabled
from sentry_sdk.utils import (
SENSITIVE_DATA_SUBSTITUTE,
capture_internal_exceptions,
ensure_integration_enabled,
)


if TYPE_CHECKING:
from typing import Any
from typing import Callable
from typing import Optional


METHODS_TO_INSTRUMENT = [
"set",
"set_many",
"get",
"get_many",
]


def _get_span_description(method_name, args, kwargs):
# type: (str, Any, Any) -> str
description = "{} ".format(method_name)
def _get_key(args, kwargs):
# type: (list[Any], dict[str, Any]) -> str
key = ""

if args is not None and len(args) >= 1:
description += str(args[0])
key = args[0]
elif kwargs is not None and "key" in kwargs:
description += str(kwargs["key"])
key = kwargs["key"]

if isinstance(key, dict):
# Do not leak sensitive data
# `set_many()` has a dict {"key1": "value1", "key2": "value2"} as first argument.
# Those values could include sensitive data so we replace them with a placeholder
key = {x: SENSITIVE_DATA_SUBSTITUTE for x in key}

return str(key)


return description
def _get_span_description(method_name, args, kwargs):
# type: (str, list[Any], dict[str, Any]) -> str
return _get_key(args, kwargs)


def _patch_cache_method(cache, method_name):
# type: (CacheHandler, str) -> None
def _patch_cache_method(cache, method_name, address, port):
# type: (CacheHandler, str, Optional[str], Optional[int]) -> None
from sentry_sdk.integrations.django import DjangoIntegration

original_method = getattr(cache, method_name)

@ensure_integration_enabled(DjangoIntegration, original_method)
def _instrument_call(cache, method_name, original_method, args, kwargs):
# type: (CacheHandler, str, Callable[..., Any], Any, Any) -> Any
def _instrument_call(
cache, method_name, original_method, args, kwargs, address, port
):
# type: (CacheHandler, str, Callable[..., Any], list[Any], dict[str, Any], Optional[str], Optional[int]) -> Any
is_set_operation = method_name.startswith("set")
is_get_operation = not is_set_operation

op = OP.CACHE_SET if is_set_operation else OP.CACHE_GET
description = _get_span_description(method_name, args, kwargs)

with sentry_sdk.start_span(
op=OP.CACHE_GET_ITEM, description=description
) as span:
with sentry_sdk.start_span(op=op, description=description) as span:
value = original_method(*args, **kwargs)

if value:
span.set_data(SPANDATA.CACHE_HIT, True)

size = len(str(value))
span.set_data(SPANDATA.CACHE_ITEM_SIZE, size)

else:
span.set_data(SPANDATA.CACHE_HIT, False)
with capture_internal_exceptions():
if address is not None:
span.set_data(SPANDATA.NETWORK_PEER_ADDRESS, address)

if port is not None:
span.set_data(SPANDATA.NETWORK_PEER_PORT, port)

key = _get_key(args, kwargs)
if key != "":
span.set_data(SPANDATA.CACHE_KEY, key)

item_size = None
if is_get_operation:
if value:
item_size = len(str(value))
span.set_data(SPANDATA.CACHE_HIT, True)
else:
span.set_data(SPANDATA.CACHE_HIT, False)
else:
try:
# 'set' command
item_size = len(str(args[1]))
except IndexError:
# 'set_many' command
item_size = len(str(args[0]))

if item_size is not None:
span.set_data(SPANDATA.CACHE_ITEM_SIZE, item_size)

return value

@functools.wraps(original_method)
def sentry_method(*args, **kwargs):
# type: (*Any, **Any) -> Any
return _instrument_call(cache, method_name, original_method, args, kwargs)
return _instrument_call(
cache, method_name, original_method, args, kwargs, address, port
)

setattr(cache, method_name, sentry_method)


def _patch_cache(cache):
# type: (CacheHandler) -> None
def _patch_cache(cache, address=None, port=None):
# type: (CacheHandler, Optional[str], Optional[int]) -> None
if not hasattr(cache, "_sentry_patched"):
for method_name in METHODS_TO_INSTRUMENT:
_patch_cache_method(cache, method_name)
_patch_cache_method(cache, method_name, address, port)
cache._sentry_patched = True


def _get_address_port(settings):
# type: (dict[str, Any]) -> tuple[Optional[str], Optional[int]]
location = settings.get("LOCATION")

# TODO: location can also be an array of locations
# see: https://docs.djangoproject.com/en/5.0/topics/cache/#redis
# GitHub issue: https://github.com/getsentry/sentry-python/issues/3062
if not isinstance(location, str):
return None, None

if "://" in location:
parsed_url = urlparse(location)
# remove the username and password from URL to not leak sensitive data.
address = "{}://{}{}".format(
parsed_url.scheme or "",
parsed_url.hostname or "",
parsed_url.path or "",
)
port = parsed_url.port
else:
address = location
port = None

return address, int(port) if port is not None else None


def patch_caching():
# type: () -> None
from sentry_sdk.integrations.django import DjangoIntegration
Expand All @@ -90,7 +161,13 @@ def sentry_get_item(self, alias):

integration = sentry_sdk.get_client().get_integration(DjangoIntegration)
if integration is not None and integration.cache_spans:
_patch_cache(cache)
from django.conf import settings

address, port = _get_address_port(
settings.CACHES[alias or "default"]
)

_patch_cache(cache, address, port)

return cache

Expand All @@ -107,7 +184,9 @@ def sentry_create_connection(self, alias):

integration = sentry_sdk.get_client().get_integration(DjangoIntegration)
if integration is not None and integration.cache_spans:
_patch_cache(cache)
address, port = _get_address_port(self.settings[alias or "default"])

_patch_cache(cache, address, port)

return cache

Expand Down
2 changes: 2 additions & 0 deletions sentry_sdk/integrations/redis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ class RedisIntegration(Integration):
def __init__(self, max_data_size=_DEFAULT_MAX_DATA_SIZE):
# type: (int) -> None
self.max_data_size = max_data_size
# TODO: add some prefix that users can set to specify a cache key
# GitHub issue: https://github.com/getsentry/sentry-python/issues/2965

@staticmethod
def setup_once():
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/aiohttp/test_aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ async def hello(request):
async def test_traces_sampler_gets_request_object_in_sampling_context(
sentry_init,
aiohttp_client,
DictionaryContaining, # noqa:N803
DictionaryContaining, # noqa: N803
ObjectDescribedBy, # noqa: N803
):
traces_sampler = mock.Mock()
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/aws_lambda/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def test_handler(event, context):

def test_traces_sampler_gets_correct_values_in_sampling_context(
run_lambda_function,
DictionaryContaining, # noqa:N803
DictionaryContaining, # noqa: N803
ObjectDescribedBy, # noqa: N803
StringContaining, # noqa: N803
):
Expand Down
Loading

0 comments on commit 30f72a3

Please sign in to comment.