Skip to content

chore(sampling_rules): remove support for regex and callable matchers #13987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions ddtrace/_trace/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ class DatadogSampler:
"""
The DatadogSampler samples traces based on the following (in order of precedence):
- A list of sampling rules, applied in the order they are provided. The first matching rule is used.
- A default sample rate, stored as the final sampling rule (lowest precedence sampling rule).
- A global rate limit, applied only if a rule is matched or if `rate_limit_always_on` is set to `True`.
- Sample rates provided by the agent (priority sampling, maps sample rates to service and env tags).
- By default, spans are sampled at a rate of 1.0 and assigned an `AUTO_KEEP` priority, allowing
Expand Down Expand Up @@ -91,9 +90,13 @@ def __init__(
Constructor for DatadogSampler sampler

:param rules: List of :class:`SamplingRule` rules to apply to the root span of every trace, default no rules
:param default_sample_rate: The default sample rate to apply if no rules matched
:param rate_limit: Global rate limit (traces per second) to apply to all traces regardless of the rules
applied to them, (default: ``100``)
:param rate_limit_window: The time window in nanoseconds for the rate limit, default is 1 second
:param rate_limit_always_on: If set to `True`, the rate limit is always applied, even if no sampling rules
are provided.
:param agent_based_samplers: A dictionary of service-based samplers, mapping a key in the format
`service:<service>,env:<env>` to a :class:`RateSampler` instance.
"""
# Set sampling rules
global_sampling_rules = config._trace_sampling_rules
Expand Down Expand Up @@ -153,15 +156,7 @@ def set_sampling_rules(self, rules: str) -> None:
raise KeyError("No sample_rate provided for sampling rule: {}".format(json.dumps(rule)))
continue
try:
sampling_rule = SamplingRule(
sample_rate=float(rule["sample_rate"]),
service=rule.get("service", SamplingRule.NO_RULE),
name=rule.get("name", SamplingRule.NO_RULE),
resource=rule.get("resource", SamplingRule.NO_RULE),
tags=rule.get("tags", SamplingRule.NO_RULE),
provenance=rule.get("provenance", "default"),
)
sampling_rules.append(sampling_rule)
sampling_rules.append(SamplingRule(**rule))
except ValueError as e:
if config._raise:
raise ValueError("Error creating sampling rule {}: {}".format(json.dumps(rule), e))
Expand Down
49 changes: 10 additions & 39 deletions ddtrace/_trace/sampling_rule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
from typing import TYPE_CHECKING # noqa:F401
from typing import Any
from typing import Optional
Expand Down Expand Up @@ -71,9 +70,9 @@ def __init__(
{k: GlobMatcher(str(v)) for k, v in tags.items()} if tags != SamplingRule.NO_RULE else {}
)
self.tags = tags
self.service = self.choose_matcher(service)
self.name = self.choose_matcher(name)
self.resource = self.choose_matcher(resource)
self.service = self._choose_matcher(service)
self.name = self._choose_matcher(name)
self.resource = self._choose_matcher(resource)
self.provenance = provenance

@property
Expand All @@ -93,26 +92,6 @@ def _pattern_matches(self, prop, pattern):
return True
if isinstance(pattern, GlobMatcher):
return pattern.match(str(prop))

# If the pattern is callable (e.g. a function) then call it passing the prop
# The expected return value is a boolean so cast the response in case it isn't
if callable(pattern):
try:
return bool(pattern(prop))
except Exception:
log.warning("%r pattern %r failed with %r", self, pattern, prop, exc_info=True)
# Their function failed to validate, assume it is a False
return False

# The pattern is a regular expression and the prop is a string
if isinstance(pattern, re.Pattern):
try:
return bool(pattern.match(str(prop)))
except (ValueError, TypeError):
# This is to guard us against the casting to a string (shouldn't happen, but still)
log.warning("%r pattern %r failed with %r", self, pattern, prop, exc_info=True)
return False

# Exact match on the values
return prop == pattern

Expand Down Expand Up @@ -204,22 +183,14 @@ def _no_rule_or_self(self, val):
else:
return val

def choose_matcher(self, prop):
# We currently support the ability to pass in a function, a regular expression, or a string
# If a string is passed in we create a GlobMatcher to handle the matching
if callable(prop) or isinstance(prop, re.Pattern):
log.error(
"Using methods or regular expressions for SamplingRule matching is not supported: %s ."
"Please move to passing in a string for Glob matching.",
str(prop),
)
return "None"
# Name and Resource will never be None, but service can be, since we str()
# whatever we pass into the GlobMatcher, we can just use its matching
def _choose_matcher(self, prop):
if prop is SamplingRule.NO_RULE:
return SamplingRule.NO_RULE
elif prop is None:
prop = "None"
else:
return GlobMatcher(prop) if prop != SamplingRule.NO_RULE else SamplingRule.NO_RULE
# Name and Resource will never be None, but service can be, since we str()
# whatever we pass into the GlobMatcher, we can just use its matching
return GlobMatcher("None")
return GlobMatcher(prop)

def __repr__(self):
return "{}(sample_rate={!r}, service={!r}, name={!r}, resource={!r}, tags={!r}, provenance={!r})".format(
Expand Down
Loading