Skip to content

Commit 6a4e926

Browse files
authored
ref(grouping): Cache enhancements in split form (#92000)
Right now, even when we're using split enhancements, the version we cache isn't split, which means that every time we pull them out of the cache, the work to split the rules has to be done all over again. This is obviously not ideal, so this PR changes the way we compute (and parse) the cached string so that it now includes the split rules. Since the rust enhancer can only parse base64 strings including one set of rules, this is done by computing a separate string for each of the three kinds of rules - rules in their original/pre-split form, classifier rules, and contributes rules - and then concatenating them, separated by a character which can't ever appear in base64. Older base64 strings containing only the original rules and no delimiter can still be parsed, because for them, splitting on the delimiter will be a no-op.
1 parent 6dec121 commit 6a4e926

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

src/sentry/grouping/enhancer/__init__.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@
4444
]
4545
LATEST_VERSION = VERSIONS[-1]
4646

47+
# A delimiter to insert between rulesets in the base64 represenation of enhancements (by spec,
48+
# base64 strings never contain '#')
49+
BASE64_ENHANCEMENTS_DELIMITER = b"#"
50+
4751
VALID_PROFILING_MATCHER_PREFIXES = (
4852
"stack.abs_path",
4953
"path", # stack.abs_path alias
@@ -807,7 +811,19 @@ def _get_base64_bytes_from_rules(self, rules: list[EnhancementRule]) -> bytes:
807811
@cached_property
808812
def base64_string(self) -> str:
809813
"""A base64 string representation of the enhancements object"""
810-
base64_bytes = self._get_base64_bytes_from_rules(self.rules)
814+
rulesets = [self.rules]
815+
816+
if self.run_split_enhancements:
817+
rulesets.extend([self.classifier_rules, self.contributes_rules])
818+
819+
# Create a base64 bytestring for each set of rules, and join them with a character we know
820+
# can never appear in base64. We do it this way rather than combining all three sets of
821+
# rules into a single bytestring because the rust enhancer only knows how to deal with
822+
# bytestrings encoding data of the form `[version, bases, rules]` (not
823+
# `[version, bases, rules, rules, rules]`).
824+
base64_bytes = BASE64_ENHANCEMENTS_DELIMITER.join(
825+
self._get_base64_bytes_from_rules(ruleset) for ruleset in rulesets
826+
)
811827
base64_str = base64_bytes.decode("ascii")
812828
return base64_str
813829

@@ -845,13 +861,25 @@ def from_base64_string(
845861
with metrics.timer("grouping.enhancements.creation") as metrics_timer_tags:
846862
metrics_timer_tags.update({"source": "base64_string", "referrer": referrer})
847863

848-
bytes_str = (
864+
raw_bytes_str = (
849865
base64_string.encode("ascii", "ignore")
850866
if isinstance(base64_string, str)
851867
else base64_string
852868
)
853869

854-
unsplit_config = cls._get_config_from_base64_bytes(bytes_str)
870+
# Split the string to get encoded data for each set of rules: unsplit rules (i.e., rules
871+
# the way they're stored in project config), classifier rules, and contributes rules.
872+
# Older base64 strings - such as those stored in events created before rule-splitting was
873+
# introduced - will only have one part and thus will end up unchanged. (The delimiter is
874+
# chosen specifically to be a character which can't appear in base64.)
875+
bytes_strs = raw_bytes_str.split(BASE64_ENHANCEMENTS_DELIMITER)
876+
configs = [cls._get_config_from_base64_bytes(bytes_str) for bytes_str in bytes_strs]
877+
878+
unsplit_config = configs[0]
879+
split_configs = None
880+
881+
if len(configs) == 3:
882+
split_configs = (configs[1], configs[2])
855883

856884
version = unsplit_config.version
857885
bases = unsplit_config.bases
@@ -861,6 +889,7 @@ def from_base64_string(
861889
return cls(
862890
rules=unsplit_config.rules,
863891
rust_enhancements=unsplit_config.rust_enhancements,
892+
split_enhancement_configs=split_configs,
864893
version=version,
865894
bases=bases,
866895
)

tests/sentry/grouping/test_enhancer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,8 @@ def test_loads_split_enhancements_from_base64_string(self, split_rules_spy: Magi
700700
== "<EnhancementRule function:playFetch +group>"
701701
)
702702
assert strategy_config.enhancements.id is None
703-
# Currently we re-split the rules when translating from base64 back to object
704-
assert split_rules_spy.call_count == 2
703+
# Rules didn't have to be split again because they were cached in split form
704+
assert split_rules_spy.call_count == 1
705705

706706
def test_uses_default_enhancements_when_loading_string_with_invalid_version(self):
707707
enhancements = Enhancements.from_rules_text("function:playFetch +app")

0 commit comments

Comments
 (0)