Skip to content

Commit ffb8e9b

Browse files
feat: further optimize code variables regex search (#429)
* feat: initial * fix: ruff
1 parent 7780ca8 commit ffb8e9b

File tree

4 files changed

+92
-9
lines changed

4 files changed

+92
-9
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
# 7.8.3 - 2026-02-09
1+
# 7.8.5 - 2026-02-09
2+
3+
fix: further optimize code variables pattern matching
4+
5+
# 7.8.4 - 2026-02-09
26

37
fix: do not pattern match long values in code variables
48

posthog/exception_utils.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$"
6767

6868
_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000
69+
_REGEX_METACHARACTERS = frozenset(r"\.^$*+?{}[]|()")
6970

7071
DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024
7172

@@ -931,18 +932,47 @@ def strip_string(value, max_length=None):
931932
)
932933

933934

935+
def _extract_plain_substring(pattern):
936+
# Matches inline flag groups like (?i), (?ai), (?ims), etc. that include the 'i' flag.
937+
# Python regex flags: a=ASCII, i=IGNORECASE, L=LOCALE, m=MULTILINE, s=DOTALL, u=UNICODE, x=VERBOSE
938+
inline_flags = re.match(r"^\(\?[aiLmsux]*i[aiLmsux]*\)", pattern)
939+
if not inline_flags:
940+
return None
941+
remainder = pattern[inline_flags.end() :]
942+
if not remainder or any(c in _REGEX_METACHARACTERS for c in remainder):
943+
return None
944+
return remainder.lower()
945+
946+
934947
def _compile_patterns(patterns):
935-
compiled = []
948+
if not patterns:
949+
return None
950+
substrings = []
951+
regexes = []
936952
for pattern in patterns:
937-
try:
938-
compiled.append(re.compile(pattern))
939-
except Exception:
940-
pass
941-
return compiled
953+
simple = _extract_plain_substring(pattern)
954+
if simple is not None:
955+
substrings.append(simple)
956+
else:
957+
try:
958+
regexes.append(re.compile(pattern))
959+
except Exception:
960+
pass
961+
if not substrings and not regexes:
962+
return None
963+
return (substrings, regexes)
942964

943965

944966
def _pattern_matches(name, patterns):
945-
for pattern in patterns:
967+
if patterns is None:
968+
return False
969+
substrings, regexes = patterns
970+
if substrings:
971+
name_lower = name.lower()
972+
for s in substrings:
973+
if s in name_lower:
974+
return True
975+
for pattern in regexes:
946976
if pattern.search(name):
947977
return True
948978
return False

posthog/test/test_exception_capture.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,3 +590,52 @@ def test_mask_sensitive_data_circular_ref():
590590
result = _mask_sensitive_data(circular_list, compiled_mask)
591591
assert result[0] == "item"
592592
assert result[1] == "<circular ref>"
593+
594+
595+
def test_compile_patterns_fast_path_and_regex_fallback():
596+
from posthog.exception_utils import _compile_patterns, _pattern_matches
597+
598+
# Simple case-insensitive patterns should become substrings
599+
simple_only = _compile_patterns([r"(?i)password", r"(?i)token", r"(?i)jwt"])
600+
substrings, regexes = simple_only
601+
assert substrings == ["password", "token", "jwt"]
602+
assert regexes == []
603+
604+
assert _pattern_matches("my_password_var", simple_only) is True
605+
assert _pattern_matches("MY_TOKEN", simple_only) is True
606+
assert _pattern_matches("safe_variable", simple_only) is False
607+
608+
# Complex regex patterns should stay as compiled regexes
609+
complex_only = _compile_patterns([r"^__.*", r"\d{3,}", r"^sk_live_"])
610+
substrings, regexes = complex_only
611+
assert substrings == []
612+
assert len(regexes) == 3
613+
614+
assert _pattern_matches("__dunder", complex_only) is True
615+
assert _pattern_matches("has_999_numbers", complex_only) is True
616+
assert _pattern_matches("sk_live_abc123", complex_only) is True
617+
assert _pattern_matches("normal_var", complex_only) is False
618+
619+
# Mixed: simple substrings + complex regexes together
620+
mixed = _compile_patterns(
621+
[
622+
r"(?i)secret", # simple
623+
r"(?i)api_key", # simple
624+
r"^__.*", # regex
625+
r"\btoken_\w+", # regex
626+
]
627+
)
628+
substrings, regexes = mixed
629+
assert substrings == ["secret", "api_key"]
630+
assert len(regexes) == 2
631+
632+
# Substring matches
633+
assert _pattern_matches("my_secret", mixed) is True
634+
assert _pattern_matches("API_KEY_VALUE", mixed) is True
635+
636+
# Regex matches
637+
assert _pattern_matches("__private", mixed) is True
638+
assert _pattern_matches("token_abc", mixed) is True
639+
640+
# No match
641+
assert _pattern_matches("safe_var", mixed) is False

posthog/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = "7.8.4"
1+
VERSION = "7.8.5"
22

33
if __name__ == "__main__":
44
print(VERSION, end="") # noqa: T201

0 commit comments

Comments
 (0)