Skip to content

Commit a5ffbc6

Browse files
committed
instrumentation: make query scanning for dollar quotes a bit more correct
Dollar quotes follow the same rules as SQL identifiers so: SQL identifiers and key words must begin with a letter (a-z, but also letters with diacritical marks and non-Latin letters) or an underscore (_). Subsequent characters in an identifier or key word can be letters, underscores, digits (0-9), or dollar signs ($). Given we are not going to write a compliant SQL parser at least handle query parameters that are simple to catch since they have a digit right after the opening $. Refs #1851.
1 parent 58c916b commit a5ffbc6

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

elasticapm/instrumentation/packages/dbapi2.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"""
3535

3636
import re
37+
import string
3738

3839
import wrapt
3940

@@ -65,6 +66,7 @@ def look_for_table(sql, keyword):
6566
def _scan_for_table_with_tokens(tokens, keyword):
6667
seen_keyword = False
6768
for idx, lexeme in scan(tokens):
69+
print(idx, lexeme)
6870
if seen_keyword:
6971
if lexeme == "(":
7072
return _scan_for_table_with_tokens(tokens[idx:], keyword)
@@ -85,6 +87,7 @@ def scan(tokens):
8587
literal_started = None
8688
prev_was_escape = False
8789
lexeme = []
90+
digits = set(string.digits)
8891

8992
i = 0
9093
while i < len(tokens):
@@ -114,6 +117,11 @@ def scan(tokens):
114117
literal_start_idx = i
115118
literal_started = token
116119
elif token == "$":
120+
# exclude query parameters that have a digit following the dollar
121+
if True and len(tokens) > i + 1 and tokens[i + 1] in digits:
122+
yield i, token
123+
i += 1
124+
continue
117125
# Postgres can use arbitrary characters between two $'s as a
118126
# literal separation token, e.g.: $fish$ literal $fish$
119127
# This part will detect that and skip over the literal.
@@ -125,6 +133,7 @@ def scan(tokens):
125133
pass
126134
else:
127135
quote = tokens[i : closing_dollar_idx + 1]
136+
128137
length = len(quote)
129138
# Opening dollar of the closing quote,
130139
# i.e. the first $ in the second $fish$

tests/instrumentation/dbapi2_tests.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ def test_extract_signature_bytes():
122122
assert actual == expected
123123

124124

125+
def test_extract_signature_pathological():
126+
# tune for performance testing
127+
multiplier = 10
128+
values = []
129+
for chunk in range(multiplier):
130+
i = chunk * 3
131+
values.append(f" (${1+i}::varchar, ${2+i}::varchar, ${3+i}::varchar), ")
132+
133+
sql = f"SELECT * FROM (VALUES {''.join(values)})\n"
134+
actual = extract_signature(sql)
135+
expected = "SELECT FROM"
136+
assert actual == expected
137+
138+
125139
@pytest.mark.parametrize(
126140
["sql", "expected"],
127141
[

0 commit comments

Comments
 (0)