Skip to content

Commit b82a7aa

Browse files
committed
instrumentation/dbapi2: make query scanning for dollar quotes a bit more correct
Dollar quotes follow the same rules as SQL identifiers so: SQL identifiers and key words must begin with a letter (a-z, but also letters with diacritical marks and non-Latin letters) or an underscore (_). Subsequent characters in an identifier or key word can be letters, underscores, digits (0-9), or dollar signs ($). Given we are not going to write a compliant SQL parser at least handle query parameters that are simple to catch since they have a digit right after the opening $. Refs #1851.
1 parent 58c916b commit b82a7aa

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

elasticapm/instrumentation/packages/dbapi2.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"""
3535

3636
import re
37+
import string
3738

3839
import wrapt
3940

@@ -85,6 +86,7 @@ def scan(tokens):
8586
literal_started = None
8687
prev_was_escape = False
8788
lexeme = []
89+
digits = set(string.digits)
8890

8991
i = 0
9092
while i < len(tokens):
@@ -114,6 +116,11 @@ def scan(tokens):
114116
literal_start_idx = i
115117
literal_started = token
116118
elif token == "$":
119+
# exclude query parameters that have a digit following the dollar
120+
if True and len(tokens) > i + 1 and tokens[i + 1] in digits:
121+
yield i, token
122+
i += 1
123+
continue
117124
# Postgres can use arbitrary characters between two $'s as a
118125
# literal separation token, e.g.: $fish$ literal $fish$
119126
# This part will detect that and skip over the literal.

tests/instrumentation/dbapi2_tests.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ def test_extract_signature_bytes():
122122
assert actual == expected
123123

124124

125+
def test_extract_signature_pathological():
126+
# tune for performance testing
127+
multiplier = 10
128+
values = []
129+
for chunk in range(multiplier):
130+
i = chunk * 3
131+
values.append(f" (${1+i}::varchar, ${2+i}::varchar, ${3+i}::varchar), ")
132+
133+
sql = f"SELECT * FROM (VALUES {''.join(values)})\n"
134+
actual = extract_signature(sql)
135+
expected = "SELECT FROM"
136+
assert actual == expected
137+
138+
125139
@pytest.mark.parametrize(
126140
["sql", "expected"],
127141
[

0 commit comments

Comments
 (0)