Skip to content

Commit 8b02fe0

Browse files
authored
[Python] Improve named backref regexp highlighting (#4362)
This commit prepares named backreferences for correctly handling interpolation. Primarily required to drive raw f-strings, but also helps with raw unicode strings, which act as format-strings. It prevents placeholders `%s` or string repelacements `{name}` from breaking syntax highlighting or causing unwanted illegal highlighting.
1 parent 6c54e3c commit 8b02fe0

File tree

2 files changed

+177
-49
lines changed

2 files changed

+177
-49
lines changed

Python/Embeddings/RegExp (for Python).sublime-syntax

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ variables:
1313
deactivate_x_mode: (?:\?[imsLua]*-[imsLua]*x[imxsLua]*)
1414
other_modifiers: \?(?:[ixmsLua]*-)?[ixmsLua]+
1515

16-
# capture group identifiers
17-
capture_group_name: (?:[[:alpha:]_]\w*)
18-
capture_group: (?:[1-9][0-9]*|{{capture_group_name}})
19-
2016
# quantifiers
2117
ranged_quantifier: '{{brace_start}}\d+(?:,\d*)?{{brace_end}}'
2218
brace_start: \{
@@ -34,33 +30,64 @@ contexts:
3430

3531
group-start:
3632
- meta_prepend: true
37-
- match: (\?P=)({{capture_group_name}})(?=\))
38-
captures:
39-
1: keyword.other.back-reference.named.regexp
40-
2: variable.other.backref-and-recursion.regexp
41-
- match: (\?P)(<)({{capture_group_name}})(>)
33+
# named capture group back-reference assertion (lookahead)
34+
- match: \?P=
35+
scope: keyword.other.backref-and-recursion.regexp
36+
set: group-assertion-name
37+
# named capture group back-reference match (for: regex package)
38+
- match: \?&
39+
scope: keyword.other.backref-and-recursion.regexp
40+
set: group-capturing-name
41+
# conditional capture group back-reference
42+
- match: \?(\()
4243
captures:
43-
1: keyword.other.backref-and-recursion.regexp
44-
2: punctuation.definition.capture-group-name.begin.regexp
45-
3: entity.name.capture-group.regexp
46-
4: punctuation.definition.capture-group-name.end.regexp
47-
# We can make this more sophisticated to match the | character that separates
48-
# yes-pattern from no-pattern, but it's not really necessary.
49-
- match: (\?)(\()({{capture_group}})(\))
44+
1: punctuation.definition.capture-group-name.begin.regexp
45+
set: group-conditional-name
46+
# named capture group definition
47+
- match: \?P(<)
5048
captures:
51-
1: keyword.other.backref-and-recursion.conditional.regexp
52-
2: punctuation.definition.group.begin.assertion.conditional.regexp
53-
3: variable.other.back-reference.regexp
54-
4: punctuation.definition.group.end.assertion.conditional.regexp
49+
1: punctuation.definition.capture-group-name.begin.regexp
50+
set: group-definition-name
51+
52+
group-assertion-name:
53+
- meta_content_scope: keyword.other.backref-and-recursion.regexp variable.other.capture-group.regexp
54+
- match: \)
55+
scope: punctuation.section.group.end.regexp
56+
pop: 3
57+
58+
group-capturing-name:
59+
- meta_content_scope: keyword.other.backref-and-recursion.regexp variable.other.capture-group.regexp
60+
- match: \)
61+
scope: punctuation.section.group.end.regexp
62+
pop: 3
63+
64+
group-conditional-name:
65+
- meta_scope: keyword.other.backref-and-recursion.regexp
66+
- meta_content_scope: variable.other.capture-group.regexp
67+
- match: \)
68+
scope: punctuation.definition.capture-group-name.end.regexp
69+
pop: 1
70+
71+
group-definition-name:
72+
- meta_scope: keyword.other.backref-and-recursion.regexp
73+
- meta_content_scope: entity.name.capture-group.regexp
74+
- match: \>
75+
scope: punctuation.definition.capture-group-name.end.regexp
76+
pop: 1
5577

5678
backrefs:
5779
- meta_prepend: true
58-
- match: \\g(<)({{capture_group}})(>)
59-
scope: keyword.other.backref-and-recursion.regexp
80+
- match: \\g(<)
6081
captures:
6182
1: punctuation.definition.capture-group-name.begin.regexp
62-
2: variable.other.backref-and-recursion.regexp
63-
3: punctuation.definition.capture-group-name.end.regexp
83+
push: backref-name
84+
85+
backref-name:
86+
- meta_scope: keyword.other.backref-and-recursion.regexp
87+
- meta_content_scope: variable.other.capture-group.regexp
88+
- match: \>
89+
scope: punctuation.definition.capture-group-name.end.regexp
90+
pop: 1
6491

6592
literals:
6693
- meta_prepend: true

Python/tests/syntax_test_python_strings.py

Lines changed: 126 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -120,37 +120,48 @@
120120
# ^ variable.other.backref-and-recursion.regexp
121121
# ^ - keyword
122122

123+
regex = r'(?P<quote>[\'"]).*?(?&quote)' # `?&` is for the regex package
124+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
125+
# ^ punctuation.definition.capture-group-name.begin.regexp
126+
# ^^^^^ entity.name.capture-group.regexp - invalid
127+
# ^ punctuation.definition.capture-group-name.end.regexp
128+
# ^^^^^^^ keyword.other.backref-and-recursion.regexp
129+
# ^^^^^ variable.other.capture-group.regexp
130+
# ^ - keyword - variable
131+
123132
regex = r'(?P<quote>[\'"]).*?(?P=quote)'
124-
# ^^ keyword.other.backref-and-recursion.regexp
133+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
125134
# ^ punctuation.definition.capture-group-name.begin.regexp
126135
# ^^^^^ entity.name.capture-group.regexp - invalid
127136
# ^ punctuation.definition.capture-group-name.end.regexp
128-
# ^^^ keyword.other.back-reference.named.regexp
129-
# ^^^^^ variable.other.backref-and-recursion.regexp - invalid
137+
# ^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
138+
# ^^^^^ variable.other.capture-group.regexp - invalid
139+
# ^ - keyword - variable
130140

131141
regex = r'(?P<Quote>[\'"]).*?(?P=Quote)'
132-
# ^^ keyword.other.backref-and-recursion.regexp
142+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
133143
# ^ punctuation.definition.capture-group-name.begin.regexp
134144
# ^^^^^ entity.name.capture-group.regexp - invalid
135145
# ^ punctuation.definition.capture-group-name.end.regexp
136-
# ^^^ keyword.other.back-reference.named.regexp
137-
# ^^^^^ variable.other.backref-and-recursion.regexp - invalid
146+
# ^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
147+
# ^^^^^ variable.other.capture-group.regexp - invalid
148+
# ^ - keyword - variable
138149

139150
regex = r'(?P<quote>[\'"]).*?\g<quote>'
140-
# ^^ keyword.other.backref-and-recursion.regexp
151+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
141152
# ^ punctuation.definition.capture-group-name.begin.regexp
142153
# ^^^^^ entity.name.capture-group.regexp - invalid
143154
# ^ punctuation.definition.capture-group-name.end.regexp
144-
# ^^ keyword.other.backref-and-recursion.regexp
145-
# ^^^^^ variable.other.backref-and-recursion.regexp - invalid
155+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
156+
# ^^^^^ variable.other.capture-group.regexp - invalid
146157

147158
regex = r'(?P<Quote>[\'"]).*?\g<Quote>'
148-
# ^^ keyword.other.backref-and-recursion.regexp
159+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
149160
# ^ punctuation.definition.capture-group-name.begin.regexp
150161
# ^^^^^ entity.name.capture-group.regexp - invalid
151162
# ^ punctuation.definition.capture-group-name.end.regexp
152-
# ^^ keyword.other.backref-and-recursion.regexp
153-
# ^^^^^ variable.other.backref-and-recursion.regexp - invalid
163+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
164+
# ^^^^^ variable.other.capture-group.regexp - invalid
154165

155166
regex = r'''\b ([fobar]*){1}(?:a|b)?'''
156167
# ^^^^^^^^^^^^^^^^^^^^^^^^ meta.mode.extended.regexp
@@ -1263,19 +1274,19 @@
12631274
# ^^ constant.character.escape.regexp constant.character.escape.python
12641275

12651276
match = re.match(r'(?P<test>a)?b(?(test)c|d)', line)
1266-
# ^^^^^^^^^^ meta.group.regexp
1277+
# ^^^^^^^^^^^ meta.group.regexp
12671278
# ^ punctuation.section.group.begin
1268-
# ^^ keyword.other.backref-and-recursion
1279+
# ^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
12691280
# ^ punctuation.definition.capture-group-name.begin
12701281
# ^^^^ entity.name.capture-group
12711282
# ^ punctuation.definition.capture-group-name.end
12721283
# ^ punctuation.section.group.end
12731284
# ^ keyword.operator.quantifier
12741285
# ^ punctuation.section.group.begin
1275-
# ^ keyword.other.backref-and-recursion.conditional
1276-
# ^ punctuation.definition.group.begin.assertion.conditional
1277-
# ^^^^ variable.other.back-reference
1278-
# ^ punctuation.definition.group.end.assertion.conditional
1286+
# ^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
1287+
# ^ punctuation.definition.capture-group-name.begin.regexp
1288+
# ^^^^ variable.other.capture-group.regexp
1289+
# ^ punctuation.definition.capture-group-name.end.regexp
12791290
# ^ keyword.operator.alternation
12801291
# ^ punctuation.section.group.end
12811292
match = re.match(r'(a)?b(?(1)c|d)', line)
@@ -1284,16 +1295,16 @@
12841295
# ^ punctuation.section.group.end
12851296
# ^ keyword.operator.quantifier
12861297
# ^ punctuation.section.group.begin
1287-
# ^ keyword.other.backref-and-recursion.conditional
1288-
# ^ punctuation.definition.group.begin.assertion.conditional
1289-
# ^ variable.other.back-reference - punctuation - keyword
1290-
# ^ punctuation.definition.group.end.assertion.conditional
1298+
# ^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
1299+
# ^ punctuation.definition.capture-group-name.begin.regexp
1300+
# ^ variable.other.capture-group.regexp
1301+
# ^ punctuation.definition.capture-group-name.end.regexp
12911302
# ^ keyword.operator.alternation
12921303
# ^ punctuation.section.group.end
12931304
match = re.search(r'''(?P<quote>['"]).*?(?P=quote)''', line)
12941305
# ^^^^^^^^^^^^^^^ meta.group.regexp
12951306
# ^ punctuation.section.group.begin
1296-
# ^^ keyword.other.backref-and-recursion
1307+
# ^^^^^^^^^ keyword.other.backref-and-recursion.regexp - keyword keyword
12971308
# ^ punctuation.definition.capture-group-name.begin
12981309
# ^^^^^ entity.name.capture-group
12991310
# ^ punctuation.definition.capture-group-name.end
@@ -1303,8 +1314,8 @@
13031314
# ^ punctuation.section.group.end
13041315
# ^ keyword.other.any - meta.group
13051316
# ^^ keyword.operator.quantifier
1306-
# ^^^ keyword.other.back-reference.named
1307-
# ^^^^^ variable.other.backref-and-recursion - keyword
1317+
# ^^^ keyword.other.backref-and-recursion.regexp - keyword keyword - variable
1318+
# ^^^^^ keyword.other.backref-and-recursion.regexp variable.other.capture-group.regexp - keyword keyword
13081319
match = re.search(r'''(?ix)some text(?-i)''', line)
13091320
# ^ punctuation.definition.modifier.begin
13101321
# ^^ storage.modifier.mode
@@ -1465,6 +1476,96 @@
14651476
# ^^^^^ - keyword.operator
14661477
# ^^ keyword.operator.quantifier.regexp
14671478

1479+
fr"(?P<{name!s}>.*(?&{name})"
1480+
# ^^^ keyword.other.backref-and-recursion.regexp
1481+
# ^ punctuation.definition.capture-group-name.begin.regexp
1482+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1483+
# ^ punctuation.section.interpolation.begin.python
1484+
# ^^^^ meta.generic-name.python
1485+
# ^^ storage.modifier.conversion.python
1486+
# ^ punctuation.section.interpolation.end.python
1487+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1488+
# ^^ keyword.other.backref-and-recursion.regexp
1489+
# ^^^^^^ meta.string.python meta.interpolation.python
1490+
# ^ punctuation.section.interpolation.begin.python
1491+
# ^^^^ meta.generic-name.python
1492+
# ^ punctuation.section.interpolation.end.python
1493+
# ^ punctuation.section.group.end.regexp
1494+
1495+
fr'(?P<{name!s}>.*(?&{name})'
1496+
# ^^^ keyword.other.backref-and-recursion.regexp
1497+
# ^ punctuation.definition.capture-group-name.begin.regexp
1498+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1499+
# ^ punctuation.section.interpolation.begin.python
1500+
# ^^^^ meta.generic-name.python
1501+
# ^^ storage.modifier.conversion.python
1502+
# ^ punctuation.section.interpolation.end.python
1503+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1504+
# ^^ keyword.other.backref-and-recursion.regexp
1505+
# ^^^^^^ meta.string.python meta.interpolation.python
1506+
# ^ punctuation.section.interpolation.begin.python
1507+
# ^^^^ meta.generic-name.python
1508+
# ^ punctuation.section.interpolation.end.python
1509+
# ^ punctuation.section.group.end.regexp
1510+
1511+
fr"(?P={name!s})"
1512+
# ^^^ keyword.other.backref-and-recursion.regexp
1513+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1514+
# ^ punctuation.section.interpolation.begin.python
1515+
# ^^^^ meta.generic-name.python
1516+
# ^^ storage.modifier.conversion.python
1517+
# ^ punctuation.section.interpolation.end.python
1518+
# ^ punctuation.section.group.end.regexp
1519+
1520+
fr'(?P={name!s})'
1521+
# ^^^ keyword.other.backref-and-recursion.regexp
1522+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1523+
# ^ punctuation.section.interpolation.begin.python
1524+
# ^^^^ meta.generic-name.python
1525+
# ^^ storage.modifier.conversion.python
1526+
# ^ punctuation.section.interpolation.end.python
1527+
# ^ punctuation.section.group.end.regexp
1528+
1529+
fr"(?({name!s})yes|no)"
1530+
# ^ keyword.other.backref-and-recursion.regexp
1531+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.begin.regexp
1532+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1533+
# ^ punctuation.section.interpolation.begin.python
1534+
# ^^^^ meta.generic-name.python
1535+
# ^^ storage.modifier.conversion.python
1536+
# ^ punctuation.section.interpolation.end.python
1537+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1538+
1539+
fr'(?({name!s})yes|no)'
1540+
# ^ keyword.other.backref-and-recursion.regexp
1541+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.begin.regexp
1542+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1543+
# ^ punctuation.section.interpolation.begin.python
1544+
# ^^^^ meta.generic-name.python
1545+
# ^^ storage.modifier.conversion.python
1546+
# ^ punctuation.section.interpolation.end.python
1547+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1548+
1549+
fr"\g<{name!s}>"
1550+
# ^^ keyword.other.backref-and-recursion.regexp
1551+
# ^ punctuation.definition.capture-group-name.begin.regexp
1552+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1553+
# ^ punctuation.section.interpolation.begin.python
1554+
# ^^^^ meta.generic-name.python
1555+
# ^^ storage.modifier.conversion.python
1556+
# ^ punctuation.section.interpolation.end.python
1557+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1558+
1559+
fr'\g<{name!s}>'
1560+
# ^^ keyword.other.backref-and-recursion.regexp
1561+
# ^ punctuation.definition.capture-group-name.begin.regexp
1562+
# ^^^^^^^^ meta.string.python meta.interpolation.python
1563+
# ^ punctuation.section.interpolation.begin.python
1564+
# ^^^^ meta.generic-name.python
1565+
# ^^ storage.modifier.conversion.python
1566+
# ^ punctuation.section.interpolation.end.python
1567+
# ^ keyword.other.backref-and-recursion.regexp punctuation.definition.capture-group-name.end.regexp
1568+
14681569
# Most of these were inspired by
14691570
# https://github.com/python/cpython/commit/9a4135e939bc223f592045a38e0f927ba170da32
14701571
f'{x=:}'

0 commit comments

Comments
 (0)