Skip to content

Commit

Permalink
Merge pull request #542 from asottile/named_escapes
Browse files Browse the repository at this point in the history
handle named escape sequences in format upgrades
  • Loading branch information
asottile authored Sep 23, 2021
2 parents 748a1f0 + 0f9d15b commit 0981e83
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 30 deletions.
39 changes: 25 additions & 14 deletions pyupgrade/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@
from pyupgrade._data import Settings
from pyupgrade._data import Version
from pyupgrade._data import visit
from pyupgrade._string_helpers import curly_escape
from pyupgrade._string_helpers import is_ascii
from pyupgrade._string_helpers import is_codec
from pyupgrade._string_helpers import NAMED_UNICODE_RE
from pyupgrade._token_helpers import CLOSING
from pyupgrade._token_helpers import KEYWORDS
from pyupgrade._token_helpers import OPENING
Expand All @@ -47,21 +49,34 @@


def parse_format(s: str) -> Tuple[DotFormatPart, ...]:
"""Makes the empty string not a special case. In the stdlib, there's
loss of information (the type) on the empty string.
"""
parsed = tuple(_stdlib_parse_format(s))
if not parsed:
return ((s, None, None, None),)
else:
return parsed
"""handle named escape sequences"""
ret: List[DotFormatPart] = []

for part in NAMED_UNICODE_RE.split(s):
if NAMED_UNICODE_RE.fullmatch(part):
if not ret:
ret.append((part, None, None, None))
else:
ret[-1] = (ret[-1][0] + part, None, None, None)
else:
first = True
for tup in _stdlib_parse_format(part):
if not first or not ret:
ret.append(tup)
else:
ret[-1] = (ret[-1][0] + tup[0], *tup[1:])
first = False

if not ret:
ret.append((s, None, None, None))

return tuple(ret)


def unparse_parsed_string(parsed: Sequence[DotFormatPart]) -> str:
def _convert_tup(tup: DotFormatPart) -> str:
ret, field_name, format_spec, conversion = tup
ret = ret.replace('{', '{{')
ret = ret.replace('}', '}}')
ret = curly_escape(ret)
if field_name is not None:
ret += '{' + field_name
if conversion:
Expand Down Expand Up @@ -786,10 +801,6 @@ def _fix_py36_plus(contents_text: str, *, min_version: Version) -> str:
return contents_text
for i, token in reversed_enumerate(tokens):
if token.offset in visitor.fstrings:
# TODO: handle \N escape sequences
if r'\N' in token.src:
continue

paren = i + 3
if tokens_to_src(tokens[i + 1:paren + 1]) != '.format(':
continue
Expand Down
12 changes: 3 additions & 9 deletions pyupgrade/_plugins/percent_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pyupgrade._data import register
from pyupgrade._data import State
from pyupgrade._data import TokenFunc
from pyupgrade._string_helpers import curly_escape
from pyupgrade._token_helpers import KEYWORDS
from pyupgrade._token_helpers import remove_brace
from pyupgrade._token_helpers import victims
Expand Down Expand Up @@ -120,7 +121,8 @@ def _simplify_conversion_flag(flag: str) -> str:
def _percent_to_format(s: str) -> str:
def _handle_part(part: PercentFormat) -> str:
s, fmt = part
s = s.replace('{', '{{').replace('}', '}}')
s = curly_escape(s)

if fmt is None:
return s
else:
Expand Down Expand Up @@ -155,10 +157,6 @@ def _fix_percent_format_tuple(
*,
node_right: ast.Tuple,
) -> None:
# TODO: handle \N escape sequences
if r'\N' in tokens[i].src:
return

# TODO: this is overly timid
paren = i + 4
if tokens_to_src(tokens[i + 1:paren + 1]) != ' % (':
Expand All @@ -181,10 +179,6 @@ def _fix_percent_format_dict(
*,
node_right: ast.Dict,
) -> None:
# TODO: handle \N escape sequences
if r'\N' in tokens[i].src:
return

seen_keys: Set[str] = set()
keys = {}

Expand Down
13 changes: 13 additions & 0 deletions pyupgrade/_string_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
import re
import string
import sys

Expand All @@ -8,6 +9,18 @@
def is_ascii(s: str) -> bool:
return all(c in string.printable for c in s)

NAMED_UNICODE_RE = re.compile(r'(?<!\\)(?:\\\\)*(\\N\{[^}]+\})')


def curly_escape(s: str) -> str:
parts = NAMED_UNICODE_RE.split(s)
return ''.join(
part.replace('{', '{{').replace('}', '}}')
if not NAMED_UNICODE_RE.fullmatch(part)
else part
for part in parts
)


def is_codec(encoding: str, name: str) -> bool:
try:
Expand Down
15 changes: 13 additions & 2 deletions tests/features/format_literals_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ def test_roundtrip_text(s):
assert unparse_parsed_string(parse_format(s)) == s


def test_parse_format_starts_with_named():
# technically not possible since our string always starts with quotes
assert parse_format(r'\N{snowman} hi {0} hello') == (
(r'\N{snowman} hi ', '0', '', None),
(' hello', None, None, None),
)


@pytest.mark.parametrize(
('s', 'expected'),
(
Expand Down Expand Up @@ -49,8 +57,6 @@ def test_intentionally_not_round_trip(s, expected):
"'{' '0}'.format(1)",
# comment looks like placeholder but is not!
'("{0}" # {1}\n"{2}").format(1, 2, 3)',
# TODO: this works by accident (extended escape treated as placeholder)
r'"\N{snowman} {}".format(1)',
# don't touch f-strings (these are wrong but don't make it worse)
'f"{0}".format(a)',
),
Expand Down Expand Up @@ -101,6 +107,11 @@ def test_format_literals_noop(s):
),
# parenthesized string literals
('("{0}").format(1)', '("{}").format(1)'),
pytest.param(
r'"\N{snowman} {0}".format(1)',
r'"\N{snowman} {}".format(1)',
id='named escape sequence',
),
),
)
def test_format_literals(s, expected):
Expand Down
7 changes: 5 additions & 2 deletions tests/features/fstrings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
'"{:{}}".format(x, y)',
'"{a[b]}".format(a=a)',
'"{a.a[b]}".format(a=a)',
# TODO: handle \N escape sequences
r'"\N{snowman} {}".format(a)',
# not enough placeholders / placeholders missing
'"{}{}".format(a)', '"{a}{b}".format(a=a)',
# backslashes and quotes cannot nest
Expand Down Expand Up @@ -58,6 +56,11 @@ def test_fix_fstrings_noop(s):
('"{}{{}}{}".format(escaped, y)', 'f"{escaped}{{}}{y}"'),
('"{}{b}{}".format(a, c, b=b)', 'f"{a}{b}{c}"'),
('"{}".format(0x0)', 'f"{0x0}"'),
pytest.param(
r'"\N{snowman} {}".format(a)',
r'f"\N{snowman} {a}"',
id='named escape sequences',
),
# TODO: poor man's f-strings?
# '"{foo}".format(**locals())'
),
Expand Down
12 changes: 9 additions & 3 deletions tests/features/percent_format_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,6 @@ def test_simplify_conversion_flag(s, expected):
'"%(and)s" % {"and": 2}',
# invalid string formats
'"%" % {}', '"%(hi)" % {}', '"%2" % {}',
# TODO: handle \N escape sequences
r'"%s \N{snowman}" % (a,)',
r'"%(foo)s \N{snowman}" % {"foo": 1}',
),
)
def test_percent_format_noop(s):
Expand Down Expand Up @@ -223,6 +220,15 @@ def test_percent_format_noop_if_bug_16806():
# dict
('"%(k)s" % {"k": "v"}', '"{k}".format(k="v")'),
('"%(to_list)s" % {"to_list": []}', '"{to_list}".format(to_list=[])'),
# \N escapes
(
r'"%s \N{snowman}" % (a,)',
r'"{} \N{snowman}".format(a)',
),
(
r'"%(foo)s \N{snowman}" % {"foo": 1}',
r'"{foo} \N{snowman}".format(foo=1)',
),
),
)
def test_percent_format(s, expected):
Expand Down

0 comments on commit 0981e83

Please sign in to comment.