Skip to content

gh-108469: Update ast.unparse for unescaped quote support from PEP701 [3.12] #108553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 5, 2023
60 changes: 34 additions & 26 deletions Lib/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,37 +1225,49 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):

def visit_JoinedStr(self, node):
self.write("f")
if self._avoid_backslashes:
with self.buffered() as buffer:
self._write_fstring_inner(node)
return self._write_str_avoiding_backslashes("".join(buffer))

# If we don't need to avoid backslashes globally (i.e., we only need
# to avoid them inside FormattedValues), it's cosmetically preferred
# to use escaped whitespace. That is, it's preferred to use backslashes
# for cases like: f"{x}\n". To accomplish this, we keep track of what
# in our buffer corresponds to FormattedValues and what corresponds to
# Constant parts of the f-string, and allow escapes accordingly.

fstring_parts = []
for value in node.values:
with self.buffered() as buffer:
self._write_fstring_inner(value)
fstring_parts.append(
("".join(buffer), isinstance(value, Constant))
)
fstring_parts.append(("".join(buffer), isinstance(value, Constant)))

# We decide if we need to write a multi-line `f-string` since it is only
# necessary when we have "\n" inside formatted values.
use_multiline = any(
"\n" in value for value, is_constant in fstring_parts if not is_constant
)

# We then choose the quote type we use. We let `repr` do this work for
# now. This can be easily modified afterwards.
quote = repr(
"".join(value for value, is_constant in fstring_parts if is_constant)
)[0]
quote_type = quote * 3 if use_multiline else quote

new_fstring_parts = []
quote_types = list(_ALL_QUOTES)
for value, is_constant in fstring_parts:
value, quote_types = self._str_literal_helper(
value,
quote_types=quote_types,
escape_special_whitespace=is_constant,
)
if is_constant:
consecutive_quotes = 0
res = []
for c in value:
if c == "\\" or not c.isprintable():
res.append(c.encode("unicode_escape").decode("ascii"))
continue
if c == quote:
if consecutive_quotes == len(quote_type) - 1:
# escape when we see a full `quote_type`
res.append("\\")
consecutive_quotes = 0
else:
consecutive_quotes += 1
else:
consecutive_quotes = 0
res.append(c)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic probably belongs somewhere else, something similiar to what we had with the existing f-string unparsing implementation:

https://github.com/python/cpython/blob/21229ce1f0c8fb2e580df62972296d629960f30e/Lib/ast.py#L1182C1-L1218C1

value = "".join(res)
new_fstring_parts.append(value)

value = "".join(new_fstring_parts)
quote_type = quote_types[0]
self.write(f"{quote_type}{value}{quote_type}")

def _write_fstring_inner(self, node):
Expand All @@ -1273,16 +1285,12 @@ def _write_fstring_inner(self, node):

def visit_FormattedValue(self, node):
def unparse_inner(inner):
unparser = type(self)(_avoid_backslashes=True)
unparser = type(self)(_avoid_backslashes=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need this mode or can we drop avoid_backslashes all together?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we can drop it.

unparser.set_precedence(_Precedence.TEST.next(), inner)
return unparser.visit(inner)

with self.delimit("{", "}"):
expr = unparse_inner(node.value)
if "\\" in expr:
raise ValueError(
"Unable to avoid backslash in f-string expression part"
)
if expr.startswith("{"):
# Separate pair of opening brackets as "{ {"
self.write(" ")
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1860,7 +1860,7 @@ def test_random_files(self):

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

# TODO: Remove this once we can unparse PEP 701 syntax
# TODO: Remove this once we can untokenize PEP 701 syntax
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
Expand Down
29 changes: 23 additions & 6 deletions Lib/test/test_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ def test_fstrings_complicated(self):
self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""')
self.check_ast_roundtrip('''f"a\\r\\nb"''')
self.check_ast_roundtrip('''f"\\u2028{'x'}"''')
self.check_ast_roundtrip("f\"'''{1}\\\"\\\"\\\"\"")
self.check_ast_roundtrip('f\'\\\'\\\'\\\'{1}"""\'')
self.check_ast_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'')

def test_fstrings_pep701(self):
self.check_ast_roundtrip('f" something { my_dict["key"] } something else "')
self.check_ast_roundtrip('f"{f"{f"{f"{f"{f"{1+1}"}"}"}"}"}"')
self.check_ast_roundtrip("f'{f'{f'{f'{f'{f'{1+1}'}'}'}'}'}'")

def test_strings(self):
self.check_ast_roundtrip("u'foo'")
Expand Down Expand Up @@ -378,8 +386,15 @@ def test_invalid_fstring_value(self):
)
)

def test_invalid_fstring_backslash(self):
self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\")))
def test_fstring_backslash(self):
# valid since Python 3.12
self.assertEqual(ast.unparse(
ast.FormattedValue(
value=ast.Constant(value="\\\\"),
conversion=-1,
format_spec=None,
)
), "{'\\\\\\\\'}")

def test_invalid_yield_from(self):
self.check_invalid(ast.YieldFrom(value=None))
Expand Down Expand Up @@ -502,11 +517,13 @@ def test_class_bases_and_keywords(self):
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")

def test_fstrings(self):
self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
self.check_src_roundtrip('''f"\\u2028{'x'}"''')
self.check_src_roundtrip("f'-{f'*{f'+{f'.{x}.'}+'}*'}-'")
self.check_src_roundtrip("f'\\u2028{'x'}'")
self.check_src_roundtrip(r"f'{x}\n'")
self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''')
self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''')
self.check_src_roundtrip("f'{'\\n'}\\n'")
self.check_src_roundtrip("f'{f'{x}\\n'}\\n'")
self.check_src_roundtrip('f\'\\\'\\\'\\\'{1}"""\'')
self.check_src_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'')

def test_docstrings(self):
docstrings = (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`ast.unparse` now supports new :term:`f-string` syntax introduced in
Python 3.12. Note that the :term:`f-string` quotes are reselected for simplicity
under the new syntax. (Patch by Steven Sun)