Skip to content

bpo-33189: pygettext.py now accepts only literal strings #6364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 65 additions & 6 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import sys
import unittest
import textwrap
from textwrap import dedent

from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
Expand Down Expand Up @@ -109,25 +109,84 @@ def test_POT_Creation_Date(self):
# This will raise if the date format does not exactly match.
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')

def test_funcdocstring(self):
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
with self.subTest(doc):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
%s
''' % doc))
self.assertIn('doc', msgids)

def test_funcdocstring_bytes(self):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
b"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_funcdocstring_fstring(self):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
f"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_classdocstring(self):
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
with self.subTest(doc):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
%s
''' % doc))
self.assertIn('doc', msgids)

def test_classdocstring_bytes(self):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
b"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_classdocstring_fstring(self):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
f"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_msgid(self):
msgids = self.extract_docstrings_from_str(
'''_("""doc""" r'str' u"ing")''')
self.assertIn('docstring', msgids)

def test_msgid_bytes(self):
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_msgid_fstring(self):
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])

def test_funcdocstring_annotated_args(self):
""" Test docstrings for functions with annotated args """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar: str):
"""doc"""
'''))
self.assertIn('doc', msgids)

def test_funcdocstring_annotated_return(self):
""" Test docstrings for functions with annotated return type """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar) -> str:
"""doc"""
'''))
self.assertIn('doc', msgids)

def test_funcdocstring_defvalue_args(self):
""" Test docstring for functions with default arg values """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar=()):
"""doc"""
'''))
Expand All @@ -137,7 +196,7 @@ def test_funcdocstring_multiple_funcs(self):
""" Test docstring extraction for multiple functions combining
annotated args, annotated return types and default arg values
"""
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
msgids = self.extract_docstrings_from_str(dedent('''\
def foo1(bar: tuple=()) -> str:
"""doc1"""

Expand All @@ -155,7 +214,7 @@ def test_classdocstring_early_colon(self):
""" Test docstring extraction for a class with colons occuring within
the parentheses.
"""
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
msgids = self.extract_docstrings_from_str(dedent('''\
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
"""doc"""
'''))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:program:`pygettext.py` now recognizes only literal strings as docstrings
and translatable strings, and rejects bytes literals and f-string expressions.
14 changes: 9 additions & 5 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
return ''.join(escapes[b] for b in s.encode(encoding))


def is_literal_string(s):
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')


def safe_eval(s):
# unwrap quotes, safely
return eval(s, {'__builtins__':{}}, {})
Expand Down Expand Up @@ -325,8 +329,8 @@ def __init__(self, options):
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
## 'tstring:', tstring
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## file=sys.stderr)
self.__state(ttype, tstring, stup[0])

def __waiting(self, ttype, tstring, lineno):
Expand All @@ -335,7 +339,7 @@ def __waiting(self, ttype, tstring, lineno):
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING:
if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
Expand All @@ -361,7 +365,7 @@ def __suiteseen(self, ttype, tstring, lineno):

def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
Expand All @@ -386,7 +390,7 @@ def __openseen(self, ttype, tstring, lineno):
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
elif ttype == tokenize.STRING and is_literal_string(tstring):
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
Expand Down