Skip to content

Commit a4fb580

Browse files
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects bytes literals and f-string expressions. (cherry picked from commit 6952482) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 198c0c0 commit a4fb580

File tree

3 files changed

+76
-11
lines changed

3 files changed

+76
-11
lines changed

Lib/test/test_tools/test_i18n.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import sys
55
import unittest
6-
import textwrap
6+
from textwrap import dedent
77

88
from test.support.script_helper import assert_python_ok
99
from test.test_tools import skip_if_missing, toolsdir
@@ -109,25 +109,84 @@ def test_POT_Creation_Date(self):
109109
# This will raise if the date format does not exactly match.
110110
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
111111

112+
def test_funcdocstring(self):
113+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
114+
with self.subTest(doc):
115+
msgids = self.extract_docstrings_from_str(dedent('''\
116+
def foo(bar):
117+
%s
118+
''' % doc))
119+
self.assertIn('doc', msgids)
120+
121+
def test_funcdocstring_bytes(self):
122+
msgids = self.extract_docstrings_from_str(dedent('''\
123+
def foo(bar):
124+
b"""doc"""
125+
'''))
126+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
127+
128+
def test_funcdocstring_fstring(self):
129+
msgids = self.extract_docstrings_from_str(dedent('''\
130+
def foo(bar):
131+
f"""doc"""
132+
'''))
133+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
134+
135+
def test_classdocstring(self):
136+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
137+
with self.subTest(doc):
138+
msgids = self.extract_docstrings_from_str(dedent('''\
139+
class C:
140+
%s
141+
''' % doc))
142+
self.assertIn('doc', msgids)
143+
144+
def test_classdocstring_bytes(self):
145+
msgids = self.extract_docstrings_from_str(dedent('''\
146+
class C:
147+
b"""doc"""
148+
'''))
149+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
150+
151+
def test_classdocstring_fstring(self):
152+
msgids = self.extract_docstrings_from_str(dedent('''\
153+
class C:
154+
f"""doc"""
155+
'''))
156+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
157+
158+
def test_msgid(self):
159+
msgids = self.extract_docstrings_from_str(
160+
'''_("""doc""" r'str' u"ing")''')
161+
self.assertIn('docstring', msgids)
162+
163+
def test_msgid_bytes(self):
164+
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
165+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
166+
167+
def test_msgid_fstring(self):
168+
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
169+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
170+
112171
def test_funcdocstring_annotated_args(self):
113172
""" Test docstrings for functions with annotated args """
114-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
173+
msgids = self.extract_docstrings_from_str(dedent('''\
115174
def foo(bar: str):
116175
"""doc"""
117176
'''))
118177
self.assertIn('doc', msgids)
119178

120179
def test_funcdocstring_annotated_return(self):
121180
""" Test docstrings for functions with annotated return type """
122-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
181+
msgids = self.extract_docstrings_from_str(dedent('''\
123182
def foo(bar) -> str:
124183
"""doc"""
125184
'''))
126185
self.assertIn('doc', msgids)
127186

128187
def test_funcdocstring_defvalue_args(self):
129188
""" Test docstring for functions with default arg values """
130-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
189+
msgids = self.extract_docstrings_from_str(dedent('''\
131190
def foo(bar=()):
132191
"""doc"""
133192
'''))
@@ -137,7 +196,7 @@ def test_funcdocstring_multiple_funcs(self):
137196
""" Test docstring extraction for multiple functions combining
138197
annotated args, annotated return types and default arg values
139198
"""
140-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
199+
msgids = self.extract_docstrings_from_str(dedent('''\
141200
def foo1(bar: tuple=()) -> str:
142201
"""doc1"""
143202
@@ -155,7 +214,7 @@ def test_classdocstring_early_colon(self):
155214
""" Test docstring extraction for a class with colons occuring within
156215
the parentheses.
157216
"""
158-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
217+
msgids = self.extract_docstrings_from_str(dedent('''\
159218
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
160219
"""doc"""
161220
'''))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:program:`pygettext.py` now recognizes only literal strings as docstrings
2+
and translatable strings, and rejects bytes literals and f-string expressions.

Tools/i18n/pygettext.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
232232
return ''.join(escapes[b] for b in s.encode(encoding))
233233

234234

235+
def is_literal_string(s):
236+
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
237+
238+
235239
def safe_eval(s):
236240
# unwrap quotes, safely
237241
return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ def __init__(self, options):
317321
def __call__(self, ttype, tstring, stup, etup, line):
318322
# dispatch
319323
## import token
320-
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
321-
## 'tstring:', tstring
324+
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
325+
## file=sys.stderr)
322326
self.__state(ttype, tstring, stup[0])
323327

324328
def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ def __waiting(self, ttype, tstring, lineno):
327331
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
328332
# module docstring?
329333
if self.__freshmodule:
330-
if ttype == tokenize.STRING:
334+
if ttype == tokenize.STRING and is_literal_string(tstring):
331335
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
332336
self.__freshmodule = 0
333337
elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ def __suiteseen(self, ttype, tstring, lineno):
353357

354358
def __suitedocstring(self, ttype, tstring, lineno):
355359
# ignore any intervening noise
356-
if ttype == tokenize.STRING:
360+
if ttype == tokenize.STRING and is_literal_string(tstring):
357361
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
358362
self.__state = self.__waiting
359363
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ def __openseen(self, ttype, tstring, lineno):
378382
if self.__data:
379383
self.__addentry(EMPTYSTRING.join(self.__data))
380384
self.__state = self.__waiting
381-
elif ttype == tokenize.STRING:
385+
elif ttype == tokenize.STRING and is_literal_string(tstring):
382386
self.__data.append(safe_eval(tstring))
383387
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
384388
token.NEWLINE, tokenize.NL]:

0 commit comments

Comments
 (0)