python · serhiy-storchaka · Apr 19, 2018 · Apr 3, 2018
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
@@ -3,7 +3,7 @@
 import os
 import sys
 import unittest
-import textwrap
+from textwrap import dedent
 
 from test.support.script_helper import assert_python_ok
 from test.test_tools import skip_if_missing, toolsdir
@@ -109,25 +109,84 @@ def test_POT_Creation_Date(self):
             # This will raise if the date format does not exactly match.
             datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
 
+    def test_funcdocstring(self):
+        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
+            with self.subTest(doc):
+                msgids = self.extract_docstrings_from_str(dedent('''\
+                def foo(bar):
+                    %s
+                ''' % doc))
+                self.assertIn('doc', msgids)
+
+    def test_funcdocstring_bytes(self):
+        msgids = self.extract_docstrings_from_str(dedent('''\
+        def foo(bar):
+            b"""doc"""
+        '''))
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+    def test_funcdocstring_fstring(self):
+        msgids = self.extract_docstrings_from_str(dedent('''\
+        def foo(bar):
+            f"""doc"""
+        '''))
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+    def test_classdocstring(self):
+        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
+            with self.subTest(doc):
+                msgids = self.extract_docstrings_from_str(dedent('''\
+                class C:
+                    %s
+                ''' % doc))
+                self.assertIn('doc', msgids)
+
+    def test_classdocstring_bytes(self):
+        msgids = self.extract_docstrings_from_str(dedent('''\
+        class C:
+            b"""doc"""
+        '''))
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+    def test_classdocstring_fstring(self):
+        msgids = self.extract_docstrings_from_str(dedent('''\
+        class C:
+            f"""doc"""
+        '''))
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+    def test_msgid(self):
+        msgids = self.extract_docstrings_from_str(
+                '''_("""doc""" r'str' u"ing")''')
+        self.assertIn('docstring', msgids)
+
+    def test_msgid_bytes(self):
+        msgids = self.extract_docstrings_from_str('_(b"""doc""")')
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
+    def test_msgid_fstring(self):
+        msgids = self.extract_docstrings_from_str('_(f"""doc""")')
+        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
+
     def test_funcdocstring_annotated_args(self):
         """ Test docstrings for functions with annotated args """
-        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        msgids = self.extract_docstrings_from_str(dedent('''\
         def foo(bar: str):
             """doc"""
         '''))
         self.assertIn('doc', msgids)
 
     def test_funcdocstring_annotated_return(self):
         """ Test docstrings for functions with annotated return type """
-        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        msgids = self.extract_docstrings_from_str(dedent('''\
         def foo(bar) -> str:
             """doc"""
         '''))
         self.assertIn('doc', msgids)
 
     def test_funcdocstring_defvalue_args(self):
         """ Test docstring for functions with default arg values """
-        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        msgids = self.extract_docstrings_from_str(dedent('''\
         def foo(bar=()):
             """doc"""
         '''))
@@ -137,7 +196,7 @@ def test_funcdocstring_multiple_funcs(self):
         """ Test docstring extraction for multiple functions combining
         annotated args, annotated return types and default arg values
         """
-        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        msgids = self.extract_docstrings_from_str(dedent('''\
         def foo1(bar: tuple=()) -> str:
             """doc1"""
 
@@ -155,7 +214,7 @@ def test_classdocstring_early_colon(self):
         """ Test docstring extraction for a class with colons occuring within
         the parentheses.
         """
-        msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
+        msgids = self.extract_docstrings_from_str(dedent('''\
         class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
             """doc"""
         '''))

diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst b/Misc/NEWS.d/next/Tools-Demos/2018-04-03-18-10-00.bpo-33189.QrXR00.rst
@@ -0,0 +1,2 @@
+:program:`pygettext.py` now recognizes only literal strings as docstrings
+and translatable strings, and rejects bytes literals and f-string expressions.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
+def is_literal_string(s):
+    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
+
+
 def safe_eval(s):
     # unwrap quotes, safely
     return eval(s, {'__builtins__':{}}, {})
@@ -325,8 +329,8 @@ def __init__(self, options):
     def __call__(self, ttype, tstring, stup, etup, line):
         # dispatch
 ##        import token
-##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-##              'tstring:', tstring
+##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
+##              file=sys.stderr)
         self.__state(ttype, tstring, stup[0])
 
     def __waiting(self, ttype, tstring, lineno):
@@ -335,7 +339,7 @@ def __waiting(self, ttype, tstring, lineno):
         if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
             # module docstring?
             if self.__freshmodule:
-                if ttype == tokenize.STRING:
+                if ttype == tokenize.STRING and is_literal_string(tstring):
                     self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                     self.__freshmodule = 0
                 elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -361,7 +365,7 @@ def __suiteseen(self, ttype, tstring, lineno):
 
     def __suitedocstring(self, ttype, tstring, lineno):
         # ignore any intervening noise
-        if ttype == tokenize.STRING:
+        if ttype == tokenize.STRING and is_literal_string(tstring):
             self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
             self.__state = self.__waiting
         elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -386,7 +390,7 @@ def __openseen(self, ttype, tstring, lineno):
             if self.__data:
                 self.__addentry(EMPTYSTRING.join(self.__data))
             self.__state = self.__waiting
-        elif ttype == tokenize.STRING:
+        elif ttype == tokenize.STRING and is_literal_string(tstring):
             self.__data.append(safe_eval(tstring))
         elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                            token.NEWLINE, tokenize.NL]:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		:program:`pygettext.py` now recognizes only literal strings as docstrings
		and translatable strings, and rejects bytes literals and f-string expressions.