mirror of
https://github.com/python/cpython.git
synced 2025-09-30 20:31:52 +00:00
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects
bytes literals and f-string expressions.
(cherry picked from commit 69524821a8
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
198c0c0509
commit
a4fb580f70
3 changed files with 76 additions and 11 deletions
|
@ -3,7 +3,7 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import textwrap
|
from textwrap import dedent
|
||||||
|
|
||||||
from test.support.script_helper import assert_python_ok
|
from test.support.script_helper import assert_python_ok
|
||||||
from test.test_tools import skip_if_missing, toolsdir
|
from test.test_tools import skip_if_missing, toolsdir
|
||||||
|
@ -109,9 +109,68 @@ class Test_pygettext(unittest.TestCase):
|
||||||
# This will raise if the date format does not exactly match.
|
# This will raise if the date format does not exactly match.
|
||||||
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
|
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
|
||||||
|
|
||||||
|
def test_funcdocstring(self):
|
||||||
|
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
|
||||||
|
with self.subTest(doc):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
def foo(bar):
|
||||||
|
%s
|
||||||
|
''' % doc))
|
||||||
|
self.assertIn('doc', msgids)
|
||||||
|
|
||||||
|
def test_funcdocstring_bytes(self):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
def foo(bar):
|
||||||
|
b"""doc"""
|
||||||
|
'''))
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
|
def test_funcdocstring_fstring(self):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
def foo(bar):
|
||||||
|
f"""doc"""
|
||||||
|
'''))
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
|
def test_classdocstring(self):
|
||||||
|
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
|
||||||
|
with self.subTest(doc):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
class C:
|
||||||
|
%s
|
||||||
|
''' % doc))
|
||||||
|
self.assertIn('doc', msgids)
|
||||||
|
|
||||||
|
def test_classdocstring_bytes(self):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
class C:
|
||||||
|
b"""doc"""
|
||||||
|
'''))
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
|
def test_classdocstring_fstring(self):
|
||||||
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
|
class C:
|
||||||
|
f"""doc"""
|
||||||
|
'''))
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
|
def test_msgid(self):
|
||||||
|
msgids = self.extract_docstrings_from_str(
|
||||||
|
'''_("""doc""" r'str' u"ing")''')
|
||||||
|
self.assertIn('docstring', msgids)
|
||||||
|
|
||||||
|
def test_msgid_bytes(self):
|
||||||
|
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
|
def test_msgid_fstring(self):
|
||||||
|
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
|
||||||
|
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
|
||||||
|
|
||||||
def test_funcdocstring_annotated_args(self):
|
def test_funcdocstring_annotated_args(self):
|
||||||
""" Test docstrings for functions with annotated args """
|
""" Test docstrings for functions with annotated args """
|
||||||
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
def foo(bar: str):
|
def foo(bar: str):
|
||||||
"""doc"""
|
"""doc"""
|
||||||
'''))
|
'''))
|
||||||
|
@ -119,7 +178,7 @@ class Test_pygettext(unittest.TestCase):
|
||||||
|
|
||||||
def test_funcdocstring_annotated_return(self):
|
def test_funcdocstring_annotated_return(self):
|
||||||
""" Test docstrings for functions with annotated return type """
|
""" Test docstrings for functions with annotated return type """
|
||||||
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
def foo(bar) -> str:
|
def foo(bar) -> str:
|
||||||
"""doc"""
|
"""doc"""
|
||||||
'''))
|
'''))
|
||||||
|
@ -127,7 +186,7 @@ class Test_pygettext(unittest.TestCase):
|
||||||
|
|
||||||
def test_funcdocstring_defvalue_args(self):
|
def test_funcdocstring_defvalue_args(self):
|
||||||
""" Test docstring for functions with default arg values """
|
""" Test docstring for functions with default arg values """
|
||||||
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
def foo(bar=()):
|
def foo(bar=()):
|
||||||
"""doc"""
|
"""doc"""
|
||||||
'''))
|
'''))
|
||||||
|
@ -137,7 +196,7 @@ class Test_pygettext(unittest.TestCase):
|
||||||
""" Test docstring extraction for multiple functions combining
|
""" Test docstring extraction for multiple functions combining
|
||||||
annotated args, annotated return types and default arg values
|
annotated args, annotated return types and default arg values
|
||||||
"""
|
"""
|
||||||
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
def foo1(bar: tuple=()) -> str:
|
def foo1(bar: tuple=()) -> str:
|
||||||
"""doc1"""
|
"""doc1"""
|
||||||
|
|
||||||
|
@ -155,7 +214,7 @@ class Test_pygettext(unittest.TestCase):
|
||||||
""" Test docstring extraction for a class with colons occuring within
|
""" Test docstring extraction for a class with colons occuring within
|
||||||
the parentheses.
|
the parentheses.
|
||||||
"""
|
"""
|
||||||
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
|
msgids = self.extract_docstrings_from_str(dedent('''\
|
||||||
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
|
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
|
||||||
"""doc"""
|
"""doc"""
|
||||||
'''))
|
'''))
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
:program:`pygettext.py` now recognizes only literal strings as docstrings
|
||||||
|
and translatable strings, and rejects bytes literals and f-string expressions.
|
|
@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
|
||||||
return ''.join(escapes[b] for b in s.encode(encoding))
|
return ''.join(escapes[b] for b in s.encode(encoding))
|
||||||
|
|
||||||
|
|
||||||
|
def is_literal_string(s):
|
||||||
|
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
|
||||||
|
|
||||||
|
|
||||||
def safe_eval(s):
|
def safe_eval(s):
|
||||||
# unwrap quotes, safely
|
# unwrap quotes, safely
|
||||||
return eval(s, {'__builtins__':{}}, {})
|
return eval(s, {'__builtins__':{}}, {})
|
||||||
|
@ -317,8 +321,8 @@ class TokenEater:
|
||||||
def __call__(self, ttype, tstring, stup, etup, line):
|
def __call__(self, ttype, tstring, stup, etup, line):
|
||||||
# dispatch
|
# dispatch
|
||||||
## import token
|
## import token
|
||||||
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
|
||||||
## 'tstring:', tstring
|
## file=sys.stderr)
|
||||||
self.__state(ttype, tstring, stup[0])
|
self.__state(ttype, tstring, stup[0])
|
||||||
|
|
||||||
def __waiting(self, ttype, tstring, lineno):
|
def __waiting(self, ttype, tstring, lineno):
|
||||||
|
@ -327,7 +331,7 @@ class TokenEater:
|
||||||
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
||||||
# module docstring?
|
# module docstring?
|
||||||
if self.__freshmodule:
|
if self.__freshmodule:
|
||||||
if ttype == tokenize.STRING:
|
if ttype == tokenize.STRING and is_literal_string(tstring):
|
||||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||||
self.__freshmodule = 0
|
self.__freshmodule = 0
|
||||||
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
||||||
|
@ -353,7 +357,7 @@ class TokenEater:
|
||||||
|
|
||||||
def __suitedocstring(self, ttype, tstring, lineno):
|
def __suitedocstring(self, ttype, tstring, lineno):
|
||||||
# ignore any intervening noise
|
# ignore any intervening noise
|
||||||
if ttype == tokenize.STRING:
|
if ttype == tokenize.STRING and is_literal_string(tstring):
|
||||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||||
self.__state = self.__waiting
|
self.__state = self.__waiting
|
||||||
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
||||||
|
@ -378,7 +382,7 @@ class TokenEater:
|
||||||
if self.__data:
|
if self.__data:
|
||||||
self.__addentry(EMPTYSTRING.join(self.__data))
|
self.__addentry(EMPTYSTRING.join(self.__data))
|
||||||
self.__state = self.__waiting
|
self.__state = self.__waiting
|
||||||
elif ttype == tokenize.STRING:
|
elif ttype == tokenize.STRING and is_literal_string(tstring):
|
||||||
self.__data.append(safe_eval(tstring))
|
self.__data.append(safe_eval(tstring))
|
||||||
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||||
token.NEWLINE, tokenize.NL]:
|
token.NEWLINE, tokenize.NL]:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue