mirror of
https://github.com/python/cpython.git
synced 2025-10-14 18:59:46 +00:00
gh-130057: Pygettext: Support translator comments (GH-130061)
This commit is contained in:
parent
6669905723
commit
aa845af9bb
5 changed files with 335 additions and 20 deletions
110
Lib/test/test_tools/i18n_data/comments.pot
Normal file
110
Lib/test/test_tools/i18n_data/comments.pot
Normal file
|
@ -0,0 +1,110 @@
|
|||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) YEAR ORGANIZATION
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\n"
|
||||
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: pygettext.py 1.5\n"
|
||||
|
||||
|
||||
#: comments.py:4
|
||||
msgid "foo"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#: comments.py:7
|
||||
msgid "bar"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. i18n: This is another translator comment
|
||||
#: comments.py:11
|
||||
msgid "baz"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. with multiple
|
||||
#. lines
|
||||
#: comments.py:16
|
||||
msgid "qux"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#: comments.py:21
|
||||
msgid "quux"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. with multiple lines
|
||||
#. i18n: This is another translator comment
|
||||
#. with multiple lines
|
||||
#: comments.py:27
|
||||
msgid "corge"
|
||||
msgstr ""
|
||||
|
||||
#: comments.py:31
|
||||
msgid "grault"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is another translator comment
|
||||
#: comments.py:36
|
||||
msgid "garply"
|
||||
msgstr ""
|
||||
|
||||
#: comments.py:40
|
||||
msgid "george"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is another translator comment
|
||||
#: comments.py:45
|
||||
msgid "waldo"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. i18n: This is also a translator comment
|
||||
#. i18n: This is another translator comment
|
||||
#: comments.py:50
|
||||
msgid "waldo2"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. i18n: This is another translator comment
|
||||
#. i18n: This is yet another translator comment
|
||||
#. i18n: This is a translator comment
|
||||
#. with multiple lines
|
||||
#: comments.py:53 comments.py:56 comments.py:59 comments.py:63
|
||||
msgid "fred"
|
||||
msgstr ""
|
||||
|
||||
#: comments.py:65
|
||||
msgid "plugh"
|
||||
msgstr ""
|
||||
|
||||
#: comments.py:67
|
||||
msgid "foobar"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#: comments.py:71
|
||||
msgid "xyzzy"
|
||||
msgstr ""
|
||||
|
||||
#: comments.py:72
|
||||
msgid "thud"
|
||||
msgstr ""
|
||||
|
||||
#. i18n: This is a translator comment
|
||||
#. i18n: This is another translator comment
|
||||
#. i18n: This is yet another translator comment
|
||||
#: comments.py:78
|
||||
msgid "foos"
|
||||
msgstr ""
|
||||
|
78
Lib/test/test_tools/i18n_data/comments.py
Normal file
78
Lib/test/test_tools/i18n_data/comments.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
from gettext import gettext as _
|
||||
|
||||
# Not a translator comment
|
||||
_('foo')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
_('bar')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
# i18n: This is another translator comment
|
||||
_('baz')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
# with multiple
|
||||
# lines
|
||||
_('qux')
|
||||
|
||||
# This comment should not be included because
|
||||
# it does not start with the prefix
|
||||
# i18n: This is a translator comment
|
||||
_('quux')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
# with multiple lines
|
||||
# i18n: This is another translator comment
|
||||
# with multiple lines
|
||||
_('corge')
|
||||
|
||||
# i18n: This comment should be ignored
|
||||
|
||||
_('grault')
|
||||
|
||||
# i18n: This comment should be ignored
|
||||
|
||||
# i18n: This is another translator comment
|
||||
_('garply')
|
||||
|
||||
# i18n: comment should be ignored
|
||||
x = 1
|
||||
_('george')
|
||||
|
||||
# i18n: This comment should be ignored
|
||||
x = 1
|
||||
# i18n: This is another translator comment
|
||||
_('waldo')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
x = 1 # i18n: This is also a translator comment
|
||||
# i18n: This is another translator comment
|
||||
_('waldo2')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
_('fred')
|
||||
|
||||
# i18n: This is another translator comment
|
||||
_('fred')
|
||||
|
||||
# i18n: This is yet another translator comment
|
||||
_('fred')
|
||||
|
||||
# i18n: This is a translator comment
|
||||
# with multiple lines
|
||||
_('fred')
|
||||
|
||||
_('plugh') # i18n: This comment should be ignored
|
||||
|
||||
_('foo' # i18n: This comment should be ignored
|
||||
'bar') # i18n: This comment should be ignored
|
||||
|
||||
# i18n: This is a translator comment
|
||||
_('xyzzy')
|
||||
_('thud')
|
||||
|
||||
|
||||
## i18n: This is a translator comment
|
||||
# # i18n: This is another translator comment
|
||||
### ### i18n: This is yet another translator comment
|
||||
_('foos')
|
|
@ -87,7 +87,8 @@ class Test_pygettext(unittest.TestCase):
|
|||
self.maxDiff = None
|
||||
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
|
||||
|
||||
def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False):
|
||||
def extract_from_str(self, module_content, *, args=(), strict=True,
|
||||
with_stderr=False, raw=False):
|
||||
"""Return all msgids extracted from module_content."""
|
||||
filename = 'test.py'
|
||||
with temp_cwd(None):
|
||||
|
@ -98,10 +99,11 @@ class Test_pygettext(unittest.TestCase):
|
|||
self.assertEqual(res.err, b'')
|
||||
with open('messages.pot', encoding='utf-8') as fp:
|
||||
data = fp.read()
|
||||
msgids = self.get_msgids(data)
|
||||
if not raw:
|
||||
data = self.get_msgids(data)
|
||||
if not with_stderr:
|
||||
return msgids
|
||||
return msgids, res.err
|
||||
return data
|
||||
return data, res.err
|
||||
|
||||
def extract_docstrings_from_str(self, module_content):
|
||||
"""Return all docstrings extracted from module_content."""
|
||||
|
@ -381,7 +383,8 @@ class Test_pygettext(unittest.TestCase):
|
|||
contents = input_file.read_text(encoding='utf-8')
|
||||
with temp_cwd(None):
|
||||
Path(input_file.name).write_text(contents)
|
||||
assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
|
||||
assert_python_ok('-Xutf8', self.script, '--docstrings',
|
||||
'--add-comments=i18n:', input_file.name)
|
||||
output = Path('messages.pot').read_text(encoding='utf-8')
|
||||
|
||||
expected = output_file.read_text(encoding='utf-8')
|
||||
|
@ -437,6 +440,51 @@ class Test_pygettext(unittest.TestCase):
|
|||
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
|
||||
)
|
||||
|
||||
def test_extract_all_comments(self):
|
||||
"""
|
||||
Test that the --add-comments option without an
|
||||
explicit tag extracts all translator comments.
|
||||
"""
|
||||
for arg in ('--add-comments', '-c'):
|
||||
with self.subTest(arg=arg):
|
||||
data = self.extract_from_str(dedent('''\
|
||||
# Translator comment
|
||||
_("foo")
|
||||
'''), args=(arg,), raw=True)
|
||||
self.assertIn('#. Translator comment', data)
|
||||
|
||||
def test_comments_with_multiple_tags(self):
|
||||
"""
|
||||
Test that multiple --add-comments tags can be specified.
|
||||
"""
|
||||
for arg in ('--add-comments={}', '-c{}'):
|
||||
with self.subTest(arg=arg):
|
||||
args = (arg.format('foo:'), arg.format('bar:'))
|
||||
data = self.extract_from_str(dedent('''\
|
||||
# foo: comment
|
||||
_("foo")
|
||||
|
||||
# bar: comment
|
||||
_("bar")
|
||||
|
||||
# baz: comment
|
||||
_("baz")
|
||||
'''), args=args, raw=True)
|
||||
self.assertIn('#. foo: comment', data)
|
||||
self.assertIn('#. bar: comment', data)
|
||||
self.assertNotIn('#. baz: comment', data)
|
||||
|
||||
def test_comments_not_extracted_without_tags(self):
|
||||
"""
|
||||
Test that translator comments are not extracted without
|
||||
specifying --add-comments.
|
||||
"""
|
||||
data = self.extract_from_str(dedent('''\
|
||||
# Translator comment
|
||||
_("foo")
|
||||
'''), raw=True)
|
||||
self.assertNotIn('#.', data)
|
||||
|
||||
|
||||
def update_POT_snapshots():
|
||||
for input_file in DATA_DIR.glob('*.py'):
|
||||
|
@ -444,7 +492,8 @@ def update_POT_snapshots():
|
|||
contents = input_file.read_bytes()
|
||||
with temp_cwd(None):
|
||||
Path(input_file.name).write_bytes(contents)
|
||||
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
|
||||
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
|
||||
'--add-comments=i18n:', input_file.name)
|
||||
output = Path('messages.pot').read_text(encoding='utf-8')
|
||||
|
||||
output = normalize_POT_file(output)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add support for translator comments in :program:`pygettext.py`.
|
|
@ -46,6 +46,12 @@ Options:
|
|||
--extract-all
|
||||
Extract all strings.
|
||||
|
||||
-cTAG
|
||||
--add-comments=TAG
|
||||
Extract translator comments. Comments must start with TAG and
|
||||
must precede the gettext call. Multiple -cTAG options are allowed.
|
||||
In that case, any comment matching any of the TAGs will be extracted.
|
||||
|
||||
-d name
|
||||
--default-domain=name
|
||||
Rename the default output file from messages.pot to name.pot.
|
||||
|
@ -141,7 +147,9 @@ import importlib.util
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import tokenize
|
||||
from dataclasses import dataclass, field
|
||||
from io import BytesIO
|
||||
from operator import itemgetter
|
||||
|
||||
__version__ = '1.5'
|
||||
|
@ -302,12 +310,30 @@ class Message:
|
|||
msgctxt: str | None
|
||||
locations: set[Location] = field(default_factory=set)
|
||||
is_docstring: bool = False
|
||||
comments: list[str] = field(default_factory=list)
|
||||
|
||||
def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False):
|
||||
def add_location(self, filename, lineno, msgid_plural=None, *,
|
||||
is_docstring=False, comments=None):
|
||||
if self.msgid_plural is None:
|
||||
self.msgid_plural = msgid_plural
|
||||
self.locations.add(Location(filename, lineno))
|
||||
self.is_docstring |= is_docstring
|
||||
if comments:
|
||||
self.comments.extend(comments)
|
||||
|
||||
|
||||
def get_source_comments(source):
|
||||
"""
|
||||
Return a dictionary mapping line numbers to
|
||||
comments in the source code.
|
||||
"""
|
||||
comments = {}
|
||||
for token in tokenize.tokenize(BytesIO(source).readline):
|
||||
if token.type == tokenize.COMMENT:
|
||||
# Remove any leading combination of '#' and whitespace
|
||||
comment = token.string.lstrip('# \t')
|
||||
comments[token.start[0]] = comment
|
||||
return comments
|
||||
|
||||
|
||||
class GettextVisitor(ast.NodeVisitor):
|
||||
|
@ -316,10 +342,18 @@ class GettextVisitor(ast.NodeVisitor):
|
|||
self.options = options
|
||||
self.filename = None
|
||||
self.messages = {}
|
||||
self.comments = {}
|
||||
|
||||
def visit_file(self, source, filename):
|
||||
try:
|
||||
module_tree = ast.parse(source)
|
||||
except SyntaxError:
|
||||
return
|
||||
|
||||
def visit_file(self, node, filename):
|
||||
self.filename = filename
|
||||
self.visit(node)
|
||||
if self.options.comment_tags:
|
||||
self.comments = get_source_comments(source)
|
||||
self.visit(module_tree)
|
||||
|
||||
def visit_Module(self, node):
|
||||
self._extract_docstring(node)
|
||||
|
@ -372,14 +406,51 @@ class GettextVisitor(ast.NodeVisitor):
|
|||
msg_data[arg_type] = arg.value
|
||||
|
||||
lineno = node.lineno
|
||||
self._add_message(lineno, **msg_data)
|
||||
comments = self._extract_comments(node)
|
||||
self._add_message(lineno, **msg_data, comments=comments)
|
||||
|
||||
def _extract_comments(self, node):
|
||||
"""Extract translator comments.
|
||||
|
||||
Translator comments must precede the gettext call and
|
||||
start with one of the comment prefixes defined by
|
||||
--add-comments=TAG. See the tests for examples.
|
||||
"""
|
||||
if not self.options.comment_tags:
|
||||
return []
|
||||
|
||||
comments = []
|
||||
lineno = node.lineno - 1
|
||||
# Collect an unbroken sequence of comments starting from
|
||||
# the line above the gettext call.
|
||||
while lineno >= 1:
|
||||
comment = self.comments.get(lineno)
|
||||
if comment is None:
|
||||
break
|
||||
comments.append(comment)
|
||||
lineno -= 1
|
||||
|
||||
# Find the first translator comment in the sequence and
|
||||
# return all comments starting from that comment.
|
||||
comments = comments[::-1]
|
||||
first_index = next((i for i, comment in enumerate(comments)
|
||||
if self._is_translator_comment(comment)), None)
|
||||
if first_index is None:
|
||||
return []
|
||||
return comments[first_index:]
|
||||
|
||||
def _is_translator_comment(self, comment):
|
||||
return comment.startswith(self.options.comment_tags)
|
||||
|
||||
def _add_message(
|
||||
self, lineno, msgid, msgid_plural=None, msgctxt=None, *,
|
||||
is_docstring=False):
|
||||
is_docstring=False, comments=None):
|
||||
if msgid in self.options.toexclude:
|
||||
return
|
||||
|
||||
if not comments:
|
||||
comments = []
|
||||
|
||||
key = self._key_for(msgid, msgctxt)
|
||||
message = self.messages.get(key)
|
||||
if message:
|
||||
|
@ -388,6 +459,7 @@ class GettextVisitor(ast.NodeVisitor):
|
|||
lineno,
|
||||
msgid_plural,
|
||||
is_docstring=is_docstring,
|
||||
comments=comments,
|
||||
)
|
||||
else:
|
||||
self.messages[key] = Message(
|
||||
|
@ -396,6 +468,7 @@ class GettextVisitor(ast.NodeVisitor):
|
|||
msgctxt=msgctxt,
|
||||
locations={Location(self.filename, lineno)},
|
||||
is_docstring=is_docstring,
|
||||
comments=comments,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@ -435,6 +508,10 @@ def write_pot_file(messages, options, fp):
|
|||
|
||||
for key, locations in sorted_keys:
|
||||
msg = messages[key]
|
||||
|
||||
for comment in msg.comments:
|
||||
print(f'#. {comment}', file=fp)
|
||||
|
||||
if options.writelocations:
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
if options.locationstyle == options.SOLARIS:
|
||||
|
@ -473,9 +550,9 @@ def main():
|
|||
try:
|
||||
opts, args = getopt.getopt(
|
||||
sys.argv[1:],
|
||||
'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
||||
['extract-all', 'default-domain=', 'escape', 'help',
|
||||
'keyword=', 'no-default-keywords',
|
||||
'ac::d:DEhk:Kno:p:S:Vvw:x:X:',
|
||||
['extract-all', 'add-comments=?', 'default-domain=', 'escape',
|
||||
'help', 'keyword=', 'no-default-keywords',
|
||||
'add-location', 'no-location', 'output=', 'output-dir=',
|
||||
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
||||
'docstrings', 'no-docstrings',
|
||||
|
@ -501,6 +578,7 @@ def main():
|
|||
excludefilename = ''
|
||||
docstrings = 0
|
||||
nodocstrings = {}
|
||||
comment_tags = set()
|
||||
|
||||
options = Options()
|
||||
locations = {'gnu' : options.GNU,
|
||||
|
@ -513,6 +591,8 @@ def main():
|
|||
usage(0)
|
||||
elif opt in ('-a', '--extract-all'):
|
||||
options.extractall = 1
|
||||
elif opt in ('-c', '--add-comments'):
|
||||
options.comment_tags.add(arg)
|
||||
elif opt in ('-d', '--default-domain'):
|
||||
options.outfile = arg + '.pot'
|
||||
elif opt in ('-E', '--escape'):
|
||||
|
@ -558,6 +638,8 @@ def main():
|
|||
finally:
|
||||
fp.close()
|
||||
|
||||
options.comment_tags = tuple(options.comment_tags)
|
||||
|
||||
# calculate escapes
|
||||
make_escapes(not options.escape)
|
||||
|
||||
|
@ -600,12 +682,7 @@ def main():
|
|||
with open(filename, 'rb') as fp:
|
||||
source = fp.read()
|
||||
|
||||
try:
|
||||
module_tree = ast.parse(source)
|
||||
except SyntaxError:
|
||||
continue
|
||||
|
||||
visitor.visit_file(module_tree, filename)
|
||||
visitor.visit_file(source, filename)
|
||||
|
||||
# write the output
|
||||
if options.outfile == '-':
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue