gh-130057: Pygettext: Support translator comments (GH-130061)

This commit is contained in:
Tomas R. 2025-02-17 11:41:28 +01:00 committed by GitHub
parent 6669905723
commit aa845af9bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 335 additions and 20 deletions

View file

@ -0,0 +1,110 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: comments.py:4
msgid "foo"
msgstr ""
#. i18n: This is a translator comment
#: comments.py:7
msgid "bar"
msgstr ""
#. i18n: This is a translator comment
#. i18n: This is another translator comment
#: comments.py:11
msgid "baz"
msgstr ""
#. i18n: This is a translator comment
#. with multiple
#. lines
#: comments.py:16
msgid "qux"
msgstr ""
#. i18n: This is a translator comment
#: comments.py:21
msgid "quux"
msgstr ""
#. i18n: This is a translator comment
#. with multiple lines
#. i18n: This is another translator comment
#. with multiple lines
#: comments.py:27
msgid "corge"
msgstr ""
#: comments.py:31
msgid "grault"
msgstr ""
#. i18n: This is another translator comment
#: comments.py:36
msgid "garply"
msgstr ""
#: comments.py:40
msgid "george"
msgstr ""
#. i18n: This is another translator comment
#: comments.py:45
msgid "waldo"
msgstr ""
#. i18n: This is a translator comment
#. i18n: This is also a translator comment
#. i18n: This is another translator comment
#: comments.py:50
msgid "waldo2"
msgstr ""
#. i18n: This is a translator comment
#. i18n: This is another translator comment
#. i18n: This is yet another translator comment
#. i18n: This is a translator comment
#. with multiple lines
#: comments.py:53 comments.py:56 comments.py:59 comments.py:63
msgid "fred"
msgstr ""
#: comments.py:65
msgid "plugh"
msgstr ""
#: comments.py:67
msgid "foobar"
msgstr ""
#. i18n: This is a translator comment
#: comments.py:71
msgid "xyzzy"
msgstr ""
#: comments.py:72
msgid "thud"
msgstr ""
#. i18n: This is a translator comment
#. i18n: This is another translator comment
#. i18n: This is yet another translator comment
#: comments.py:78
msgid "foos"
msgstr ""

View file

@ -0,0 +1,78 @@
from gettext import gettext as _
# Not a translator comment
_('foo')
# i18n: This is a translator comment
_('bar')
# i18n: This is a translator comment
# i18n: This is another translator comment
_('baz')
# i18n: This is a translator comment
# with multiple
# lines
_('qux')
# This comment should not be included because
# it does not start with the prefix
# i18n: This is a translator comment
_('quux')
# i18n: This is a translator comment
# with multiple lines
# i18n: This is another translator comment
# with multiple lines
_('corge')
# i18n: This comment should be ignored
_('grault')
# i18n: This comment should be ignored
# i18n: This is another translator comment
_('garply')
# i18n: comment should be ignored
x = 1
_('george')
# i18n: This comment should be ignored
x = 1
# i18n: This is another translator comment
_('waldo')
# i18n: This is a translator comment
x = 1 # i18n: This is also a translator comment
# i18n: This is another translator comment
_('waldo2')
# i18n: This is a translator comment
_('fred')
# i18n: This is another translator comment
_('fred')
# i18n: This is yet another translator comment
_('fred')
# i18n: This is a translator comment
# with multiple lines
_('fred')
_('plugh') # i18n: This comment should be ignored
_('foo' # i18n: This comment should be ignored
'bar') # i18n: This comment should be ignored
# i18n: This is a translator comment
_('xyzzy')
_('thud')
## i18n: This is a translator comment
# # i18n: This is another translator comment
### ### i18n: This is yet another translator comment
_('foos')

View file

@ -87,7 +87,8 @@ class Test_pygettext(unittest.TestCase):
self.maxDiff = None self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False): def extract_from_str(self, module_content, *, args=(), strict=True,
with_stderr=False, raw=False):
"""Return all msgids extracted from module_content.""" """Return all msgids extracted from module_content."""
filename = 'test.py' filename = 'test.py'
with temp_cwd(None): with temp_cwd(None):
@ -98,10 +99,11 @@ class Test_pygettext(unittest.TestCase):
self.assertEqual(res.err, b'') self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp: with open('messages.pot', encoding='utf-8') as fp:
data = fp.read() data = fp.read()
msgids = self.get_msgids(data) if not raw:
data = self.get_msgids(data)
if not with_stderr: if not with_stderr:
return msgids return data
return msgids, res.err return data, res.err
def extract_docstrings_from_str(self, module_content): def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content.""" """Return all docstrings extracted from module_content."""
@ -381,7 +383,8 @@ class Test_pygettext(unittest.TestCase):
contents = input_file.read_text(encoding='utf-8') contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None): with temp_cwd(None):
Path(input_file.name).write_text(contents) Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) assert_python_ok('-Xutf8', self.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8') output = Path('messages.pot').read_text(encoding='utf-8')
expected = output_file.read_text(encoding='utf-8') expected = output_file.read_text(encoding='utf-8')
@ -437,6 +440,51 @@ class Test_pygettext(unittest.TestCase):
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n" "*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
) )
def test_extract_all_comments(self):
"""
Test that the --add-comments option without an
explicit tag extracts all translator comments.
"""
for arg in ('--add-comments', '-c'):
with self.subTest(arg=arg):
data = self.extract_from_str(dedent('''\
# Translator comment
_("foo")
'''), args=(arg,), raw=True)
self.assertIn('#. Translator comment', data)
def test_comments_with_multiple_tags(self):
"""
Test that multiple --add-comments tags can be specified.
"""
for arg in ('--add-comments={}', '-c{}'):
with self.subTest(arg=arg):
args = (arg.format('foo:'), arg.format('bar:'))
data = self.extract_from_str(dedent('''\
# foo: comment
_("foo")
# bar: comment
_("bar")
# baz: comment
_("baz")
'''), args=args, raw=True)
self.assertIn('#. foo: comment', data)
self.assertIn('#. bar: comment', data)
self.assertNotIn('#. baz: comment', data)
def test_comments_not_extracted_without_tags(self):
"""
Test that translator comments are not extracted without
specifying --add-comments.
"""
data = self.extract_from_str(dedent('''\
# Translator comment
_("foo")
'''), raw=True)
self.assertNotIn('#.', data)
def update_POT_snapshots(): def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'): for input_file in DATA_DIR.glob('*.py'):
@ -444,7 +492,8 @@ def update_POT_snapshots():
contents = input_file.read_bytes() contents = input_file.read_bytes()
with temp_cwd(None): with temp_cwd(None):
Path(input_file.name).write_bytes(contents) Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8') output = Path('messages.pot').read_text(encoding='utf-8')
output = normalize_POT_file(output) output = normalize_POT_file(output)

View file

@ -0,0 +1 @@
Add support for translator comments in :program:`pygettext.py`.

View file

@ -46,6 +46,12 @@ Options:
--extract-all --extract-all
Extract all strings. Extract all strings.
-cTAG
--add-comments=TAG
Extract translator comments. Comments must start with TAG and
must precede the gettext call. Multiple -cTAG options are allowed.
In that case, any comment matching any of the TAGs will be extracted.
-d name -d name
--default-domain=name --default-domain=name
Rename the default output file from messages.pot to name.pot. Rename the default output file from messages.pot to name.pot.
@ -141,7 +147,9 @@ import importlib.util
import os import os
import sys import sys
import time import time
import tokenize
from dataclasses import dataclass, field from dataclasses import dataclass, field
from io import BytesIO
from operator import itemgetter from operator import itemgetter
__version__ = '1.5' __version__ = '1.5'
@ -302,12 +310,30 @@ class Message:
msgctxt: str | None msgctxt: str | None
locations: set[Location] = field(default_factory=set) locations: set[Location] = field(default_factory=set)
is_docstring: bool = False is_docstring: bool = False
comments: list[str] = field(default_factory=list)
def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False): def add_location(self, filename, lineno, msgid_plural=None, *,
is_docstring=False, comments=None):
if self.msgid_plural is None: if self.msgid_plural is None:
self.msgid_plural = msgid_plural self.msgid_plural = msgid_plural
self.locations.add(Location(filename, lineno)) self.locations.add(Location(filename, lineno))
self.is_docstring |= is_docstring self.is_docstring |= is_docstring
if comments:
self.comments.extend(comments)
def get_source_comments(source):
"""
Return a dictionary mapping line numbers to
comments in the source code.
"""
comments = {}
for token in tokenize.tokenize(BytesIO(source).readline):
if token.type == tokenize.COMMENT:
# Remove any leading combination of '#' and whitespace
comment = token.string.lstrip('# \t')
comments[token.start[0]] = comment
return comments
class GettextVisitor(ast.NodeVisitor): class GettextVisitor(ast.NodeVisitor):
@ -316,10 +342,18 @@ class GettextVisitor(ast.NodeVisitor):
self.options = options self.options = options
self.filename = None self.filename = None
self.messages = {} self.messages = {}
self.comments = {}
def visit_file(self, source, filename):
try:
module_tree = ast.parse(source)
except SyntaxError:
return
def visit_file(self, node, filename):
self.filename = filename self.filename = filename
self.visit(node) if self.options.comment_tags:
self.comments = get_source_comments(source)
self.visit(module_tree)
def visit_Module(self, node): def visit_Module(self, node):
self._extract_docstring(node) self._extract_docstring(node)
@ -372,14 +406,51 @@ class GettextVisitor(ast.NodeVisitor):
msg_data[arg_type] = arg.value msg_data[arg_type] = arg.value
lineno = node.lineno lineno = node.lineno
self._add_message(lineno, **msg_data) comments = self._extract_comments(node)
self._add_message(lineno, **msg_data, comments=comments)
def _extract_comments(self, node):
"""Extract translator comments.
Translator comments must precede the gettext call and
start with one of the comment prefixes defined by
--add-comments=TAG. See the tests for examples.
"""
if not self.options.comment_tags:
return []
comments = []
lineno = node.lineno - 1
# Collect an unbroken sequence of comments starting from
# the line above the gettext call.
while lineno >= 1:
comment = self.comments.get(lineno)
if comment is None:
break
comments.append(comment)
lineno -= 1
# Find the first translator comment in the sequence and
# return all comments starting from that comment.
comments = comments[::-1]
first_index = next((i for i, comment in enumerate(comments)
if self._is_translator_comment(comment)), None)
if first_index is None:
return []
return comments[first_index:]
def _is_translator_comment(self, comment):
return comment.startswith(self.options.comment_tags)
def _add_message( def _add_message(
self, lineno, msgid, msgid_plural=None, msgctxt=None, *, self, lineno, msgid, msgid_plural=None, msgctxt=None, *,
is_docstring=False): is_docstring=False, comments=None):
if msgid in self.options.toexclude: if msgid in self.options.toexclude:
return return
if not comments:
comments = []
key = self._key_for(msgid, msgctxt) key = self._key_for(msgid, msgctxt)
message = self.messages.get(key) message = self.messages.get(key)
if message: if message:
@ -388,6 +459,7 @@ class GettextVisitor(ast.NodeVisitor):
lineno, lineno,
msgid_plural, msgid_plural,
is_docstring=is_docstring, is_docstring=is_docstring,
comments=comments,
) )
else: else:
self.messages[key] = Message( self.messages[key] = Message(
@ -396,6 +468,7 @@ class GettextVisitor(ast.NodeVisitor):
msgctxt=msgctxt, msgctxt=msgctxt,
locations={Location(self.filename, lineno)}, locations={Location(self.filename, lineno)},
is_docstring=is_docstring, is_docstring=is_docstring,
comments=comments,
) )
@staticmethod @staticmethod
@ -435,6 +508,10 @@ def write_pot_file(messages, options, fp):
for key, locations in sorted_keys: for key, locations in sorted_keys:
msg = messages[key] msg = messages[key]
for comment in msg.comments:
print(f'#. {comment}', file=fp)
if options.writelocations: if options.writelocations:
# location comments are different b/w Solaris and GNU: # location comments are different b/w Solaris and GNU:
if options.locationstyle == options.SOLARIS: if options.locationstyle == options.SOLARIS:
@ -473,9 +550,9 @@ def main():
try: try:
opts, args = getopt.getopt( opts, args = getopt.getopt(
sys.argv[1:], sys.argv[1:],
'ad:DEhk:Kno:p:S:Vvw:x:X:', 'ac::d:DEhk:Kno:p:S:Vvw:x:X:',
['extract-all', 'default-domain=', 'escape', 'help', ['extract-all', 'add-comments=?', 'default-domain=', 'escape',
'keyword=', 'no-default-keywords', 'help', 'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=', 'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings', 'docstrings', 'no-docstrings',
@ -501,6 +578,7 @@ def main():
excludefilename = '' excludefilename = ''
docstrings = 0 docstrings = 0
nodocstrings = {} nodocstrings = {}
comment_tags = set()
options = Options() options = Options()
locations = {'gnu' : options.GNU, locations = {'gnu' : options.GNU,
@ -513,6 +591,8 @@ def main():
usage(0) usage(0)
elif opt in ('-a', '--extract-all'): elif opt in ('-a', '--extract-all'):
options.extractall = 1 options.extractall = 1
elif opt in ('-c', '--add-comments'):
options.comment_tags.add(arg)
elif opt in ('-d', '--default-domain'): elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot' options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'): elif opt in ('-E', '--escape'):
@ -558,6 +638,8 @@ def main():
finally: finally:
fp.close() fp.close()
options.comment_tags = tuple(options.comment_tags)
# calculate escapes # calculate escapes
make_escapes(not options.escape) make_escapes(not options.escape)
@ -600,12 +682,7 @@ def main():
with open(filename, 'rb') as fp: with open(filename, 'rb') as fp:
source = fp.read() source = fp.read()
try: visitor.visit_file(source, filename)
module_tree = ast.parse(source)
except SyntaxError:
continue
visitor.visit_file(module_tree, filename)
# write the output # write the output
if options.outfile == '-': if options.outfile == '-':