gh-130453: pygettext: Allow specifying multiple keywords with the same function name (GH-131380)

This commit is contained in:
Tomas R. 2025-04-10 13:06:40 +02:00 committed by GitHub
parent 619edb802e
commit b6760b7fa5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 218 additions and 44 deletions

View file

@ -0,0 +1,38 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: multiple_keywords.py:3
msgid "bar"
msgstr ""
#: multiple_keywords.py:5
msgctxt "baz"
msgid "qux"
msgstr ""
#: multiple_keywords.py:9
msgctxt "corge"
msgid "grault"
msgstr ""
#: multiple_keywords.py:11
msgctxt "xyzzy"
msgid "foo"
msgid_plural "foos"
msgstr[0] ""
msgstr[1] ""

View file

@ -0,0 +1,11 @@
from gettext import gettext as foo
foo('bar')
foo('baz', 'qux')
# The 't' specifier is not supported, so the following
# call is extracted as pgettext instead of ngettext.
foo('corge', 'grault', 1)
foo('xyzzy', 'foo', 'foos', 1)

View file

@ -18,7 +18,8 @@ DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'
with imports_under_tool("i18n"):
from pygettext import parse_spec
from pygettext import (parse_spec, process_keywords, DEFAULTKEYWORDS,
unparse_spec)
def normalize_POT_file(pot):
@ -483,20 +484,22 @@ class Test_pygettext(unittest.TestCase):
def test_parse_keyword_spec(self):
valid = (
('foo', ('foo', {0: 'msgid'})),
('foo:1', ('foo', {0: 'msgid'})),
('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo', ('foo', {'msgid': 0})),
('foo:1', ('foo', {'msgid': 0})),
('foo:1,2', ('foo', {'msgid': 0, 'msgid_plural': 1})),
('foo:1, 2', ('foo', {'msgid': 0, 'msgid_plural': 1})),
('foo:1,2c', ('foo', {'msgid': 0, 'msgctxt': 1})),
('foo:2c,1', ('foo', {'msgid': 0, 'msgctxt': 1})),
('foo:2c ,1', ('foo', {'msgid': 0, 'msgctxt': 1})),
('foo:1,2,3c', ('foo', {'msgid': 0, 'msgid_plural': 1, 'msgctxt': 2})),
('foo:1, 2, 3c', ('foo', {'msgid': 0, 'msgid_plural': 1, 'msgctxt': 2})),
('foo:3c,1,2', ('foo', {'msgid': 0, 'msgid_plural': 1, 'msgctxt': 2})),
)
for spec, expected in valid:
with self.subTest(spec=spec):
self.assertEqual(parse_spec(spec), expected)
# test unparse-parse round-trip
self.assertEqual(parse_spec(unparse_spec(*expected)), expected)
invalid = (
('foo:', "Invalid keyword spec 'foo:': missing argument positions"),
@ -516,6 +519,70 @@ class Test_pygettext(unittest.TestCase):
parse_spec(spec)
self.assertEqual(str(cm.exception), message)
def test_process_keywords(self):
default_keywords = {name: [spec] for name, spec
in DEFAULTKEYWORDS.items()}
inputs = (
(['foo'], True),
(['_:1,2'], True),
(['foo', 'foo:1,2'], True),
(['foo'], False),
(['_:1,2', '_:1c,2,3', 'pgettext'], False),
# Duplicate entries
(['foo', 'foo'], True),
(['_'], False)
)
expected = (
{'foo': [{'msgid': 0}]},
{'_': [{'msgid': 0, 'msgid_plural': 1}]},
{'foo': [{'msgid': 0}, {'msgid': 0, 'msgid_plural': 1}]},
default_keywords | {'foo': [{'msgid': 0}]},
default_keywords | {'_': [{'msgid': 0, 'msgid_plural': 1},
{'msgctxt': 0, 'msgid': 1, 'msgid_plural': 2},
{'msgid': 0}],
'pgettext': [{'msgid': 0},
{'msgctxt': 0, 'msgid': 1}]},
{'foo': [{'msgid': 0}]},
default_keywords,
)
for (keywords, no_default_keywords), expected in zip(inputs, expected):
with self.subTest(keywords=keywords,
no_default_keywords=no_default_keywords):
processed = process_keywords(
keywords,
no_default_keywords=no_default_keywords)
self.assertEqual(processed, expected)
def test_multiple_keywords_same_funcname_errors(self):
# If at least one keyword spec for a given funcname matches,
# no error should be printed.
msgids, stderr = self.extract_from_str(dedent('''\
_("foo", 42)
_(42, "bar")
'''), args=('--keyword=_:1', '--keyword=_:2'), with_stderr=True)
self.assertIn('foo', msgids)
self.assertIn('bar', msgids)
self.assertEqual(stderr, b'')
# If no keyword spec for a given funcname matches,
# all errors are printed.
msgids, stderr = self.extract_from_str(dedent('''\
_(x, 42)
_(42, y)
'''), args=('--keyword=_:1', '--keyword=_:2'), with_stderr=True,
strict=False)
self.assertEqual(msgids, [''])
# Normalize line endings on Windows
stderr = stderr.decode('utf-8').replace('\r', '')
self.assertEqual(
stderr,
'*** test.py:1: No keywords matched gettext call "_":\n'
'\tkeyword="_": Expected a string constant for argument 1, got x\n'
'\tkeyword="_:2": Expected a string constant for argument 2, got 42\n'
'*** test.py:2: No keywords matched gettext call "_":\n'
'\tkeyword="_": Expected a string constant for argument 1, got 42\n'
'\tkeyword="_:2": Expected a string constant for argument 2, got y\n')
def extract_from_snapshots():
snapshots = {
@ -526,6 +593,10 @@ def extract_from_snapshots():
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
'multiple_keywords.py': ('--keyword=foo:1c,2,3', '--keyword=foo:1c,2',
'--keyword=foo:1,2',
# repeat a keyword to make sure it is extracted only once
'--keyword=foo', '--keyword=foo'),
# == Test character escaping
# Escape ascii and unicode:
'escapes.py': ('--escape', '--add-comments='),

View file

@ -0,0 +1,2 @@
Allow passing multiple keyword arguments with the same function name in
:program:`pygettext`.

View file

@ -282,15 +282,15 @@ def getFilesForName(name):
# Key is the function name, value is a dictionary mapping argument positions to the
# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'.
DEFAULTKEYWORDS = {
'_': {0: 'msgid'},
'gettext': {0: 'msgid'},
'ngettext': {0: 'msgid', 1: 'msgid_plural'},
'pgettext': {0: 'msgctxt', 1: 'msgid'},
'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'},
'dgettext': {1: 'msgid'},
'dngettext': {1: 'msgid', 2: 'msgid_plural'},
'dpgettext': {1: 'msgctxt', 2: 'msgid'},
'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'},
'_': {'msgid': 0},
'gettext': {'msgid': 0},
'ngettext': {'msgid': 0, 'msgid_plural': 1},
'pgettext': {'msgctxt': 0, 'msgid': 1},
'npgettext': {'msgctxt': 0, 'msgid': 1, 'msgid_plural': 2},
'dgettext': {'msgid': 1},
'dngettext': {'msgid': 1, 'msgid_plural': 2},
'dpgettext': {'msgctxt': 1, 'msgid': 2},
'dnpgettext': {'msgctxt': 1, 'msgid': 2, 'msgid_plural': 3},
}
@ -327,7 +327,7 @@ def parse_spec(spec):
parts = spec.strip().split(':', 1)
if len(parts) == 1:
name = parts[0]
return name, {0: 'msgid'}
return name, {'msgid': 0}
name, args = parts
if not args:
@ -373,7 +373,41 @@ def parse_spec(spec):
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt cannot appear without msgid')
return name, {v: k for k, v in result.items()}
return name, result
def unparse_spec(name, spec):
"""Unparse a keyword spec dictionary into a string."""
if spec == {'msgid': 0}:
return name
parts = []
for arg, pos in sorted(spec.items(), key=lambda x: x[1]):
if arg == 'msgctxt':
parts.append(f'{pos + 1}c')
else:
parts.append(str(pos + 1))
return f'{name}:{','.join(parts)}'
def process_keywords(keywords, *, no_default_keywords):
custom_keywords = {}
for spec in dict.fromkeys(keywords):
name, spec = parse_spec(spec)
if name not in custom_keywords:
custom_keywords[name] = []
custom_keywords[name].append(spec)
if no_default_keywords:
return custom_keywords
# custom keywords override default keywords
for name, spec in DEFAULTKEYWORDS.items():
if name not in custom_keywords:
custom_keywords[name] = []
if spec not in custom_keywords[name]:
custom_keywords[name].append(spec)
return custom_keywords
@dataclass(frozen=True)
@ -459,32 +493,53 @@ class GettextVisitor(ast.NodeVisitor):
def _extract_message(self, node):
func_name = self._get_func_name(node)
spec = self.options.keywords.get(func_name)
if spec is None:
return
errors = []
specs = self.options.keywords.get(func_name, [])
for spec in specs:
err = self._extract_message_with_spec(node, spec)
if err is None:
return
errors.append(err)
max_index = max(spec)
if not errors:
return
if len(errors) == 1:
print(f'*** {self.filename}:{node.lineno}: {errors[0]}',
file=sys.stderr)
else:
# There are multiple keyword specs for the function name and
# none of them could be extracted. Print a general error
# message and list the errors for each keyword spec.
print(f'*** {self.filename}:{node.lineno}: '
f'No keywords matched gettext call "{func_name}":',
file=sys.stderr)
for spec, err in zip(specs, errors, strict=True):
unparsed = unparse_spec(func_name, spec)
print(f'\tkeyword="{unparsed}": {err}', file=sys.stderr)
def _extract_message_with_spec(self, node, spec):
"""Extract a gettext call with the given spec.
Return None if the gettext call was successfully extracted,
otherwise return an error message.
"""
max_index = max(spec.values())
has_var_positional = any(isinstance(arg, ast.Starred) for
arg in node.args[:max_index+1])
if has_var_positional:
print(f'*** {self.filename}:{node.lineno}: Variable positional '
f'arguments are not allowed in gettext calls', file=sys.stderr)
return
return ('Variable positional arguments are not '
'allowed in gettext calls')
if max_index >= len(node.args):
print(f'*** {self.filename}:{node.lineno}: Expected at least '
f'{max(spec) + 1} positional argument(s) in gettext call, '
f'got {len(node.args)}', file=sys.stderr)
return
return (f'Expected at least {max_index + 1} positional '
f'argument(s) in gettext call, got {len(node.args)}')
msg_data = {}
for position, arg_type in spec.items():
for arg_type, position in spec.items():
arg = node.args[position]
if not self._is_string_const(arg):
print(f'*** {self.filename}:{arg.lineno}: Expected a string '
f'constant for argument {position + 1}, '
f'got {ast.unparse(arg)}', file=sys.stderr)
return
return (f'Expected a string constant for argument '
f'{position + 1}, got {ast.unparse(arg)}')
msg_data[arg_type] = arg.value
lineno = node.lineno
@ -729,15 +784,12 @@ def main():
# calculate all keywords
try:
custom_keywords = dict(parse_spec(spec) for spec in options.keywords)
options.keywords = process_keywords(
options.keywords,
no_default_keywords=no_default_keywords)
except ValueError as e:
print(e, file=sys.stderr)
sys.exit(1)
options.keywords = {}
if not no_default_keywords:
options.keywords |= DEFAULTKEYWORDS
# custom keywords override default keywords
options.keywords |= custom_keywords
# initialize list of strings to exclude
if options.excludefilename: