From a693eaa7104f3d2e42a3a1e0347775a3d2757b05 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Sun, 6 Apr 2025 22:15:17 +0200 Subject: [PATCH] gh-132121: Always escape non-printable characters in pygettext (GH-132122) --- Lib/test/test_tools/i18n_data/ascii-escapes.pot | 2 +- .../2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst | 1 + Tools/i18n/pygettext.py | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot index f8e0f53b256..cc5a9f6ba61 100644 --- a/Lib/test/test_tools/i18n_data/ascii-escapes.pot +++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot @@ -41,7 +41,7 @@ msgstr "" #. some characters in the 128-255 range #: escapes.py:20 -msgid "€   ÿ" +msgid "\302\200 \302\240 ÿ" msgstr "" #. some characters >= 256 encoded as 2, 3 and 4 bytes, respectively diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst new file mode 100644 index 00000000000..1235360f9c6 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst @@ -0,0 +1 @@ +Always escape non-printable Unicode characters in :program:`pygettext`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f0ee2ea386f..a4af1d2be82 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -190,12 +190,10 @@ def make_escapes(pass_nonascii): # Allow non-ascii characters to pass through so that e.g. 'msgid # "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. - mod = 128 escape = escape_ascii else: - mod = 256 escape = escape_nonascii - escapes = [r"\%03o" % i for i in range(mod)] + escapes = [r"\%03o" % i for i in range(256)] for i in range(32, 127): escapes[i] = chr(i) escapes[ord('\\')] = r'\\' @@ -206,7 +204,9 @@ def make_escapes(pass_nonascii): def escape_ascii(s, encoding): - return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) + return ''.join(escapes[ord(c)] if ord(c) < 128 else c + if c.isprintable() else escape_nonascii(c, encoding) + for c in s) def escape_nonascii(s, encoding):