diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py index e3e3035c4f4..a6073b8be03 100644 --- a/Lib/test/test_tools/test_msgfmt.py +++ b/Lib/test/test_tools/test_msgfmt.py @@ -39,6 +39,14 @@ class CompilationTest(unittest.TestCase): self.assertDictEqual(actual._catalog, expected._catalog) + def test_po_with_bom(self): + with temp_cwd(): + Path('bom.po').write_bytes(b'\xef\xbb\xbfmsgid "Python"\nmsgstr "Pioton"\n') + + res = assert_python_failure(msgfmt, 'bom.po') + err = res.err.decode('utf-8') + self.assertIn('The file bom.po starts with a UTF-8 BOM', err) + def test_invalid_msgid_plural(self): with temp_cwd(): Path('invalid.po').write_text('''\ diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index 3f731e941ea..f005c4e7b5b 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -32,9 +32,11 @@ import getopt import struct import array from email.parser import HeaderParser +import codecs __version__ = "1.2" + MESSAGES = {} @@ -116,6 +118,14 @@ def make(filename, outfile): print(msg, file=sys.stderr) sys.exit(1) + if lines[0].startswith(codecs.BOM_UTF8): + print( + f"The file {infile} starts with a UTF-8 BOM which is not allowed in .po files.\n" + "Please save the file without a BOM and try again.", + file=sys.stderr + ) + sys.exit(1) + section = msgctxt = None fuzzy = 0