[3.13] gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415) (#137461)

gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415)

Fix name of the Python encoding in Unicode errors of the code page
codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8"
which are not valid Python code names.
(cherry picked from commit ce1b747ff6)

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Miss Islington (bot) 2025-08-06 14:59:11 +02:00 committed by GitHub
parent 9cfa4dfe05
commit f2d6931656
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 5 additions and 6 deletions

View file

@ -3284,7 +3284,7 @@ class CodePageTest(unittest.TestCase):
codecs.code_page_encode, 932, '\xff')
self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
def check_decode(self, cp, tests):

View file

@ -0,0 +1,3 @@
Fix name of the Python encoding in Unicode errors of the code page codec:
use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
valid Python code names. Patch by Victor Stinner.

View file

@ -7290,10 +7290,6 @@ code_page_name(UINT code_page, PyObject **obj)
*obj = NULL;
if (code_page == CP_ACP)
return "mbcs";
if (code_page == CP_UTF7)
return "CP_UTF7";
if (code_page == CP_UTF8)
return "CP_UTF8";
*obj = PyBytes_FromFormat("cp%u", code_page);
if (*obj == NULL)

View file

@ -1087,7 +1087,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
}
}
}
else if (strcmp(encoding, "CP_UTF8") == 0) {
else if (strcmp(encoding, "cp65001") == 0) {
*bytelength = 3;
return ENC_UTF8;
}