[3.14] gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415) (#137460)

gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415)

Fix name of the Python encoding in Unicode errors of the code page
codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8"
which are not valid Python code names.
(cherry picked from commit ce1b747ff6)

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Miss Islington (bot) 2025-10-07 19:39:31 +02:00 committed by GitHub
parent e4e8c5a3b0
commit 57c8e3eb8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 5 additions and 6 deletions

View file

@ -3293,7 +3293,7 @@ class CodePageTest(unittest.TestCase):
codecs.code_page_encode, 932, '\xff') codecs.code_page_encode, 932, '\xff')
self.assertRaisesRegex(UnicodeDecodeError, 'cp932', self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
codecs.code_page_decode, 932, b'\x81\x00', 'strict', True) codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8', self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True) codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
def check_decode(self, cp, tests): def check_decode(self, cp, tests):

View file

@ -0,0 +1,3 @@
Fix name of the Python encoding in Unicode errors of the code page codec:
use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
valid Python code names. Patch by Victor Stinner.

View file

@ -7713,10 +7713,6 @@ code_page_name(UINT code_page, PyObject **obj)
*obj = NULL; *obj = NULL;
if (code_page == CP_ACP) if (code_page == CP_ACP)
return "mbcs"; return "mbcs";
if (code_page == CP_UTF7)
return "CP_UTF7";
if (code_page == CP_UTF8)
return "CP_UTF8";
*obj = PyBytes_FromFormat("cp%u", code_page); *obj = PyBytes_FromFormat("cp%u", code_page);
if (*obj == NULL) if (*obj == NULL)

View file

@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int *bytelength)
} }
} }
} }
else if (strcmp(encoding, "CP_UTF8") == 0) { else if (strcmp(encoding, "cp65001") == 0) {
*bytelength = 3; *bytelength = 3;
return ENC_UTF8; return ENC_UTF8;
} }