mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-35883: Py_DecodeLocale() escapes invalid Unicode characters (GH-24843)
Python no longer fails at startup with a fatal error if a command line argument contains an invalid Unicode character. The Py_DecodeLocale() function now escapes byte sequences which would be decoded as Unicode characters outside the [U+0000; U+10ffff] range. Use MAX_UNICODE constant in unicodeobject.c.
This commit is contained in:
parent
6086ae7fd4
commit
9976834f80
4 changed files with 149 additions and 70 deletions
|
@ -94,7 +94,8 @@ NOTE: In the interpreter's initialization phase, some globals are currently
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) */
|
||||
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
|
||||
// The value must be the same in fileutils.c.
|
||||
#define MAX_UNICODE 0x10ffff
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
|
@ -1784,8 +1785,8 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
|
|||
*maxchar = ch;
|
||||
if (*maxchar > MAX_UNICODE) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"character U+%x is not in range [U+0000; U+10ffff]",
|
||||
ch);
|
||||
"character U+%x is not in range [U+0000; U+%x]",
|
||||
ch, MAX_UNICODE);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -14089,7 +14090,7 @@ _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
|
|||
{
|
||||
case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
|
||||
case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
|
||||
case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break;
|
||||
case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue