mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Issue #25318: Avoid sprintf() in backslashreplace()
Rewrite backslashreplace() to be closer to PyCodec_BackslashReplaceErrors(). Add also unit tests for non-BMP characters.
This commit is contained in:
parent
b13b97d3b8
commit
797485e101
2 changed files with 22 additions and 9 deletions
|
@ -3155,7 +3155,8 @@ class ASCIITest(unittest.TestCase):
|
|||
('[\x80\xff\u20ac]', 'ignore', b'[]'),
|
||||
('[\x80\xff\u20ac]', 'replace', b'[???]'),
|
||||
('[\x80\xff\u20ac]', 'xmlcharrefreplace', b'[€ÿ€]'),
|
||||
('[\x80\xff\u20ac]', 'backslashreplace', b'[\\x80\\xff\\u20ac]'),
|
||||
('[\x80\xff\u20ac\U000abcde]', 'backslashreplace',
|
||||
b'[\\x80\\xff\\u20ac\\U000abcde]'),
|
||||
('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'),
|
||||
):
|
||||
with self.subTest(data=data, error_handler=error_handler,
|
||||
|
@ -3197,7 +3198,8 @@ class Latin1Test(unittest.TestCase):
|
|||
for data, error_handler, expected in (
|
||||
('[\u20ac\udc80]', 'ignore', b'[]'),
|
||||
('[\u20ac\udc80]', 'replace', b'[??]'),
|
||||
('[\u20ac\udc80]', 'backslashreplace', b'[\\u20ac\\udc80]'),
|
||||
('[\u20ac\U000abcde]', 'backslashreplace',
|
||||
b'[\\u20ac\\U000abcde]'),
|
||||
('[\u20ac\udc80]', 'xmlcharrefreplace', b'[€�]'),
|
||||
('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'),
|
||||
):
|
||||
|
|
|
@ -610,14 +610,25 @@ backslashreplace(_PyBytesWriter *writer, Py_ssize_t prealloc_per_char,
|
|||
/* generate replacement */
|
||||
for (i = collstart; i < collend; ++i) {
|
||||
ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch < 0x100)
|
||||
str += sprintf(str, "\\x%02x", ch);
|
||||
else if (ch < 0x10000)
|
||||
str += sprintf(str, "\\u%04x", ch);
|
||||
else {
|
||||
assert(ch <= MAX_UNICODE);
|
||||
str += sprintf(str, "\\U%08x", ch);
|
||||
*str++ = '\\';
|
||||
if (ch >= 0x00010000) {
|
||||
*str++ = 'U';
|
||||
*str++ = Py_hexdigits[(ch>>28)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>24)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>20)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>16)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>12)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>8)&0xf];
|
||||
}
|
||||
else if (ch >= 0x100) {
|
||||
*str++ = 'u';
|
||||
*str++ = Py_hexdigits[(ch>>12)&0xf];
|
||||
*str++ = Py_hexdigits[(ch>>8)&0xf];
|
||||
}
|
||||
else
|
||||
*str++ = 'x';
|
||||
*str++ = Py_hexdigits[(ch>>4)&0xf];
|
||||
*str++ = Py_hexdigits[ch&0xf];
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue