mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-70278: Fix PyUnicode_FromFormat() with precision for %s and %V (GH-120365)
PyUnicode_FromFormat() no longer produces the ending \ufffd character for truncated C string when use precision with %s and %V. It now truncates the string before the start of truncated multibyte sequences.
This commit is contained in:
parent
22b8a35d6e
commit
6eb23b1311
3 changed files with 59 additions and 4 deletions
|
@ -2581,6 +2581,7 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
|
|||
Py_ssize_t width, Py_ssize_t precision, int flags)
|
||||
{
|
||||
/* UTF-8 */
|
||||
Py_ssize_t *pconsumed = NULL;
|
||||
Py_ssize_t length;
|
||||
if (precision == -1) {
|
||||
length = strlen(str);
|
||||
|
@ -2590,15 +2591,23 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
|
|||
while (length < precision && str[length]) {
|
||||
length++;
|
||||
}
|
||||
if (length == precision) {
|
||||
/* The input string is not NUL-terminated. If it ends with an
|
||||
* incomplete UTF-8 sequence, truncate the string just before it.
|
||||
* Incomplete sequences in the middle and sequences which cannot
|
||||
* be valid prefixes are still treated as errors and replaced
|
||||
* with \xfffd. */
|
||||
pconsumed = &length;
|
||||
}
|
||||
}
|
||||
|
||||
if (width < 0) {
|
||||
return unicode_decode_utf8_writer(writer, str, length,
|
||||
_Py_ERROR_REPLACE, "replace", NULL);
|
||||
_Py_ERROR_REPLACE, "replace", pconsumed);
|
||||
}
|
||||
|
||||
PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
|
||||
"replace", NULL);
|
||||
"replace", pconsumed);
|
||||
if (unicode == NULL)
|
||||
return -1;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue