gh-95781: More strict format string checking in PyUnicode_FromFormatV() (GH-95784)

An unrecognized format character in PyUnicode_FromFormat() and
PyUnicode_FromFormatV() now sets a SystemError.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
This commit is contained in:
Serhiy Storchaka 2022-08-08 19:21:07 +03:00 committed by GitHub
parent 63140b445e
commit 62f06508e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 39 deletions

View file

@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
p = f;
f++;
if (*f == '%') {
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
return NULL;
f++;
return f;
}
zeropad = 0;
if (*f == '0') {
zeropad = 1;
@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
f++;
}
}
if (*f == '%') {
/* "%.3%s" => f points to "3" */
f--;
}
}
if (*f == '\0') {
/* bogus format "%.123" => go backward, f points to "3" */
f--;
}
/* Handle %ld, %lu, %lld and %llu. */
@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
++f;
}
if (f[1] == '\0')
if (f[0] != '\0' && f[1] == '\0')
writer->overallocate = 0;
switch (*f) {
@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
break;
}
case '%':
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
return NULL;
break;
default:
/* if we stumble upon an unknown formatting code, copy the rest
of the format string to the output string. (we cannot just
skip the code, since there's no way to know what's in the
argument list) */
len = strlen(p);
if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
return NULL;
f = p+len;
return f;
PyErr_Format(PyExc_SystemError, "invalid format string: %s", p);
return NULL;
}
f++;