mirror of
https://github.com/python/cpython.git
synced 2025-10-21 22:22:48 +00:00
Issue #10829: Refactor PyUnicode_FromFormat()
* Use the same function to parse the format string in the 3 steps * Fix crashs on invalid format strings
This commit is contained in:
parent
096f1a85f0
commit
968654515f
3 changed files with 103 additions and 68 deletions
|
@ -1455,9 +1455,28 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
'string, got a non-ASCII byte: 0xe9$',
|
'string, got a non-ASCII byte: 0xe9$',
|
||||||
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
|
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
|
||||||
|
|
||||||
|
# test "%c"
|
||||||
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
|
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
|
||||||
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
|
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
|
||||||
|
|
||||||
|
# test "%"
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
|
||||||
|
|
||||||
|
# test "%i"
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
|
||||||
|
|
||||||
|
# not supported: copy the raw format string. these tests are just here
|
||||||
|
# to check for crashs and should not be considered as specifications
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
|
||||||
|
self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
|
||||||
|
|
||||||
# other tests
|
# other tests
|
||||||
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
|
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
|
||||||
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
|
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
|
||||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
|
||||||
|
the format string in the 3 steps, fix crashs on invalid format strings.
|
||||||
|
|
||||||
- Issue #11246: Fix PyUnicode_FromFormat("%V") to decode the byte string from
|
- Issue #11246: Fix PyUnicode_FromFormat("%V") to decode the byte string from
|
||||||
UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode).
|
UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode).
|
||||||
Patch written by Ray Allen.
|
Patch written by Ray Allen.
|
||||||
|
|
|
@ -714,6 +714,70 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
|
||||||
*fmt = '\0';
|
*fmt = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* helper for PyUnicode_FromFormatV() */
|
||||||
|
|
||||||
|
static const char*
|
||||||
|
parse_format_flags(const char *f,
|
||||||
|
int *p_width, int *p_precision,
|
||||||
|
int *p_longflag, int *p_longlongflag, int *p_size_tflag)
|
||||||
|
{
|
||||||
|
int width, precision, longflag, longlongflag, size_tflag;
|
||||||
|
|
||||||
|
/* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
|
||||||
|
f++;
|
||||||
|
width = 0;
|
||||||
|
while (Py_ISDIGIT((unsigned)*f))
|
||||||
|
width = (width*10) + *f++ - '0';
|
||||||
|
precision = 0;
|
||||||
|
if (*f == '.') {
|
||||||
|
f++;
|
||||||
|
while (Py_ISDIGIT((unsigned)*f))
|
||||||
|
precision = (precision*10) + *f++ - '0';
|
||||||
|
if (*f == '%') {
|
||||||
|
/* "%.3%s" => f points to "3" */
|
||||||
|
f--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*f == '\0') {
|
||||||
|
/* bogus format "%.1" => go backward, f points to "1" */
|
||||||
|
f--;
|
||||||
|
}
|
||||||
|
if (p_width != NULL)
|
||||||
|
*p_width = width;
|
||||||
|
if (p_precision != NULL)
|
||||||
|
*p_precision = precision;
|
||||||
|
|
||||||
|
/* Handle %ld, %lu, %lld and %llu. */
|
||||||
|
longflag = 0;
|
||||||
|
longlongflag = 0;
|
||||||
|
|
||||||
|
if (*f == 'l') {
|
||||||
|
if (f[1] == 'd' || f[1] == 'u') {
|
||||||
|
longflag = 1;
|
||||||
|
++f;
|
||||||
|
}
|
||||||
|
#ifdef HAVE_LONG_LONG
|
||||||
|
else if (f[1] == 'l' &&
|
||||||
|
(f[2] == 'd' || f[2] == 'u')) {
|
||||||
|
longlongflag = 1;
|
||||||
|
f += 2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
/* handle the size_t flag. */
|
||||||
|
else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
||||||
|
size_tflag = 1;
|
||||||
|
++f;
|
||||||
|
}
|
||||||
|
if (p_longflag != NULL)
|
||||||
|
*p_longflag = longflag;
|
||||||
|
if (p_longlongflag != NULL)
|
||||||
|
*p_longlongflag = longlongflag;
|
||||||
|
if (p_size_tflag != NULL)
|
||||||
|
*p_size_tflag = size_tflag;
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
|
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
|
||||||
|
|
||||||
/* size of fixed-size buffer for formatting single arguments */
|
/* size of fixed-size buffer for formatting single arguments */
|
||||||
|
@ -757,15 +821,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
* result in an array) */
|
* result in an array) */
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
if (*(f+1)=='%')
|
/* skip width or width.precision (eg. "1.2" of "%1.2f") */
|
||||||
continue;
|
f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
|
||||||
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
|
if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
|
||||||
++callcount;
|
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
|
||||||
width = (width*10) + *f++ - '0';
|
|
||||||
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
|
|
||||||
;
|
|
||||||
if (*f == 's')
|
|
||||||
++callcount;
|
++callcount;
|
||||||
}
|
}
|
||||||
else if (128 <= (unsigned char)*f) {
|
else if (128 <= (unsigned char)*f) {
|
||||||
|
@ -790,33 +848,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
#ifdef HAVE_LONG_LONG
|
#ifdef HAVE_LONG_LONG
|
||||||
int longlongflag = 0;
|
int longlongflag;
|
||||||
#endif
|
#endif
|
||||||
const char* p = f;
|
const char* p;
|
||||||
width = 0;
|
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
|
||||||
width = (width*10) + *f++ - '0';
|
|
||||||
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
|
|
||||||
;
|
|
||||||
|
|
||||||
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
p = f;
|
||||||
* they don't affect the amount of space we reserve.
|
f = parse_format_flags(f, &width, NULL,
|
||||||
*/
|
NULL, &longlongflag, NULL);
|
||||||
if (*f == 'l') {
|
|
||||||
if (f[1] == 'd' || f[1] == 'u') {
|
|
||||||
++f;
|
|
||||||
}
|
|
||||||
#ifdef HAVE_LONG_LONG
|
|
||||||
else if (f[1] == 'l' &&
|
|
||||||
(f[2] == 'd' || f[2] == 'u')) {
|
|
||||||
longlongflag = 1;
|
|
||||||
f += 2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
|
||||||
++f;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (*f) {
|
switch (*f) {
|
||||||
case 'c':
|
case 'c':
|
||||||
|
@ -981,40 +1019,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
|
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
const char* p = f++;
|
const char* p;
|
||||||
int longflag = 0;
|
int longflag;
|
||||||
int longlongflag = 0;
|
int longlongflag;
|
||||||
int size_tflag = 0;
|
int size_tflag;
|
||||||
zeropad = (*f == '0');
|
|
||||||
/* parse the width.precision part */
|
p = f;
|
||||||
width = 0;
|
zeropad = (f[1] == '0');
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
f = parse_format_flags(f, &width, &precision,
|
||||||
width = (width*10) + *f++ - '0';
|
&longflag, &longlongflag, &size_tflag);
|
||||||
precision = 0;
|
|
||||||
if (*f == '.') {
|
|
||||||
f++;
|
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
|
||||||
precision = (precision*10) + *f++ - '0';
|
|
||||||
}
|
|
||||||
/* Handle %ld, %lu, %lld and %llu. */
|
|
||||||
if (*f == 'l') {
|
|
||||||
if (f[1] == 'd' || f[1] == 'u') {
|
|
||||||
longflag = 1;
|
|
||||||
++f;
|
|
||||||
}
|
|
||||||
#ifdef HAVE_LONG_LONG
|
|
||||||
else if (f[1] == 'l' &&
|
|
||||||
(f[2] == 'd' || f[2] == 'u')) {
|
|
||||||
longlongflag = 1;
|
|
||||||
f += 2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
/* handle the size_t flag. */
|
|
||||||
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
|
||||||
size_tflag = 1;
|
|
||||||
++f;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (*f) {
|
switch (*f) {
|
||||||
case 'c':
|
case 'c':
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue