mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
This commit is contained in:
parent
f18bf6fd2d
commit
3f95292be6
3 changed files with 159 additions and 31 deletions
|
@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
# Test PyUnicode_FromFormat()
|
# Test PyUnicode_FromFormat()
|
||||||
def test_from_format(self):
|
def test_from_format(self):
|
||||||
support.import_module('ctypes')
|
support.import_module('ctypes')
|
||||||
from ctypes import pythonapi, py_object, c_int
|
from ctypes import (
|
||||||
|
pythonapi, py_object, sizeof,
|
||||||
|
c_int, c_long, c_longlong, c_ssize_t,
|
||||||
|
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
|
||||||
if sys.maxunicode == 65535:
|
if sys.maxunicode == 65535:
|
||||||
name = "PyUnicodeUCS2_FromFormat"
|
name = "PyUnicodeUCS2_FromFormat"
|
||||||
else:
|
else:
|
||||||
|
@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
for arg in args)
|
for arg in args)
|
||||||
return _PyUnicode_FromFormat(format, *cargs)
|
return _PyUnicode_FromFormat(format, *cargs)
|
||||||
|
|
||||||
|
def check_format(expected, format, *args):
|
||||||
|
text = PyUnicode_FromFormat(format, *args)
|
||||||
|
self.assertEqual(expected, text)
|
||||||
|
|
||||||
# ascii format, non-ascii argument
|
# ascii format, non-ascii argument
|
||||||
text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
|
check_format('ascii\x7f=unicode\xe9',
|
||||||
self.assertEqual(text, 'ascii\x7f=unicode\xe9')
|
b'ascii\x7f=%U', 'unicode\xe9')
|
||||||
|
|
||||||
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
|
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
|
||||||
# raises an error
|
# raises an error
|
||||||
|
@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
'string, got a non-ASCII byte: 0xe9$',
|
'string, got a non-ASCII byte: 0xe9$',
|
||||||
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
|
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
|
||||||
|
|
||||||
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
|
# test "%c"
|
||||||
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
|
check_format('\uabcd',
|
||||||
|
b'%c', c_int(0xabcd))
|
||||||
|
check_format('\U0010ffff',
|
||||||
|
b'%c', c_int(0x10ffff))
|
||||||
|
with self.assertRaises(OverflowError):
|
||||||
|
PyUnicode_FromFormat(b'%c', c_int(0x110000))
|
||||||
|
# Issue #18183
|
||||||
|
check_format('\U00010000\U00100000',
|
||||||
|
b'%c%c', c_int(0x10000), c_int(0x100000))
|
||||||
|
|
||||||
# other tests
|
# test "%"
|
||||||
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
|
check_format('%',
|
||||||
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
|
b'%')
|
||||||
|
check_format('%',
|
||||||
|
b'%%')
|
||||||
|
check_format('%s',
|
||||||
|
b'%%s')
|
||||||
|
check_format('[%]',
|
||||||
|
b'[%%]')
|
||||||
|
check_format('%abc',
|
||||||
|
b'%%%s', b'abc')
|
||||||
|
|
||||||
text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
|
# test %S
|
||||||
self.assertEqual(text, 'repr=abc')
|
check_format("repr=\u20acABC",
|
||||||
|
b'repr=%S', '\u20acABC')
|
||||||
|
|
||||||
|
# test %R
|
||||||
|
check_format("repr='\u20acABC'",
|
||||||
|
b'repr=%R', '\u20acABC')
|
||||||
|
|
||||||
|
# test integer formats (%i, %d, %u)
|
||||||
|
check_format('010',
|
||||||
|
b'%03i', c_int(10))
|
||||||
|
check_format('0010',
|
||||||
|
b'%0.4i', c_int(10))
|
||||||
|
check_format('-123',
|
||||||
|
b'%i', c_int(-123))
|
||||||
|
|
||||||
|
check_format('-123',
|
||||||
|
b'%d', c_int(-123))
|
||||||
|
check_format('-123',
|
||||||
|
b'%ld', c_long(-123))
|
||||||
|
check_format('-123',
|
||||||
|
b'%lld', c_longlong(-123))
|
||||||
|
check_format('-123',
|
||||||
|
b'%zd', c_ssize_t(-123))
|
||||||
|
|
||||||
|
check_format('123',
|
||||||
|
b'%u', c_uint(123))
|
||||||
|
check_format('123',
|
||||||
|
b'%lu', c_ulong(123))
|
||||||
|
check_format('123',
|
||||||
|
b'%llu', c_ulonglong(123))
|
||||||
|
check_format('123',
|
||||||
|
b'%zu', c_size_t(123))
|
||||||
|
|
||||||
|
# test long output
|
||||||
|
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
|
||||||
|
max_longlong = -min_longlong - 1
|
||||||
|
check_format(str(min_longlong),
|
||||||
|
b'%lld', c_longlong(min_longlong))
|
||||||
|
check_format(str(max_longlong),
|
||||||
|
b'%lld', c_longlong(max_longlong))
|
||||||
|
max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
|
||||||
|
check_format(str(max_ulonglong),
|
||||||
|
b'%llu', c_ulonglong(max_ulonglong))
|
||||||
|
PyUnicode_FromFormat(b'%p', c_void_p(-1))
|
||||||
|
|
||||||
|
# test padding (width and/or precision)
|
||||||
|
check_format('123'.rjust(10, '0'),
|
||||||
|
b'%010i', c_int(123))
|
||||||
|
check_format('123'.rjust(100),
|
||||||
|
b'%100i', c_int(123))
|
||||||
|
check_format('123'.rjust(100, '0'),
|
||||||
|
b'%.100i', c_int(123))
|
||||||
|
check_format('123'.rjust(80, '0').rjust(100),
|
||||||
|
b'%100.80i', c_int(123))
|
||||||
|
|
||||||
|
check_format('123'.rjust(10, '0'),
|
||||||
|
b'%010u', c_uint(123))
|
||||||
|
check_format('123'.rjust(100),
|
||||||
|
b'%100u', c_uint(123))
|
||||||
|
check_format('123'.rjust(100, '0'),
|
||||||
|
b'%.100u', c_uint(123))
|
||||||
|
check_format('123'.rjust(80, '0').rjust(100),
|
||||||
|
b'%100.80u', c_uint(123))
|
||||||
|
|
||||||
|
check_format('123'.rjust(10, '0'),
|
||||||
|
b'%010x', c_int(0x123))
|
||||||
|
check_format('123'.rjust(100),
|
||||||
|
b'%100x', c_int(0x123))
|
||||||
|
check_format('123'.rjust(100, '0'),
|
||||||
|
b'%.100x', c_int(0x123))
|
||||||
|
check_format('123'.rjust(80, '0').rjust(100),
|
||||||
|
b'%100.80x', c_int(0x123))
|
||||||
|
|
||||||
|
# test %A
|
||||||
|
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
|
||||||
|
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
|
||||||
|
|
||||||
|
# test %V
|
||||||
|
check_format('repr=abc',
|
||||||
|
b'repr=%V', 'abc', b'xyz')
|
||||||
|
|
||||||
# Test string decode from parameter of %s using utf-8.
|
# Test string decode from parameter of %s using utf-8.
|
||||||
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
|
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
|
||||||
# '\u4eba\u6c11'
|
# '\u4eba\u6c11'
|
||||||
text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
|
check_format('repr=\u4eba\u6c11',
|
||||||
self.assertEqual(text, 'repr=\u4eba\u6c11')
|
b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
|
||||||
|
|
||||||
#Test replace error handler.
|
#Test replace error handler.
|
||||||
text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
|
check_format('repr=abc\ufffd',
|
||||||
self.assertEqual(text, 'repr=abc\ufffd')
|
b'repr=%V', None, b'abc\xff')
|
||||||
|
|
||||||
|
# not supported: copy the raw format string. these tests are just here
|
||||||
|
# to check for crashs and should not be considered as specifications
|
||||||
|
check_format('%s',
|
||||||
|
b'%1%s', b'abc')
|
||||||
|
check_format('%1abc',
|
||||||
|
b'%1abc')
|
||||||
|
check_format('%+i',
|
||||||
|
b'%+i', c_int(10))
|
||||||
|
check_format('%s',
|
||||||
|
b'%.%s', b'abc')
|
||||||
|
|
||||||
# Test PyUnicode_AsWideChar()
|
# Test PyUnicode_AsWideChar()
|
||||||
def test_aswidechar(self):
|
def test_aswidechar(self):
|
||||||
|
|
12
Misc/NEWS
12
Misc/NEWS
|
@ -2,6 +2,18 @@
|
||||||
Python News
|
Python News
|
||||||
+++++++++++
|
+++++++++++
|
||||||
|
|
||||||
|
What's New in Python 3.2.7?
|
||||||
|
============================
|
||||||
|
|
||||||
|
*Release date: XXXX-XX-XX*
|
||||||
|
|
||||||
|
Core and Builtins
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
|
||||||
|
and fix by Guido Vranken.
|
||||||
|
|
||||||
|
|
||||||
What's New in Python 3.2.6?
|
What's New in Python 3.2.6?
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
|
|
|
@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
* result in an array) */
|
* result in an array) */
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
if (*(f+1)=='%')
|
f++;
|
||||||
continue;
|
while (*f && *f != '%' && !Py_ISALPHA((unsigned)*f))
|
||||||
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
|
f++;
|
||||||
++callcount;
|
if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
|
||||||
width = (width*10) + *f++ - '0';
|
|
||||||
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
|
|
||||||
;
|
|
||||||
if (*f == 's')
|
|
||||||
++callcount;
|
++callcount;
|
||||||
}
|
}
|
||||||
else if (128 <= (unsigned char)*f) {
|
else if (128 <= (unsigned char)*f) {
|
||||||
|
@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
#ifdef HAVE_LONG_LONG
|
#ifdef HAVE_LONG_LONG
|
||||||
int longlongflag = 0;
|
int longlongflag = 0;
|
||||||
#endif
|
#endif
|
||||||
const char* p = f;
|
const char* p = f++;
|
||||||
width = 0;
|
width = 0;
|
||||||
while (Py_ISDIGIT((unsigned)*f))
|
while (Py_ISDIGIT((unsigned)*f))
|
||||||
width = (width*10) + *f++ - '0';
|
width = (width*10) + *f++ - '0';
|
||||||
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
|
precision = 0;
|
||||||
;
|
if (*f == '.') {
|
||||||
|
f++;
|
||||||
|
while (Py_ISDIGIT((unsigned)*f))
|
||||||
|
precision = (precision*10) + *f++ - '0';
|
||||||
|
}
|
||||||
|
|
||||||
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
||||||
* they don't affect the amount of space we reserve.
|
* they don't affect the amount of space we reserve.
|
||||||
|
@ -823,16 +822,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
switch (*f) {
|
switch (*f) {
|
||||||
case 'c':
|
case 'c':
|
||||||
{
|
{
|
||||||
#ifndef Py_UNICODE_WIDE
|
|
||||||
int ordinal = va_arg(count, int);
|
int ordinal = va_arg(count, int);
|
||||||
|
if (ordinal < 0 || ordinal > 0x10ffff) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"%c arg not in range(0x110000)");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
#ifndef Py_UNICODE_WIDE
|
||||||
if (ordinal > 0xffff)
|
if (ordinal > 0xffff)
|
||||||
n += 2;
|
n += 2;
|
||||||
else
|
else
|
||||||
n++;
|
|
||||||
#else
|
|
||||||
(void)va_arg(count, int);
|
|
||||||
n++;
|
|
||||||
#endif
|
#endif
|
||||||
|
n++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case '%':
|
case '%':
|
||||||
|
@ -840,6 +841,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
break;
|
break;
|
||||||
case 'd': case 'u': case 'i': case 'x':
|
case 'd': case 'u': case 'i': case 'x':
|
||||||
(void) va_arg(count, int);
|
(void) va_arg(count, int);
|
||||||
|
if (width < precision)
|
||||||
|
width = precision;
|
||||||
#ifdef HAVE_LONG_LONG
|
#ifdef HAVE_LONG_LONG
|
||||||
if (longlongflag) {
|
if (longlongflag) {
|
||||||
if (width < MAX_LONG_LONG_CHARS)
|
if (width < MAX_LONG_LONG_CHARS)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue