Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis

and fix by Guido Vranken.
2025-11-25 21:11:09 +00:00 · 2015-01-27 22:18:46 +02:00 · 2015-01-27 22:18:46 +02:00 · 4dbc305002
commit 4dbc305002
parent 119479f705
3 changed files with 131 additions and 40 deletions
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -2016,9 +2016,10 @@ class UnicodeTest(string_tests.CommonTest,
    # Test PyUnicode_FromFormat()
    def test_from_format(self):
        support.import_module('ctypes')
-        from ctypes import (pythonapi, py_object,
+        from ctypes import (
+            pythonapi, py_object, sizeof,
            c_int, c_long, c_longlong, c_ssize_t,
-            c_uint, c_ulong, c_ulonglong, c_size_t)
+            c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
        name = "PyUnicode_FromFormat"
        _PyUnicode_FromFormat = getattr(pythonapi, name)
        _PyUnicode_FromFormat.restype = py_object
@ -2029,9 +2030,13 @@ class UnicodeTest(string_tests.CommonTest,
                for arg in args)
            return _PyUnicode_FromFormat(format, *cargs)

+        def check_format(expected, format, *args):
+            text = PyUnicode_FromFormat(format, *args)
+            self.assertEqual(expected, text)
+
        # ascii format, non-ascii argument
-        text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
-        self.assertEqual(text, 'ascii\x7f=unicode\xe9')
+        check_format('ascii\x7f=unicode\xe9',
+                     b'ascii\x7f=%U', 'unicode\xe9')

        # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
        # raises an error
@ -2041,64 +2046,136 @@ class UnicodeTest(string_tests.CommonTest,
            PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')

        # test "%c"
-        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
-        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
+        check_format('\uabcd',
+                     b'%c', c_int(0xabcd))
+        check_format('\U0010ffff',
+                     b'%c', c_int(0x10ffff))
        with self.assertRaises(OverflowError):
            PyUnicode_FromFormat(b'%c', c_int(0x110000))
        # Issue #18183
-        self.assertEqual(
-            PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)),
-            '\U00010000\U00100000')
+        check_format('\U00010000\U00100000',
+                     b'%c%c', c_int(0x10000), c_int(0x100000))

        # test "%"
-        self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
-        self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
-        self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
-        self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
-        self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
+        check_format('%',
+                     b'%')
+        check_format('%',
+                     b'%%')
+        check_format('%s',
+                     b'%%s')
+        check_format('[%]',
+                     b'[%%]')
+        check_format('%abc',
+                     b'%%%s', b'abc')
+
+        # test %S
+        check_format("repr=\u20acABC",
+                     b'repr=%S', '\u20acABC')
+
+        # test %R
+        check_format("repr='\u20acABC'",
+                     b'repr=%R', '\u20acABC')

        # test integer formats (%i, %d, %u)
-        self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
-        self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
-        self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
+        check_format('010',
+                     b'%03i', c_int(10))
+        check_format('0010',
+                     b'%0.4i', c_int(10))
+        check_format('-123',
+                     b'%i', c_int(-123))
+        check_format('-123',
+                     b'%li', c_long(-123))
+        check_format('-123',
+                     b'%lli', c_longlong(-123))
+        check_format('-123',
+                     b'%zi', c_ssize_t(-123))

-        self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
-        self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
+        check_format('-123',
+                     b'%d', c_int(-123))
+        check_format('-123',
+                     b'%ld', c_long(-123))
+        check_format('-123',
+                     b'%lld', c_longlong(-123))
+        check_format('-123',
+                     b'%zd', c_ssize_t(-123))

-        self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
-        self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
-        self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
-        self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
+        check_format('123',
+                     b'%u', c_uint(123))
+        check_format('123',
+                     b'%lu', c_ulong(123))
+        check_format('123',
+                     b'%llu', c_ulonglong(123))
+        check_format('123',
+                     b'%zu', c_size_t(123))
+
+        # test long output
+        min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
+        max_longlong = -min_longlong - 1
+        check_format(str(min_longlong),
+                     b'%lld', c_longlong(min_longlong))
+        check_format(str(max_longlong),
+                     b'%lld', c_longlong(max_longlong))
+        max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
+        check_format(str(max_ulonglong),
+                     b'%llu', c_ulonglong(max_ulonglong))
+        PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+        # test padding (width and/or precision)
+        check_format('123'.rjust(10, '0'),
+                     b'%010i', c_int(123))
+        check_format('123'.rjust(100),
+                     b'%100i', c_int(123))
+        check_format('123'.rjust(300, '0'),
+                     b'%.300i', c_int(123))
+        check_format('123'.rjust(80, '0').rjust(100),
+                     b'%100.80i', c_int(123))
+
+        check_format('123'.rjust(10, '0'),
+                     b'%010u', c_uint(123))
+        check_format('123'.rjust(100),
+                     b'%100u', c_uint(123))
+        check_format('123'.rjust(300, '0'),
+                     b'%.300u', c_uint(123))
+        check_format('123'.rjust(80, '0').rjust(100),
+                     b'%100.80u', c_uint(123))
+
+        check_format('123'.rjust(10, '0'),
+                     b'%010x', c_int(0x123))
+        check_format('123'.rjust(100),
+                     b'%100x', c_int(0x123))
+        check_format('123'.rjust(300, '0'),
+                     b'%.300x', c_int(0x123))
+        check_format('123'.rjust(80, '0').rjust(100),
+                     b'%100.80x', c_int(0x123))

        # test %A
-        text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
-        self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
+        check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
+                     b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')

        # test %V
-        text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
-        self.assertEqual(text, 'repr=abc')
+        check_format('repr=abc',
+                     b'repr=%V', 'abc', b'xyz')

        # Test string decode from parameter of %s using utf-8.
        # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
        # '\u4eba\u6c11'
-        text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
-        self.assertEqual(text, 'repr=\u4eba\u6c11')
+        check_format('repr=\u4eba\u6c11',
+                     b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')

        #Test replace error handler.
-        text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
-        self.assertEqual(text, 'repr=abc\ufffd')
+        check_format('repr=abc\ufffd',
+                     b'repr=%V', None, b'abc\xff')

        # not supported: copy the raw format string. these tests are just here
        # to check for crashs and should not be considered as specifications
-        self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
-        self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
-        self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
-        self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
+        check_format('%s',
+                     b'%1%s', b'abc')
+        check_format('%1abc',
+                     b'%1abc')
+        check_format('%+i',
+                     b'%+i', c_int(10))
+        check_format('%.%s',
+                     b'%.%s', b'abc')

    # Test PyUnicode_AsWideChar()
    @support.cpython_only
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -2,6 +2,18 @@
 Python News
 +++++++++++

+What's New in Python 3.3.7?
+============================
+
+*Release date: XXXX-XX-XX*
+
+Core and Builtins
+-----------------
+
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV.  Analysis
+  and fix by Guido Vranken.
+
+
 What's New in Python 3.3.6?
 ===========================

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -2335,6 +2335,8 @@ parse_format_flags(const char *f,
            f--;
        }
    }
+    if (width < precision)
+        width = precision;
    if (*f == '\0') {
        /* bogus format "%.1" => go backward, f points to "1" */
        f--;