Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF

=> raise an UnicodeDecodeError. Patch written by Ezio Melotti.
2025-09-26 18:29:57 +00:00 · 2010-02-23 23:16:07 +00:00 · 2010-02-23 23:16:07 +00:00 · f20f9c299e
commit f20f9c299e
parent ea370a9edd
3 changed files with 26 additions and 2 deletions
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -395,6 +395,19 @@ class UnicodeTest(
        self.assertEqual(u'%c' % 0x1234, u'\u1234')
        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
        for num in range(0x00,0x80):
            char = chr(num)
            self.assertEqual(u"%c" % char, char)
            self.assertEqual(u"%c" % num, char)
        # Issue 7649
        for num in range(0x80,0x100):
            uchar = unichr(num)
            self.assertEqual(uchar, u"%c" % num)   # works only with ints
            self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
            # the implicit decoding should fail for non-ascii chars
            self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
            self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
        # formatting jobs delegated from the string implementation:
        self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 4?
 Core and Builtins
 -----------------
 - Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
  UnicodeDecodeError
 - Issue #6902: Fix problem with built-in types format incorrectly with
  0 padding.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -8170,6 +8170,7 @@ formatchar(Py_UNICODE *buf,
           size_t buflen,
           PyObject *v)
 {
    PyObject *s;
    /* presume that the buffer is at least 2 characters long */
    if (PyUnicode_Check(v)) {
        if (PyUnicode_GET_SIZE(v) != 1)
@ -8180,7 +8181,14 @@ formatchar(Py_UNICODE *buf,
    else if (PyString_Check(v)) {
        if (PyString_GET_SIZE(v) != 1)
            goto onError;
-        buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+        /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
           string, "u'%c' % char" should fail with a UnicodeDecodeError */
        s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
        /* if the char is not decodable return -1 */
        if (s == NULL)
            return -1;
        buf[0] = PyUnicode_AS_UNICODE(s)[0];
        Py_DECREF(s);
    }
    else {