mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF
=> raise an UnicodeDecodeError. Patch written by Ezio Melotti.
This commit is contained in:
parent
ea370a9edd
commit
f20f9c299e
3 changed files with 26 additions and 2 deletions
|
@ -395,6 +395,19 @@ class UnicodeTest(
|
||||||
self.assertEqual(u'%c' % 0x1234, u'\u1234')
|
self.assertEqual(u'%c' % 0x1234, u'\u1234')
|
||||||
self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
|
self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
|
||||||
|
|
||||||
|
for num in range(0x00,0x80):
|
||||||
|
char = chr(num)
|
||||||
|
self.assertEqual(u"%c" % char, char)
|
||||||
|
self.assertEqual(u"%c" % num, char)
|
||||||
|
# Issue 7649
|
||||||
|
for num in range(0x80,0x100):
|
||||||
|
uchar = unichr(num)
|
||||||
|
self.assertEqual(uchar, u"%c" % num) # works only with ints
|
||||||
|
self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
|
||||||
|
# the implicit decoding should fail for non-ascii chars
|
||||||
|
self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
|
||||||
|
self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
|
||||||
|
|
||||||
# formatting jobs delegated from the string implementation:
|
# formatting jobs delegated from the string implementation:
|
||||||
self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
|
self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
|
||||||
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
|
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 4?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
|
||||||
|
UnicodeDecodeError
|
||||||
|
|
||||||
- Issue #6902: Fix problem with built-in types format incorrectly with
|
- Issue #6902: Fix problem with built-in types format incorrectly with
|
||||||
0 padding.
|
0 padding.
|
||||||
|
|
||||||
|
|
|
@ -8170,6 +8170,7 @@ formatchar(Py_UNICODE *buf,
|
||||||
size_t buflen,
|
size_t buflen,
|
||||||
PyObject *v)
|
PyObject *v)
|
||||||
{
|
{
|
||||||
|
PyObject *s;
|
||||||
/* presume that the buffer is at least 2 characters long */
|
/* presume that the buffer is at least 2 characters long */
|
||||||
if (PyUnicode_Check(v)) {
|
if (PyUnicode_Check(v)) {
|
||||||
if (PyUnicode_GET_SIZE(v) != 1)
|
if (PyUnicode_GET_SIZE(v) != 1)
|
||||||
|
@ -8180,7 +8181,14 @@ formatchar(Py_UNICODE *buf,
|
||||||
else if (PyString_Check(v)) {
|
else if (PyString_Check(v)) {
|
||||||
if (PyString_GET_SIZE(v) != 1)
|
if (PyString_GET_SIZE(v) != 1)
|
||||||
goto onError;
|
goto onError;
|
||||||
buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
|
/* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
|
||||||
|
string, "u'%c' % char" should fail with a UnicodeDecodeError */
|
||||||
|
s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
|
||||||
|
/* if the char is not decodable return -1 */
|
||||||
|
if (s == NULL)
|
||||||
|
return -1;
|
||||||
|
buf[0] = PyUnicode_AS_UNICODE(s)[0];
|
||||||
|
Py_DECREF(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue