Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"

in any mapping, not only in an unicode string.
This commit is contained in:
Serhiy Storchaka 2013-01-15 15:30:04 +02:00
commit 55e2cb497b
3 changed files with 82 additions and 19 deletions

View file

@ -1737,6 +1737,10 @@ class CharmapTest(unittest.TestCase):
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab" codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"
) )
self.assertRaises(UnicodeDecodeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab\ufffe"
)
self.assertEqual( self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
("ab\ufffd", 3) ("ab\ufffd", 3)
@ -1793,6 +1797,17 @@ class CharmapTest(unittest.TestCase):
{0: 'a', 1: 'b'} {0: 'a', 1: 'b'}
) )
self.assertRaises(UnicodeDecodeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict",
{0: 'a', 1: 'b', 2: None}
)
# Issue #14850
self.assertRaises(UnicodeDecodeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict",
{0: 'a', 1: 'b', 2: '\ufffe'}
)
self.assertEqual( self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace", codecs.charmap_decode(b"\x00\x01\x02", "replace",
{0: 'a', 1: 'b'}), {0: 'a', 1: 'b'}),
@ -1805,6 +1820,13 @@ class CharmapTest(unittest.TestCase):
("ab\ufffd", 3) ("ab\ufffd", 3)
) )
# Issue #14850
self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace",
{0: 'a', 1: 'b', 2: '\ufffe'}),
("ab\ufffd", 3)
)
self.assertEqual( self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "ignore", codecs.charmap_decode(b"\x00\x01\x02", "ignore",
{0: 'a', 1: 'b'}), {0: 'a', 1: 'b'}),
@ -1817,6 +1839,13 @@ class CharmapTest(unittest.TestCase):
("ab", 3) ("ab", 3)
) )
# Issue #14850
self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "ignore",
{0: 'a', 1: 'b', 2: '\ufffe'}),
("ab", 3)
)
allbytes = bytes(range(256)) allbytes = bytes(range(256))
self.assertEqual( self.assertEqual(
codecs.charmap_decode(allbytes, "ignore", {}), codecs.charmap_decode(allbytes, "ignore", {}),
@ -1857,18 +1886,35 @@ class CharmapTest(unittest.TestCase):
{0: a, 1: b}, {0: a, 1: b},
) )
self.assertRaises(UnicodeDecodeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict",
{0: a, 1: b, 2: 0xFFFE},
)
self.assertEqual( self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace", codecs.charmap_decode(b"\x00\x01\x02", "replace",
{0: a, 1: b}), {0: a, 1: b}),
("ab\ufffd", 3) ("ab\ufffd", 3)
) )
self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace",
{0: a, 1: b, 2: 0xFFFE}),
("ab\ufffd", 3)
)
self.assertEqual( self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "ignore", codecs.charmap_decode(b"\x00\x01\x02", "ignore",
{0: a, 1: b}), {0: a, 1: b}),
("ab", 3) ("ab", 3)
) )
self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "ignore",
{0: a, 1: b, 2: 0xFFFE}),
("ab", 3)
)
class WithStmtTest(unittest.TestCase): class WithStmtTest(unittest.TestCase):
def test_encodedfile(self): def test_encodedfile(self):

View file

@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
in any mapping, not only in a string.
- Issue #16730: importlib.machinery.FileFinder now no longers raises an - Issue #16730: importlib.machinery.FileFinder now no longers raises an
exception when trying to populate its cache and it finds out the directory is exception when trying to populate its cache and it finds out the directory is
unreadable or has turned into a file. Reported and diagnosed by unreadable or has turned into a file. Reported and diagnosed by

View file

@ -7393,15 +7393,18 @@ Error:
if (PyErr_ExceptionMatches(PyExc_LookupError)) { if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found means: mapping is undefined. */ /* No mapping found means: mapping is undefined. */
PyErr_Clear(); PyErr_Clear();
x = Py_None; goto Undefined;
Py_INCREF(x);
} else } else
goto onError; goto onError;
} }
/* Apply mapping */ /* Apply mapping */
if (x == Py_None)
goto Undefined;
if (PyLong_Check(x)) { if (PyLong_Check(x)) {
long value = PyLong_AS_LONG(x); long value = PyLong_AS_LONG(x);
if (value == 0xFFFE)
goto Undefined;
if (value < 0 || value > MAX_UNICODE) { if (value < 0 || value > MAX_UNICODE) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"character mapping must be in range(0x%lx)", "character mapping must be in range(0x%lx)",
@ -7415,26 +7418,24 @@ Error:
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value);
writer.pos++; writer.pos++;
} }
else if (x == Py_None) {
/* undefined mapping */
startinpos = s-starts;
endinpos = startinpos+1;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"charmap", "character maps to <undefined>",
&starts, &e, &startinpos, &endinpos, &exc, &s,
&writer)) {
Py_DECREF(x);
goto onError;
}
Py_DECREF(x);
continue;
}
else if (PyUnicode_Check(x)) { else if (PyUnicode_Check(x)) {
if (PyUnicode_READY(x) == -1)
goto onError;
if (PyUnicode_GET_LENGTH(x) == 1) {
Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
if (value == 0xFFFE)
goto Undefined;
if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1)
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value);
writer.pos++;
}
else {
writer.overallocate = 1; writer.overallocate = 1;
if (_PyUnicodeWriter_WriteStr(&writer, x) == -1) if (_PyUnicodeWriter_WriteStr(&writer, x) == -1)
goto onError; goto onError;
} }
}
else { else {
/* wrong return value */ /* wrong return value */
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
@ -7444,6 +7445,19 @@ Error:
} }
Py_DECREF(x); Py_DECREF(x);
++s; ++s;
continue;
Undefined:
/* undefined mapping */
Py_XDECREF(x);
startinpos = s-starts;
endinpos = startinpos+1;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"charmap", "character maps to <undefined>",
&starts, &e, &startinpos, &endinpos, &exc, &s,
&writer)) {
goto onError;
}
} }
} }
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);