mirror of
https://github.com/python/cpython.git
synced 2025-07-19 01:05:26 +00:00
Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:
1. Non-ASCII bytes were accepted after shift sequence. 2. A low surrogate could be emitted in case of error in high surrogate. 3. In some circumstances the '\xfd' character was produced instead of the replacement character '\ufffd' (due to a bug in _PyUnicodeWriter).
This commit is contained in:
commit
58c8f2bb6d
4 changed files with 75 additions and 11 deletions
|
@ -1553,7 +1553,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
|
||||
|
||||
# Issue #2242: crash on some Windows/MSVC versions
|
||||
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
|
||||
self.assertEqual(b'+\xc1'.decode('utf-7', 'ignore'), '')
|
||||
|
||||
# Direct encoded characters
|
||||
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
|
||||
|
@ -1995,6 +1995,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
|
||||
self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
|
||||
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
|
||||
self.assertEqual(str(b'\202 x', 'ascii', 'replace'), '\uFFFD x')
|
||||
|
||||
# Error handling (unknown character names)
|
||||
self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue