mirror of
https://github.com/python/cpython.git
synced 2025-11-01 02:38:53 +00:00
Issue #13333: The UTF-7 decoder now accepts lone surrogates
(the encoder already accepts them).
This commit is contained in:
commit
78edf7576e
3 changed files with 21 additions and 12 deletions
|
|
@ -1108,10 +1108,18 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
for (x, y) in utfTests:
|
for (x, y) in utfTests:
|
||||||
self.assertEqual(x.encode('utf-7'), y)
|
self.assertEqual(x.encode('utf-7'), y)
|
||||||
|
|
||||||
# Unpaired surrogates not supported
|
# Unpaired surrogates are passed through
|
||||||
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
|
self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')
|
||||||
|
self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')
|
||||||
|
self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')
|
||||||
|
self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')
|
||||||
|
self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')
|
||||||
|
self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')
|
||||||
|
self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')
|
||||||
|
self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')
|
||||||
|
|
||||||
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd')
|
self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')
|
||||||
|
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
|
||||||
|
|
||||||
# Issue #2242: crash on some Windows/MSVC versions
|
# Issue #2242: crash on some Windows/MSVC versions
|
||||||
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
|
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
|
||||||
|
already accepts them).
|
||||||
|
|
||||||
- Issue #13389: Full garbage collection passes now clear the freelists for
|
- Issue #13389: Full garbage collection passes now clear the freelists for
|
||||||
list and dict objects. They already cleared other freelists in the
|
list and dict objects. They already cleared other freelists in the
|
||||||
interpreter.
|
interpreter.
|
||||||
|
|
|
||||||
|
|
@ -3884,21 +3884,18 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
|
||||||
if (unicode_putchar(&unicode, &outpos, ch2) < 0)
|
if (unicode_putchar(&unicode, &outpos, ch2) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
surrogate = 0;
|
surrogate = 0;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
|
||||||
|
goto onError;
|
||||||
surrogate = 0;
|
surrogate = 0;
|
||||||
errmsg = "second surrogate missing";
|
|
||||||
goto utf7Error;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
|
if (outCh >= 0xD800 && outCh <= 0xDBFF) {
|
||||||
/* first surrogate */
|
/* first surrogate */
|
||||||
surrogate = outCh;
|
surrogate = outCh;
|
||||||
}
|
}
|
||||||
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
|
|
||||||
errmsg = "unexpected second surrogate";
|
|
||||||
goto utf7Error;
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
if (unicode_putchar(&unicode, &outpos, outCh) < 0)
|
if (unicode_putchar(&unicode, &outpos, outCh) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
@ -3909,8 +3906,9 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
|
||||||
inShift = 0;
|
inShift = 0;
|
||||||
s++;
|
s++;
|
||||||
if (surrogate) {
|
if (surrogate) {
|
||||||
errmsg = "second surrogate missing at end of shift sequence";
|
if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
|
||||||
goto utf7Error;
|
goto onError;
|
||||||
|
surrogate = 0;
|
||||||
}
|
}
|
||||||
if (base64bits > 0) { /* left-over bits */
|
if (base64bits > 0) { /* left-over bits */
|
||||||
if (base64bits >= 6) {
|
if (base64bits >= 6) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue