Issue #13333: The UTF-7 decoder now accepts lone surrogates

(the encoder already accepts them).
This commit is contained in:
Antoine Pitrou 2011-11-15 01:49:40 +01:00
parent 16ed86831b
commit 30402549de
3 changed files with 19 additions and 12 deletions

View file

@ -1628,21 +1628,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
*p++ = outCh;
#endif
surrogate = 0;
continue;
}
else {
*p++ = surrogate;
surrogate = 0;
errmsg = "second surrogate missing";
goto utf7Error;
}
}
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */
surrogate = outCh;
}
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
errmsg = "unexpected second surrogate";
goto utf7Error;
}
else {
*p++ = outCh;
}
@ -1652,8 +1648,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 0;
s++;
if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence";
goto utf7Error;
*p++ = surrogate;
surrogate = 0;
}
if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) {