Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.

This commit is contained in:
Serhiy Storchaka 2013-01-29 10:28:07 +02:00
commit 24193debd4
4 changed files with 118 additions and 54 deletions

View file

@ -5508,7 +5508,6 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
int j;
PyObject *v;
const char *end;
char* message;
@ -5630,29 +5629,19 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
message = "truncated \\UXXXXXXXX escape";
hexescape:
chr = 0;
if (s+digits>end) {
endinpos = size;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", "end of string in escape sequence",
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
goto nextByte;
}
for (j = 0; j < digits; ++j) {
c = (unsigned char) s[j];
if (!Py_ISXDIGIT(c)) {
endinpos = (s+j+1)-starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", message,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
len = PyUnicode_GET_LENGTH(v);
goto nextByte;
if (end - s < digits) {
/* count only hex digits */
for (; s < end; ++s) {
c = (unsigned char)*s;
if (!Py_ISXDIGIT(c))
goto error;
}
goto error;
}
for (; digits--; ++s) {
c = (unsigned char)*s;
if (!Py_ISXDIGIT(c))
goto error;
chr = (chr<<4) & ~0xF;
if (c >= '0' && c <= '9')
chr += c - '0';
@ -5661,24 +5650,16 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
else
chr += 10 + c - 'A';
}
s += j;
if (chr == 0xffffffff && PyErr_Occurred())
/* _decoding_error will have already written into the
target buffer. */
break;
store:
/* when we get here, chr is a 32-bit unicode character */
if (chr <= MAX_UNICODE) {
WRITECHAR(chr);
} else {
endinpos = s-starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", "illegal Unicode character",
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
}
message = "illegal Unicode character";
if (chr > MAX_UNICODE)
goto error;
WRITECHAR(chr);
break;
/* \N{name} */
@ -5706,26 +5687,13 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto store;
}
}
endinpos = s-starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", message,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
break;
goto error;
default:
if (s > end) {
message = "\\ at end of string";
s--;
endinpos = s-starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", message,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
goto error;
}
else {
WRITECHAR('\\');
@ -5733,8 +5701,18 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
}
break;
}
nextByte:
;
continue;
error:
endinpos = s-starts;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"unicodeescape", message,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&v, &i))
goto onError;
len = PyUnicode_GET_LENGTH(v);
continue;
}
#undef WRITECHAR