bpo-36297: remove "unicode_internal" codec (GH-12342)

This commit is contained in:
Inada Naoki 2019-03-18 15:44:11 +09:00 committed by GitHub
parent 6fb544d8bc
commit 6a16b18224
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 40 additions and 529 deletions

View file

@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
return result;
}
/* --- Unicode Internal Codec ------------------------------------------- */
PyObject *
_PyUnicode_DecodeUnicodeInternal(const char *s,
Py_ssize_t size,
const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
_PyUnicodeWriter writer;
const char *end;
const char *reason;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"unicode_internal codec has been deprecated",
1))
return NULL;
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
_PyUnicodeWriter_Init(&writer);
if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
PyErr_NoMemory();
goto onError;
}
writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
end = s + size;
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
if (end - s < Py_UNICODE_SIZE) {
endinpos = end-starts;
reason = "truncated input";
goto error;
}
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
((char *) &uch)[1] = s[1];
#ifdef Py_UNICODE_WIDE
((char *) &uch)[2] = s[2];
((char *) &uch)[3] = s[3];
#endif
ch = uch;
#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
if (ch > 0x10ffff) {
endinpos = s - starts + Py_UNICODE_SIZE;
reason = "illegal code point (> 0x10FFFF)";
goto error;
}
#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
((char *) &uch2)[1] = s[1];
if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
{
ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
s += Py_UNICODE_SIZE;
}
}
#endif
if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
continue;
error:
startinpos = s - starts;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&writer))
goto onError;
}
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return _PyUnicodeWriter_Finish(&writer);
onError:
_PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
}
/* --- Latin-1 Codec ------------------------------------------------------ */
PyObject *