mirror of
https://github.com/python/cpython.git
synced 2025-08-24 18:55:00 +00:00
bpo-36297: remove "unicode_internal" codec (GH-12342)
This commit is contained in:
parent
6fb544d8bc
commit
6a16b18224
12 changed files with 40 additions and 529 deletions
|
@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* --- Unicode Internal Codec ------------------------------------------- */
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors)
|
||||
{
|
||||
const char *starts = s;
|
||||
Py_ssize_t startinpos;
|
||||
Py_ssize_t endinpos;
|
||||
_PyUnicodeWriter writer;
|
||||
const char *end;
|
||||
const char *reason;
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
|
||||
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||
"unicode_internal codec has been deprecated",
|
||||
1))
|
||||
return NULL;
|
||||
|
||||
if (size < 0) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
if (size == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
_PyUnicodeWriter_Init(&writer);
|
||||
if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
|
||||
PyErr_NoMemory();
|
||||
goto onError;
|
||||
}
|
||||
writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
|
||||
|
||||
end = s + size;
|
||||
while (s < end) {
|
||||
Py_UNICODE uch;
|
||||
Py_UCS4 ch;
|
||||
if (end - s < Py_UNICODE_SIZE) {
|
||||
endinpos = end-starts;
|
||||
reason = "truncated input";
|
||||
goto error;
|
||||
}
|
||||
/* We copy the raw representation one byte at a time because the
|
||||
pointer may be unaligned (see test_codeccallbacks). */
|
||||
((char *) &uch)[0] = s[0];
|
||||
((char *) &uch)[1] = s[1];
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
((char *) &uch)[2] = s[2];
|
||||
((char *) &uch)[3] = s[3];
|
||||
#endif
|
||||
ch = uch;
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
/* We have to sanity check the raw data, otherwise doom looms for
|
||||
some malformed UCS-4 data. */
|
||||
if (ch > 0x10ffff) {
|
||||
endinpos = s - starts + Py_UNICODE_SIZE;
|
||||
reason = "illegal code point (> 0x10FFFF)";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
s += Py_UNICODE_SIZE;
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
|
||||
{
|
||||
Py_UNICODE uch2;
|
||||
((char *) &uch2)[0] = s[0];
|
||||
((char *) &uch2)[1] = s[1];
|
||||
if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
|
||||
{
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
|
||||
s += Py_UNICODE_SIZE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
|
||||
goto onError;
|
||||
continue;
|
||||
|
||||
error:
|
||||
startinpos = s - starts;
|
||||
if (unicode_decode_call_errorhandler_writer(
|
||||
errors, &errorHandler,
|
||||
"unicode_internal", reason,
|
||||
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||
&writer))
|
||||
goto onError;
|
||||
}
|
||||
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return _PyUnicodeWriter_Finish(&writer);
|
||||
|
||||
onError:
|
||||
_PyUnicodeWriter_Dealloc(&writer);
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* --- Latin-1 Codec ------------------------------------------------------ */
|
||||
|
||||
PyObject *
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue