mirror of
https://github.com/python/cpython.git
synced 2025-07-16 15:55:18 +00:00
Fix and deprecated the unicode_internal codec
unicode_internal codec uses Py_UNICODE instead of the real internal representation (PEP 393: Py_UCS1, Py_UCS2 or Py_UCS4) for backward compatibility.
This commit is contained in:
parent
240c55f721
commit
9f4b1e9c50
4 changed files with 42 additions and 10 deletions
|
@ -1173,6 +1173,8 @@ particular, the following variants typically exist:
|
||||||
| unicode_internal | | Return the internal |
|
| unicode_internal | | Return the internal |
|
||||||
| | | representation of the |
|
| | | representation of the |
|
||||||
| | | operand |
|
| | | operand |
|
||||||
|
| | | |
|
||||||
|
| | | .. deprecated:: 3.3 |
|
||||||
+--------------------+---------+---------------------------+
|
+--------------------+---------+---------------------------+
|
||||||
|
|
||||||
The following codecs provide bytes-to-bytes mappings.
|
The following codecs provide bytes-to-bytes mappings.
|
||||||
|
|
|
@ -250,6 +250,8 @@ versions.
|
||||||
|
|
||||||
(:issue:`12100`)
|
(:issue:`12100`)
|
||||||
|
|
||||||
|
The ``unicode_internal`` codec has been deprecated.
|
||||||
|
|
||||||
crypt
|
crypt
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
|
@ -675,18 +675,30 @@ unicode_internal_encode(PyObject *self,
|
||||||
PyObject *obj;
|
PyObject *obj;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
const char *data;
|
const char *data;
|
||||||
Py_ssize_t size;
|
Py_ssize_t len, size;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
|
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
|
||||||
&obj, &errors))
|
&obj, &errors))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (PyUnicode_Check(obj)) {
|
if (PyUnicode_Check(obj)) {
|
||||||
|
Py_UNICODE *u;
|
||||||
|
|
||||||
if (PyUnicode_READY(obj) < 0)
|
if (PyUnicode_READY(obj) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
data = PyUnicode_AS_DATA(obj);
|
|
||||||
size = PyUnicode_GET_DATA_SIZE(obj);
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||||
return codec_tuple(PyBytes_FromStringAndSize(data, size),
|
"unicode_internal codecs has been deprecated",
|
||||||
|
1))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
u = PyUnicode_AsUnicodeAndSize(obj, &len);
|
||||||
|
if (u == NULL)
|
||||||
|
return NULL;
|
||||||
|
if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
|
||||||
|
return PyErr_NoMemory();
|
||||||
|
size = len * sizeof(Py_UNICODE);
|
||||||
|
return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
|
||||||
PyUnicode_GET_LENGTH(obj));
|
PyUnicode_GET_LENGTH(obj));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -6237,6 +6237,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
|
|
||||||
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||||
|
"unicode_internal codecs has been deprecated",
|
||||||
|
1))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* XXX overflow detection missing */
|
/* XXX overflow detection missing */
|
||||||
v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
|
v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -6270,15 +6275,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicode_internal", reason,
|
"unicode_internal", reason,
|
||||||
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
&v, &outpos)) {
|
&v, &outpos))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
s += Py_UNICODE_SIZE;
|
||||||
|
#ifndef Py_UNICODE_WIDE
|
||||||
|
if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
|
||||||
|
{
|
||||||
|
Py_UCS4 ch2 = *(Py_UNICODE*)s;
|
||||||
|
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
|
||||||
|
{
|
||||||
|
ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
|
||||||
|
s += Py_UNICODE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
#endif
|
||||||
if (unicode_putchar(&v, &outpos, ch) < 0)
|
|
||||||
goto onError;
|
if (unicode_putchar(&v, &outpos, ch) < 0)
|
||||||
s += Py_UNICODE_SIZE;
|
goto onError;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PyUnicode_Resize(&v, outpos) < 0)
|
if (PyUnicode_Resize(&v, outpos) < 0)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue