mirror of
https://github.com/python/cpython.git
synced 2025-10-07 07:31:46 +00:00
Close #13072: Restore code before the PEP 393 for the array module
'u' format of the array module uses again Py_UNICODE type for backward compatibility with Python 3.2. The only change from Python 3.2 is that PyUnicode_AsUnicode() result is now checked for NULL value.
This commit is contained in:
parent
3af2617401
commit
62bb394729
2 changed files with 56 additions and 41 deletions
|
@ -21,7 +21,7 @@ defined:
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'B'`` | unsigned char | int | 1 | |
|
| ``'B'`` | unsigned char | int | 1 | |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'u'`` | Py_UCS4 | Unicode character | 4 | |
|
| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'h'`` | signed short | int | 2 | |
|
| ``'h'`` | signed short | int | 2 | |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
|
@ -35,9 +35,9 @@ defined:
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'L'`` | unsigned long | int | 4 | |
|
| ``'L'`` | unsigned long | int | 4 | |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'q'`` | signed long long | int | 8 | \(1) |
|
| ``'q'`` | signed long long | int | 8 | \(2) |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'Q'`` | unsigned long long | int | 8 | \(1) |
|
| ``'Q'`` | unsigned long long | int | 8 | \(2) |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'f'`` | float | float | 4 | |
|
| ``'f'`` | float | float | 4 | |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
|
@ -47,6 +47,11 @@ defined:
|
||||||
Notes:
|
Notes:
|
||||||
|
|
||||||
(1)
|
(1)
|
||||||
|
The ``'u'`` type code corresponds to Python's unicode character
|
||||||
|
(:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
|
||||||
|
platform, it can be 16 bits or 32 bits.
|
||||||
|
|
||||||
|
(2)
|
||||||
The ``'q'`` and ``'Q'`` type codes are available only if
|
The ``'q'`` and ``'Q'`` type codes are available only if
|
||||||
the platform C compiler used to build Python supports C :c:type:`long long`,
|
the platform C compiler used to build Python supports C :c:type:`long long`,
|
||||||
or, on Windows, :c:type:`__int64`.
|
or, on Windows, :c:type:`__int64`.
|
||||||
|
|
|
@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
u_getitem(arrayobject *ap, Py_ssize_t i)
|
u_getitem(arrayobject *ap, Py_ssize_t i)
|
||||||
{
|
{
|
||||||
return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
|
return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
{
|
{
|
||||||
PyObject *p;
|
Py_UNICODE *p;
|
||||||
|
Py_ssize_t len;
|
||||||
|
|
||||||
if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
|
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
|
||||||
return -1;
|
return -1;
|
||||||
if (PyUnicode_READY(p))
|
if (len != 1) {
|
||||||
return -1;
|
|
||||||
if (PyUnicode_GET_LENGTH(p) != 1) {
|
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"array item must be unicode character");
|
"array item must be unicode character");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (i >= 0)
|
if (i >= 0)
|
||||||
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
|
((Py_UNICODE *)ap->ob_item)[i] = p[0];
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SIZEOF_INT == 4
|
|
||||||
# define STRUCT_LONG_FORMAT "I"
|
|
||||||
#elif SIZEOF_LONG == 4
|
|
||||||
# define STRUCT_LONG_FORMAT "L"
|
|
||||||
#else
|
|
||||||
# error "Unable to get struct format for Py_UCS4"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Description of types.
|
/* Description of types.
|
||||||
*
|
*
|
||||||
|
@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
static struct arraydescr descriptors[] = {
|
static struct arraydescr descriptors[] = {
|
||||||
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
|
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
|
||||||
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
|
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
|
||||||
{'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
|
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
|
||||||
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
|
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
|
||||||
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
|
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
|
||||||
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
|
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
|
||||||
|
@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead.");
|
||||||
static PyObject *
|
static PyObject *
|
||||||
array_fromunicode(arrayobject *self, PyObject *args)
|
array_fromunicode(arrayobject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *ustr;
|
Py_UNICODE *ustr;
|
||||||
Py_ssize_t n;
|
Py_ssize_t n;
|
||||||
|
char typecode;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
|
if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (self->ob_descr->typecode != 'u') {
|
typecode = self->ob_descr->typecode;
|
||||||
|
if ((typecode != 'u')) {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"fromunicode() may only be called on "
|
"fromunicode() may only be called on "
|
||||||
"unicode type arrays");
|
"unicode type arrays");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (PyUnicode_READY(ustr))
|
|
||||||
return NULL;
|
|
||||||
n = PyUnicode_GET_LENGTH(ustr);
|
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
Py_ssize_t old_size = Py_SIZE(self);
|
Py_ssize_t old_size = Py_SIZE(self);
|
||||||
if (array_resize(self, old_size + n) == -1)
|
if (array_resize(self, old_size + n) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
|
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
|
||||||
return NULL;
|
ustr, n * sizeof(Py_UNICODE));
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_INCREF(Py_None);
|
Py_INCREF(Py_None);
|
||||||
|
@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type.");
|
||||||
static PyObject *
|
static PyObject *
|
||||||
array_tounicode(arrayobject *self, PyObject *unused)
|
array_tounicode(arrayobject *self, PyObject *unused)
|
||||||
{
|
{
|
||||||
if (self->ob_descr->typecode != 'u') {
|
char typecode;
|
||||||
|
typecode = self->ob_descr->typecode;
|
||||||
|
if ((typecode != 'u')) {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"tounicode() may only be called on unicode type arrays");
|
"tounicode() may only be called on unicode type arrays");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
|
||||||
(Py_UCS4 *) self->ob_item,
|
|
||||||
Py_SIZE(self));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(tounicode_doc,
|
PyDoc_STRVAR(tounicode_doc,
|
||||||
|
@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode)
|
||||||
return UNSIGNED_INT8;
|
return UNSIGNED_INT8;
|
||||||
|
|
||||||
case 'u':
|
case 'u':
|
||||||
|
if (sizeof(Py_UNICODE) == 2) {
|
||||||
|
return UTF16_LE + is_big_endian;
|
||||||
|
}
|
||||||
|
if (sizeof(Py_UNICODE) == 4) {
|
||||||
return UTF32_LE + is_big_endian;
|
return UTF32_LE + is_big_endian;
|
||||||
|
}
|
||||||
|
return UNKNOWN_FORMAT;
|
||||||
|
|
||||||
case 'f':
|
case 'f':
|
||||||
if (sizeof(float) == 4) {
|
if (sizeof(float) == 4) {
|
||||||
|
@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
|
||||||
view->strides = &(view->itemsize);
|
view->strides = &(view->itemsize);
|
||||||
view->format = NULL;
|
view->format = NULL;
|
||||||
view->internal = NULL;
|
view->internal = NULL;
|
||||||
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
|
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
|
||||||
view->format = self->ob_descr->formats;
|
view->format = self->ob_descr->formats;
|
||||||
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
if (self->ob_descr->typecode == 'u') {
|
||||||
|
view->format = "w";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
self->ob_exports++;
|
self->ob_exports++;
|
||||||
|
@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
}
|
}
|
||||||
else if (initial != NULL && PyUnicode_Check(initial)) {
|
else if (initial != NULL && PyUnicode_Check(initial)) {
|
||||||
|
Py_UNICODE *ustr;
|
||||||
Py_ssize_t n;
|
Py_ssize_t n;
|
||||||
if (PyUnicode_READY(initial)) {
|
|
||||||
|
ustr = PyUnicode_AsUnicode(initial);
|
||||||
|
if (ustr == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
Py_DECREF(a);
|
Py_DECREF(a);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
n = PyUnicode_GET_LENGTH(initial);
|
|
||||||
|
n = PyUnicode_GET_DATA_SIZE(initial);
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
arrayobject *self = (arrayobject *)a;
|
arrayobject *self = (arrayobject *)a;
|
||||||
Py_UCS4 *item = (Py_UCS4 *)self->ob_item;
|
char *item = self->ob_item;
|
||||||
item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));
|
item = (char *)PyMem_Realloc(item, n);
|
||||||
if (item == NULL) {
|
if (item == NULL) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
Py_DECREF(a);
|
Py_DECREF(a);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
self->ob_item = (char*)item;
|
self->ob_item = item;
|
||||||
Py_SIZE(self) = n;
|
Py_SIZE(self) = n / sizeof(Py_UNICODE);
|
||||||
if (!PyUnicode_AsUCS4(initial, item, n, 0))
|
memcpy(item, ustr, n);
|
||||||
return NULL;
|
|
||||||
self->allocated = Py_SIZE(self);
|
self->allocated = Py_SIZE(self);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2593,7 +2600,7 @@ is a single character. The following type codes are defined:\n\
|
||||||
Type code C Type Minimum size in bytes \n\
|
Type code C Type Minimum size in bytes \n\
|
||||||
'b' signed integer 1 \n\
|
'b' signed integer 1 \n\
|
||||||
'B' unsigned integer 1 \n\
|
'B' unsigned integer 1 \n\
|
||||||
'u' Unicode character 4 \n\
|
'u' Unicode character 2 (see note) \n\
|
||||||
'h' signed integer 2 \n\
|
'h' signed integer 2 \n\
|
||||||
'H' unsigned integer 2 \n\
|
'H' unsigned integer 2 \n\
|
||||||
'i' signed integer 2 \n\
|
'i' signed integer 2 \n\
|
||||||
|
@ -2605,6 +2612,9 @@ is a single character. The following type codes are defined:\n\
|
||||||
'f' floating point 4 \n\
|
'f' floating point 4 \n\
|
||||||
'd' floating point 8 \n\
|
'd' floating point 8 \n\
|
||||||
\n\
|
\n\
|
||||||
|
NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
|
||||||
|
narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
|
||||||
|
\n\
|
||||||
NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
|
NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
|
||||||
C compiler used to build Python supports 'long long', or, on Windows, \n\
|
C compiler used to build Python supports 'long long', or, on Windows, \n\
|
||||||
'__int64'.\n\
|
'__int64'.\n\
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue