mirror of
https://github.com/python/cpython.git
synced 2025-10-09 08:31:26 +00:00
gh-80480: array: Add 'w' typecode. (#105242)
This commit is contained in:
parent
5a5ed7a3e6
commit
1237fb6a4b
7 changed files with 159 additions and 59 deletions
|
@ -924,12 +924,12 @@ module::
|
||||||
'Hello, there!'
|
'Hello, there!'
|
||||||
|
|
||||||
>>> import array
|
>>> import array
|
||||||
>>> a = array.array('u', s)
|
>>> a = array.array('w', s)
|
||||||
>>> print(a)
|
>>> print(a)
|
||||||
array('u', 'Hello, world')
|
array('w', 'Hello, world')
|
||||||
>>> a[0] = 'y'
|
>>> a[0] = 'y'
|
||||||
>>> print(a)
|
>>> print(a)
|
||||||
array('u', 'yello, world')
|
array('w', 'yello, world')
|
||||||
>>> a.tounicode()
|
>>> a.tounicode()
|
||||||
'yello, world'
|
'yello, world'
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,8 @@ defined:
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'u'`` | wchar_t | Unicode character | 2 | \(1) |
|
| ``'u'`` | wchar_t | Unicode character | 2 | \(1) |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
|
| ``'w'`` | Py_UCS4 | Unicode character | 4 | |
|
||||||
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'h'`` | signed short | int | 2 | |
|
| ``'h'`` | signed short | int | 2 | |
|
||||||
+-----------+--------------------+-------------------+-----------------------+-------+
|
+-----------+--------------------+-------------------+-----------------------+-------+
|
||||||
| ``'H'`` | unsigned short | int | 2 | |
|
| ``'H'`` | unsigned short | int | 2 | |
|
||||||
|
@ -56,6 +58,7 @@ Notes:
|
||||||
``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.
|
``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.
|
||||||
|
|
||||||
.. deprecated-removed:: 3.3 4.0
|
.. deprecated-removed:: 3.3 4.0
|
||||||
|
Please migrate to ``'w'`` typecode.
|
||||||
|
|
||||||
|
|
||||||
The actual representation of values is determined by the machine architecture
|
The actual representation of values is determined by the machine architecture
|
||||||
|
@ -174,9 +177,9 @@ The module defines the following type:
|
||||||
|
|
||||||
.. method:: fromunicode(s)
|
.. method:: fromunicode(s)
|
||||||
|
|
||||||
Extends this array with data from the given unicode string. The array must
|
Extends this array with data from the given unicode string.
|
||||||
be a type ``'u'`` array; otherwise a :exc:`ValueError` is raised. Use
|
The array must have type code ``'u'`` or ``'w'``; otherwise a :exc:`ValueError` is raised.
|
||||||
``array.frombytes(unicodestring.encode(enc))`` to append Unicode data to an
|
Use ``array.frombytes(unicodestring.encode(enc))`` to append Unicode data to an
|
||||||
array of some other type.
|
array of some other type.
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,21 +239,22 @@ The module defines the following type:
|
||||||
|
|
||||||
.. method:: tounicode()
|
.. method:: tounicode()
|
||||||
|
|
||||||
Convert the array to a unicode string. The array must be a type ``'u'`` array;
|
Convert the array to a unicode string. The array must have a type ``'u'`` or ``'w'``;
|
||||||
otherwise a :exc:`ValueError` is raised. Use ``array.tobytes().decode(enc)`` to
|
otherwise a :exc:`ValueError` is raised. Use ``array.tobytes().decode(enc)`` to
|
||||||
obtain a unicode string from an array of some other type.
|
obtain a unicode string from an array of some other type.
|
||||||
|
|
||||||
|
|
||||||
When an array object is printed or converted to a string, it is represented as
|
When an array object is printed or converted to a string, it is represented as
|
||||||
``array(typecode, initializer)``. The *initializer* is omitted if the array is
|
``array(typecode, initializer)``. The *initializer* is omitted if the array is
|
||||||
empty, otherwise it is a string if the *typecode* is ``'u'``, otherwise it is a
|
empty, otherwise it is a string if the *typecode* is ``'u'`` or ``'w'``,
|
||||||
list of numbers. The string is guaranteed to be able to be converted back to an
|
otherwise it is a list of numbers.
|
||||||
|
The string is guaranteed to be able to be converted back to an
|
||||||
array with the same type and value using :func:`eval`, so long as the
|
array with the same type and value using :func:`eval`, so long as the
|
||||||
:class:`~array.array` class has been imported using ``from array import array``.
|
:class:`~array.array` class has been imported using ``from array import array``.
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
array('l')
|
array('l')
|
||||||
array('u', 'hello \u2641')
|
array('w', 'hello \u2641')
|
||||||
array('l', [1, 2, 3, 4, 5])
|
array('l', [1, 2, 3, 4, 5])
|
||||||
array('d', [1.0, 2.0, 3.14])
|
array('d', [1.0, 2.0, 3.14])
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,13 @@ New Modules
|
||||||
Improved Modules
|
Improved Modules
|
||||||
================
|
================
|
||||||
|
|
||||||
|
array
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Add ``'w'`` type code that can be used for Unicode strings.
|
||||||
|
It can be used instead of ``'u'`` type code, which is deprecated.
|
||||||
|
(Contributed by Inada Naoki in :gh:`80480`.)
|
||||||
|
|
||||||
io
|
io
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ class ArraySubclassWithKwargs(array.array):
|
||||||
def __init__(self, typecode, newarg=None):
|
def __init__(self, typecode, newarg=None):
|
||||||
array.array.__init__(self)
|
array.array.__init__(self)
|
||||||
|
|
||||||
typecodes = 'ubBhHiIlLfdqQ'
|
typecodes = 'uwbBhHiIlLfdqQ'
|
||||||
|
|
||||||
class MiscTest(unittest.TestCase):
|
class MiscTest(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -186,11 +186,12 @@ class ArrayReconstructorTest(unittest.TestCase):
|
||||||
)
|
)
|
||||||
for testcase in testcases:
|
for testcase in testcases:
|
||||||
mformat_code, encoding = testcase
|
mformat_code, encoding = testcase
|
||||||
a = array.array('u', teststr)
|
for c in 'uw':
|
||||||
b = array_reconstructor(
|
a = array.array(c, teststr)
|
||||||
array.array, 'u', mformat_code, teststr.encode(encoding))
|
b = array_reconstructor(
|
||||||
self.assertEqual(a, b,
|
array.array, c, mformat_code, teststr.encode(encoding))
|
||||||
msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))
|
self.assertEqual(a, b,
|
||||||
|
msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))
|
||||||
|
|
||||||
|
|
||||||
class BaseTest:
|
class BaseTest:
|
||||||
|
@ -234,7 +235,7 @@ class BaseTest:
|
||||||
self.assertEqual(bi[1], len(a))
|
self.assertEqual(bi[1], len(a))
|
||||||
|
|
||||||
def test_byteswap(self):
|
def test_byteswap(self):
|
||||||
if self.typecode == 'u':
|
if self.typecode in ('u', 'w'):
|
||||||
example = '\U00100100'
|
example = '\U00100100'
|
||||||
else:
|
else:
|
||||||
example = self.example
|
example = self.example
|
||||||
|
@ -1079,7 +1080,7 @@ class BaseTest:
|
||||||
self.assertEqual(m.tobytes(), expected)
|
self.assertEqual(m.tobytes(), expected)
|
||||||
self.assertRaises(BufferError, a.frombytes, a.tobytes())
|
self.assertRaises(BufferError, a.frombytes, a.tobytes())
|
||||||
self.assertEqual(m.tobytes(), expected)
|
self.assertEqual(m.tobytes(), expected)
|
||||||
if self.typecode == 'u':
|
if self.typecode in ('u', 'w'):
|
||||||
self.assertRaises(BufferError, a.fromunicode, a.tounicode())
|
self.assertRaises(BufferError, a.fromunicode, a.tounicode())
|
||||||
self.assertEqual(m.tobytes(), expected)
|
self.assertEqual(m.tobytes(), expected)
|
||||||
self.assertRaises(BufferError, operator.imul, a, 2)
|
self.assertRaises(BufferError, operator.imul, a, 2)
|
||||||
|
@ -1135,16 +1136,17 @@ class BaseTest:
|
||||||
support.check_sizeof(self, a, basesize)
|
support.check_sizeof(self, a, basesize)
|
||||||
|
|
||||||
def test_initialize_with_unicode(self):
|
def test_initialize_with_unicode(self):
|
||||||
if self.typecode != 'u':
|
if self.typecode not in ('u', 'w'):
|
||||||
with self.assertRaises(TypeError) as cm:
|
with self.assertRaises(TypeError) as cm:
|
||||||
a = array.array(self.typecode, 'foo')
|
a = array.array(self.typecode, 'foo')
|
||||||
self.assertIn("cannot use a str", str(cm.exception))
|
self.assertIn("cannot use a str", str(cm.exception))
|
||||||
with self.assertRaises(TypeError) as cm:
|
with self.assertRaises(TypeError) as cm:
|
||||||
a = array.array(self.typecode, array.array('u', 'foo'))
|
a = array.array(self.typecode, array.array('w', 'foo'))
|
||||||
self.assertIn("cannot use a unicode array", str(cm.exception))
|
self.assertIn("cannot use a unicode array", str(cm.exception))
|
||||||
else:
|
else:
|
||||||
a = array.array(self.typecode, "foo")
|
a = array.array(self.typecode, "foo")
|
||||||
a = array.array(self.typecode, array.array('u', 'foo'))
|
a = array.array(self.typecode, array.array('u', 'foo'))
|
||||||
|
a = array.array(self.typecode, array.array('w', 'foo'))
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
def test_obsolete_write_lock(self):
|
def test_obsolete_write_lock(self):
|
||||||
|
@ -1171,40 +1173,45 @@ class UnicodeTest(StringTest, unittest.TestCase):
|
||||||
smallerexample = '\x01\u263a\x00\ufefe'
|
smallerexample = '\x01\u263a\x00\ufefe'
|
||||||
biggerexample = '\x01\u263a\x01\ufeff'
|
biggerexample = '\x01\u263a\x01\ufeff'
|
||||||
outside = str('\x33')
|
outside = str('\x33')
|
||||||
minitemsize = 2
|
minitemsize = sizeof_wchar
|
||||||
|
|
||||||
def test_unicode(self):
|
def test_unicode(self):
|
||||||
self.assertRaises(TypeError, array.array, 'b', 'foo')
|
self.assertRaises(TypeError, array.array, 'b', 'foo')
|
||||||
|
|
||||||
a = array.array('u', '\xa0\xc2\u1234')
|
a = array.array(self.typecode, '\xa0\xc2\u1234')
|
||||||
a.fromunicode(' ')
|
a.fromunicode(' ')
|
||||||
a.fromunicode('')
|
a.fromunicode('')
|
||||||
a.fromunicode('')
|
a.fromunicode('')
|
||||||
a.fromunicode('\x11abc\xff\u1234')
|
a.fromunicode('\x11abc\xff\u1234')
|
||||||
s = a.tounicode()
|
s = a.tounicode()
|
||||||
self.assertEqual(s, '\xa0\xc2\u1234 \x11abc\xff\u1234')
|
self.assertEqual(s, '\xa0\xc2\u1234 \x11abc\xff\u1234')
|
||||||
self.assertEqual(a.itemsize, sizeof_wchar)
|
self.assertEqual(a.itemsize, self.minitemsize)
|
||||||
|
|
||||||
s = '\x00="\'a\\b\x80\xff\u0000\u0001\u1234'
|
s = '\x00="\'a\\b\x80\xff\u0000\u0001\u1234'
|
||||||
a = array.array('u', s)
|
a = array.array(self.typecode, s)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
repr(a),
|
repr(a),
|
||||||
"array('u', '\\x00=\"\\'a\\\\b\\x80\xff\\x00\\x01\u1234')")
|
f"array('{self.typecode}', '\\x00=\"\\'a\\\\b\\x80\xff\\x00\\x01\u1234')")
|
||||||
|
|
||||||
self.assertRaises(TypeError, a.fromunicode)
|
self.assertRaises(TypeError, a.fromunicode)
|
||||||
|
|
||||||
def test_issue17223(self):
|
def test_issue17223(self):
|
||||||
# this used to crash
|
if self.typecode == 'u' and sizeof_wchar == 2:
|
||||||
if sizeof_wchar == 4:
|
|
||||||
# U+FFFFFFFF is an invalid code point in Unicode 6.0
|
|
||||||
invalid_str = b'\xff\xff\xff\xff'
|
|
||||||
else:
|
|
||||||
# PyUnicode_FromUnicode() cannot fail with 16-bit wchar_t
|
# PyUnicode_FromUnicode() cannot fail with 16-bit wchar_t
|
||||||
self.skipTest("specific to 32-bit wchar_t")
|
self.skipTest("specific to 32-bit wchar_t")
|
||||||
a = array.array('u', invalid_str)
|
|
||||||
|
# this used to crash
|
||||||
|
# U+FFFFFFFF is an invalid code point in Unicode 6.0
|
||||||
|
invalid_str = b'\xff\xff\xff\xff'
|
||||||
|
|
||||||
|
a = array.array(self.typecode, invalid_str)
|
||||||
self.assertRaises(ValueError, a.tounicode)
|
self.assertRaises(ValueError, a.tounicode)
|
||||||
self.assertRaises(ValueError, str, a)
|
self.assertRaises(ValueError, str, a)
|
||||||
|
|
||||||
|
class UCS4Test(UnicodeTest):
|
||||||
|
typecode = 'w'
|
||||||
|
minitemsize = 4
|
||||||
|
|
||||||
class NumberTest(BaseTest):
|
class NumberTest(BaseTest):
|
||||||
|
|
||||||
def test_extslice(self):
|
def test_extslice(self):
|
||||||
|
|
|
@ -955,7 +955,7 @@ class TestArrayWrites(unittest.TestCase):
|
||||||
|
|
||||||
def test_char_write(self):
|
def test_char_write(self):
|
||||||
import array, string
|
import array, string
|
||||||
a = array.array('u', string.ascii_letters)
|
a = array.array('w', string.ascii_letters)
|
||||||
|
|
||||||
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
|
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
|
||||||
writer = csv.writer(fileobj, dialect="excel")
|
writer = csv.writer(fileobj, dialect="excel")
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
:mod:`array`: Add ``'w'`` typecode that represents ``Py_UCS4``.
|
|
@ -13,6 +13,7 @@
|
||||||
#include "pycore_bytesobject.h" // _PyBytes_Repeat
|
#include "pycore_bytesobject.h" // _PyBytes_Repeat
|
||||||
#include "structmember.h" // PyMemberDef
|
#include "structmember.h" // PyMemberDef
|
||||||
#include <stddef.h> // offsetof()
|
#include <stddef.h> // offsetof()
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
module array
|
module array
|
||||||
|
@ -279,6 +280,31 @@ u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
w_getitem(arrayobject *ap, Py_ssize_t i)
|
||||||
|
{
|
||||||
|
return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
w_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
|
{
|
||||||
|
PyObject *u;
|
||||||
|
if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicode_GetLength(u) != 1) {
|
||||||
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
"array item must be unicode character");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i >= 0) {
|
||||||
|
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(u, 0);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
h_getitem(arrayobject *ap, Py_ssize_t i)
|
h_getitem(arrayobject *ap, Py_ssize_t i)
|
||||||
|
@ -543,6 +569,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
|
||||||
DEFINE_COMPAREITEMS(b, signed char)
|
DEFINE_COMPAREITEMS(b, signed char)
|
||||||
DEFINE_COMPAREITEMS(BB, unsigned char)
|
DEFINE_COMPAREITEMS(BB, unsigned char)
|
||||||
DEFINE_COMPAREITEMS(u, wchar_t)
|
DEFINE_COMPAREITEMS(u, wchar_t)
|
||||||
|
DEFINE_COMPAREITEMS(w, Py_UCS4)
|
||||||
DEFINE_COMPAREITEMS(h, short)
|
DEFINE_COMPAREITEMS(h, short)
|
||||||
DEFINE_COMPAREITEMS(HH, unsigned short)
|
DEFINE_COMPAREITEMS(HH, unsigned short)
|
||||||
DEFINE_COMPAREITEMS(i, int)
|
DEFINE_COMPAREITEMS(i, int)
|
||||||
|
@ -561,6 +588,7 @@ static const struct arraydescr descriptors[] = {
|
||||||
{'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
|
{'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
|
||||||
{'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
|
{'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
|
||||||
{'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
|
{'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
|
||||||
|
{'w', sizeof(Py_UCS4), w_getitem, w_setitem, w_compareitems, "w", 0, 0,},
|
||||||
{'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
|
{'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
|
||||||
{'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
|
{'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
|
||||||
{'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
|
{'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
|
||||||
|
@ -1716,25 +1744,46 @@ static PyObject *
|
||||||
array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
|
array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
|
||||||
/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
|
/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
|
||||||
{
|
{
|
||||||
if (self->ob_descr->typecode != 'u') {
|
int typecode = self->ob_descr->typecode;
|
||||||
|
if (typecode != 'u' && typecode != 'w') {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"fromunicode() may only be called on "
|
"fromunicode() may only be called on "
|
||||||
"unicode type arrays");
|
"unicode type arrays ('u' or 'w')");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
|
if (typecode == 'u') {
|
||||||
assert(ustr_length > 0);
|
Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
|
||||||
if (ustr_length > 1) {
|
assert(ustr_length > 0);
|
||||||
ustr_length--; /* trim trailing NUL character */
|
if (ustr_length > 1) {
|
||||||
|
ustr_length--; /* trim trailing NUL character */
|
||||||
|
Py_ssize_t old_size = Py_SIZE(self);
|
||||||
|
if (array_resize(self, old_size + ustr_length) == -1) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// must not fail
|
||||||
|
PyUnicode_AsWideChar(
|
||||||
|
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // typecode == 'w'
|
||||||
|
Py_ssize_t ustr_length = PyUnicode_GetLength(ustr);
|
||||||
Py_ssize_t old_size = Py_SIZE(self);
|
Py_ssize_t old_size = Py_SIZE(self);
|
||||||
if (array_resize(self, old_size + ustr_length) == -1) {
|
Py_ssize_t new_size = old_size + ustr_length;
|
||||||
|
|
||||||
|
if (new_size < 0 || (size_t)new_size > PY_SSIZE_T_MAX / sizeof(Py_UCS4)) {
|
||||||
|
return PyErr_NoMemory();
|
||||||
|
}
|
||||||
|
if (array_resize(self, new_size) == -1) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// must not fail
|
// must not fail
|
||||||
PyUnicode_AsWideChar(
|
Py_UCS4 *u = PyUnicode_AsUCS4(ustr, ((Py_UCS4*)self->ob_item) + old_size,
|
||||||
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
|
ustr_length, 0);
|
||||||
|
assert(u != NULL);
|
||||||
|
(void)u; // Suppress unused_variable warning.
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
|
@ -1754,12 +1803,20 @@ static PyObject *
|
||||||
array_array_tounicode_impl(arrayobject *self)
|
array_array_tounicode_impl(arrayobject *self)
|
||||||
/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
|
/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
|
||||||
{
|
{
|
||||||
if (self->ob_descr->typecode != 'u') {
|
int typecode = self->ob_descr->typecode;
|
||||||
|
if (typecode != 'u' && typecode != 'w') {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"tounicode() may only be called on unicode type arrays");
|
"tounicode() may only be called on unicode type arrays ('u' or 'w')");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
|
if (typecode == 'u') {
|
||||||
|
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
|
||||||
|
}
|
||||||
|
else { // typecode == 'w'
|
||||||
|
int byteorder = 0; // native byteorder
|
||||||
|
return PyUnicode_DecodeUTF32((const char *) self->ob_item, Py_SIZE(self) * 4,
|
||||||
|
NULL, &byteorder);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -1838,6 +1895,9 @@ typecode_to_mformat_code(char typecode)
|
||||||
}
|
}
|
||||||
return UNKNOWN_FORMAT;
|
return UNKNOWN_FORMAT;
|
||||||
|
|
||||||
|
case 'w':
|
||||||
|
return UTF32_LE + is_big_endian;
|
||||||
|
|
||||||
case 'f':
|
case 'f':
|
||||||
if (sizeof(float) == 4) {
|
if (sizeof(float) == 4) {
|
||||||
const float y = 16711938.0;
|
const float y = 16711938.0;
|
||||||
|
@ -2314,7 +2374,7 @@ array_repr(arrayobject *a)
|
||||||
return PyUnicode_FromFormat("%s('%c')",
|
return PyUnicode_FromFormat("%s('%c')",
|
||||||
_PyType_Name(Py_TYPE(a)), (int)typecode);
|
_PyType_Name(Py_TYPE(a)), (int)typecode);
|
||||||
}
|
}
|
||||||
if (typecode == 'u') {
|
if (typecode == 'u' || typecode == 'w') {
|
||||||
v = array_array_tounicode_impl(a);
|
v = array_array_tounicode_impl(a);
|
||||||
} else {
|
} else {
|
||||||
v = array_array_tolist_impl(a);
|
v = array_array_tolist_impl(a);
|
||||||
|
@ -2619,17 +2679,21 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (initial && c != 'u') {
|
bool is_unicode = c == 'u' || c == 'w';
|
||||||
|
|
||||||
|
if (initial && !is_unicode) {
|
||||||
if (PyUnicode_Check(initial)) {
|
if (PyUnicode_Check(initial)) {
|
||||||
PyErr_Format(PyExc_TypeError, "cannot use a str to initialize "
|
PyErr_Format(PyExc_TypeError, "cannot use a str to initialize "
|
||||||
"an array with typecode '%c'", c);
|
"an array with typecode '%c'", c);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
else if (array_Check(initial, state) &&
|
else if (array_Check(initial, state)) {
|
||||||
((arrayobject*)initial)->ob_descr->typecode == 'u') {
|
int ic = ((arrayobject*)initial)->ob_descr->typecode;
|
||||||
PyErr_Format(PyExc_TypeError, "cannot use a unicode array to "
|
if (ic == 'u' || ic == 'w') {
|
||||||
"initialize an array with typecode '%c'", c);
|
PyErr_Format(PyExc_TypeError, "cannot use a unicode array to "
|
||||||
return NULL;
|
"initialize an array with typecode '%c'", c);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2637,7 +2701,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|| PyByteArray_Check(initial)
|
|| PyByteArray_Check(initial)
|
||||||
|| PyBytes_Check(initial)
|
|| PyBytes_Check(initial)
|
||||||
|| PyTuple_Check(initial)
|
|| PyTuple_Check(initial)
|
||||||
|| ((c=='u') && PyUnicode_Check(initial))
|
|| (is_unicode && PyUnicode_Check(initial))
|
||||||
|| (array_Check(initial, state)
|
|| (array_Check(initial, state)
|
||||||
&& c == ((arrayobject*)initial)->ob_descr->typecode))) {
|
&& c == ((arrayobject*)initial)->ob_descr->typecode))) {
|
||||||
it = PyObject_GetIter(initial);
|
it = PyObject_GetIter(initial);
|
||||||
|
@ -2697,14 +2761,31 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
}
|
}
|
||||||
else if (initial != NULL && PyUnicode_Check(initial)) {
|
else if (initial != NULL && PyUnicode_Check(initial)) {
|
||||||
Py_ssize_t n;
|
if (c == 'u') {
|
||||||
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
|
Py_ssize_t n;
|
||||||
if (ustr == NULL) {
|
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
|
||||||
Py_DECREF(a);
|
if (ustr == NULL) {
|
||||||
return NULL;
|
Py_DECREF(a);
|
||||||
}
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 0) {
|
||||||
|
arrayobject *self = (arrayobject *)a;
|
||||||
|
// self->ob_item may be NULL but it is safe.
|
||||||
|
PyMem_Free(self->ob_item);
|
||||||
|
self->ob_item = (char *)ustr;
|
||||||
|
Py_SET_SIZE(self, n);
|
||||||
|
self->allocated = n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // c == 'w'
|
||||||
|
Py_ssize_t n = PyUnicode_GET_LENGTH(initial);
|
||||||
|
Py_UCS4 *ustr = PyUnicode_AsUCS4Copy(initial);
|
||||||
|
if (ustr == NULL) {
|
||||||
|
Py_DECREF(a);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (n > 0) {
|
|
||||||
arrayobject *self = (arrayobject *)a;
|
arrayobject *self = (arrayobject *)a;
|
||||||
// self->ob_item may be NULL but it is safe.
|
// self->ob_item may be NULL but it is safe.
|
||||||
PyMem_Free(self->ob_item);
|
PyMem_Free(self->ob_item);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue