mirror of
https://github.com/python/cpython.git
synced 2025-11-13 15:40:05 +00:00
Merge.
This commit is contained in:
commit
734e159b12
2 changed files with 32 additions and 40 deletions
|
|
@ -377,16 +377,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
|
||||||
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
|
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
|
||||||
_PyUnicode_NONCOMPACT_DATA(op))
|
_PyUnicode_NONCOMPACT_DATA(op))
|
||||||
|
|
||||||
#define _PyUnicode_UTF8(op) \
|
|
||||||
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
|
||||||
((char*)((PyASCIIObject*)(op) + 1)) : \
|
|
||||||
((PyCompactUnicodeObject*)(op))->utf8)
|
|
||||||
|
|
||||||
#define _PyUnicode_UTF8_LENGTH(op) \
|
|
||||||
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
|
||||||
((PyASCIIObject*)(op))->length : \
|
|
||||||
((PyCompactUnicodeObject*)(op))->utf8_length)
|
|
||||||
|
|
||||||
/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
|
/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
|
||||||
|
|
||||||
The index is a character index, the result is a size in bytes. */
|
The index is a character index, the result is a size in bytes. */
|
||||||
|
|
@ -466,7 +456,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
|
||||||
#define PyUnicode_READY(op) \
|
#define PyUnicode_READY(op) \
|
||||||
(assert(PyUnicode_Check(op)), \
|
(assert(PyUnicode_Check(op)), \
|
||||||
(PyUnicode_IS_READY(op) ? \
|
(PyUnicode_IS_READY(op) ? \
|
||||||
0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
|
0 : _PyUnicode_Ready((PyObject *)(op))))
|
||||||
|
|
||||||
/* Return a maximum character value which is suitable for creating another
|
/* Return a maximum character value which is suitable for creating another
|
||||||
string based on op. This is always an approximation but more efficient
|
string based on op. This is always an approximation but more efficient
|
||||||
|
|
@ -507,14 +497,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Initializes the canonical string representation from a the deprected
|
/* Initializes the canonical string representation from a the deprecated
|
||||||
wstr/Py_UNICODE representation. This function is used to convert
|
wstr/Py_UNICODE representation. This function is used to convert Unicode
|
||||||
unicode objects which were created using the old API to the new flexible
|
objects which were created using the old API to the new flexible format
|
||||||
format introduced with PEP 393. The PyUnicode_READY() macro can be
|
introduced with PEP 393.
|
||||||
more efficient if the string is already ready. */
|
|
||||||
|
Don't call this function directly, use the public PyUnicode_READY() macro
|
||||||
|
instead. */
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(int) _PyUnicode_Ready(
|
PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||||
PyUnicodeObject *unicode /* Unicode object */
|
PyObject *unicode /* Unicode object */
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,14 @@ extern "C" {
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define _PyUnicode_UTF8(op) \
|
||||||
|
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||||
|
((char*)((PyASCIIObject*)(op) + 1)) : \
|
||||||
|
((PyCompactUnicodeObject*)(op))->utf8)
|
||||||
|
#define _PyUnicode_UTF8_LENGTH(op) \
|
||||||
|
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||||
|
((PyASCIIObject*)(op))->length : \
|
||||||
|
((PyCompactUnicodeObject*)(op))->utf8_length)
|
||||||
#define _PyUnicode_WSTR(op) (((PyASCIIObject*)(op))->wstr)
|
#define _PyUnicode_WSTR(op) (((PyASCIIObject*)(op))->wstr)
|
||||||
#define _PyUnicode_WSTR_LENGTH(op) (((PyCompactUnicodeObject*)(op))->wstr_length)
|
#define _PyUnicode_WSTR_LENGTH(op) (((PyCompactUnicodeObject*)(op))->wstr_length)
|
||||||
#define _PyUnicode_LENGTH(op) (((PyASCIIObject *)(op))->length)
|
#define _PyUnicode_LENGTH(op) (((PyASCIIObject *)(op))->length)
|
||||||
|
|
@ -773,8 +781,9 @@ int unicode_ready_calls = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicode_Ready(PyUnicodeObject *unicode)
|
_PyUnicode_Ready(PyObject *obj)
|
||||||
{
|
{
|
||||||
|
PyUnicodeObject *unicode = (PyUnicodeObject *)obj;
|
||||||
wchar_t *end;
|
wchar_t *end;
|
||||||
Py_UCS4 maxchar = 0;
|
Py_UCS4 maxchar = 0;
|
||||||
Py_ssize_t num_surrogates;
|
Py_ssize_t num_surrogates;
|
||||||
|
|
@ -782,25 +791,19 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
|
||||||
Py_ssize_t length_wo_surrogates;
|
Py_ssize_t length_wo_surrogates;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(PyUnicode_Check(unicode));
|
|
||||||
|
|
||||||
if (unicode->data.any != NULL) {
|
|
||||||
assert(PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* _PyUnicode_Ready() is only intented for old-style API usage where
|
/* _PyUnicode_Ready() is only intented for old-style API usage where
|
||||||
* strings were created using _PyObject_New() and where no canonical
|
strings were created using _PyObject_New() and where no canonical
|
||||||
* representation (the str field) has been set yet aka strings
|
representation (the str field) has been set yet aka strings
|
||||||
* which are not yet ready.
|
which are not yet ready. */
|
||||||
*/
|
assert(PyUnicode_Check(obj));
|
||||||
|
assert(!PyUnicode_IS_READY(obj));
|
||||||
|
assert(!PyUnicode_IS_COMPACT(obj));
|
||||||
|
assert(_PyUnicode_KIND(obj) == PyUnicode_WCHAR_KIND);
|
||||||
assert(_PyUnicode_WSTR(unicode) != NULL);
|
assert(_PyUnicode_WSTR(unicode) != NULL);
|
||||||
assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
|
assert(unicode->data.any == NULL);
|
||||||
assert(!PyUnicode_IS_COMPACT(unicode));
|
|
||||||
assert(!PyUnicode_IS_READY(unicode));
|
|
||||||
/* Actually, it should neither be interned nor be anything else: */
|
|
||||||
assert(_PyUnicode_STATE(unicode).interned == 0);
|
|
||||||
assert(unicode->_base.utf8 == NULL);
|
assert(unicode->_base.utf8 == NULL);
|
||||||
|
/* Actually, it should neither be interned nor be anything else: */
|
||||||
|
assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);
|
||||||
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
++unicode_ready_calls;
|
++unicode_ready_calls;
|
||||||
|
|
@ -808,11 +811,8 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
|
||||||
|
|
||||||
end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
|
end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
|
||||||
if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
|
if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
|
||||||
&maxchar,
|
&maxchar, &num_surrogates) == -1)
|
||||||
&num_surrogates) == -1) {
|
|
||||||
assert(0 && "PyUnicode_FindMaxCharAndNumSurrogatePairs failed");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
|
|
||||||
if (maxchar < 256) {
|
if (maxchar < 256) {
|
||||||
unicode->data.any = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
|
unicode->data.any = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
|
||||||
|
|
@ -1038,8 +1038,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
|
||||||
|
|
||||||
/* If not empty and not single character, copy the Unicode data
|
/* If not empty and not single character, copy the Unicode data
|
||||||
into the new object */
|
into the new object */
|
||||||
if (find_maxchar_surrogates(u, u + size, &maxchar,
|
if (find_maxchar_surrogates(u, u + size,
|
||||||
&num_surrogates) == -1)
|
&maxchar, &num_surrogates) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
unicode = (PyUnicodeObject *) PyUnicode_New(size - num_surrogates,
|
unicode = (PyUnicodeObject *) PyUnicode_New(size - num_surrogates,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue