mirror of
https://github.com/python/cpython.git
synced 2025-08-27 04:05:34 +00:00
This commit is contained in:
parent
24cddab4a2
commit
fa6c48e4b3
2 changed files with 126 additions and 60 deletions
|
@ -111,20 +111,42 @@ NOTE: In the interpreter's initialization phase, some globals are currently
|
|||
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
|
||||
#endif
|
||||
|
||||
#define _PyUnicode_UTF8(op) \
|
||||
(_PyCompactUnicodeObject_CAST(op)->utf8)
|
||||
#define PyUnicode_UTF8(op) \
|
||||
(assert(_PyUnicode_CHECK(op)), \
|
||||
PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
|
||||
_PyUnicode_UTF8(op))
|
||||
#define _PyUnicode_UTF8_LENGTH(op) \
|
||||
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
|
||||
#define PyUnicode_UTF8_LENGTH(op) \
|
||||
(assert(_PyUnicode_CHECK(op)), \
|
||||
PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||
_PyASCIIObject_CAST(op)->length : \
|
||||
_PyUnicode_UTF8_LENGTH(op))
|
||||
static inline char* _PyUnicode_UTF8(PyObject *op)
|
||||
{
|
||||
return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
|
||||
}
|
||||
|
||||
static inline char* PyUnicode_UTF8(PyObject *op)
|
||||
{
|
||||
assert(_PyUnicode_CHECK(op));
|
||||
if (PyUnicode_IS_COMPACT_ASCII(op)) {
|
||||
return ((char*)(_PyASCIIObject_CAST(op) + 1));
|
||||
}
|
||||
else {
|
||||
return _PyUnicode_UTF8(op);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
|
||||
{
|
||||
FT_ATOMIC_STORE_PTR_RELEASE(_PyCompactUnicodeObject_CAST(op)->utf8, utf8);
|
||||
}
|
||||
|
||||
static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
|
||||
{
|
||||
assert(_PyUnicode_CHECK(op));
|
||||
if (PyUnicode_IS_COMPACT_ASCII(op)) {
|
||||
return _PyASCIIObject_CAST(op)->length;
|
||||
}
|
||||
else {
|
||||
return _PyCompactUnicodeObject_CAST(op)->utf8_length;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
|
||||
{
|
||||
_PyCompactUnicodeObject_CAST(op)->utf8_length = length;
|
||||
}
|
||||
|
||||
#define _PyUnicode_LENGTH(op) \
|
||||
(_PyASCIIObject_CAST(op)->length)
|
||||
|
@ -132,26 +154,37 @@ NOTE: In the interpreter's initialization phase, some globals are currently
|
|||
(_PyASCIIObject_CAST(op)->state)
|
||||
#define _PyUnicode_HASH(op) \
|
||||
(_PyASCIIObject_CAST(op)->hash)
|
||||
#define _PyUnicode_KIND(op) \
|
||||
(assert(_PyUnicode_CHECK(op)), \
|
||||
_PyASCIIObject_CAST(op)->state.kind)
|
||||
#define _PyUnicode_GET_LENGTH(op) \
|
||||
(assert(_PyUnicode_CHECK(op)), \
|
||||
_PyASCIIObject_CAST(op)->length)
|
||||
|
||||
static inline Py_hash_t PyUnicode_HASH(PyObject *op)
|
||||
{
|
||||
assert(_PyUnicode_CHECK(op));
|
||||
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
|
||||
}
|
||||
|
||||
static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
|
||||
{
|
||||
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash);
|
||||
}
|
||||
|
||||
#define _PyUnicode_DATA_ANY(op) \
|
||||
(_PyUnicodeObject_CAST(op)->data.any)
|
||||
|
||||
#define _PyUnicode_SHARE_UTF8(op) \
|
||||
(assert(_PyUnicode_CHECK(op)), \
|
||||
assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
|
||||
(_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
|
||||
static inline int _PyUnicode_SHARE_UTF8(PyObject *op)
|
||||
{
|
||||
assert(_PyUnicode_CHECK(op));
|
||||
assert(!PyUnicode_IS_COMPACT_ASCII(op));
|
||||
return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op));
|
||||
}
|
||||
|
||||
/* true if the Unicode object has an allocated UTF-8 memory block
|
||||
(not shared with other data) */
|
||||
#define _PyUnicode_HAS_UTF8_MEMORY(op) \
|
||||
((!PyUnicode_IS_COMPACT_ASCII(op) \
|
||||
&& _PyUnicode_UTF8(op) \
|
||||
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
|
||||
static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
|
||||
{
|
||||
return (!PyUnicode_IS_COMPACT_ASCII(op)
|
||||
&& _PyUnicode_UTF8(op) != NULL
|
||||
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op));
|
||||
}
|
||||
|
||||
|
||||
/* Generic helper macro to convert characters of different types.
|
||||
from_type and to_type have to be valid type names, begin and end
|
||||
|
@ -650,7 +683,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||
|| kind == PyUnicode_2BYTE_KIND
|
||||
|| kind == PyUnicode_4BYTE_KIND);
|
||||
CHECK(ascii->state.ascii == 0);
|
||||
CHECK(compact->utf8 != data);
|
||||
CHECK(_PyUnicode_UTF8(op) != data);
|
||||
}
|
||||
else {
|
||||
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
|
||||
|
@ -662,16 +695,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||
CHECK(ascii->state.compact == 0);
|
||||
CHECK(data != NULL);
|
||||
if (ascii->state.ascii) {
|
||||
CHECK(compact->utf8 == data);
|
||||
CHECK(_PyUnicode_UTF8(op) == data);
|
||||
CHECK(compact->utf8_length == ascii->length);
|
||||
}
|
||||
else {
|
||||
CHECK(compact->utf8 != data);
|
||||
CHECK(_PyUnicode_UTF8(op) != data);
|
||||
}
|
||||
}
|
||||
|
||||
if (compact->utf8 == NULL)
|
||||
#ifndef Py_GIL_DISABLED
|
||||
if (_PyUnicode_UTF8(op) == NULL)
|
||||
CHECK(compact->utf8_length == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* check that the best kind is used: O(n) operation */
|
||||
|
@ -1115,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
|
|||
|
||||
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
|
||||
PyMem_Free(_PyUnicode_UTF8(unicode));
|
||||
_PyUnicode_UTF8(unicode) = NULL;
|
||||
_PyUnicode_UTF8_LENGTH(unicode) = 0;
|
||||
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
|
||||
PyUnicode_SET_UTF8(unicode, NULL);
|
||||
}
|
||||
#ifdef Py_TRACE_REFS
|
||||
_Py_ForgetReference(unicode);
|
||||
|
@ -1169,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
|
|||
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
|
||||
{
|
||||
PyMem_Free(_PyUnicode_UTF8(unicode));
|
||||
_PyUnicode_UTF8(unicode) = NULL;
|
||||
_PyUnicode_UTF8_LENGTH(unicode) = 0;
|
||||
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
|
||||
PyUnicode_SET_UTF8(unicode, NULL);
|
||||
}
|
||||
|
||||
data = (PyObject *)PyObject_Realloc(data, new_size);
|
||||
|
@ -1180,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
|
|||
}
|
||||
_PyUnicode_DATA_ANY(unicode) = data;
|
||||
if (share_utf8) {
|
||||
_PyUnicode_UTF8(unicode) = data;
|
||||
_PyUnicode_UTF8_LENGTH(unicode) = length;
|
||||
PyUnicode_SET_UTF8_LENGTH(unicode, length);
|
||||
PyUnicode_SET_UTF8(unicode, data);
|
||||
}
|
||||
_PyUnicode_LENGTH(unicode) = length;
|
||||
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
|
||||
|
@ -1411,12 +1445,12 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
|
|||
|
||||
assert(unicode != NULL);
|
||||
assert(_PyUnicode_CHECK(unicode));
|
||||
assert(_PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
|
||||
assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
|
||||
ucs4_out = PyUnicode_4BYTE_DATA(unicode);
|
||||
|
||||
for (iter = begin; iter < end; ) {
|
||||
assert(ucs4_out < (PyUnicode_4BYTE_DATA(unicode) +
|
||||
_PyUnicode_GET_LENGTH(unicode)));
|
||||
PyUnicode_GET_LENGTH(unicode)));
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])
|
||||
&& (iter+1) < end
|
||||
&& Py_UNICODE_IS_LOW_SURROGATE(iter[1]))
|
||||
|
@ -1430,7 +1464,7 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
|
|||
}
|
||||
}
|
||||
assert(ucs4_out == (PyUnicode_4BYTE_DATA(unicode) +
|
||||
_PyUnicode_GET_LENGTH(unicode)));
|
||||
PyUnicode_GET_LENGTH(unicode)));
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -1801,7 +1835,7 @@ unicode_modifiable(PyObject *unicode)
|
|||
assert(_PyUnicode_CHECK(unicode));
|
||||
if (Py_REFCNT(unicode) != 1)
|
||||
return 0;
|
||||
if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1)
|
||||
if (PyUnicode_HASH(unicode) != -1)
|
||||
return 0;
|
||||
if (PyUnicode_CHECK_INTERNED(unicode))
|
||||
return 0;
|
||||
|
@ -4052,6 +4086,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
|
|||
|
||||
static int unicode_fill_utf8(PyObject *unicode);
|
||||
|
||||
|
||||
static int
|
||||
unicode_ensure_utf8(PyObject *unicode)
|
||||
{
|
||||
int err = 0;
|
||||
if (PyUnicode_UTF8(unicode) == NULL) {
|
||||
Py_BEGIN_CRITICAL_SECTION(unicode);
|
||||
if (PyUnicode_UTF8(unicode) == NULL) {
|
||||
err = unicode_fill_utf8(unicode);
|
||||
}
|
||||
Py_END_CRITICAL_SECTION();
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
const char *
|
||||
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
|
||||
{
|
||||
|
@ -4063,13 +4112,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_UTF8(unicode) == NULL) {
|
||||
if (unicode_fill_utf8(unicode) == -1) {
|
||||
if (psize) {
|
||||
*psize = -1;
|
||||
}
|
||||
return NULL;
|
||||
if (unicode_ensure_utf8(unicode) == -1) {
|
||||
if (psize) {
|
||||
*psize = -1;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (psize) {
|
||||
|
@ -5401,6 +5448,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
|||
static int
|
||||
unicode_fill_utf8(PyObject *unicode)
|
||||
{
|
||||
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(unicode);
|
||||
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
|
||||
assert(!PyUnicode_IS_ASCII(unicode));
|
||||
|
||||
|
@ -5442,10 +5490,10 @@ unicode_fill_utf8(PyObject *unicode)
|
|||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
_PyUnicode_UTF8(unicode) = cache;
|
||||
_PyUnicode_UTF8_LENGTH(unicode) = len;
|
||||
memcpy(cache, start, len);
|
||||
cache[len] = '\0';
|
||||
PyUnicode_SET_UTF8_LENGTH(unicode, len);
|
||||
PyUnicode_SET_UTF8(unicode, cache);
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
return 0;
|
||||
}
|
||||
|
@ -10996,9 +11044,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
|||
return 0;
|
||||
}
|
||||
|
||||
Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni));
|
||||
Py_hash_t right_hash = PyUnicode_HASH(right_uni);
|
||||
assert(right_hash != -1);
|
||||
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left));
|
||||
Py_hash_t hash = PyUnicode_HASH(left);
|
||||
if (hash != -1 && hash != right_hash) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -11484,14 +11532,14 @@ unicode_hash(PyObject *self)
|
|||
#ifdef Py_DEBUG
|
||||
assert(_Py_HashSecret_Initialized);
|
||||
#endif
|
||||
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self));
|
||||
Py_hash_t hash = PyUnicode_HASH(self);
|
||||
if (hash != -1) {
|
||||
return hash;
|
||||
}
|
||||
x = _Py_HashBytes(PyUnicode_DATA(self),
|
||||
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
|
||||
|
||||
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
|
||||
PyUnicode_SET_HASH(self, x);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
@ -14888,8 +14936,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
|
|||
_PyUnicode_STATE(self).compact = 0;
|
||||
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
|
||||
_PyUnicode_STATE(self).statically_allocated = 0;
|
||||
_PyUnicode_UTF8_LENGTH(self) = 0;
|
||||
_PyUnicode_UTF8(self) = NULL;
|
||||
PyUnicode_SET_UTF8_LENGTH(self, 0);
|
||||
PyUnicode_SET_UTF8(self, NULL);
|
||||
_PyUnicode_DATA_ANY(self) = NULL;
|
||||
|
||||
share_utf8 = 0;
|
||||
|
@ -14919,8 +14967,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
|
|||
|
||||
_PyUnicode_DATA_ANY(self) = data;
|
||||
if (share_utf8) {
|
||||
_PyUnicode_UTF8_LENGTH(self) = length;
|
||||
_PyUnicode_UTF8(self) = data;
|
||||
PyUnicode_SET_UTF8_LENGTH(self, length);
|
||||
PyUnicode_SET_UTF8(self, data);
|
||||
}
|
||||
|
||||
memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue