Merge.

2025-11-13 15:40:05 +00:00 · 2011-09-29 19:51:46 +02:00 · 2011-09-29 19:51:46 +02:00 · 734e159b12
commit 734e159b12
parent 9624a764ff d8f6510acc
2 changed files with 32 additions and 40 deletions
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -377,16 +377,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
     PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
     _PyUnicode_NONCOMPACT_DATA(op))
 #define _PyUnicode_UTF8(op)                     \
    (PyUnicode_IS_COMPACT_ASCII(op) ?           \
     ((char*)((PyASCIIObject*)(op) + 1)) :      \
     ((PyCompactUnicodeObject*)(op))->utf8)
 #define _PyUnicode_UTF8_LENGTH(op)                      \
    (PyUnicode_IS_COMPACT_ASCII(op) ?                   \
     ((PyASCIIObject*)(op))->length :                   \
     ((PyCompactUnicodeObject*)(op))->utf8_length)
 /* Compute (index * char_size) where char_size is 2 ** (kind - 1).
   The index is a character index, the result is a size in bytes. */
@ -466,7 +456,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 #define PyUnicode_READY(op)                        \
    (assert(PyUnicode_Check(op)),                       \
     (PyUnicode_IS_READY(op) ?                          \
-      0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
+      0 : _PyUnicode_Ready((PyObject *)(op))))
 /* Return a maximum character value which is suitable for creating another
   string based on op.  This is always an approximation but more efficient
@ -507,14 +497,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
    );
 #endif
-/* Initializes the canonical string representation from a the deprected
+/* Initializes the canonical string representation from a the deprecated
-   wstr/Py_UNICODE representation.  This function is used to convert
+   wstr/Py_UNICODE representation. This function is used to convert Unicode
-   unicode objects which were created using the old API to the new flexible
+   objects which were created using the old API to the new flexible format
-   format introduced with PEP 393.  The PyUnicode_READY() macro can be
+   introduced with PEP 393.
-   more efficient if the string is already ready. */
+
   Don't call this function directly, use the public PyUnicode_READY() macro
   instead. */
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyUnicode_Ready(
-    PyUnicodeObject *unicode    /* Unicode object */
+    PyObject *unicode           /* Unicode object */
    );
 #endif
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -105,6 +105,14 @@ extern "C" {
        } \
    } while (0)
 #define _PyUnicode_UTF8(op)                     \
    (PyUnicode_IS_COMPACT_ASCII(op) ?           \
     ((char*)((PyASCIIObject*)(op) + 1)) :      \
     ((PyCompactUnicodeObject*)(op))->utf8)
 #define _PyUnicode_UTF8_LENGTH(op)                      \
    (PyUnicode_IS_COMPACT_ASCII(op) ?                   \
     ((PyASCIIObject*)(op))->length :                   \
     ((PyCompactUnicodeObject*)(op))->utf8_length)
 #define _PyUnicode_WSTR(op) (((PyASCIIObject*)(op))->wstr)
 #define _PyUnicode_WSTR_LENGTH(op) (((PyCompactUnicodeObject*)(op))->wstr_length)
 #define _PyUnicode_LENGTH(op) (((PyASCIIObject *)(op))->length)
@ -773,8 +781,9 @@ int unicode_ready_calls = 0;
 #endif
 int
-_PyUnicode_Ready(PyUnicodeObject *unicode)
+_PyUnicode_Ready(PyObject *obj)
 {
    PyUnicodeObject *unicode = (PyUnicodeObject *)obj;
    wchar_t *end;
    Py_UCS4 maxchar = 0;
    Py_ssize_t num_surrogates;
@ -782,25 +791,19 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
    Py_ssize_t length_wo_surrogates;
 #endif
    assert(PyUnicode_Check(unicode));
    if (unicode->data.any != NULL) {
        assert(PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
        return 0;
    }
    /* _PyUnicode_Ready() is only intented for old-style API usage where
-     * strings were created using _PyObject_New() and where no canonical
+       strings were created using _PyObject_New() and where no canonical
-     * representation (the str field) has been set yet aka strings
+       representation (the str field) has been set yet aka strings
-     * which are not yet ready.
+       which are not yet ready. */
-     */
+    assert(PyUnicode_Check(obj));
    assert(!PyUnicode_IS_READY(obj));
    assert(!PyUnicode_IS_COMPACT(obj));
    assert(_PyUnicode_KIND(obj) == PyUnicode_WCHAR_KIND);
    assert(_PyUnicode_WSTR(unicode) != NULL);
-    assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
+    assert(unicode->data.any == NULL);
    assert(!PyUnicode_IS_COMPACT(unicode));
    assert(!PyUnicode_IS_READY(unicode));
    /* Actually, it should neither be interned nor be anything else: */
    assert(_PyUnicode_STATE(unicode).interned == 0);
    assert(unicode->_base.utf8 == NULL);
    /* Actually, it should neither be interned nor be anything else: */
    assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);
 #ifdef Py_DEBUG
    ++unicode_ready_calls;
@ -808,11 +811,8 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
    end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
    if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
-                                                  &maxchar,
+                                &maxchar, &num_surrogates) == -1)
                                                  &num_surrogates) == -1) {
        assert(0 && "PyUnicode_FindMaxCharAndNumSurrogatePairs failed");
        return -1;
    }
    if (maxchar < 256) {
        unicode->data.any = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
@ -1038,8 +1038,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
    /* If not empty and not single character, copy the Unicode data
       into the new object */
-    if (find_maxchar_surrogates(u, u + size, &maxchar,
+    if (find_maxchar_surrogates(u, u + size,
-                                                  &num_surrogates) == -1)
+                                &maxchar, &num_surrogates) == -1)
        return NULL;
    unicode = (PyUnicodeObject *) PyUnicode_New(size - num_surrogates,