Improve string forms and PyUnicode_Resize() documentation

Remove also the FIXME for resize_copy(): as discussed with Martin, copy the string on resize if the string is not resizable is just fine.
2025-09-26 18:29:57 +00:00 · 2011-10-03 23:19:21 +02:00 · 2011-10-03 23:19:21 +02:00 · 8cfcbed4e3
commit 8cfcbed4e3
parent 77bb47b312
2 changed files with 24 additions and 15 deletions
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -206,7 +206,7 @@ extern "C" {
   immediately follow the structure. utf8_length and wstr_length can be found
   in the length field; the utf8 pointer is equal to the data pointer. */
 typedef struct {
-    /* Unicode strings can be in 4 states:
+    /* There a 4 forms of Unicode strings:
       - compact ascii:
@ -227,7 +227,7 @@ typedef struct {
         * ascii = 0
         * utf8 != data
-       - string created by the legacy API (not ready):
+       - legacy string, not ready:
         * structure = PyUnicodeObject
         * kind = PyUnicode_WCHAR_KIND
@ -239,7 +239,7 @@ typedef struct {
         * interned = SSTATE_NOT_INTERNED
         * ascii = 0
-       - string created by the legacy API, ready:
+       - legacy string, ready:
         * structure = PyUnicodeObject structure
         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
@ -249,10 +249,16 @@ typedef struct {
         * data.any is not NULL
         * utf8 = data if ascii is 1
-       String created by the legacy API becomes ready when calling
+       Compact strings use only one memory block (structure + characters),
-       PyUnicode_READY().
+       whereas legacy strings use one block for the structure and one block
       for characters.
-       See also _PyUnicode_CheckConsistency(). */
+       Legacy strings are created by PyUnicode_FromUnicode() and
       PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
       when PyUnicode_READY() is called.
       See also _PyUnicode_CheckConsistency().
    */
    PyObject_HEAD
    Py_ssize_t length;          /* Number of code points in the string */
    Py_hash_t hash;             /* Hash value; -1 if not set */
@ -721,19 +727,22 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 #endif
-/* Resize an already allocated Unicode object to the new size length.
+/* Resize an Unicode object allocated by the legacy API (e.g.
   PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
   PyUnicode_New) cannot be resized by this function.
   The length is a number of Py_UNICODE characters (and not the number of code
   points).
   *unicode is modified to point to the new (resized) object and 0
   returned on success.
-   This API may only be called by the function which also called the
+   If the refcount on the object is 1, the function resizes the string in
-   Unicode constructor. The refcount on the object must be 1. Otherwise,
+   place, which is usually faster than allocating a new string (and copy
-   an error is returned.
+   characters).
   Error handling is implemented as follows: an exception is set, -1
-   is returned and *unicode left untouched.
+   is returned and *unicode left untouched. */
 */
 PyAPI_FUNC(int) PyUnicode_Resize(
    PyObject **unicode,         /* Pointer to the Unicode object */
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -536,7 +536,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
            return NULL;
        }
        return copy;
-    } else {
+    }
    else {
        PyUnicodeObject *w;
        assert(_PyUnicode_WSTR(unicode) != NULL);
        assert(_PyUnicode_DATA_ANY(unicode) == NULL);
@ -1294,7 +1295,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
    if (old_length == length)
        return 0;
    /* FIXME: really create a new object? */
    if (!unicode_resizable(unicode)) {
        PyObject *copy = resize_copy(unicode, length);
        if (copy == NULL)