Change PyUnicode_KIND to 1,2,4. Drop _KIND_SIZE and _CHARACTER_SIZE.

2025-07-15 23:35:23 +00:00 · 2011-10-07 20:55:35 +02:00 · 2011-10-07 20:55:35 +02:00 · c47adb04b3
commit c47adb04b3
parent dd07732af5
8 changed files with 84 additions and 123 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -470,12 +470,12 @@ Py_LOCAL_INLINE(char *) findchar(void *s, int kind,
    if (direction == 1) {
        for(i = 0; i < size; i++)
            if (PyUnicode_READ(kind, s, i) == ch)
-                return (char*)s + PyUnicode_KIND_SIZE(kind, i);
+                return (char*)s + kind * i;
    }
    else {
        for(i = size-1; i >= 0; i--)
            if (PyUnicode_READ(kind, s, i) == ch)
-                return (char*)s + PyUnicode_KIND_SIZE(kind, i);
+                return (char*)s + kind * i;
    }
    return NULL;
 }
@ -489,7 +489,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
    int share_wstr;

    assert(PyUnicode_IS_READY(unicode));
-    char_size = PyUnicode_CHARACTER_SIZE(unicode);
+    char_size = PyUnicode_KIND(unicode);
    if (PyUnicode_IS_COMPACT_ASCII(unicode))
        struct_size = sizeof(PyASCIIObject);
    else
@ -540,7 +540,7 @@ resize_inplace(PyUnicodeObject *unicode, Py_ssize_t length)

        data = _PyUnicode_DATA_ANY(unicode);
        assert(data != NULL);
-        char_size = PyUnicode_CHARACTER_SIZE(unicode);
+        char_size = PyUnicode_KIND(unicode);
        share_wstr = _PyUnicode_SHARE_WSTR(unicode);
        share_utf8 = _PyUnicode_SHARE_UTF8(unicode);
        if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
@ -1005,11 +1005,9 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
    }

    if (fast) {
-        Py_MEMCPY((char*)to_data
-                      + PyUnicode_KIND_SIZE(to_kind, to_start),
-                  (char*)from_data
-                      + PyUnicode_KIND_SIZE(from_kind, from_start),
-                  PyUnicode_KIND_SIZE(to_kind, how_many));
+        Py_MEMCPY((char*)to_data + to_kind * to_start,
+                  (char*)from_data + from_kind * from_start,
+                  to_kind * how_many);
    }
    else if (from_kind == PyUnicode_1BYTE_KIND
             && to_kind == PyUnicode_2BYTE_KIND)
@ -8760,7 +8758,7 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
        end = PyUnicode_GET_LENGTH(str);
    kind = PyUnicode_KIND(str);
    result = findchar(PyUnicode_1BYTE_DATA(str)
-                      + PyUnicode_KIND_SIZE(kind, start),
+                      + kind*start,
                      kind,
                      end-start, ch, direction);
    if (!result)
@ -8813,10 +8811,10 @@ tailmatch(PyUnicodeObject *self,
        /* If both are of the same kind, memcmp is sufficient */
        if (kind_self == kind_sub) {
            return ! memcmp((char *)data_self +
-                                (offset * PyUnicode_CHARACTER_SIZE(substring)),
+                                (offset * PyUnicode_KIND(substring)),
                            data_sub,
                            PyUnicode_GET_LENGTH(substring) *
-                                PyUnicode_CHARACTER_SIZE(substring));
+                                PyUnicode_KIND(substring));
        }
        /* otherwise we have to compare each character by first accesing it */
        else {
@ -8881,7 +8879,7 @@ fixup(PyObject *self,
        return NULL;

    Py_MEMCPY(PyUnicode_1BYTE_DATA(u), PyUnicode_1BYTE_DATA(self),
-              PyUnicode_GET_LENGTH(u) * PyUnicode_CHARACTER_SIZE(u));
+              PyUnicode_GET_LENGTH(u) * PyUnicode_KIND(u));

    /* fix functions return the new maximum character in a string,
       if the kind of the resulting unicode object does not change,
@ -9262,8 +9260,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
            if (use_memcpy) {
                Py_MEMCPY(res_data,
                          sep_data,
-                          PyUnicode_KIND_SIZE(kind, seplen));
-                res_data += PyUnicode_KIND_SIZE(kind, seplen);
+                          kind * seplen);
+                res_data += kind * seplen;
            }
            else {
                copy_characters(res, res_offset, sep, 0, seplen);
@ -9275,8 +9273,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
            if (use_memcpy) {
                Py_MEMCPY(res_data,
                          PyUnicode_DATA(item),
-                          PyUnicode_KIND_SIZE(kind, itemlen));
-                res_data += PyUnicode_KIND_SIZE(kind, itemlen);
+                          kind * itemlen);
+                res_data += kind * itemlen;
            }
            else {
                copy_characters(res, res_offset, item, 0, itemlen);
@ -9286,7 +9284,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
    }
    if (use_memcpy)
        assert(res_data == PyUnicode_1BYTE_DATA(res)
-                           + PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res)));
+                           + kind * PyUnicode_GET_LENGTH(res));
    else
        assert(res_offset == PyUnicode_GET_LENGTH(res));

@ -9735,22 +9733,22 @@ replace(PyObject *self, PyObject *str1,
                goto error;
            res = PyUnicode_DATA(rstr);

-            memcpy(res, sbuf, PyUnicode_KIND_SIZE(rkind, slen));
+            memcpy(res, sbuf, rkind * slen);
            /* change everything in-place, starting with this one */
-            memcpy(res + PyUnicode_KIND_SIZE(rkind, i),
+            memcpy(res + rkind * i,
                   buf2,
-                   PyUnicode_KIND_SIZE(rkind, len2));
+                   rkind * len2);
            i += len1;

            while ( --maxcount > 0) {
                i = anylib_find(rkind, self,
-                                sbuf+PyUnicode_KIND_SIZE(rkind, i), slen-i,
+                                sbuf+rkind*i, slen-i,
                                str1, buf1, len1, i);
                if (i == -1)
                    break;
-                memcpy(res + PyUnicode_KIND_SIZE(rkind, i),
+                memcpy(res + rkind * i,
                       buf2,
-                       PyUnicode_KIND_SIZE(rkind, len2));
+                       rkind * len2);
                i += len1;
            }

@ -9816,49 +9814,49 @@ replace(PyObject *self, PyObject *str1,
            while (n-- > 0) {
                /* look for next match */
                j = anylib_find(rkind, self,
-                                sbuf + PyUnicode_KIND_SIZE(rkind, i), slen-i,
+                                sbuf + rkind * i, slen-i,
                                str1, buf1, len1, i);
                if (j == -1)
                    break;
                else if (j > i) {
                    /* copy unchanged part [i:j] */
-                    memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
-                           sbuf + PyUnicode_KIND_SIZE(rkind, i),
-                           PyUnicode_KIND_SIZE(rkind, j-i));
+                    memcpy(res + rkind * ires,
+                           sbuf + rkind * i,
+                           rkind * (j-i));
                    ires += j - i;
                }
                /* copy substitution string */
                if (len2 > 0) {
-                    memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
+                    memcpy(res + rkind * ires,
                           buf2,
-                           PyUnicode_KIND_SIZE(rkind, len2));
+                           rkind * len2);
                    ires += len2;
                }
                i = j + len1;
            }
            if (i < slen)
                /* copy tail [i:] */
-                memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
-                       sbuf + PyUnicode_KIND_SIZE(rkind, i),
-                       PyUnicode_KIND_SIZE(rkind, slen-i));
+                memcpy(res + rkind * ires,
+                       sbuf + rkind * i,
+                       rkind * (slen-i));
        } else {
            /* interleave */
            while (n > 0) {
-                memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
+                memcpy(res + rkind * ires,
                       buf2,
-                       PyUnicode_KIND_SIZE(rkind, len2));
+                       rkind * len2);
                ires += len2;
                if (--n <= 0)
                    break;
-                memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
-                       sbuf + PyUnicode_KIND_SIZE(rkind, i),
-                       PyUnicode_KIND_SIZE(rkind, 1));
+                memcpy(res + rkind * ires,
+                       sbuf + rkind * i,
+                       rkind);
                ires++;
                i++;
            }
-            memcpy(res + PyUnicode_KIND_SIZE(rkind, ires),
-                   sbuf + PyUnicode_KIND_SIZE(rkind, i),
-                   PyUnicode_KIND_SIZE(rkind, slen-i));
+            memcpy(res + rkind * ires,
+                   sbuf + rkind * i,
+                   rkind * (slen-i));
        }
        u = rstr;
        unicode_adjust_maxchar(&u);
@ -11341,7 +11339,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
        kind = PyUnicode_KIND(self);
        data = PyUnicode_1BYTE_DATA(self);
        return PyUnicode_FromKindAndData(kind,
-                                         data + PyUnicode_KIND_SIZE(kind, start),
+                                         data + kind * start,
                                         length);
    }
 }
@ -11497,7 +11495,7 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
    else {
        /* number of characters copied this far */
        Py_ssize_t done = PyUnicode_GET_LENGTH(str);
-        const Py_ssize_t char_size = PyUnicode_CHARACTER_SIZE(str);
+        const Py_ssize_t char_size = PyUnicode_KIND(str);
        char *to = (char *) PyUnicode_DATA(u);
        Py_MEMCPY(to, PyUnicode_DATA(str),
                  PyUnicode_GET_LENGTH(str) * char_size);
@ -12488,14 +12486,14 @@ unicode__sizeof__(PyUnicodeObject *v)
        size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1;
    else if (PyUnicode_IS_COMPACT(v))
        size = sizeof(PyCompactUnicodeObject) +
-            (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_CHARACTER_SIZE(v);
+            (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v);
    else {
        /* If it is a two-block object, account for base object, and
           for character block if present. */
        size = sizeof(PyUnicodeObject);
        if (_PyUnicode_DATA_ANY(v))
            size += (PyUnicode_GET_LENGTH(v) + 1) *
-                PyUnicode_CHARACTER_SIZE(v);
+                PyUnicode_KIND(v);
    }
    /* If the wstr pointer is present, account for it unless it is shared
       with the data pointer. Check if the data is not shared. */
@ -13246,7 +13244,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            else {
                const char *p = (const char *) pbuf;
                assert(pbuf != NULL);
-                p = p + PyUnicode_KIND_SIZE(kind, pindex);
+                p += kind * pindex;
                v = PyUnicode_FromKindAndData(kind, p, len);
            }
            if (v == NULL)
@ -13399,7 +13397,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    }

    Py_MEMCPY(data, PyUnicode_DATA(unicode),
-              PyUnicode_KIND_SIZE(kind, length + 1));
+              kind * (length + 1));
    Py_DECREF(unicode);
    assert(_PyUnicode_CheckConsistency(self, 1));
 #ifdef Py_DEBUG