Rewrite PyUnicode_EncodeDecimal() to use the new Unicode API

Add tests for PyUnicode_EncodeDecimal() and PyUnicode_TransformDecimalToASCII().
2025-10-15 03:10:29 +00:00 · 2011-11-21 22:52:58 +01:00 · 2011-11-21 22:52:58 +01:00 · 42bf77537e
commit 42bf77537e
parent 6dd381eb62
3 changed files with 132 additions and 46 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -8829,7 +8829,6 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
                        char *output,
                        const char *errors)
 {
-    Py_UNICODE *p, *end;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    PyObject *unicode;
@ -8838,47 +8837,50 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
    /* the following variable is used for caching string comparisons
     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
    int known_errorHandler = -1;
+    Py_ssize_t i, j;
+    enum PyUnicode_Kind kind;
+    void *data;

    if (output == NULL) {
        PyErr_BadArgument();
        return -1;
    }

-    p = s;
-    end = s + length;
-    while (p < end) {
-        register Py_UNICODE ch = *p;
+    unicode = PyUnicode_FromUnicode(s, length);
+    if (unicode == NULL)
+        return -1;
+
+    if (PyUnicode_READY(unicode) < 0)
+        goto onError;
+    kind = PyUnicode_KIND(unicode);
+    data = PyUnicode_DATA(unicode);
+
+    for (i=0; i < length; i++) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
        int decimal;
-        PyObject *repunicode;
-        Py_ssize_t repsize;
-        Py_ssize_t newpos;
-        Py_UNICODE *uni2;
-        Py_UNICODE *collstart;
-        Py_UNICODE *collend;
+        Py_ssize_t startpos, endpos;

        if (Py_UNICODE_ISSPACE(ch)) {
            *output++ = ' ';
-            ++p;
            continue;
        }
        decimal = Py_UNICODE_TODECIMAL(ch);
        if (decimal >= 0) {
            *output++ = '0' + decimal;
-            ++p;
            continue;
        }
        if (0 < ch && ch < 256) {
            *output++ = (char)ch;
-            ++p;
            continue;
        }
        /* All other characters are considered unencodable */
-        collstart = p;
-        collend = p+1;
-        while (collend < end) {
-            if ((0 < *collend && *collend < 256) ||
-                !Py_UNICODE_ISSPACE(*collend) ||
-                Py_UNICODE_TODECIMAL(*collend))
+        startpos = i;
+        endpos = i+1;
+        for (; endpos < length; endpos++) {
+            ch = PyUnicode_READ(kind, data, endpos);
+            if ((0 < ch && ch < 256) ||
+                !Py_UNICODE_ISSPACE(ch) ||
+                Py_UNICODE_TODECIMAL(ch))
                break;
        }
        /* cache callback name lookup
@ -8897,33 +8899,33 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
        }
        switch (known_errorHandler) {
        case 1: /* strict */
-            unicode = PyUnicode_FromUnicode(s, length);
-            if (unicode == NULL)
-                goto onError;
-            raise_encode_exception(&exc, encoding, unicode, collstart-s, collend-s, reason);
-            Py_DECREF(unicode);
+            raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason);
            goto onError;
        case 2: /* replace */
-            for (p = collstart; p < collend; ++p)
+            for (j=startpos; j < endpos; j++)
                *output++ = '?';
            /* fall through */
        case 3: /* ignore */
-            p = collend;
+            i = endpos;
            break;
        case 4: /* xmlcharrefreplace */
-            /* generate replacement (temporarily (mis)uses p) */
-            for (p = collstart; p < collend; ++p)
-                output += sprintf(output, "&#%d;", (int)*p);
-            p = collend;
+            /* generate replacement */
+            for (j=startpos; j < endpos; j++) {
+                ch = PyUnicode_READ(kind, data, i);
+                output += sprintf(output, "&#%d;", (int)ch);
+                i++;
+            }
            break;
        default:
-            unicode = PyUnicode_FromUnicode(s, length);
-            if (unicode == NULL)
-                goto onError;
+        {
+            PyObject *repunicode;
+            Py_ssize_t repsize, newpos, k;
+            enum PyUnicode_Kind repkind;
+            void *repdata;
+
            repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
                                                          encoding, reason, unicode, &exc,
-                                                          collstart-s, collend-s, &newpos);
-            Py_DECREF(unicode);
+                                                          startpos, endpos, &newpos);
            if (repunicode == NULL)
                goto onError;
            if (!PyUnicode_Check(repunicode)) {
@ -8932,10 +8934,17 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
                Py_DECREF(repunicode);
                goto onError;
            }
+            if (PyUnicode_READY(repunicode) < 0) {
+                Py_DECREF(repunicode);
+                goto onError;
+            }
+            repkind = PyUnicode_KIND(repunicode);
+            repdata = PyUnicode_DATA(repunicode);
+
            /* generate replacement  */
            repsize = PyUnicode_GET_SIZE(repunicode);
-            for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
-                Py_UNICODE ch = *uni2;
+            for (k=0; k<repsize; k++) {
+                ch = PyUnicode_READ(repkind, repdata, k);
                if (Py_UNICODE_ISSPACE(ch))
                    *output++ = ' ';
                else {
@ -8946,29 +8955,29 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
                        *output++ = (char)ch;
                    else {
                        Py_DECREF(repunicode);
-                        unicode = PyUnicode_FromUnicode(s, length);
-                        if (unicode == NULL)
-                            goto onError;
                        raise_encode_exception(&exc, encoding,
-                                               unicode, collstart-s, collend-s, reason);
-                        Py_DECREF(unicode);
+                                               unicode, startpos, endpos,
+                                               reason);
                        goto onError;
                    }
                }
            }
-            p = s + newpos;
+            i = newpos;
            Py_DECREF(repunicode);
        }
+        }
    }
    /* 0-terminate the output string */
    *output++ = '\0';
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
+    Py_DECREF(unicode);
    return 0;

  onError:
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
+    Py_DECREF(unicode);
    return -1;
 }