Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possible

wmemcmp() is twice faster than a dummy loop (342 usec vs 744 usec) on Fedora 18/x86_64, GCC 4.7.2.
2025-09-26 10:19:53 +00:00 · 2013-04-08 22:43:44 +02:00 · 2013-04-08 22:43:44 +02:00 · cd777eaf53
commit cd777eaf53
parent 9fc5981ea2
5 changed files with 30 additions and 5 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -10304,8 +10304,19 @@ unicode_compare(PyObject *str1, PyObject *str2)
            COMPARE(Py_UCS2, Py_UCS1);
            break;
        case PyUnicode_2BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 2
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
            COMPARE(Py_UCS2, Py_UCS2);
+#endif
            break;
+        }
        case PyUnicode_4BYTE_KIND:
            COMPARE(Py_UCS2, Py_UCS4);
            break;
@ -10324,8 +10335,19 @@ unicode_compare(PyObject *str1, PyObject *str2)
            COMPARE(Py_UCS4, Py_UCS2);
            break;
        case PyUnicode_4BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
            COMPARE(Py_UCS4, Py_UCS4);
+#endif
            break;
+        }
        default:
            assert(0);
        }