Removing UTF-16 aware Unicode comparison code. This kind of compare

function (together with other locale aware ones) should into a new collation support module. See python-dev for a discussion of this removal. Note: This patch should also be applied to the 1.6 branch.
2025-09-26 18:29:57 +00:00 · 2000-08-08 08:04:29 +00:00 · 2000-08-08 08:04:29 +00:00 · e5034378cc
commit e5034378cc
parent 5660639f9f
3 changed files with 83 additions and 48 deletions
--- a/Lib/test/output/test_unicode
+++ b/Lib/test/output/test_unicode
@ -1,6 +1,5 @@
 test_unicode
 Testing Unicode comparisons... done.
 Testing UTF-16 code point order comparisons... done.
 Testing Unicode contains method... done.
 Testing Unicode formatting strings... done.
 Testing builtin codecs... done.
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -168,56 +168,59 @@ assert 'abc' < u'abcd'
 assert u'abc' < u'abcd'
 print 'done.'
-print 'Testing UTF-16 code point order comparisons...',
+if 0:
-#No surrogates, no fixup required.
+    # Move these tests to a Unicode collation module test...
 assert u'\u0061' < u'\u20ac'
 # Non surrogate below surrogate value, no fixup required
 assert u'\u0061' < u'\ud800\udc02'
-# Non surrogate above surrogate value, fixup required
+    print 'Testing UTF-16 code point order comparisons...',
-def test_lecmp(s, s2):
+    #No surrogates, no fixup required.
-  assert s <  s2 , "comparison failed on %s < %s" % (s, s2)
+    assert u'\u0061' < u'\u20ac'
-  
+    # Non surrogate below surrogate value, no fixup required
-def test_fixup(s):
+    assert u'\u0061' < u'\ud800\udc02'
  s2 = u'\ud800\udc01'
  test_lecmp(s, s2)
  s2 = u'\ud900\udc01'
  test_lecmp(s, s2)
  s2 = u'\uda00\udc01'
  test_lecmp(s, s2)
  s2 = u'\udb00\udc01'
  test_lecmp(s, s2)
  s2 = u'\ud800\udd01'
  test_lecmp(s, s2)
  s2 = u'\ud900\udd01'
  test_lecmp(s, s2)
  s2 = u'\uda00\udd01'
  test_lecmp(s, s2)
  s2 = u'\udb00\udd01'
  test_lecmp(s, s2)
  s2 = u'\ud800\ude01'
  test_lecmp(s, s2)
  s2 = u'\ud900\ude01'
  test_lecmp(s, s2)
  s2 = u'\uda00\ude01'
  test_lecmp(s, s2)
  s2 = u'\udb00\ude01'
  test_lecmp(s, s2)
  s2 = u'\ud800\udfff'
  test_lecmp(s, s2)
  s2 = u'\ud900\udfff'
  test_lecmp(s, s2)
  s2 = u'\uda00\udfff'
  test_lecmp(s, s2)
  s2 = u'\udb00\udfff'
  test_lecmp(s, s2)
-test_fixup(u'\ue000')
+    # Non surrogate above surrogate value, fixup required
-test_fixup(u'\uff61')
+    def test_lecmp(s, s2):
      assert s <  s2 , "comparison failed on %s < %s" % (s, s2)
-# Surrogates on both sides, no fixup required
+    def test_fixup(s):
-assert u'\ud800\udc02' < u'\ud84d\udc56'
+      s2 = u'\ud800\udc01'
-print 'done.'
+      test_lecmp(s, s2)
      s2 = u'\ud900\udc01'
      test_lecmp(s, s2)
      s2 = u'\uda00\udc01'
      test_lecmp(s, s2)
      s2 = u'\udb00\udc01'
      test_lecmp(s, s2)
      s2 = u'\ud800\udd01'
      test_lecmp(s, s2)
      s2 = u'\ud900\udd01'
      test_lecmp(s, s2)
      s2 = u'\uda00\udd01'
      test_lecmp(s, s2)
      s2 = u'\udb00\udd01'
      test_lecmp(s, s2)
      s2 = u'\ud800\ude01'
      test_lecmp(s, s2)
      s2 = u'\ud900\ude01'
      test_lecmp(s, s2)
      s2 = u'\uda00\ude01'
      test_lecmp(s, s2)
      s2 = u'\udb00\ude01'
      test_lecmp(s, s2)
      s2 = u'\ud800\udfff'
      test_lecmp(s, s2)
      s2 = u'\ud900\udfff'
      test_lecmp(s, s2)
      s2 = u'\uda00\udfff'
      test_lecmp(s, s2)
      s2 = u'\udb00\udfff'
      test_lecmp(s, s2)
    test_fixup(u'\ue000')
    test_fixup(u'\uff61')
    # Surrogates on both sides, no fixup required
    assert u'\ud800\udc02' < u'\ud84d\udc56'
    print 'done.'
 test('ljust', u'abc',  u'abc       ', 10)
 test('rjust', u'abc',  u'       abc', 10)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
    return (PyObject*) pad(self, left, marg - left, ' ');
 }
 #if 0
 /* This code should go into some future Unicode collation support
   module. The basic comparison should compare ordinals on a naive
   basis (this is what Java does and thus JPython too).
 /* speedy UTF-16 code point order comparison */
 /* gleaned from: */
 /* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
    return (len1 < len2) ? -1 : (len1 != len2);
 }
 #else
 static int
 unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
 {
    register int len1, len2;
    Py_UNICODE *s1 = str1->str;
    Py_UNICODE *s2 = str2->str;
    len1 = str1->length;
    len2 = str2->length;
    while (len1 > 0 && len2 > 0) {
 	register long diff;
        diff = (long)*s1++ - (long)*s2++;
        if (diff)
            return (diff < 0) ? -1 : (diff != 0);
        len1--; len2--;
    }
    return (len1 < len2) ? -1 : (len1 != len2);
 }
 #endif
 int PyUnicode_Compare(PyObject *left,
 		      PyObject *right)
 {