mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Removing UTF-16 aware Unicode comparison code. This kind of compare
function (together with other locale aware ones) should into a new collation support module. See python-dev for a discussion of this removal. Note: This patch should also be applied to the 1.6 branch.
This commit is contained in:
parent
5660639f9f
commit
e5034378cc
3 changed files with 83 additions and 48 deletions
|
@ -1,6 +1,5 @@
|
||||||
test_unicode
|
test_unicode
|
||||||
Testing Unicode comparisons... done.
|
Testing Unicode comparisons... done.
|
||||||
Testing UTF-16 code point order comparisons... done.
|
|
||||||
Testing Unicode contains method... done.
|
Testing Unicode contains method... done.
|
||||||
Testing Unicode formatting strings... done.
|
Testing Unicode formatting strings... done.
|
||||||
Testing builtin codecs... done.
|
Testing builtin codecs... done.
|
||||||
|
|
|
@ -168,56 +168,59 @@ assert 'abc' < u'abcd'
|
||||||
assert u'abc' < u'abcd'
|
assert u'abc' < u'abcd'
|
||||||
print 'done.'
|
print 'done.'
|
||||||
|
|
||||||
print 'Testing UTF-16 code point order comparisons...',
|
if 0:
|
||||||
#No surrogates, no fixup required.
|
# Move these tests to a Unicode collation module test...
|
||||||
assert u'\u0061' < u'\u20ac'
|
|
||||||
# Non surrogate below surrogate value, no fixup required
|
|
||||||
assert u'\u0061' < u'\ud800\udc02'
|
|
||||||
|
|
||||||
# Non surrogate above surrogate value, fixup required
|
print 'Testing UTF-16 code point order comparisons...',
|
||||||
def test_lecmp(s, s2):
|
#No surrogates, no fixup required.
|
||||||
assert s < s2 , "comparison failed on %s < %s" % (s, s2)
|
assert u'\u0061' < u'\u20ac'
|
||||||
|
# Non surrogate below surrogate value, no fixup required
|
||||||
def test_fixup(s):
|
assert u'\u0061' < u'\ud800\udc02'
|
||||||
s2 = u'\ud800\udc01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud900\udc01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\uda00\udc01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\udb00\udc01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud800\udd01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud900\udd01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\uda00\udd01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\udb00\udd01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud800\ude01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud900\ude01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\uda00\ude01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\udb00\ude01'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud800\udfff'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\ud900\udfff'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\uda00\udfff'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
s2 = u'\udb00\udfff'
|
|
||||||
test_lecmp(s, s2)
|
|
||||||
|
|
||||||
test_fixup(u'\ue000')
|
# Non surrogate above surrogate value, fixup required
|
||||||
test_fixup(u'\uff61')
|
def test_lecmp(s, s2):
|
||||||
|
assert s < s2 , "comparison failed on %s < %s" % (s, s2)
|
||||||
|
|
||||||
# Surrogates on both sides, no fixup required
|
def test_fixup(s):
|
||||||
assert u'\ud800\udc02' < u'\ud84d\udc56'
|
s2 = u'\ud800\udc01'
|
||||||
print 'done.'
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud900\udc01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\uda00\udc01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\udb00\udc01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud800\udd01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud900\udd01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\uda00\udd01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\udb00\udd01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud800\ude01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud900\ude01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\uda00\ude01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\udb00\ude01'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud800\udfff'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\ud900\udfff'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\uda00\udfff'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
s2 = u'\udb00\udfff'
|
||||||
|
test_lecmp(s, s2)
|
||||||
|
|
||||||
|
test_fixup(u'\ue000')
|
||||||
|
test_fixup(u'\uff61')
|
||||||
|
|
||||||
|
# Surrogates on both sides, no fixup required
|
||||||
|
assert u'\ud800\udc02' < u'\ud84d\udc56'
|
||||||
|
print 'done.'
|
||||||
|
|
||||||
test('ljust', u'abc', u'abc ', 10)
|
test('ljust', u'abc', u'abc ', 10)
|
||||||
test('rjust', u'abc', u' abc', 10)
|
test('rjust', u'abc', u' abc', 10)
|
||||||
|
|
|
@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
|
||||||
return (PyObject*) pad(self, left, marg - left, ' ');
|
return (PyObject*) pad(self, left, marg - left, ' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
/* This code should go into some future Unicode collation support
|
||||||
|
module. The basic comparison should compare ordinals on a naive
|
||||||
|
basis (this is what Java does and thus JPython too).
|
||||||
|
|
||||||
/* speedy UTF-16 code point order comparison */
|
/* speedy UTF-16 code point order comparison */
|
||||||
/* gleaned from: */
|
/* gleaned from: */
|
||||||
/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
|
/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
|
||||||
|
@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
|
||||||
return (len1 < len2) ? -1 : (len1 != len2);
|
return (len1 < len2) ? -1 : (len1 != len2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static int
|
||||||
|
unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
|
||||||
|
{
|
||||||
|
register int len1, len2;
|
||||||
|
|
||||||
|
Py_UNICODE *s1 = str1->str;
|
||||||
|
Py_UNICODE *s2 = str2->str;
|
||||||
|
|
||||||
|
len1 = str1->length;
|
||||||
|
len2 = str2->length;
|
||||||
|
|
||||||
|
while (len1 > 0 && len2 > 0) {
|
||||||
|
register long diff;
|
||||||
|
|
||||||
|
diff = (long)*s1++ - (long)*s2++;
|
||||||
|
if (diff)
|
||||||
|
return (diff < 0) ? -1 : (diff != 0);
|
||||||
|
len1--; len2--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (len1 < len2) ? -1 : (len1 != len2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
int PyUnicode_Compare(PyObject *left,
|
int PyUnicode_Compare(PyObject *left,
|
||||||
PyObject *right)
|
PyObject *right)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue