mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
delta encoding of upper/lower/title makes a glorious return (#12736)
This commit is contained in:
parent
da05f454e3
commit
ad9c569825
3 changed files with 1448 additions and 3544 deletions
|
@ -27,9 +27,13 @@
|
||||||
#define EXTENDED_CASE_MASK 0x4000
|
#define EXTENDED_CASE_MASK 0x4000
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const Py_UCS4 upper;
|
/*
|
||||||
const Py_UCS4 lower;
|
These are either deltas to the character or offsets in
|
||||||
const Py_UCS4 title;
|
_PyUnicode_ExtendedCase.
|
||||||
|
*/
|
||||||
|
const int upper;
|
||||||
|
const int lower;
|
||||||
|
const int title;
|
||||||
const unsigned char decimal;
|
const unsigned char decimal;
|
||||||
const unsigned char digit;
|
const unsigned char digit;
|
||||||
const unsigned short flags;
|
const unsigned short flags;
|
||||||
|
@ -60,7 +64,7 @@ Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
return ctype->title ? ctype->title : ch;
|
return ch + ctype->title;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 1 for Unicode characters having the category 'Lt', 0
|
/* Returns 1 for Unicode characters having the category 'Lt', 0
|
||||||
|
@ -186,7 +190,7 @@ Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
|
||||||
|
|
||||||
if (ctype->flags & EXTENDED_CASE_MASK)
|
if (ctype->flags & EXTENDED_CASE_MASK)
|
||||||
return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF];
|
return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF];
|
||||||
return ctype->upper ? ctype->upper : ch;
|
return ch + ctype->upper;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the lowercase Unicode characters corresponding to ch or just
|
/* Returns the lowercase Unicode characters corresponding to ch or just
|
||||||
|
@ -198,7 +202,7 @@ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
|
||||||
|
|
||||||
if (ctype->flags & EXTENDED_CASE_MASK)
|
if (ctype->flags & EXTENDED_CASE_MASK)
|
||||||
return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF];
|
return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF];
|
||||||
return ctype->lower ? ctype->lower : ch;
|
return ch + ctype->lower;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
|
int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
|
||||||
|
@ -213,7 +217,7 @@ int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
|
||||||
res[i] = _PyUnicode_ExtendedCase[index + i];
|
res[i] = _PyUnicode_ExtendedCase[index + i];
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
res[0] = ctype->lower ? ctype->lower : ch;
|
res[0] = ch + ctype->lower;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,7 +233,7 @@ int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
|
||||||
res[i] = _PyUnicode_ExtendedCase[index + i];
|
res[i] = _PyUnicode_ExtendedCase[index + i];
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
res[0] = ctype->title ? ctype->title : ch;
|
res[0] = ch + ctype->title;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,7 +249,7 @@ int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
|
||||||
res[i] = _PyUnicode_ExtendedCase[index + i];
|
res[i] = _PyUnicode_ExtendedCase[index + i];
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
res[0] = ctype->upper ? ctype->upper : ch;
|
res[0] = ch + ctype->upper;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -443,6 +443,13 @@ def makeunicodetype(unicode, trace):
|
||||||
if sc is None:
|
if sc is None:
|
||||||
if upper == lower == title:
|
if upper == lower == title:
|
||||||
upper = lower = title = 0
|
upper = lower = title = 0
|
||||||
|
else:
|
||||||
|
upper = upper - char
|
||||||
|
lower = lower - char
|
||||||
|
title = title - char
|
||||||
|
assert (abs(upper) <= 2147483647 and
|
||||||
|
abs(lower) <= 2147483647 and
|
||||||
|
abs(title) <= 2147483647)
|
||||||
else:
|
else:
|
||||||
# This happens either when some character maps to more than one
|
# This happens either when some character maps to more than one
|
||||||
# character in uppercase, lowercase, or titlecase or the
|
# character in uppercase, lowercase, or titlecase or the
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue