add str.casefold() (closes #13752)

This commit is contained in:
Benjamin Peterson 2012-01-14 13:23:30 -05:00
parent 94d5a7174a
commit d5890c8db5
8 changed files with 493 additions and 137 deletions

View file

@ -185,7 +185,7 @@ Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK)
return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFFFF];
return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF];
return ctype->upper ? ctype->upper : ch;
}
@ -197,7 +197,7 @@ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK)
return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFFFF];
return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF];
return ctype->lower ? ctype->lower : ch;
}
@ -206,7 +206,7 @@ int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK) {
int index = ctype->lower & 0xFFFFFF;
int index = ctype->lower & 0xFFFF;
int n = ctype->lower >> 24;
int i;
for (i = 0; i < n; i++)
@ -222,7 +222,7 @@ int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK) {
int index = ctype->title & 0xFFFFFF;
int index = ctype->title & 0xFFFF;
int n = ctype->title >> 24;
int i;
for (i = 0; i < n; i++)
@ -238,7 +238,7 @@ int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK) {
int index = ctype->upper & 0xFFFFFF;
int index = ctype->upper & 0xFFFF;
int n = ctype->upper >> 24;
int i;
for (i = 0; i < n; i++)
@ -249,6 +249,21 @@ int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
return 1;
}
int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) {
int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24);
int n = (ctype->lower >> 20) & 7;
int i;
for (i = 0; i < n; i++)
res[i] = _PyUnicode_ExtendedCase[index + i];
return n;
}
return _PyUnicode_ToLowerFull(ch, res);
}
int _PyUnicode_IsCased(Py_UCS4 ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);

View file

@ -9576,6 +9576,24 @@ do_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar
return do_upper_or_lower(kind, data, length, res, maxchar, 1);
}
static Py_ssize_t
do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
Py_ssize_t i, k = 0;
for (i = 0; i < length; i++) {
Py_UCS4 c = PyUnicode_READ(kind, data, i);
Py_UCS4 mapped[3];
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
for (j = 0; j < n_res; j++) {
if (mapped[j] > *maxchar)
*maxchar = mapped[j];
res[k++] = mapped[j];
}
}
return k;
}
static Py_ssize_t
do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
@ -10501,6 +10519,22 @@ unicode_capitalize(PyObject *self)
return case_operation(self, do_capitalize);
}
PyDoc_STRVAR(casefold__doc__,
"S.casefold() -> str\n\
\n\
Return a version of S suitable for caseless comparisons.");
static PyObject *
unicode_casefold(PyObject *self)
{
if (PyUnicode_READY(self) == -1)
return NULL;
if (PyUnicode_IS_ASCII(self))
return ascii_upper_or_lower(self, 1);
return case_operation(self, do_casefold);
}
/* Argument converter. Coerces to a single unicode character */
static int
@ -12998,6 +13032,7 @@ static PyMethodDef unicode_methods[] = {
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
{"join", (PyCFunction) unicode_join, METH_O, join__doc__},
{"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
{"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__},
{"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
{"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
{"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},

File diff suppressed because it is too large Load diff