mirror of
https://github.com/python/cpython.git
synced 2025-08-24 18:55:00 +00:00
gh-117431: Adapt str.find and friends to Argument Clinic (#117468)
This change gives a significant speedup, as the METH_FASTCALL calling convention is now used. The following methods are adapted: - str.count - str.find - str.index - str.rfind - str.rindex
This commit is contained in:
parent
345194de8c
commit
7ecd55d604
4 changed files with 444 additions and 222 deletions
|
@ -9194,75 +9194,6 @@ _PyUnicode_InsertThousandsGrouping(
|
|||
return count;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_count_impl(PyObject *str,
|
||||
PyObject *substr,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
{
|
||||
assert(PyUnicode_Check(str));
|
||||
assert(PyUnicode_Check(substr));
|
||||
|
||||
Py_ssize_t result;
|
||||
int kind1, kind2;
|
||||
const void *buf1 = NULL, *buf2 = NULL;
|
||||
Py_ssize_t len1, len2;
|
||||
|
||||
kind1 = PyUnicode_KIND(str);
|
||||
kind2 = PyUnicode_KIND(substr);
|
||||
if (kind1 < kind2)
|
||||
return 0;
|
||||
|
||||
len1 = PyUnicode_GET_LENGTH(str);
|
||||
len2 = PyUnicode_GET_LENGTH(substr);
|
||||
ADJUST_INDICES(start, end, len1);
|
||||
if (end - start < len2)
|
||||
return 0;
|
||||
|
||||
buf1 = PyUnicode_DATA(str);
|
||||
buf2 = PyUnicode_DATA(substr);
|
||||
if (kind2 != kind1) {
|
||||
buf2 = unicode_askind(kind2, buf2, len2, kind1);
|
||||
if (!buf2)
|
||||
goto onError;
|
||||
}
|
||||
|
||||
// We don't reuse `anylib_count` here because of the explicit casts.
|
||||
switch (kind1) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
result = ucs1lib_count(
|
||||
((const Py_UCS1*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
result = ucs2lib_count(
|
||||
((const Py_UCS2*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
result = ucs4lib_count(
|
||||
((const Py_UCS4*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
|
||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||
if (kind2 != kind1)
|
||||
PyMem_Free((void *)buf2);
|
||||
|
||||
return result;
|
||||
onError:
|
||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||
if (kind2 != kind1)
|
||||
PyMem_Free((void *)buf2);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
PyUnicode_Count(PyObject *str,
|
||||
PyObject *substr,
|
||||
|
@ -11131,47 +11062,87 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
|
|||
Py_XDECREF(right);
|
||||
}
|
||||
|
||||
/*
|
||||
Wraps asciilib_parse_args_finds() and additionally ensures that the
|
||||
first argument is a unicode object.
|
||||
*/
|
||||
/*[clinic input]
|
||||
@text_signature "($self, sub[, start[, end]], /)"
|
||||
str.count as unicode_count -> Py_ssize_t
|
||||
|
||||
static inline int
|
||||
parse_args_finds_unicode(const char * function_name, PyObject *args,
|
||||
PyObject **substring,
|
||||
Py_ssize_t *start, Py_ssize_t *end)
|
||||
self as str: self
|
||||
sub as substr: unicode
|
||||
start: slice_index(accept={int, NoneType}, c_default='0') = None
|
||||
end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
|
||||
/
|
||||
|
||||
Return the number of non-overlapping occurrences of substring sub in string S[start:end].
|
||||
|
||||
Optional arguments start and end are interpreted as in slice notation.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
/*[clinic end generated code: output=8fcc3aef0b18edbf input=6f168ffd94be8785]*/
|
||||
{
|
||||
if (asciilib_parse_args_finds(function_name, args, substring, start, end)) {
|
||||
if (ensure_unicode(*substring) < 0)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
assert(PyUnicode_Check(str));
|
||||
assert(PyUnicode_Check(substr));
|
||||
|
||||
PyDoc_STRVAR(count__doc__,
|
||||
"S.count(sub[, start[, end]]) -> int\n\
|
||||
\n\
|
||||
Return the number of non-overlapping occurrences of substring sub in\n\
|
||||
string S[start:end]. Optional arguments start and end are\n\
|
||||
interpreted as in slice notation.");
|
||||
|
||||
static PyObject *
|
||||
unicode_count(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *substring = NULL; /* initialize to fix a compiler warning */
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||
Py_ssize_t result;
|
||||
int kind1, kind2;
|
||||
const void *buf1 = NULL, *buf2 = NULL;
|
||||
Py_ssize_t len1, len2;
|
||||
|
||||
if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
|
||||
return NULL;
|
||||
kind1 = PyUnicode_KIND(str);
|
||||
kind2 = PyUnicode_KIND(substr);
|
||||
if (kind1 < kind2)
|
||||
return 0;
|
||||
|
||||
result = unicode_count_impl(self, substring, start, end);
|
||||
if (result == -1)
|
||||
return NULL;
|
||||
len1 = PyUnicode_GET_LENGTH(str);
|
||||
len2 = PyUnicode_GET_LENGTH(substr);
|
||||
ADJUST_INDICES(start, end, len1);
|
||||
if (end - start < len2)
|
||||
return 0;
|
||||
|
||||
return PyLong_FromSsize_t(result);
|
||||
buf1 = PyUnicode_DATA(str);
|
||||
buf2 = PyUnicode_DATA(substr);
|
||||
if (kind2 != kind1) {
|
||||
buf2 = unicode_askind(kind2, buf2, len2, kind1);
|
||||
if (!buf2)
|
||||
goto onError;
|
||||
}
|
||||
|
||||
// We don't reuse `anylib_count` here because of the explicit casts.
|
||||
switch (kind1) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
result = ucs1lib_count(
|
||||
((const Py_UCS1*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
result = ucs2lib_count(
|
||||
((const Py_UCS2*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
result = ucs4lib_count(
|
||||
((const Py_UCS4*)buf1) + start, end - start,
|
||||
buf2, len2, PY_SSIZE_T_MAX
|
||||
);
|
||||
break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
|
||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||
if (kind2 != kind1)
|
||||
PyMem_Free((void *)buf2);
|
||||
|
||||
return result;
|
||||
onError:
|
||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||
if (kind2 != kind1)
|
||||
PyMem_Free((void *)buf2);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -11282,33 +11253,25 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(find__doc__,
|
||||
"S.find(sub[, start[, end]]) -> int\n\
|
||||
\n\
|
||||
Return the lowest index in S where substring sub is found,\n\
|
||||
such that sub is contained within S[start:end]. Optional\n\
|
||||
arguments start and end are interpreted as in slice notation.\n\
|
||||
\n\
|
||||
Return -1 on failure.");
|
||||
/*[clinic input]
|
||||
str.find as unicode_find = str.count
|
||||
|
||||
static PyObject *
|
||||
unicode_find(PyObject *self, PyObject *args)
|
||||
Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||
|
||||
Optional arguments start and end are interpreted as in slice notation.
|
||||
Return -1 on failure.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
/*[clinic end generated code: output=51dbe6255712e278 input=4a89d2d68ef57256]*/
|
||||
{
|
||||
/* initialize variables to prevent gcc warning */
|
||||
PyObject *substring = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = 0;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!parse_args_finds_unicode("find", args, &substring, &start, &end))
|
||||
return NULL;
|
||||
|
||||
result = any_find_slice(self, substring, start, end, 1);
|
||||
|
||||
if (result == -2)
|
||||
return NULL;
|
||||
|
||||
return PyLong_FromSsize_t(result);
|
||||
Py_ssize_t result = any_find_slice(str, substr, start, end, 1);
|
||||
if (result < 0) {
|
||||
return -1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -11351,38 +11314,28 @@ unicode_hash(PyObject *self)
|
|||
return x;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(index__doc__,
|
||||
"S.index(sub[, start[, end]]) -> int\n\
|
||||
\n\
|
||||
Return the lowest index in S where substring sub is found,\n\
|
||||
such that sub is contained within S[start:end]. Optional\n\
|
||||
arguments start and end are interpreted as in slice notation.\n\
|
||||
\n\
|
||||
Raises ValueError when the substring is not found.");
|
||||
/*[clinic input]
|
||||
str.index as unicode_index = str.count
|
||||
|
||||
static PyObject *
|
||||
unicode_index(PyObject *self, PyObject *args)
|
||||
Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||
|
||||
Optional arguments start and end are interpreted as in slice notation.
|
||||
Raises ValueError when the substring is not found.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
/*[clinic end generated code: output=77558288837cdf40 input=d986aeac0be14a1c]*/
|
||||
{
|
||||
/* initialize variables to prevent gcc warning */
|
||||
Py_ssize_t result;
|
||||
PyObject *substring = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = 0;
|
||||
|
||||
if (!parse_args_finds_unicode("index", args, &substring, &start, &end))
|
||||
return NULL;
|
||||
|
||||
result = any_find_slice(self, substring, start, end, 1);
|
||||
|
||||
if (result == -2)
|
||||
return NULL;
|
||||
|
||||
if (result < 0) {
|
||||
Py_ssize_t result = any_find_slice(str, substr, start, end, 1);
|
||||
if (result == -1) {
|
||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyLong_FromSsize_t(result);
|
||||
else if (result < 0) {
|
||||
return -1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -12462,67 +12415,49 @@ unicode_repr(PyObject *unicode)
|
|||
return repr;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(rfind__doc__,
|
||||
"S.rfind(sub[, start[, end]]) -> int\n\
|
||||
\n\
|
||||
Return the highest index in S where substring sub is found,\n\
|
||||
such that sub is contained within S[start:end]. Optional\n\
|
||||
arguments start and end are interpreted as in slice notation.\n\
|
||||
\n\
|
||||
Return -1 on failure.");
|
||||
/*[clinic input]
|
||||
str.rfind as unicode_rfind = str.count
|
||||
|
||||
static PyObject *
|
||||
unicode_rfind(PyObject *self, PyObject *args)
|
||||
Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||
|
||||
Optional arguments start and end are interpreted as in slice notation.
|
||||
Return -1 on failure.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
/*[clinic end generated code: output=880b29f01dd014c8 input=898361fb71f59294]*/
|
||||
{
|
||||
/* initialize variables to prevent gcc warning */
|
||||
PyObject *substring = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = 0;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end))
|
||||
return NULL;
|
||||
|
||||
result = any_find_slice(self, substring, start, end, -1);
|
||||
|
||||
if (result == -2)
|
||||
return NULL;
|
||||
|
||||
return PyLong_FromSsize_t(result);
|
||||
Py_ssize_t result = any_find_slice(str, substr, start, end, -1);
|
||||
if (result < 0) {
|
||||
return -1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(rindex__doc__,
|
||||
"S.rindex(sub[, start[, end]]) -> int\n\
|
||||
\n\
|
||||
Return the highest index in S where substring sub is found,\n\
|
||||
such that sub is contained within S[start:end]. Optional\n\
|
||||
arguments start and end are interpreted as in slice notation.\n\
|
||||
\n\
|
||||
Raises ValueError when the substring is not found.");
|
||||
/*[clinic input]
|
||||
str.rindex as unicode_rindex = str.count
|
||||
|
||||
static PyObject *
|
||||
unicode_rindex(PyObject *self, PyObject *args)
|
||||
Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||
|
||||
Optional arguments start and end are interpreted as in slice notation.
|
||||
Raises ValueError when the substring is not found.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||
Py_ssize_t end)
|
||||
/*[clinic end generated code: output=5f3aef124c867fe1 input=35943dead6c1ea9d]*/
|
||||
{
|
||||
/* initialize variables to prevent gcc warning */
|
||||
PyObject *substring = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = 0;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end))
|
||||
return NULL;
|
||||
|
||||
result = any_find_slice(self, substring, start, end, -1);
|
||||
|
||||
if (result == -2)
|
||||
return NULL;
|
||||
|
||||
if (result < 0) {
|
||||
Py_ssize_t result = any_find_slice(str, substr, start, end, -1);
|
||||
if (result == -1) {
|
||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyLong_FromSsize_t(result);
|
||||
else if (result < 0) {
|
||||
return -1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -13562,16 +13497,16 @@ static PyMethodDef unicode_methods[] = {
|
|||
UNICODE_CASEFOLD_METHODDEF
|
||||
UNICODE_TITLE_METHODDEF
|
||||
UNICODE_CENTER_METHODDEF
|
||||
{"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
|
||||
UNICODE_COUNT_METHODDEF
|
||||
UNICODE_EXPANDTABS_METHODDEF
|
||||
{"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
|
||||
UNICODE_FIND_METHODDEF
|
||||
UNICODE_PARTITION_METHODDEF
|
||||
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
|
||||
UNICODE_INDEX_METHODDEF
|
||||
UNICODE_LJUST_METHODDEF
|
||||
UNICODE_LOWER_METHODDEF
|
||||
UNICODE_LSTRIP_METHODDEF
|
||||
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
|
||||
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
|
||||
UNICODE_RFIND_METHODDEF
|
||||
UNICODE_RINDEX_METHODDEF
|
||||
UNICODE_RJUST_METHODDEF
|
||||
UNICODE_RSTRIP_METHODDEF
|
||||
UNICODE_RPARTITION_METHODDEF
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue