Add rsplit method for str and unicode builtin types.

SF feature request #801847. Original patch is written by Sean Reifschneider.
2025-07-12 05:45:15 +00:00 · 2003-12-15 18:49:53 +00:00 · 2003-12-15 18:49:53 +00:00 · 3ae811b57d
commit 3ae811b57d
parent dce391cb39
7 changed files with 402 additions and 1 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -4053,7 +4053,7 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
 }

 #define SPLIT_APPEND(data, left, right)					\
-	str = PyUnicode_FromUnicode(data + left, right - left);		\
+	str = PyUnicode_FromUnicode((data) + (left), (right) - (left));	\
 	if (!str)							\
 	    goto onError;						\
 	if (PyList_Append(list, str)) {					\
@ -4063,6 +4063,17 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
        else								\
            Py_DECREF(str);

+#define SPLIT_INSERT(data, left, right)					\
+	str = PyUnicode_FromUnicode((data) + (left), (right) - (left));	\
+	if (!str)							\
+	    goto onError;						\
+	if (PyList_Insert(list, 0, str)) {				\
+	    Py_DECREF(str);						\
+	    goto onError;						\
+	}								\
+        else								\
+            Py_DECREF(str);
+
 static
 PyObject *split_whitespace(PyUnicodeObject *self,
 			   PyObject *list,
@ -4214,7 +4225,106 @@ PyObject *split_substring(PyUnicodeObject *self,
    return NULL;
 }

+static
+PyObject *rsplit_whitespace(PyUnicodeObject *self,
+			    PyObject *list,
+			    int maxcount)
+{
+    register int i;
+    register int j;
+    int len = self->length;
+    PyObject *str;
+
+    for (i = j = len - 1; i >= 0; ) {
+	/* find a token */
+	while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+	    i--;
+	j = i;
+	while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
+	    i--;
+	if (j > i) {
+	    if (maxcount-- <= 0)
+		break;
+	    SPLIT_INSERT(self->str, i + 1, j + 1);
+	    while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+		i--;
+	    j = i;
+	}
+    }
+    if (j >= 0) {
+	SPLIT_INSERT(self->str, 0, j + 1);
+    }
+    return list;
+
+ onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+static 
+PyObject *rsplit_char(PyUnicodeObject *self,
+		      PyObject *list,
+		      Py_UNICODE ch,
+		      int maxcount)
+{
+    register int i;
+    register int j;
+    int len = self->length;
+    PyObject *str;
+
+    for (i = j = len - 1; i >= 0; ) {
+	if (self->str[i] == ch) {
+	    if (maxcount-- <= 0)
+		break;
+	    SPLIT_INSERT(self->str, i + 1, j + 1);
+	    j = i = i - 1;
+	} else
+	    i--;
+    }
+    if (j >= 0) {
+	SPLIT_INSERT(self->str, 0, j + 1);
+    }
+    return list;
+
+ onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+static 
+PyObject *rsplit_substring(PyUnicodeObject *self,
+			   PyObject *list,
+			   PyUnicodeObject *substring,
+			   int maxcount)
+{
+    register int i;
+    register int j;
+    int len = self->length;
+    int sublen = substring->length;
+    PyObject *str;
+
+    for (i = len - sublen, j = len; i >= 0; ) {
+	if (Py_UNICODE_MATCH(self, i, substring)) {
+	    if (maxcount-- <= 0)
+		break;
+	    SPLIT_INSERT(self->str, i + sublen, j);
+	    j = i;
+	    i -= sublen;
+	} else
+	    i--;
+    }
+    if (j >= 0) {
+	SPLIT_INSERT(self->str, 0, j);
+    }
+    return list;
+
+ onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
 #undef SPLIT_APPEND
+#undef SPLIT_INSERT

 static
 PyObject *split(PyUnicodeObject *self,
@ -4245,6 +4355,35 @@ PyObject *split(PyUnicodeObject *self,
 	return split_substring(self,list,substring,maxcount);
 }

+static
+PyObject *rsplit(PyUnicodeObject *self,
+		 PyUnicodeObject *substring,
+		 int maxcount)
+{
+    PyObject *list;
+
+    if (maxcount < 0)
+        maxcount = INT_MAX;
+
+    list = PyList_New(0);
+    if (!list)
+        return NULL;
+
+    if (substring == NULL)
+	return rsplit_whitespace(self,list,maxcount);
+
+    else if (substring->length == 1)
+	return rsplit_char(self,list,substring->str[0],maxcount);
+
+    else if (substring->length == 0) {
+	Py_DECREF(list);
+	PyErr_SetString(PyExc_ValueError, "empty separator");
+	return NULL;
+    }
+    else
+	return rsplit_substring(self,list,substring,maxcount);
+}
+
 static
 PyObject *replace(PyUnicodeObject *self,
 		  PyUnicodeObject *str1,
@ -5675,6 +5814,56 @@ unicode_split(PyUnicodeObject *self, PyObject *args)
 	return PyUnicode_Split((PyObject *)self, substring, maxcount);
 }

+PyObject *PyUnicode_RSplit(PyObject *s,
+			   PyObject *sep,
+			   int maxsplit)
+{
+    PyObject *result;
+    
+    s = PyUnicode_FromObject(s);
+    if (s == NULL)
+	return NULL;
+    if (sep != NULL) {
+	sep = PyUnicode_FromObject(sep);
+	if (sep == NULL) {
+	    Py_DECREF(s);
+	    return NULL;
+	}
+    }
+
+    result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
+
+    Py_DECREF(s);
+    Py_XDECREF(sep);
+    return result;
+}
+
+PyDoc_STRVAR(rsplit__doc__,
+"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+\n\
+Return a list of the words in S, using sep as the\n\
+delimiter string, starting at the end of the string and\n\
+working to the front.  If maxsplit is given, at most maxsplit\n\
+splits are done. If sep is not specified, any whitespace string\n\
+is a separator.");
+
+static PyObject*
+unicode_rsplit(PyUnicodeObject *self, PyObject *args)
+{
+    PyObject *substring = Py_None;
+    int maxcount = -1;
+
+    if (!PyArg_ParseTuple(args, "|Oi:rsplit", &substring, &maxcount))
+        return NULL;
+
+    if (substring == Py_None)
+	return rsplit(self, NULL, maxcount);
+    else if (PyUnicode_Check(substring))
+	return rsplit(self, (PyUnicodeObject *)substring, maxcount);
+    else
+	return PyUnicode_RSplit((PyObject *)self, substring, maxcount);
+}
+
 PyDoc_STRVAR(splitlines__doc__,
 "S.splitlines([keepends]]) -> list of strings\n\
 \n\
@ -5870,6 +6059,7 @@ static PyMethodDef unicode_methods[] = {
    {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
    {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
    {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
+    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
    {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
    {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
    {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},