Apply patch diff.txt from SF feature request

http://www.python.org/sf/444708

This adds the optional argument for str.strip
to unicode.strip too and makes it possible
to call str.strip with a unicode argument
and unicode.strip with a str argument.
This commit is contained in:
Walter Dörwald 2002-04-22 17:42:37 +00:00
parent a7cc43b9e8
commit de02bcb265
6 changed files with 243 additions and 80 deletions

View file

@ -235,17 +235,28 @@ The functions defined in this module are:
\function{joinfields()} was only used with two arguments.) \function{joinfields()} was only used with two arguments.)
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{lstrip}{s} \begin{funcdesc}{lstrip}{s\optional{, chars}}
Return a copy of \var{s} but without leading whitespace characters. Return a copy of the string with leading characters removed. If
\var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the beginning of
the string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{rstrip}{s} \begin{funcdesc}{rstrip}{s\optional{, chars}}
Return a copy of \var{s} but without trailing whitespace Return a copy of the string with trailing characters removed. If
characters. \var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the end of the
string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{strip}{s} \begin{funcdesc}{strip}{s\optional{, chars}}
Return a copy of \var{s} without leading or trailing whitespace. Return a copy of the string with leading and trailing characters
removed. If \var{chars} is omitted or \code{None}, whitespace
characters are removed. If given and not \code{None}, \var{chars}
must be a string; the characters in the string will be stripped from
the both ends of the string this method is called on.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{swapcase}{s} \begin{funcdesc}{swapcase}{s}

View file

@ -1040,6 +1040,13 @@ extern DL_IMPORT(int) PyUnicode_Contains(
PyObject *element /* Element string */ PyObject *element /* Element string */
); );
/* Externally visible for str.strip(unicode) */
extern DL_IMPORT(PyObject *) _PyUnicode_XStrip(
PyUnicodeObject *self,
int striptype,
PyObject *sepobj
);
/* === Characters Type APIs =============================================== */ /* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and /* These should not be used directly. Use the Py_UNICODE_IS* and

View file

@ -169,12 +169,18 @@ def run_method_tests(test):
test('rstrip', ' hello ', ' hello', None) test('rstrip', ' hello ', ' hello', None)
test('strip', 'hello', 'hello', None) test('strip', 'hello', 'hello', None)
# strip/lstrip/rstrip with real arg # strip/lstrip/rstrip with str arg
test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz') test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz')
test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz') test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz')
test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz') test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz')
test('strip', 'hello', 'hello', 'xyz') test('strip', 'hello', 'hello', 'xyz')
# strip/lstrip/rstrip with unicode arg
test('strip', 'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', 'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', 'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', 'hello', u'hello', u'xyz')
test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS') test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS')
test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def') test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def')

View file

@ -169,6 +169,24 @@ test('lstrip', u' hello ', u'hello ')
test('rstrip', u' hello ', u' hello') test('rstrip', u' hello ', u' hello')
test('strip', u'hello', u'hello') test('strip', u'hello', u'hello')
# strip/lstrip/rstrip with None arg
test('strip', u' hello ', u'hello', None)
test('lstrip', u' hello ', u'hello ', None)
test('rstrip', u' hello ', u' hello', None)
test('strip', u'hello', u'hello', None)
# strip/lstrip/rstrip with unicode arg
test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', u'hello', u'hello', u'xyz')
# strip/lstrip/rstrip with str arg
test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
test('strip', u'hello', u'hello', 'xyz')
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS') test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
if 0: if 0:

View file

@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = {
#define BOTHSTRIP 2 #define BOTHSTRIP 2
/* Arrays indexed by above */ /* Arrays indexed by above */
static const char *stripname[] = {"lstrip", "rstrip", "strip"}; static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static PyObject * static PyObject *
@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
{ {
PyObject *sep = NULL; PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep)) if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL; return NULL;
if (sep != NULL && sep != Py_None) { if (sep != NULL && sep != Py_None) {
/* XXX What about Unicode? */ if (PyString_Check(sep))
if (!PyString_Check(sep)) { return do_xstrip(self, striptype, sep);
else if (PyUnicode_Check(sep)) {
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
PyObject *res;
if (uniself==NULL)
return NULL;
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
striptype, sep);
Py_DECREF(uniself);
return res;
}
else {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"%s arg must be None or string", "%s arg must be None, str or unicode",
stripname[striptype]); STRIPNAME(striptype));
return NULL; return NULL;
} }
return do_xstrip(self, striptype, sep); return do_xstrip(self, striptype, sep);
@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
static char strip__doc__[] = static char strip__doc__[] =
"S.strip([sep]) -> string\n\ "S.strip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with leading and trailing\n\ Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\ whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_strip(PyStringObject *self, PyObject *args) string_strip(PyStringObject *self, PyObject *args)
@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args)
static char lstrip__doc__[] = static char lstrip__doc__[] =
"S.lstrip([sep]) -> string\n\ "S.lstrip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with leading whitespace removed.\n\ Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_lstrip(PyStringObject *self, PyObject *args) string_lstrip(PyStringObject *self, PyObject *args)
@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args)
static char rstrip__doc__[] = static char rstrip__doc__[] =
"S.rstrip([sep]) -> string\n\ "S.rstrip([sep]) -> string or unicode\n\
\n\ \n\
Return a copy of the string S with trailing whitespace removed.\n\ Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead."; If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject * static PyObject *
string_rstrip(PyStringObject *self, PyObject *args) string_rstrip(PyStringObject *self, PyObject *args)

View file

@ -3503,35 +3503,6 @@ PyObject *split(PyUnicodeObject *self,
return split_substring(self,list,substring,maxcount); return split_substring(self,list,substring,maxcount);
} }
static
PyObject *strip(PyUnicodeObject *self,
int left,
int right)
{
Py_UNICODE *p = self->str;
int start = 0;
int end = self->length;
if (left)
while (start < end && Py_UNICODE_ISSPACE(p[start]))
start++;
if (right)
while (end > start && Py_UNICODE_ISSPACE(p[end-1]))
end--;
if (start == 0 && end == self->length && PyUnicode_CheckExact(self)) {
/* couldn't strip anything off, return original string */
Py_INCREF(self);
return (PyObject*) self;
}
return (PyObject*) PyUnicode_FromUnicode(
self->str + start,
end - start
);
}
static static
PyObject *replace(PyUnicodeObject *self, PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1, PyUnicodeObject *str1,
@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self)
return fixup(self, fixlower); return fixup(self, fixlower);
} }
static char lstrip__doc__[] = #define LEFTSTRIP 0
"S.lstrip() -> unicode\n\ #define RIGHTSTRIP 1
\n\ #define BOTHSTRIP 2
Return a copy of the string S with leading whitespace removed.";
/* Arrays indexed by above */
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static const Py_UNICODE *
unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
{
int i;
for (i = 0; i<n; ++i)
if (s[i]==c)
return s+i;
return NULL;
}
/* externally visible for str.strip(unicode) */
PyObject *
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
{
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self);
Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
int seplen = PyUnicode_GET_SIZE(sepobj);
int i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && unicode_memchr(sep, s[i], seplen)) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && unicode_memchr(sep, s[j], seplen));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject * static PyObject *
unicode_lstrip(PyUnicodeObject *self) do_strip(PyUnicodeObject *self, int striptype)
{ {
return strip(self, 1, 0); Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self), i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && Py_UNICODE_ISSPACE(s[i])) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && Py_UNICODE_ISSPACE(s[j]));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
} }
static PyObject *
do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL;
if (sep != NULL && sep != Py_None) {
if (PyUnicode_Check(sep))
return _PyUnicode_XStrip(self, striptype, sep);
else if (PyString_Check(sep)) {
PyObject *res;
sep = PyUnicode_FromObject(sep);
if (sep==NULL)
return NULL;
res = _PyUnicode_XStrip(self, striptype, sep);
Py_DECREF(sep);
return res;
}
else {
PyErr_Format(PyExc_TypeError,
"%s arg must be None, unicode or str",
STRIPNAME(striptype));
return NULL;
}
}
return do_strip(self, striptype);
}
static char strip__doc__[] =
"S.strip([sep]) -> unicode\n\
\n\
Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_strip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, BOTHSTRIP); /* Common case */
else
return do_argstrip(self, BOTHSTRIP, args);
}
static char lstrip__doc__[] =
"S.lstrip([sep]) -> unicode\n\
\n\
Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_lstrip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, LEFTSTRIP); /* Common case */
else
return do_argstrip(self, LEFTSTRIP, args);
}
static char rstrip__doc__[] =
"S.rstrip([sep]) -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_rstrip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, RIGHTSTRIP); /* Common case */
else
return do_argstrip(self, RIGHTSTRIP, args);
}
static PyObject* static PyObject*
unicode_repeat(PyUnicodeObject *str, int len) unicode_repeat(PyUnicodeObject *str, int len)
{ {
@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, width - self->length, 0, ' '); return (PyObject*) pad(self, width - self->length, 0, ' ');
} }
static char rstrip__doc__[] =
"S.rstrip() -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.";
static PyObject *
unicode_rstrip(PyUnicodeObject *self)
{
return strip(self, 0, 1);
}
static PyObject* static PyObject*
unicode_slice(PyUnicodeObject *self, int start, int end) unicode_slice(PyUnicodeObject *self, int start, int end)
{ {
@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self)
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL); return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
} }
static char strip__doc__[] =
"S.strip() -> unicode\n\
\n\
Return a copy of S with leading and trailing whitespace removed.";
static PyObject *
unicode_strip(PyUnicodeObject *self)
{
return strip(self, 1, 1);
}
static char swapcase__doc__[] = static char swapcase__doc__[] =
"S.swapcase() -> unicode\n\ "S.swapcase() -> unicode\n\
\n\ \n\
@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = {
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_NOARGS, lstrip__doc__}, {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */ /* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
{"rstrip", (PyCFunction) unicode_rstrip, METH_NOARGS, rstrip__doc__}, {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__}, {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
{"strip", (PyCFunction) unicode_strip, METH_NOARGS, strip__doc__}, {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
{"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
{"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},