Committing patch #591250 which provides "str1 in str2" when str1 is a

string of longer than 1 character.
This commit is contained in:
Barry Warsaw 2002-08-06 16:58:21 +00:00
parent b57089cdf8
commit 817918cc3c
8 changed files with 140 additions and 99 deletions

View file

@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
and \var{j} are integers: and \var{j} are integers:
\begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes} \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
\lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{} \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
equal to \var{x}, else \code{1}}{} equal to \var{x}, else \code{1}}{(1)}
\hline \hline
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{} \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)} \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
\hline \hline
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)} \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
\lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)} \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
\hline \hline
\lineiii{len(\var{s})}{length of \var{s}}{} \lineiii{len(\var{s})}{length of \var{s}}{}
\lineiii{min(\var{s})}{smallest item of \var{s}}{} \lineiii{min(\var{s})}{smallest item of \var{s}}{}
@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
Notes: Notes:
\begin{description} \begin{description}
\item[(1)] Values of \var{n} less than \code{0} are treated as \item[(1)] When \var{s} is a string or Unicode string object the
\code{in} and \code{not in} operations act like a substring test. In
Python versions before 2.3, \var{x} had to be a string of length 1.
In Python 2.3 and beyond, \var{x} may be a string of any length.
\item[(2)] Values of \var{n} less than \code{0} are treated as
\code{0} (which yields an empty sequence of the same type as \code{0} (which yields an empty sequence of the same type as
\var{s}). Note also that the copies are shallow; nested structures \var{s}). Note also that the copies are shallow; nested structures
are not copied. This often haunts new Python programmers; consider: are not copied. This often haunts new Python programmers; consider:
@ -489,12 +494,12 @@ Notes:
[[3], [5], [7]] [[3], [5], [7]]
\end{verbatim} \end{verbatim}
\item[(2)] If \var{i} or \var{j} is negative, the index is relative to \item[(3)] If \var{i} or \var{j} is negative, the index is relative to
the end of the string: \code{len(\var{s}) + \var{i}} or the end of the string: \code{len(\var{s}) + \var{i}} or
\code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is \code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is
still \code{0}. still \code{0}.
\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as \item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
the sequence of items with index \var{k} such that \code{\var{i} <= the sequence of items with index \var{k} such that \code{\var{i} <=
\var{k} < \var{j}}. If \var{i} or \var{j} is greater than \var{k} < \var{j}}. If \var{i} or \var{j} is greater than
\code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted, \code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted,

View file

@ -1,7 +1,7 @@
"""Common tests shared by test_string and test_userstring""" """Common tests shared by test_string and test_userstring"""
import string import string
from test.test_support import verify, verbose, TestFailed, have_unicode from test.test_support import verify, vereq, verbose, TestFailed, have_unicode
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
@ -295,3 +295,23 @@ def run_method_tests(test):
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]' data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
verify('hello world'.encode('zlib') == data) verify('hello world'.encode('zlib') == data)
verify(data.decode('zlib') == 'hello world') verify(data.decode('zlib') == 'hello world')
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests(test):
vereq('' in '', True)
vereq('' in 'abc', True)
vereq('\0' in 'abc', False)
vereq('\0' in '\0abc', True)
vereq('\0' in 'abc\0', True)
vereq('a' in '\0abc', True)
vereq('asdf' in 'asdf', True)
vereq('asdf' in 'asd', False)
vereq('asdf' in '', False)

View file

@ -45,17 +45,8 @@ except TypeError:
check('c' in 'abc', "'c' not in 'abc'") check('c' in 'abc', "'c' not in 'abc'")
check('d' not in 'abc', "'d' in 'abc'") check('d' not in 'abc', "'d' in 'abc'")
try: check('' in '', "'' not in ''")
'' in 'abc' check('' in 'abc', "'' not in 'abc'")
check(0, "'' in 'abc' did not raise error")
except TypeError:
pass
try:
'ab' in 'abc'
check(0, "'ab' in 'abc' did not raise error")
except TypeError:
pass
try: try:
None in 'abc' None in 'abc'
@ -71,17 +62,12 @@ if have_unicode:
check('c' in unicode('abc'), "'c' not in u'abc'") check('c' in unicode('abc'), "'c' not in u'abc'")
check('d' not in unicode('abc'), "'d' in u'abc'") check('d' not in unicode('abc'), "'d' in u'abc'")
try: check('' in unicode(''), "'' not in u''")
'' in unicode('abc') check(unicode('') in '', "u'' not in ''")
check(0, "'' in u'abc' did not raise error") check(unicode('') in unicode(''), "u'' not in u''")
except TypeError: check('' in unicode('abc'), "'' not in u'abc'")
pass check(unicode('') in 'abc', "u'' not in 'abc'")
check(unicode('') in unicode('abc'), "u'' not in u'abc'")
try:
'ab' in unicode('abc')
check(0, "'ab' in u'abc' did not raise error")
except TypeError:
pass
try: try:
None in unicode('abc') None in unicode('abc')
@ -94,35 +80,11 @@ if have_unicode:
check(unicode('c') in unicode('abc'), "u'c' not in u'abc'") check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
check(unicode('d') not in unicode('abc'), "u'd' in u'abc'") check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
try:
unicode('') in unicode('abc')
check(0, "u'' in u'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in unicode('abc')
check(0, "u'ab' in u'abc' did not raise error")
except TypeError:
pass
# Test Unicode char in string # Test Unicode char in string
check(unicode('c') in 'abc', "u'c' not in 'abc'") check(unicode('c') in 'abc', "u'c' not in 'abc'")
check(unicode('d') not in 'abc', "u'd' in 'abc'") check(unicode('d') not in 'abc', "u'd' in 'abc'")
try:
unicode('') in 'abc'
check(0, "u'' in 'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in 'abc'
check(0, "u'ab' in 'abc' did not raise error")
except TypeError:
pass
# A collection of tests on builtin sequence types # A collection of tests on builtin sequence types
a = range(10) a = range(10)
for i in a: for i in a:

View file

@ -51,6 +51,7 @@ def test(name, input, output, *args):
string_tests.run_module_tests(test) string_tests.run_module_tests(test)
string_tests.run_method_tests(test) string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)
string.whitespace string.whitespace
string.lowercase string.lowercase

View file

@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#" """#"
from test.test_support import verify, verbose, TestFailed from test.test_support import verify, vereq, verbose, TestFailed
import sys, string import sys, string
if not sys.platform.startswith('java'): if not sys.platform.startswith('java'):
@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
# Contains: # Contains:
print 'Testing Unicode contains method...', print 'Testing Unicode contains method...',
verify(('a' in u'abdb') == 1) vereq(('a' in u'abdb'), True)
verify(('a' in u'bdab') == 1) vereq(('a' in u'bdab'), True)
verify(('a' in u'bdaba') == 1) vereq(('a' in u'bdaba'), True)
verify(('a' in u'bdba') == 1) vereq(('a' in u'bdba'), True)
verify(('a' in u'bdba') == 1) vereq(('a' in u'bdba'), True)
verify((u'a' in u'bdba') == 1) vereq((u'a' in u'bdba'), True)
verify((u'a' in u'bdb') == 0) vereq((u'a' in u'bdb'), False)
verify((u'a' in 'bdb') == 0) vereq((u'a' in 'bdb'), False)
verify((u'a' in 'bdba') == 1) vereq((u'a' in 'bdba'), True)
verify((u'a' in ('a',1,None)) == 1) vereq((u'a' in ('a',1,None)), True)
verify((u'a' in (1,None,'a')) == 1) vereq((u'a' in (1,None,'a')), True)
verify((u'a' in (1,None,u'a')) == 1) vereq((u'a' in (1,None,u'a')), True)
verify(('a' in ('a',1,None)) == 1) vereq(('a' in ('a',1,None)), True)
verify(('a' in (1,None,'a')) == 1) vereq(('a' in (1,None,'a')), True)
verify(('a' in (1,None,u'a')) == 1) vereq(('a' in (1,None,u'a')), True)
verify(('a' in ('x',1,u'y')) == 0) vereq(('a' in ('x',1,u'y')), False)
verify(('a' in ('x',1,None)) == 0) vereq(('a' in ('x',1,None)), False)
print 'done.' print 'done.'
# Formatting: # Formatting:
@ -758,3 +758,42 @@ print u'abc\n',
print u'def\n' print u'def\n'
print u'def\n' print u'def\n'
print 'done.' print 'done.'
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests():
vereq(u'' in '', True)
vereq('' in u'', True)
vereq(u'' in u'', True)
vereq(u'' in 'abc', True)
vereq('' in u'abc', True)
vereq(u'' in u'abc', True)
vereq(u'\0' in 'abc', False)
vereq('\0' in u'abc', False)
vereq(u'\0' in u'abc', False)
vereq(u'\0' in '\0abc', True)
vereq('\0' in u'\0abc', True)
vereq(u'\0' in u'\0abc', True)
vereq(u'\0' in 'abc\0', True)
vereq('\0' in u'abc\0', True)
vereq(u'\0' in u'abc\0', True)
vereq(u'a' in '\0abc', True)
vereq('a' in u'\0abc', True)
vereq(u'a' in u'\0abc', True)
vereq(u'asdf' in 'asdf', True)
vereq('asdf' in u'asdf', True)
vereq(u'asdf' in u'asdf', True)
vereq(u'asdf' in 'asd', False)
vereq('asdf' in u'asd', False)
vereq(u'asdf' in u'asd', False)
vereq(u'asdf' in '', False)
vereq('asdf' in u'', False)
vereq(u'asdf' in u'', False)
run_contains_tests()

View file

@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
print (methodname, input, output, args, res[0], res[1], res[2]) print (methodname, input, output, args, res[0], res[1], res[2])
string_tests.run_method_tests(test) string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)

View file

@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
static int static int
string_contains(PyObject *a, PyObject *el) string_contains(PyObject *a, PyObject *el)
{ {
register char *s, *end; const char *lhs, *rhs, *end;
register char c; int size;
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
if (PyUnicode_Check(el)) if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el); return PyUnicode_Contains(a, el);
#endif #endif
if (!PyString_Check(el) || PyString_Size(el) != 1) { if (!PyString_Check(el)) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand"); "'in <string>' requires string as left operand");
return -1; return -1;
} }
c = PyString_AsString(el)[0]; size = PyString_Size(el);
s = PyString_AsString(a); rhs = PyString_AS_STRING(el);
end = s + PyString_Size(a); lhs = PyString_AS_STRING(a);
while (s < end) {
if (c == *s++) /* optimize for a single character */
if (size == 1)
return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
end = lhs + (PyString_Size(a) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0)
return 1; return 1;
} }
return 0; return 0;
} }

View file

@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
PyObject *element) PyObject *element)
{ {
PyUnicodeObject *u = NULL, *v = NULL; PyUnicodeObject *u = NULL, *v = NULL;
int result; int result, size;
register const Py_UNICODE *p, *e; register const Py_UNICODE *lhs, *end, *rhs;
register Py_UNICODE ch;
/* Coerce the two arguments */ /* Coerce the two arguments */
v = (PyUnicodeObject *)PyUnicode_FromObject(element); v = (PyUnicodeObject *)PyUnicode_FromObject(element);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand"); "'in <string>' requires string as left operand");
goto onError; goto onError;
} }
u = (PyUnicodeObject *)PyUnicode_FromObject(container); u = (PyUnicodeObject *)PyUnicode_FromObject(container);
@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
goto onError; goto onError;
} }
/* Check v in u */ size = PyUnicode_GET_SIZE(v);
if (PyUnicode_GET_SIZE(v) != 1) { rhs = PyUnicode_AS_UNICODE(v);
PyErr_SetString(PyExc_TypeError, lhs = PyUnicode_AS_UNICODE(u);
"'in <string>' requires character as left operand");
goto onError;
}
ch = *PyUnicode_AS_UNICODE(v);
p = PyUnicode_AS_UNICODE(u);
e = p + PyUnicode_GET_SIZE(u);
result = 0; result = 0;
while (p < e) { if (size == 1) {
if (*p++ == ch) { end = lhs + PyUnicode_GET_SIZE(u);
result = 1; while (lhs < end) {
break; if (*lhs++ == *rhs) {
result = 1;
break;
}
}
}
else {
end = lhs + (PyUnicode_GET_SIZE(u) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0) {
result = 1;
break;
}
} }
} }