mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Added findall() to RegexObject -- return a list of all matches in a
string. Added groupdict() to MatchObject -- return the named groups as a dict. Added default argument to groups() to specify what to return for unmatching groups; groupdict() also has this.
This commit is contained in:
parent
80884075f0
commit
be0b62cab4
1 changed files with 61 additions and 16 deletions
77
Lib/re.py
77
Lib/re.py
|
@ -57,6 +57,11 @@ def split(pattern, string, maxsplit=0):
|
||||||
pattern = _cachecompile(pattern)
|
pattern = _cachecompile(pattern)
|
||||||
return pattern.split(string, maxsplit)
|
return pattern.split(string, maxsplit)
|
||||||
|
|
||||||
|
def findall(pattern, string):
|
||||||
|
if type(pattern) == type(''):
|
||||||
|
pattern = _cachecompile(pattern)
|
||||||
|
return pattern.findall(string)
|
||||||
|
|
||||||
def escape(pattern):
|
def escape(pattern):
|
||||||
"Escape all non-alphanumeric characters in pattern."
|
"Escape all non-alphanumeric characters in pattern."
|
||||||
result = []
|
result = []
|
||||||
|
@ -80,6 +85,7 @@ def compile(pattern, flags=0):
|
||||||
#
|
#
|
||||||
|
|
||||||
class RegexObject:
|
class RegexObject:
|
||||||
|
|
||||||
def __init__(self, pattern, flags, code, groupindex):
|
def __init__(self, pattern, flags, code, groupindex):
|
||||||
self.code = code
|
self.code = code
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
|
@ -171,7 +177,7 @@ class RegexObject:
|
||||||
return (string.join(results, ''), n)
|
return (string.join(results, ''), n)
|
||||||
|
|
||||||
def split(self, source, maxsplit=0):
|
def split(self, source, maxsplit=0):
|
||||||
"""Split the \var{source} string by the occurrences of the pattern,
|
"""Split the source string by the occurrences of the pattern,
|
||||||
returning a list containing the resulting substrings."""
|
returning a list containing the resulting substrings."""
|
||||||
|
|
||||||
if maxsplit < 0:
|
if maxsplit < 0:
|
||||||
|
@ -198,13 +204,38 @@ class RegexObject:
|
||||||
results.append(source[lastmatch:i])
|
results.append(source[lastmatch:i])
|
||||||
g = m.groups()
|
g = m.groups()
|
||||||
if g:
|
if g:
|
||||||
if type(g)==type( "" ): g = [g]
|
|
||||||
results[len(results):] = list(g)
|
results[len(results):] = list(g)
|
||||||
pos = lastmatch = j
|
pos = lastmatch = j
|
||||||
n = n + 1
|
n = n + 1
|
||||||
results.append(source[lastmatch:])
|
results.append(source[lastmatch:])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def findall(self, string):
|
||||||
|
"""Return a list of all non-overlapping matches in the string.
|
||||||
|
|
||||||
|
If one or more groups are present in the pattern, return a
|
||||||
|
list of groups; this will be a list of tuples if the pattern
|
||||||
|
has more than one group.
|
||||||
|
|
||||||
|
Empty matches are included in the result.
|
||||||
|
|
||||||
|
"""
|
||||||
|
pos = 0
|
||||||
|
n = len(string)
|
||||||
|
result = []
|
||||||
|
while pos <= n:
|
||||||
|
m = self.search(string, pos)
|
||||||
|
if not m:
|
||||||
|
break
|
||||||
|
gr = m.groups()
|
||||||
|
if not gr:
|
||||||
|
gr = m.group()
|
||||||
|
elif len(gr) == 1:
|
||||||
|
gr = gr[0]
|
||||||
|
result.append(gr)
|
||||||
|
pos = max(m.end(), pos+1)
|
||||||
|
return result
|
||||||
|
|
||||||
# The following 3 functions were contributed by Mike Fletcher, and
|
# The following 3 functions were contributed by Mike Fletcher, and
|
||||||
# allow pickling and unpickling of RegexObject instances.
|
# allow pickling and unpickling of RegexObject instances.
|
||||||
def __getinitargs__(self):
|
def __getinitargs__(self):
|
||||||
|
@ -221,6 +252,7 @@ class RegexObject:
|
||||||
self.code = apply(pcre_compile, statetuple)
|
self.code = apply(pcre_compile, statetuple)
|
||||||
|
|
||||||
class MatchObject:
|
class MatchObject:
|
||||||
|
|
||||||
def __init__(self, re, string, pos, endpos, regs):
|
def __init__(self, re, string, pos, endpos, regs):
|
||||||
self.re = re
|
self.re = re
|
||||||
self.string = string
|
self.string = string
|
||||||
|
@ -234,7 +266,7 @@ class MatchObject:
|
||||||
try:
|
try:
|
||||||
g = self.re.groupindex[g]
|
g = self.re.groupindex[g]
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise IndexError, ('group "' + g + '" is undefined')
|
raise IndexError, 'group %s is undefined' % `g`
|
||||||
return self.regs[g][0]
|
return self.regs[g][0]
|
||||||
|
|
||||||
def end(self, g = 0):
|
def end(self, g = 0):
|
||||||
|
@ -243,31 +275,31 @@ class MatchObject:
|
||||||
try:
|
try:
|
||||||
g = self.re.groupindex[g]
|
g = self.re.groupindex[g]
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise IndexError, ('group "' + g + '" is undefined')
|
raise IndexError, 'group %s is undefined' % `g`
|
||||||
return self.regs[g][1]
|
return self.regs[g][1]
|
||||||
|
|
||||||
def span(self, g = 0):
|
def span(self, g = 0):
|
||||||
"""Return a tuple containing the start,end of the substring
|
"Return (start, end) of the substring matched by group g"
|
||||||
matched by group g"""
|
|
||||||
if type(g) == type(''):
|
if type(g) == type(''):
|
||||||
try:
|
try:
|
||||||
g = self.re.groupindex[g]
|
g = self.re.groupindex[g]
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise IndexError, ('group "' + g + '" is undefined')
|
raise IndexError, 'group %s is undefined' % `g`
|
||||||
return self.regs[g]
|
return self.regs[g]
|
||||||
|
|
||||||
def groups(self):
|
def groups(self, default=None):
|
||||||
"Return a tuple containing all subgroups of the match object"
|
"Return a tuple containing all subgroups of the match object"
|
||||||
result = []
|
result = []
|
||||||
for g in range(1, self.re._num_regs):
|
for g in range(1, self.re._num_regs):
|
||||||
if (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
|
a, b = self.regs[g]
|
||||||
result.append(None)
|
if a == -1 or b == -1:
|
||||||
|
result.append(default)
|
||||||
else:
|
else:
|
||||||
result.append(self.string[self.regs[g][0]:self.regs[g][1]])
|
result.append(self.string[a:b])
|
||||||
return tuple(result)
|
return tuple(result)
|
||||||
|
|
||||||
def group(self, *groups):
|
def group(self, *groups):
|
||||||
"Return one or more groups of the match."
|
"Return one or more groups of the match"
|
||||||
if len(groups) == 0:
|
if len(groups) == 0:
|
||||||
groups = (0,)
|
groups = (0,)
|
||||||
result = []
|
result = []
|
||||||
|
@ -276,15 +308,28 @@ class MatchObject:
|
||||||
try:
|
try:
|
||||||
g = self.re.groupindex[g]
|
g = self.re.groupindex[g]
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise IndexError, ('group "' + g + '" is undefined')
|
raise IndexError, 'group %s is undefined' % `g`
|
||||||
if len(self.regs)<=g: raise IndexError, ('group "' + str(g) + '" is undefined')
|
if g >= len(self.regs):
|
||||||
elif (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
|
raise IndexError, 'group %s is undefined' % `g`
|
||||||
|
a, b = self.regs[g]
|
||||||
|
if a == -1 or b == -1:
|
||||||
result.append(None)
|
result.append(None)
|
||||||
else:
|
else:
|
||||||
result.append(self.string[self.regs[g][0]:self.regs[g][1]])
|
result.append(self.string[a:b])
|
||||||
if len(result) > 1:
|
if len(result) > 1:
|
||||||
return tuple(result)
|
return tuple(result)
|
||||||
elif len(result) == 1:
|
elif len(result) == 1:
|
||||||
return result[0]
|
return result[0]
|
||||||
else:
|
else:
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
|
def groupdict(self, default=None):
|
||||||
|
"Return a dictionary containing all named subgroups of the match"
|
||||||
|
dict = {}
|
||||||
|
for name, index in self.re.groupindex.items():
|
||||||
|
a, b = self.regs[index]
|
||||||
|
if a == -1 or b == -1:
|
||||||
|
dict[name] = default
|
||||||
|
else:
|
||||||
|
dict[name] = self.string[a:b]
|
||||||
|
return dict
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue