mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
SRE fixes for 2.1 alpha:
-- added some more docstrings -- fixed typo in scanner class (#125531) -- the multiline flag (?m) should't affect the \Z operator (#127259) -- fixed non-greedy backtracking bug (#123769, #127259) -- added sre.DEBUG flag (currently dumps the parsed pattern structure) -- fixed a couple of glitches in groupdict (the #126587 memory leak had already been fixed by AMK)
This commit is contained in:
parent
77b20f099e
commit
770617b23e
7 changed files with 104 additions and 66 deletions
51
Lib/sre.py
51
Lib/sre.py
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
# re-compatible interface for the sre matching engine
|
# re-compatible interface for the sre matching engine
|
||||||
#
|
#
|
||||||
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
|
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
|
||||||
#
|
#
|
||||||
# This version of the SRE library can be redistributed under CNRI's
|
# This version of the SRE library can be redistributed under CNRI's
|
||||||
# Python 1.6 license. For any other use, please contact Secret Labs
|
# Python 1.6 license. For any other use, please contact Secret Labs
|
||||||
|
@ -14,23 +14,22 @@
|
||||||
# other compatibility work.
|
# other compatibility work.
|
||||||
#
|
#
|
||||||
|
|
||||||
# FIXME: change all FIXME's to XXX ;-)
|
|
||||||
|
|
||||||
import sre_compile
|
import sre_compile
|
||||||
import sre_parse
|
import sre_parse
|
||||||
|
|
||||||
import string
|
import string
|
||||||
|
|
||||||
# flags
|
# flags
|
||||||
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
|
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
|
||||||
L = LOCALE = sre_compile.SRE_FLAG_LOCALE
|
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
|
||||||
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
|
U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
|
||||||
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
|
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
|
||||||
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
|
S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
|
||||||
|
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
|
||||||
|
|
||||||
# sre extensions (may or may not be in 1.6/2.0 final)
|
# sre extensions (experimental, don't rely on these)
|
||||||
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
|
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
|
||||||
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
|
DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
|
||||||
|
|
||||||
# sre exception
|
# sre exception
|
||||||
error = sre_compile.error
|
error = sre_compile.error
|
||||||
|
@ -38,36 +37,60 @@ error = sre_compile.error
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# public interface
|
# public interface
|
||||||
|
|
||||||
# FIXME: add docstrings
|
|
||||||
|
|
||||||
def match(pattern, string, flags=0):
|
def match(pattern, string, flags=0):
|
||||||
|
"""Try to apply the pattern at the start of the string, returning
|
||||||
|
a match object, or None if no match was found."""
|
||||||
return _compile(pattern, flags).match(string)
|
return _compile(pattern, flags).match(string)
|
||||||
|
|
||||||
def search(pattern, string, flags=0):
|
def search(pattern, string, flags=0):
|
||||||
|
"""Scan through string looking for a match to the pattern, returning
|
||||||
|
a match object, or None if no match was found."""
|
||||||
return _compile(pattern, flags).search(string)
|
return _compile(pattern, flags).search(string)
|
||||||
|
|
||||||
def sub(pattern, repl, string, count=0):
|
def sub(pattern, repl, string, count=0):
|
||||||
|
"""Return the string obtained by replacing the leftmost
|
||||||
|
non-overlapping occurrences of the pattern in string by the
|
||||||
|
replacement repl"""
|
||||||
return _compile(pattern, 0).sub(repl, string, count)
|
return _compile(pattern, 0).sub(repl, string, count)
|
||||||
|
|
||||||
def subn(pattern, repl, string, count=0):
|
def subn(pattern, repl, string, count=0):
|
||||||
|
"""Return a 2-tuple containing (new_string, number).
|
||||||
|
new_string is the string obtained by replacing the leftmost
|
||||||
|
non-overlapping occurrences of the pattern in the source
|
||||||
|
string by the replacement repl. number is the number of
|
||||||
|
substitutions that were made."""
|
||||||
return _compile(pattern, 0).subn(repl, string, count)
|
return _compile(pattern, 0).subn(repl, string, count)
|
||||||
|
|
||||||
def split(pattern, string, maxsplit=0):
|
def split(pattern, string, maxsplit=0):
|
||||||
|
"""Split the source string by the occurrences of the pattern,
|
||||||
|
returning a list containing the resulting substrings."""
|
||||||
return _compile(pattern, 0).split(string, maxsplit)
|
return _compile(pattern, 0).split(string, maxsplit)
|
||||||
|
|
||||||
def findall(pattern, string, maxsplit=0):
|
def findall(pattern, string, maxsplit=0):
|
||||||
|
"""Return a list of all non-overlapping matches in the string.
|
||||||
|
|
||||||
|
If one or more groups are present in the pattern, return a
|
||||||
|
list of groups; this will be a list of tuples if the pattern
|
||||||
|
has more than one group.
|
||||||
|
|
||||||
|
Empty matches are included in the result."""
|
||||||
return _compile(pattern, 0).findall(string, maxsplit)
|
return _compile(pattern, 0).findall(string, maxsplit)
|
||||||
|
|
||||||
def compile(pattern, flags=0):
|
def compile(pattern, flags=0):
|
||||||
|
"Compile a regular expression pattern, returning a pattern object."
|
||||||
return _compile(pattern, flags)
|
return _compile(pattern, flags)
|
||||||
|
|
||||||
def purge():
|
def purge():
|
||||||
|
"Clear the regular expression cache"
|
||||||
_cache.clear()
|
_cache.clear()
|
||||||
|
|
||||||
def template(pattern, flags=0):
|
def template(pattern, flags=0):
|
||||||
|
"Compile a template pattern, returning a pattern object"
|
||||||
|
|
||||||
return _compile(pattern, flags|T)
|
return _compile(pattern, flags|T)
|
||||||
|
|
||||||
def escape(pattern):
|
def escape(pattern):
|
||||||
|
"Escape all non-alphanumeric characters in pattern."
|
||||||
s = list(pattern)
|
s = list(pattern)
|
||||||
for i in range(len(pattern)):
|
for i in range(len(pattern)):
|
||||||
c = pattern[i]
|
c = pattern[i]
|
||||||
|
@ -204,7 +227,7 @@ class Scanner:
|
||||||
break
|
break
|
||||||
action = self.lexicon[m.lastindex][1]
|
action = self.lexicon[m.lastindex][1]
|
||||||
if callable(action):
|
if callable(action):
|
||||||
self.match = match
|
self.match = m
|
||||||
action = action(self, m.group())
|
action = action(self, m.group())
|
||||||
if action is not None:
|
if action is not None:
|
||||||
append(action)
|
append(action)
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
# convert template to internal format
|
# convert template to internal format
|
||||||
#
|
#
|
||||||
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
#
|
#
|
||||||
# See the sre.py file for information on usage and redistribution.
|
# See the sre.py file for information on usage and redistribution.
|
||||||
#
|
#
|
||||||
|
@ -176,7 +176,7 @@ def _optimize_charset(charset, fixup):
|
||||||
for i in range(fixup(av[0]), fixup(av[1])+1):
|
for i in range(fixup(av[0]), fixup(av[1])+1):
|
||||||
charmap[i] = 1
|
charmap[i] = 1
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
# FIXME: could append to charmap tail
|
# XXX: could append to charmap tail
|
||||||
return charset # cannot compress
|
return charset # cannot compress
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# character set contains unicode characters
|
# character set contains unicode characters
|
||||||
|
@ -364,7 +364,7 @@ def compile(p, flags=0):
|
||||||
|
|
||||||
# print code
|
# print code
|
||||||
|
|
||||||
# FIXME: <fl> get rid of this limitation!
|
# XXX: <fl> get rid of this limitation!
|
||||||
assert p.pattern.groups <= 100,\
|
assert p.pattern.groups <= 100,\
|
||||||
"sorry, but this version only supports 100 named groups"
|
"sorry, but this version only supports 100 named groups"
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# various symbols used by the regular expression engine.
|
# various symbols used by the regular expression engine.
|
||||||
# run this script to update the _sre include files!
|
# run this script to update the _sre include files!
|
||||||
#
|
#
|
||||||
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
|
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
|
||||||
#
|
#
|
||||||
# See the sre.py file for information on usage and redistribution.
|
# See the sre.py file for information on usage and redistribution.
|
||||||
#
|
#
|
||||||
|
@ -54,10 +54,12 @@ SUBPATTERN = "subpattern"
|
||||||
# positions
|
# positions
|
||||||
AT_BEGINNING = "at_beginning"
|
AT_BEGINNING = "at_beginning"
|
||||||
AT_BEGINNING_LINE = "at_beginning_line"
|
AT_BEGINNING_LINE = "at_beginning_line"
|
||||||
|
AT_BEGINNING_STRING = "at_beginning_string"
|
||||||
AT_BOUNDARY = "at_boundary"
|
AT_BOUNDARY = "at_boundary"
|
||||||
AT_NON_BOUNDARY = "at_non_boundary"
|
AT_NON_BOUNDARY = "at_non_boundary"
|
||||||
AT_END = "at_end"
|
AT_END = "at_end"
|
||||||
AT_END_LINE = "at_end_line"
|
AT_END_LINE = "at_end_line"
|
||||||
|
AT_END_STRING = "at_end_string"
|
||||||
|
|
||||||
# categories
|
# categories
|
||||||
CATEGORY_DIGIT = "category_digit"
|
CATEGORY_DIGIT = "category_digit"
|
||||||
|
@ -109,8 +111,8 @@ OPCODES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
ATCODES = [
|
ATCODES = [
|
||||||
AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
|
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
|
||||||
AT_NON_BOUNDARY, AT_END, AT_END_LINE
|
AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING
|
||||||
]
|
]
|
||||||
|
|
||||||
CHCODES = [
|
CHCODES = [
|
||||||
|
@ -178,6 +180,7 @@ SRE_FLAG_MULTILINE = 8 # treat target as multiline string
|
||||||
SRE_FLAG_DOTALL = 16 # treat target as a single string
|
SRE_FLAG_DOTALL = 16 # treat target as a single string
|
||||||
SRE_FLAG_UNICODE = 32 # use unicode locale
|
SRE_FLAG_UNICODE = 32 # use unicode locale
|
||||||
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
|
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
|
||||||
|
SRE_FLAG_DEBUG = 128 # debugging
|
||||||
|
|
||||||
# flags for INFO primitive
|
# flags for INFO primitive
|
||||||
SRE_INFO_PREFIX = 1 # has prefix
|
SRE_INFO_PREFIX = 1 # has prefix
|
||||||
|
@ -201,7 +204,7 @@ if __name__ == "__main__":
|
||||||
* NOTE: This file is generated by sre_constants.py. If you need
|
* NOTE: This file is generated by sre_constants.py. If you need
|
||||||
* to change anything in here, edit sre_constants.py and run it.
|
* to change anything in here, edit sre_constants.py and run it.
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
* See the _sre.c file for information on usage and redistribution.
|
* See the _sre.c file for information on usage and redistribution.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
# convert re-style regular expression to sre pattern
|
# convert re-style regular expression to sre pattern
|
||||||
#
|
#
|
||||||
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
|
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
|
||||||
#
|
#
|
||||||
# See the sre.py file for information on usage and redistribution.
|
# See the sre.py file for information on usage and redistribution.
|
||||||
#
|
#
|
||||||
|
@ -34,7 +34,7 @@ ESCAPES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
r"\A": (AT, AT_BEGINNING), # start of string
|
r"\A": (AT, AT_BEGINNING_STRING), # start of string
|
||||||
r"\b": (AT, AT_BOUNDARY),
|
r"\b": (AT, AT_BOUNDARY),
|
||||||
r"\B": (AT, AT_NON_BOUNDARY),
|
r"\B": (AT, AT_NON_BOUNDARY),
|
||||||
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
|
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
|
||||||
|
@ -43,7 +43,7 @@ CATEGORIES = {
|
||||||
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
|
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
|
||||||
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
|
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
|
||||||
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
|
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
|
||||||
r"\Z": (AT, AT_END), # end of string
|
r"\Z": (AT, AT_END_STRING), # end of string
|
||||||
}
|
}
|
||||||
|
|
||||||
FLAGS = {
|
FLAGS = {
|
||||||
|
@ -421,13 +421,13 @@ def _parse(source, state):
|
||||||
code1 = code1[1][0]
|
code1 = code1[1][0]
|
||||||
set.append(code1)
|
set.append(code1)
|
||||||
|
|
||||||
# FIXME: <fl> move set optimization to compiler!
|
# XXX: <fl> should move set optimization to compiler!
|
||||||
if len(set)==1 and set[0][0] is LITERAL:
|
if len(set)==1 and set[0][0] is LITERAL:
|
||||||
subpattern.append(set[0]) # optimization
|
subpattern.append(set[0]) # optimization
|
||||||
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
|
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
|
||||||
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
|
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
|
||||||
else:
|
else:
|
||||||
# FIXME: <fl> add charmap optimization
|
# XXX: <fl> should add charmap optimization here
|
||||||
subpattern.append((IN, set))
|
subpattern.append((IN, set))
|
||||||
|
|
||||||
elif this and this[0] in REPEAT_CHARS:
|
elif this and this[0] in REPEAT_CHARS:
|
||||||
|
@ -457,7 +457,7 @@ def _parse(source, state):
|
||||||
min = int(lo)
|
min = int(lo)
|
||||||
if hi:
|
if hi:
|
||||||
max = int(hi)
|
max = int(hi)
|
||||||
# FIXME: <fl> check that hi >= lo!
|
# XXX: <fl> check that hi >= lo ???
|
||||||
else:
|
else:
|
||||||
raise error, "not supported"
|
raise error, "not supported"
|
||||||
# figure out which item to repeat
|
# figure out which item to repeat
|
||||||
|
@ -601,7 +601,8 @@ def parse(str, flags=0, pattern=None):
|
||||||
elif tail:
|
elif tail:
|
||||||
raise error, "bogus characters at end of regular expression"
|
raise error, "bogus characters at end of regular expression"
|
||||||
|
|
||||||
# p.dump()
|
if flags & SRE_FLAG_DEBUG:
|
||||||
|
p.dump()
|
||||||
|
|
||||||
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
|
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
|
||||||
# the VERBOSE flag was switched on inside the pattern. to be
|
# the VERBOSE flag was switched on inside the pattern. to be
|
||||||
|
@ -672,8 +673,7 @@ def parse_template(source, pattern):
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def expand_template(template, match):
|
def expand_template(template, match):
|
||||||
# FIXME: <fl> this is sooooo slow. drop in the slicelist
|
# XXX: <fl> this is sooooo slow. drop in the slicelist code instead
|
||||||
# code instead
|
|
||||||
p = []
|
p = []
|
||||||
a = p.append
|
a = p.append
|
||||||
sep = match.string[:0]
|
sep = match.string[:0]
|
||||||
|
|
|
@ -47,12 +47,12 @@ if verbose:
|
||||||
print 'Running tests on character literals'
|
print 'Running tests on character literals'
|
||||||
|
|
||||||
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
||||||
test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
|
test(r"""sre.match(r"\%03o" % i, chr(i)) != None""", 1)
|
||||||
test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
|
test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") != None""", 1)
|
||||||
test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
|
test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") != None""", 1)
|
||||||
test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
|
test(r"""sre.match(r"\x%02x" % i, chr(i)) != None""", 1)
|
||||||
test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
|
test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") != None""", 1)
|
||||||
test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
|
test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") != None""", 1)
|
||||||
test(r"""sre.match("\911", "")""", None, sre.error)
|
test(r"""sre.match("\911", "")""", None, sre.error)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -197,11 +197,11 @@ if verbose:
|
||||||
p = ""
|
p = ""
|
||||||
for i in range(0, 256):
|
for i in range(0, 256):
|
||||||
p = p + chr(i)
|
p = p + chr(i)
|
||||||
test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
|
test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1)
|
||||||
test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
|
test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
|
||||||
|
|
||||||
pat = sre.compile(sre.escape(p))
|
pat = sre.compile(sre.escape(p))
|
||||||
test(r"""pat.match(p) is not None""", 1)
|
test(r"""pat.match(p) != None""", 1)
|
||||||
test(r"""pat.match(p).span()""", (0,256))
|
test(r"""pat.match(p).span()""", (0,256))
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
|
@ -22,8 +22,10 @@
|
||||||
* 2000-09-21 fl don't use the buffer interface for unicode strings
|
* 2000-09-21 fl don't use the buffer interface for unicode strings
|
||||||
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
|
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
|
||||||
* 2000-10-24 fl really fixed assert_not; reset groups in findall
|
* 2000-10-24 fl really fixed assert_not; reset groups in findall
|
||||||
|
* 2000-12-21 fl fixed memory leak in groupdict
|
||||||
|
* 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
* This version of the SRE library can be redistributed under CNRI's
|
* This version of the SRE library can be redistributed under CNRI's
|
||||||
* Python 1.6 license. For any other use, please contact Secret Labs
|
* Python 1.6 license. For any other use, please contact Secret Labs
|
||||||
|
@ -355,6 +357,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
||||||
switch (at) {
|
switch (at) {
|
||||||
|
|
||||||
case SRE_AT_BEGINNING:
|
case SRE_AT_BEGINNING:
|
||||||
|
case SRE_AT_BEGINNING_STRING:
|
||||||
return ((void*) ptr == state->beginning);
|
return ((void*) ptr == state->beginning);
|
||||||
|
|
||||||
case SRE_AT_BEGINNING_LINE:
|
case SRE_AT_BEGINNING_LINE:
|
||||||
|
@ -370,6 +373,9 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
||||||
return ((void*) ptr == state->end ||
|
return ((void*) ptr == state->end ||
|
||||||
SRE_IS_LINEBREAK((int) ptr[0]));
|
SRE_IS_LINEBREAK((int) ptr[0]));
|
||||||
|
|
||||||
|
case SRE_AT_END_STRING:
|
||||||
|
return ((void*) ptr == state->end);
|
||||||
|
|
||||||
case SRE_AT_BOUNDARY:
|
case SRE_AT_BOUNDARY:
|
||||||
if (state->beginning == state->end)
|
if (state->beginning == state->end)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -826,7 +832,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* this operator only works if the repeated item is
|
/* this operator only works if the repeated item is
|
||||||
exactly one character wide, and we're not already
|
exactly one character wide, and we're not already
|
||||||
collecting backtracking points. for other cases,
|
collecting backtracking points. for other cases,
|
||||||
use the MAX_REPEAT operator instead */
|
use the MAX_REPEAT operator */
|
||||||
|
|
||||||
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
||||||
|
|
||||||
|
@ -900,7 +906,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
|
|
||||||
case SRE_OP_REPEAT:
|
case SRE_OP_REPEAT:
|
||||||
/* create repeat context. all the hard work is done
|
/* create repeat context. all the hard work is done
|
||||||
by the UNTIL operator */
|
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
|
||||||
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
|
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
|
||||||
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
|
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
|
||||||
pattern[1], pattern[2]));
|
pattern[1], pattern[2]));
|
||||||
|
@ -974,6 +980,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
if (i)
|
if (i)
|
||||||
return i;
|
return i;
|
||||||
state->repeat = rp;
|
state->repeat = rp;
|
||||||
|
state->ptr = ptr;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case SRE_OP_MIN_UNTIL:
|
case SRE_OP_MIN_UNTIL:
|
||||||
|
@ -986,7 +993,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
|
|
||||||
count = rp->count + 1;
|
count = rp->count + 1;
|
||||||
|
|
||||||
TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
|
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
|
||||||
|
rp->pattern));
|
||||||
|
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
|
|
||||||
|
@ -1009,6 +1017,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* free(rp); */
|
/* free(rp); */
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
state->ptr = ptr;
|
||||||
state->repeat = rp;
|
state->repeat = rp;
|
||||||
|
|
||||||
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
|
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
|
||||||
|
@ -1020,6 +1029,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
if (i)
|
if (i)
|
||||||
return i;
|
return i;
|
||||||
rp->count = count - 1;
|
rp->count = count - 1;
|
||||||
|
state->ptr = ptr;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -1965,7 +1975,7 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
|
||||||
|
|
||||||
PyObject* def = Py_None;
|
PyObject* def = Py_None;
|
||||||
static char* kwlist[] = { "default", NULL };
|
static char* kwlist[] = { "default", NULL };
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
|
if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
result = PyDict_New();
|
result = PyDict_New();
|
||||||
|
@ -1973,35 +1983,35 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
keys = PyMapping_Keys(self->pattern->groupindex);
|
keys = PyMapping_Keys(self->pattern->groupindex);
|
||||||
if (!keys) {
|
if (!keys)
|
||||||
Py_DECREF(result);
|
goto failed;
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (index = 0; index < PyList_GET_SIZE(keys); index++) {
|
for (index = 0; index < PyList_GET_SIZE(keys); index++) {
|
||||||
|
int status;
|
||||||
PyObject* key;
|
PyObject* key;
|
||||||
PyObject* item;
|
PyObject* value;
|
||||||
key = PyList_GET_ITEM(keys, index);
|
key = PyList_GET_ITEM(keys, index);
|
||||||
if (!key) {
|
if (!key)
|
||||||
Py_DECREF(keys);
|
goto failed;
|
||||||
Py_DECREF(result);
|
value = match_getslice(self, key, def);
|
||||||
return NULL;
|
if (!value) {
|
||||||
}
|
|
||||||
item = match_getslice(self, key, def);
|
|
||||||
if (!item) {
|
|
||||||
Py_DECREF(key);
|
Py_DECREF(key);
|
||||||
Py_DECREF(keys);
|
goto failed;
|
||||||
Py_DECREF(result);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
/* FIXME: <fl> this can fail, right? */
|
status = PyDict_SetItem(result, key, value);
|
||||||
PyDict_SetItem(result, key, item);
|
Py_DECREF(value);
|
||||||
Py_DECREF(item);
|
if (status < 0)
|
||||||
|
goto failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_DECREF(keys);
|
Py_DECREF(keys);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
failed:
|
||||||
|
Py_DECREF(keys);
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
* NOTE: This file is generated by sre_constants.py. If you need
|
* NOTE: This file is generated by sre_constants.py. If you need
|
||||||
* to change anything in here, edit sre_constants.py and run it.
|
* to change anything in here, edit sre_constants.py and run it.
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
* See the _sre.c file for information on usage and redistribution.
|
* See the _sre.c file for information on usage and redistribution.
|
||||||
*/
|
*/
|
||||||
|
@ -42,10 +42,12 @@
|
||||||
#define SRE_OP_SUBPATTERN 28
|
#define SRE_OP_SUBPATTERN 28
|
||||||
#define SRE_AT_BEGINNING 0
|
#define SRE_AT_BEGINNING 0
|
||||||
#define SRE_AT_BEGINNING_LINE 1
|
#define SRE_AT_BEGINNING_LINE 1
|
||||||
#define SRE_AT_BOUNDARY 2
|
#define SRE_AT_BEGINNING_STRING 2
|
||||||
#define SRE_AT_NON_BOUNDARY 3
|
#define SRE_AT_BOUNDARY 3
|
||||||
#define SRE_AT_END 4
|
#define SRE_AT_NON_BOUNDARY 4
|
||||||
#define SRE_AT_END_LINE 5
|
#define SRE_AT_END 5
|
||||||
|
#define SRE_AT_END_LINE 6
|
||||||
|
#define SRE_AT_END_STRING 7
|
||||||
#define SRE_CATEGORY_DIGIT 0
|
#define SRE_CATEGORY_DIGIT 0
|
||||||
#define SRE_CATEGORY_NOT_DIGIT 1
|
#define SRE_CATEGORY_NOT_DIGIT 1
|
||||||
#define SRE_CATEGORY_SPACE 2
|
#define SRE_CATEGORY_SPACE 2
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue