Added Fredrik Lundh's sre module and its supporting cast.

NOTE: THIS IS VERY ROUGH ALPHA CODE!
2025-09-27 02:39:58 +00:00 · 2000-03-31 14:58:54 +00:00 · 2000-03-31 14:58:54 +00:00 · 7627c0de69
commit 7627c0de69
parent 7a5b796322
4 changed files with 856 additions and 0 deletions
--- a/Lib/sre.py
+++ b/Lib/sre.py
@ -0,0 +1,46 @@
 # -*- Mode: Python; tab-width: 4 -*-
 #
 # Secret Labs' Regular Expression Engine
 # $Id$
 #
 # re-compatible interface for the sre matching engine
 #
 # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
 #
 # This code can only be used for 1.6 alpha testing.  All other use
 # require explicit permission from Secret Labs AB.
 #
 # Portions of this engine have been developed in cooperation with
 # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 # other compatibility work.
 #
 """
 this is a long string
 """
 import sre_compile
 # --------------------------------------------------------------------
 # public interface
 def compile(pattern, flags=0):
    return sre_compile.compile(pattern, _fixflags(flags))
 def match(pattern, string, flags=0):
    return compile(pattern, _fixflags(flags)).match(string)
 def search(pattern, string, flags=0):
    assert flags == 0
    return compile(pattern, _fixflags(flags)).search(string)
 # FIXME: etc
 # --------------------------------------------------------------------
 # helpers
 def _fixflags(flags):
    # convert flag bitmask to sequence
    assert flags == 0
    return ()
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@ -0,0 +1,187 @@
 #
 # Secret Labs' Regular Expression Engine
 # $Id$
 #
 # convert template to internal format
 #
 # Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
 #
 # This code can only be used for 1.6 alpha testing.  All other use
 # require explicit permission from Secret Labs AB.
 #
 # Portions of this engine have been developed in cooperation with
 # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 # other compatibility work.
 #
 # FIXME: <fl> formalize (objectify?) and document the compiler code
 # format, so that other frontends can use this compiler
 import array, string, sys
 import _sre
 from sre_constants import *
 # find an array type code that matches the engine's code size
 for WORDSIZE in "BHil":
    if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
 	break
 else:
    raise RuntimeError, "cannot find a useable array type"
 # FIXME: <fl> should move some optimizations from the parser to here!
 class Code:
    def __init__(self):
 	self.data = []
    def __len__(self):
 	return len(self.data)
    def __getitem__(self, index):
 	return self.data[index]
    def __setitem__(self, index, code):
 	self.data[index] = code
    def append(self, code):
 	self.data.append(code)
    def todata(self):
 	# print self.data
 	return array.array(WORDSIZE, self.data).tostring()
 def _lower(literal):
    # return _sre._lower(literal) # FIXME
    return string.lower(literal)
 def _compile(code, pattern, flags):
    append = code.append
    for op, av in pattern:
 	if op is ANY:
 	    if "s" in flags:
 		append(CODES[op]) # any character at all!
 	    else:
 		append(CODES[NOT_LITERAL])
 		append(10)
 	elif op in (SUCCESS, FAILURE):
 	    append(CODES[op])
 	elif op is AT:
 	    append(CODES[op])
 	    append(POSITIONS[av])
 	elif op is BRANCH:
 	    append(CODES[op])
 	    tail = []
 	    for av in av[1]:
 		skip = len(code); append(0)
 		_compile(code, av, flags)
 		append(CODES[JUMP])
 		tail.append(len(code)); append(0)
 		code[skip] = len(code) - skip
 	    append(0) # end of branch
 	    for tail in tail:
 		code[tail] = len(code) - tail
 	elif op is CALL:
 	    append(CODES[op])
 	    skip = len(code); append(0)
 	    _compile(code, av, flags)
 	    append(CODES[SUCCESS])
 	    code[skip] = len(code) - skip
 	elif op is CATEGORY: # not used by current parser
 	    append(CODES[op])
 	    append(CATEGORIES[av])
 	elif op is GROUP:
 	    if "i" in flags:
 		append(CODES[MAP_IGNORE[op]])
 	    else:
 		append(CODES[op])
 	    append(av)
 	elif op is IN:
 	    if "i" in flags:
 		append(CODES[MAP_IGNORE[op]])
 		def fixup(literal):
 		    return ord(_lower(literal))
 	    else:
 		append(CODES[op])
 		fixup = ord
 	    skip = len(code); append(0)
 	    for op, av in av:
 		append(CODES[op])
 		if op is NEGATE:
 		    pass
 		elif op is LITERAL:
 		    append(fixup(av))
 		elif op is RANGE:
 		    append(fixup(av[0]))
 		    append(fixup(av[1]))
 		elif op is CATEGORY:
 		    append(CATEGORIES[av])
 		else:
 		    raise ValueError, "unsupported set operator"
 	    append(CODES[FAILURE])
 	    code[skip] = len(code) - skip
 	elif op in (LITERAL, NOT_LITERAL):
 	    if "i" in flags:
 		append(CODES[MAP_IGNORE[op]])
 		append(ord(_lower(av)))
 	    else:
 		append(CODES[op])
 		append(ord(av))
 	elif op is MARK:
 	    append(CODES[op])
 	    append(av)
 	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
 	    lo, hi = av[2].getwidth()
 	    if lo == 0:
 		raise SyntaxError, "cannot repeat zero-width items"
 	    if lo == hi == 1 and op is MAX_REPEAT:
 		append(CODES[MAX_REPEAT_ONE])
 		skip = len(code); append(0)
 		append(av[0])
 		append(av[1])
 		_compile(code, av[2], flags)
 		append(CODES[SUCCESS])
 		code[skip] = len(code) - skip
 	    else:
 		append(CODES[op])
 		skip = len(code); append(0)
 		append(av[0])
 		append(av[1])
 		_compile(code, av[2], flags)
 		if op is MIN_REPEAT:
 		    append(CODES[MIN_UNTIL])
 		else:
 		    # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
 		    append(CODES[MAX_UNTIL])
 		code[skip] = len(code) - skip
 	elif op is SUBPATTERN:
 ## 	    group = av[0]
 ## 	    if group:
 ## 		append(CODES[MARK])
 ## 		append((group-1)*2)
 	    _compile(code, av[1], flags)
 ## 	    if group:
 ## 		append(CODES[MARK])
 ## 		append((group-1)*2+1)
 	else:
 	    raise ValueError, ("unsupported operand type", op)
 def compile(p, flags=()):
    # convert pattern list to internal format
    if type(p) is type(""):
 	import sre_parse
 	pattern = p
 	p = sre_parse.parse(p)
    else:
 	pattern = None
    # print p.getwidth()
    # print p
    code = Code()
    _compile(code, p.data, p.pattern.flags)
    code.append(CODES[SUCCESS])
    # print list(code.data)
    data = code.todata()
    if 0: # debugging
 	print
 	print "-" * 68
 	import sre_disasm
 	sre_disasm.disasm(data)
 	print "-" * 68
    # print len(data), p.pattern.groups, len(p.pattern.groupdict)
    return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@ -0,0 +1,131 @@
 #
 # Secret Labs' Regular Expression Engine
 # $Id$
 #
 # various symbols used by the regular expression engine.
 # run this script to update the _sre include files!
 #
 # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
 #
 # This code can only be used for 1.6 alpha testing.  All other use
 # require explicit permission from Secret Labs AB.
 #
 # Portions of this engine have been developed in cooperation with
 # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 # other compatibility work.
 #
 # operators
 FAILURE = "failure"
 SUCCESS = "success"
 ANY = "any"
 ASSERT = "assert"
 AT = "at"
 BRANCH = "branch"
 CALL = "call"
 CATEGORY = "category"
 GROUP = "group"
 GROUP_IGNORE = "group_ignore"
 IN = "in"
 IN_IGNORE = "in_ignore"
 JUMP = "jump"
 LITERAL = "literal"
 LITERAL_IGNORE = "literal_ignore"
 MARK = "mark"
 MAX_REPEAT = "max_repeat"
 MAX_REPEAT_ONE = "max_repeat_one"
 MAX_UNTIL = "max_until"
 MIN_REPEAT = "min_repeat"
 MIN_UNTIL = "min_until"
 NEGATE = "negate"
 NOT_LITERAL = "not_literal"
 NOT_LITERAL_IGNORE = "not_literal_ignore"
 RANGE = "range"
 REPEAT = "repeat"
 SUBPATTERN = "subpattern"
 # positions
 AT_BEGINNING = "at_beginning"
 AT_BOUNDARY = "at_boundary"
 AT_NON_BOUNDARY = "at_non_boundary"
 AT_END = "at_end"
 # categories
 CATEGORY_DIGIT = "category_digit"
 CATEGORY_NOT_DIGIT = "category_not_digit"
 CATEGORY_SPACE = "category_space"
 CATEGORY_NOT_SPACE = "category_not_space"
 CATEGORY_WORD = "category_word"
 CATEGORY_NOT_WORD = "category_not_word"
 CODES = [
    # failure=0 success=1 (just because it looks better that way :-)
    FAILURE, SUCCESS,
    ANY,
    ASSERT,
    AT,
    BRANCH,
    CALL,
    CATEGORY,
    GROUP, GROUP_IGNORE,
    IN, IN_IGNORE,
    JUMP,
    LITERAL, LITERAL_IGNORE,
    MARK,
    MAX_REPEAT, MAX_UNTIL,
    MAX_REPEAT_ONE,
    MIN_REPEAT, MIN_UNTIL,
    NOT_LITERAL, NOT_LITERAL_IGNORE,
    NEGATE,
    RANGE,
    REPEAT
 ]
 # convert to dictionary
 c = {}
 i = 0
 for code in CODES:
    c[code] = i
    i = i + 1
 CODES = c
 # replacement operations for "ignore case" mode
 MAP_IGNORE = {
    GROUP: GROUP_IGNORE,
    IN: IN_IGNORE,
    LITERAL: LITERAL_IGNORE,
    NOT_LITERAL: NOT_LITERAL_IGNORE
 }
 POSITIONS = {
    AT_BEGINNING: ord("a"),
    AT_BOUNDARY: ord("b"),
    AT_NON_BOUNDARY: ord("B"),
    AT_END: ord("z"),
 }
 CATEGORIES = {
    CATEGORY_DIGIT: ord("d"),
    CATEGORY_NOT_DIGIT: ord("D"),
    CATEGORY_SPACE: ord("s"),
    CATEGORY_NOT_SPACE: ord("S"),
    CATEGORY_WORD: ord("w"),
    CATEGORY_NOT_WORD: ord("W"),
 }
 if __name__ == "__main__":
    import string
    items = CODES.items()
    items.sort(lambda a, b: cmp(a[1], b[1]))
    f = open("sre_constants.h", "w")
    f.write("/* generated by sre_constants.py */\n")
    for k, v in items:
 	f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
    f.close()
    print "done"
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@ -0,0 +1,492 @@
 #
 # Secret Labs' Regular Expression Engine
 # $Id$
 #
 # convert re-style regular expression to SRE template.  the current
 # implementation is somewhat incomplete, and not very fast.  should
 # definitely be rewritten before Python 1.6 goes beta.
 #
 # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
 #
 # This code can only be used for 1.6 alpha testing.  All other use
 # require explicit permission from Secret Labs AB.
 #
 # Portions of this engine have been developed in cooperation with
 # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 # other compatibility work.
 #
 # FIXME: comments marked with the FIXME tag are open issues.  all such
 # issues should be closed before the final beta.
 import string, sys
 from sre_constants import *
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS  = "*+?{"
 OCTDIGITS = "01234567"
 HEXDIGITS = "0123456789abcdefABCDEF"
 ESCAPES = {
    "\\a": (LITERAL, chr(7)),
    "\\b": (LITERAL, chr(8)),
    "\\f": (LITERAL, chr(12)),
    "\\n": (LITERAL, chr(10)),
    "\\r": (LITERAL, chr(13)),
    "\\t": (LITERAL, chr(9)),
    "\\v": (LITERAL, chr(11))
 }
 CATEGORIES = {
    "\\A": (AT, AT_BEGINNING), # start of string
    "\\b": (AT, AT_BOUNDARY),
    "\\B": (AT, AT_NON_BOUNDARY),
    "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
    "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
    "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
    "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
    "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
    "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
    "\\Z": (AT, AT_END), # end of string
 }
 class Pattern:
    # FIXME: <fl> rename class, and store flags in here too!
    def __init__(self):
 	self.flags = []
 	self.groups = 1
 	self.groupdict = {}
    def getgroup(self, name=None):
 	gid = self.groups
 	self.groups = gid + 1
 	if name:
 	    self.groupdict[name] = gid
 	return gid
    def setflag(self, flag):
 	if flag not in self.flags:
 	    self.flags.append(flag)
 class SubPattern:
    # a subpattern, in intermediate form
    def __init__(self, pattern, data=None):
 	self.pattern = pattern
 	if not data:
 	    data = []
 	self.data = data
 	self.flags = []
 	self.width = None
    def __repr__(self):
 	return repr(self.data)
    def __len__(self):
 	return len(self.data)
    def __delitem__(self, index):
 	del self.data[index]
    def __getitem__(self, index):
 	return self.data[index]
    def __setitem__(self, index, code):
 	self.data[index] = code
    def __getslice__(self, start, stop):
 	return SubPattern(self.pattern, self.data[start:stop])
    def insert(self, index, code):
 	self.data.insert(index, code)
    def append(self, code):
 	self.data.append(code)
    def getwidth(self):
 	# determine the width (min, max) for this subpattern
 	if self.width:
 	    return self.width
 	lo = hi = 0L
 	for op, av in self.data:
 	    if op is BRANCH:
 		l = sys.maxint
 		h = 0
 		for av in av[1]:
 		    i, j = av.getwidth()
 		    l = min(l, i)
 		    h = min(h, j)
 		lo = lo + i
 		hi = hi + j
 	    elif op is CALL:
 		i, j = av.getwidth()
 		lo = lo + i
 		hi = hi + j
 	    elif op is SUBPATTERN:
 		i, j = av[1].getwidth()
 		lo = lo + i
 		hi = hi + j
 	    elif op in (MIN_REPEAT, MAX_REPEAT):
 		i, j = av[2].getwidth()
 		lo = lo + i * av[0]
 		hi = hi + j * av[1]
 	    elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
 		lo = lo + 1
 		hi = hi + 1
 	    elif op == SUCCESS:
 		break
 	self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
 	return self.width
    def set(self, flag):
 	if not flag in self.flags:
 	    self.flags.append(flag)
    def reset(self, flag):
 	if flag in self.flags:
 	    self.flags.remove(flag)
 class Tokenizer:
    def __init__(self, string):
 	self.string = list(string)
 	self.next = self.__next()
    def __next(self):
 	if not self.string:
 	    return None
 	char = self.string[0]
 	if char[0] == "\\":
 	    try:
 		c = self.string[1]
 	    except IndexError:
 		raise SyntaxError, "bogus escape"
 	    char = char + c
 	    try:
 		if c == "x":
 		    # hexadecimal constant
 		    for i in xrange(2, sys.maxint):
 			c = self.string[i]
 			if c not in HEXDIGITS:
 			    break
 			char = char + c
 		elif c in string.digits:
 		    # decimal (or octal) number
 		    for i in xrange(2, sys.maxint):
 			c = self.string[i]
 			# FIXME: if larger than current number of
 			# groups, interpret as an octal number 
 			if c not in string.digits:
 			    break
 			char = char + c
 	    except IndexError:
 		pass # use what we've got this far
 	del self.string[0:len(char)]
 	return char
    def match(self, char):
 	if char == self.next:
 	    self.next = self.__next()
 	    return 1
 	return 0
    def match_set(self, set):
 	if self.next in set:
 	    self.next = self.__next()
 	    return 1
 	return 0
    def get(self):
 	this = self.next
 	self.next = self.__next()
 	return this
 def _fixescape(escape, character_class=0):
    # convert escape to (type, value)
    if character_class:
 	# inside a character class, we'll look in the character
 	# escapes dictionary first
 	code = ESCAPES.get(escape)
 	if code:
 	    return code
 	code = CATEGORIES.get(escape)
    else:
 	code = CATEGORIES.get(escape)
 	if code:
 	    return code
 	code = ESCAPES.get(escape)
    if code:
 	return code
    if not character_class:
 	try:
 	    group = int(escape[1:])
 	    # FIXME: only valid if group <= current number of groups
 	    return GROUP, group
 	except ValueError:
 	    pass
    try:
 	if escape[1:2] == "x":
 	    escape = escape[2:]
 	    return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
 	elif escape[1:2] in string.digits:
 	    return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
 	elif len(escape) == 2:
 	    return LITERAL, escape[1]
    except ValueError:
 	pass
    raise SyntaxError, "bogus escape: %s" % repr(escape)
 def _branch(subpattern, items):
    # form a branch operator from a set of items (FIXME: move this
    # optimization to the compiler module!)
    # check if all items share a common prefix
    while 1:
 	prefix = None
 	for item in items:
 	    if not item:
 		break
 	    if prefix is None:
 		prefix = item[0]
 	    elif item[0] != prefix:
 		break
 	else:
 	    # all subitems start with a common "prefix".
 	    # move it out of the branch
 	    for item in items:
 		del item[0]
 	    subpattern.append(prefix)
 	    continue # check next one
 	break
    # check if the branch can be replaced by a character set
    for item in items:
 	if len(item) != 1 or item[0][0] != LITERAL:
 	    break
    else:
 	# we can store this as a character set instead of a
 	# branch (FIXME: use a range if possible)
 	set = []
 	for item in items:
 	    set.append(item[0])
 	subpattern.append((IN, set))
 	return
    subpattern.append((BRANCH, (None, items)))
 def _parse(source, pattern, flags=()):
    # parse regular expression pattern into an operator list.
    subpattern = SubPattern(pattern)
    this = None
    while 1:
 	if source.next in ("|", ")"):
 	    break # end of subpattern
 	this = source.get()
 	if this is None:
 	    break # end of pattern
 	if this and this[0] not in SPECIAL_CHARS:
 	    subpattern.append((LITERAL, this))
 	elif this == "[":
 	    # character set
 	    set = []
 ## 	    if source.match(":"):
 ## 		pass # handle character classes
 	    if source.match("^"):
 		set.append((NEGATE, None))
 	    # check remaining characters
 	    start = set[:]
 	    while 1:
 		this = source.get()
 		if this == "]" and set != start:
 		    break
 		elif this and this[0] == "\\":
 		    code1 = _fixescape(this, 1)
 		elif this:
 		    code1 = LITERAL, this
 		else:
 		    raise SyntaxError, "unexpected end of regular expression"
 		if source.match("-"):
 		    # potential range
 		    this = source.get()
 		    if this == "]":
 			set.append(code1)
 			set.append((LITERAL, "-"))
 			break
 		    else:
 			if this[0] == "\\":
 			    code2 = _fixescape(this, 1)
 			else:
 			    code2 = LITERAL, this
 			if code1[0] != LITERAL or code2[0] != LITERAL:
 			    raise SyntaxError, "illegal range"
 			if len(code1[1]) != 1 or len(code2[1]) != 1:
 			    raise SyntaxError, "illegal range"
 			set.append((RANGE, (code1[1], code2[1])))
 		else:
 		    if code1[0] is IN:
 			code1 = code1[1][0]
 		    set.append(code1)
 	    # FIXME: <fl> move set optimization to support function
 	    if len(set)==1 and set[0][0] is LITERAL:
 		subpattern.append(set[0]) # optimization
 	    elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
 		subpattern.append((NOT_LITERAL, set[1][1])) # optimization
 	    else:
 		# FIXME: <fl> add charmap optimization
 		subpattern.append((IN, set))
 	elif this and this[0] in REPEAT_CHARS:
 	    # repeat previous item
 	    if this == "?":
 		min, max = 0, 1
 	    elif this == "*":
 		min, max = 0, sys.maxint
 	    elif this == "+":
 		min, max = 1, sys.maxint
 	    elif this == "{":
 		min, max = 0, sys.maxint
 		lo = hi = ""
 		while source.next in string.digits:
 		    lo = lo + source.get()
 		if source.match(","):
 		    while source.next in string.digits:
 			hi = hi + source.get()
 		else:
 		    hi = lo
 		if not source.match("}"):
 		    raise SyntaxError, "bogus range"
 		if lo:
 		    min = int(lo)
 		if hi:
 		    max = int(hi)
 		# FIXME: <fl> check that hi >= lo!
 	    else:
 		raise SyntaxError, "not supported"
 	    # figure out which item to repeat
 	    # FIXME: should back up to the right mark, right?
 	    if subpattern:
 		index = len(subpattern)-1
 		while subpattern[index][0] is MARK:
 		    index = index - 1
 		item = subpattern[index:index+1]
 	    else:
 		raise SyntaxError, "nothing to repeat"
 	    if source.match("?"):
 		subpattern[index] = (MIN_REPEAT, (min, max, item))
 	    else:
 		subpattern[index] = (MAX_REPEAT, (min, max, item))
 	elif this == ".":
 	    subpattern.append((ANY, None))
 	elif this == "(":
 	    group = 1
 	    name = None
 	    if source.match("?"):
 		group = 0
 		# options
 		if source.match("P"):
 		    # named group: skip forward to end of name
 		    if source.match("<"):
 			name = ""
 			while 1:
 			    char = source.get()
 			    if char in (">", None):
 				break
 			    name = name + char
 			group = 1
 		elif source.match(":"):
 		    # non-capturing group
 		    group = 2
 		elif source.match_set("iI"):
 		    pattern.setflag("i")
 		elif source.match_set("lL"):
 		    pattern.setflag("l")
 		elif source.match_set("mM"):
 		    pattern.setflag("m")
 		elif source.match_set("sS"):
 		    pattern.setflag("s")
 		elif source.match_set("xX"):
 		    pattern.setflag("x")
 	    if group:
 		# parse group contents
 		b = []
 		if group == 2:
 		    # anonymous group
 		    group = None
 		else:
 		    group = pattern.getgroup(name)
 		if group:
 		    subpattern.append((MARK, (group-1)*2))
 		while 1:
 		    p = _parse(source, pattern, flags)
 		    if source.match(")"):
 			if b:
 			    b.append(p)
 			    _branch(subpattern, b)
 			else:
 			    subpattern.append((SUBPATTERN, (group, p)))
 			break
 		    elif source.match("|"):
 			b.append(p)
 		    else:
 			raise SyntaxError, "group not properly closed"
 		if group:
 		    subpattern.append((MARK, (group-1)*2+1))
 	    else:
 		# FIXME: should this really be a while loop?
 		while source.get() not in (")", None):
 		    pass
 	elif this == "^":
 	    subpattern.append((AT, AT_BEGINNING))
 	elif this == "$":
 	    subpattern.append((AT, AT_END))
 	elif this and this[0] == "\\":
 	    code =_fixescape(this)
 	    subpattern.append(code)
 	else:
 	    raise SyntaxError, "parser error"
    return subpattern
 def parse(source, flags=()):
    s = Tokenizer(source)
    g = Pattern()
    b = []
    while 1:
 	p = _parse(s, g, flags)
 	tail = s.get()
 	if tail == "|":
 	    b.append(p)
 	elif tail == ")":
 	    raise SyntaxError, "unbalanced parenthesis"
 	elif tail is None:
 	    if b:
 		b.append(p)
 		p = SubPattern(g)
 		_branch(p, b)
 	    break
 	else:
 	    raise SyntaxError, "bogus characters at end of regular expression"
    return p
 if __name__ == "__main__":
    from pprint import pprint
    from testpatterns import PATTERNS
    a = b = c = 0
    for pattern, flags in PATTERNS:
 	if flags:
 	    continue
 	print "-"*68
 	try:
 	    p = parse(pattern)
 	    print repr(pattern), "->"
 	    pprint(p.data)
 	    import sre_compile
 	    try:
 		code = sre_compile.compile(p)
 		c = c + 1
 	    except:
 		pass
 	    a = a + 1
 	except SyntaxError, v:
 	    print "**", repr(pattern), v
 	b = b + 1
    print "-"*68
    print a, "of", b, "patterns successfully parsed"
    print c, "of", b, "patterns successfully compiled"