mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			659 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			659 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#
 | 
						|
# Secret Labs' Regular Expression Engine
 | 
						|
#
 | 
						|
# convert re-style regular expression to sre pattern
 | 
						|
#
 | 
						|
# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
 | 
						|
#
 | 
						|
# Portions of this engine have been developed in cooperation with
 | 
						|
# CNRI.  Hewlett-Packard provided funding for 2.0 integration and
 | 
						|
# other compatibility work.
 | 
						|
#
 | 
						|
 | 
						|
import string, sys
 | 
						|
 | 
						|
import _sre
 | 
						|
 | 
						|
from sre_constants import *
 | 
						|
 | 
						|
MAXREPEAT = 65535
 | 
						|
 | 
						|
# FIXME: the following might change in 2.0 final.  but for now, this
 | 
						|
# seems to be the best way to be compatible with 1.5.2
 | 
						|
CHARMASK = 0xff
 | 
						|
 | 
						|
SPECIAL_CHARS = ".\\[{()*+?^$|"
 | 
						|
REPEAT_CHARS  = "*+?{"
 | 
						|
 | 
						|
DIGITS = tuple(string.digits)
 | 
						|
 | 
						|
OCTDIGITS = tuple("01234567")
 | 
						|
HEXDIGITS = tuple("0123456789abcdefABCDEF")
 | 
						|
 | 
						|
WHITESPACE = tuple(string.whitespace)
 | 
						|
 | 
						|
ESCAPES = {
 | 
						|
    r"\a": (LITERAL, 7),
 | 
						|
    r"\b": (LITERAL, 8),
 | 
						|
    r"\f": (LITERAL, 12),
 | 
						|
    r"\n": (LITERAL, 10),
 | 
						|
    r"\r": (LITERAL, 13),
 | 
						|
    r"\t": (LITERAL, 9),
 | 
						|
    r"\v": (LITERAL, 11),
 | 
						|
    r"\\": (LITERAL, ord("\\"))
 | 
						|
}
 | 
						|
 | 
						|
CATEGORIES = {
 | 
						|
    r"\A": (AT, AT_BEGINNING), # start of string
 | 
						|
    r"\b": (AT, AT_BOUNDARY),
 | 
						|
    r"\B": (AT, AT_NON_BOUNDARY),
 | 
						|
    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
 | 
						|
    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
 | 
						|
    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
 | 
						|
    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
 | 
						|
    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
 | 
						|
    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
 | 
						|
    r"\Z": (AT, AT_END), # end of string
 | 
						|
}
 | 
						|
 | 
						|
FLAGS = {
 | 
						|
    # standard flags
 | 
						|
    "i": SRE_FLAG_IGNORECASE,
 | 
						|
    "L": SRE_FLAG_LOCALE,
 | 
						|
    "m": SRE_FLAG_MULTILINE,
 | 
						|
    "s": SRE_FLAG_DOTALL,
 | 
						|
    "x": SRE_FLAG_VERBOSE,
 | 
						|
    # extensions
 | 
						|
    "t": SRE_FLAG_TEMPLATE,
 | 
						|
    "u": SRE_FLAG_UNICODE,
 | 
						|
}
 | 
						|
 | 
						|
class State:
 | 
						|
    def __init__(self):
 | 
						|
        self.flags = 0
 | 
						|
        self.groups = 1
 | 
						|
        self.groupdict = {}
 | 
						|
    def getgroup(self, name=None):
 | 
						|
        gid = self.groups
 | 
						|
        self.groups = gid + 1
 | 
						|
        if name:
 | 
						|
            self.groupdict[name] = gid
 | 
						|
        return gid
 | 
						|
 | 
						|
class SubPattern:
 | 
						|
    # a subpattern, in intermediate form
 | 
						|
    def __init__(self, pattern, data=None):
 | 
						|
        self.pattern = pattern
 | 
						|
        if not data:
 | 
						|
            data = []
 | 
						|
        self.data = data
 | 
						|
        self.width = None
 | 
						|
    def __repr__(self):
 | 
						|
        return repr(self.data)
 | 
						|
    def __len__(self):
 | 
						|
        return len(self.data)
 | 
						|
    def __delitem__(self, index):
 | 
						|
        del self.data[index]
 | 
						|
    def __getitem__(self, index):
 | 
						|
        return self.data[index]
 | 
						|
    def __setitem__(self, index, code):
 | 
						|
        self.data[index] = code
 | 
						|
    def __getslice__(self, start, stop):
 | 
						|
        return SubPattern(self.pattern, self.data[start:stop])
 | 
						|
    def insert(self, index, code):
 | 
						|
        self.data.insert(index, code)
 | 
						|
    def append(self, code):
 | 
						|
        self.data.append(code)
 | 
						|
    def getwidth(self):
 | 
						|
        # determine the width (min, max) for this subpattern
 | 
						|
        if self.width:
 | 
						|
            return self.width
 | 
						|
        lo = hi = 0L
 | 
						|
        for op, av in self.data:
 | 
						|
            if op is BRANCH:
 | 
						|
                l = sys.maxint
 | 
						|
                h = 0
 | 
						|
                for av in av[1]:
 | 
						|
                    i, j = av.getwidth()
 | 
						|
                    l = min(l, i)
 | 
						|
                    h = min(h, j)
 | 
						|
                lo = lo + i
 | 
						|
                hi = hi + j
 | 
						|
            elif op is CALL:
 | 
						|
                i, j = av.getwidth()
 | 
						|
                lo = lo + i
 | 
						|
                hi = hi + j
 | 
						|
            elif op is SUBPATTERN:
 | 
						|
                i, j = av[1].getwidth()
 | 
						|
                lo = lo + i
 | 
						|
                hi = hi + j
 | 
						|
            elif op in (MIN_REPEAT, MAX_REPEAT):
 | 
						|
                i, j = av[2].getwidth()
 | 
						|
                lo = lo + long(i) * av[0]
 | 
						|
                hi = hi + long(j) * av[1]
 | 
						|
            elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
 | 
						|
                lo = lo + 1
 | 
						|
                hi = hi + 1
 | 
						|
            elif op == SUCCESS:
 | 
						|
                break
 | 
						|
        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
 | 
						|
        return self.width
 | 
						|
 | 
						|
class Tokenizer:
 | 
						|
    def __init__(self, string):
 | 
						|
        self.string = string
 | 
						|
        self.index = 0
 | 
						|
        self.__next()
 | 
						|
    def __next(self):
 | 
						|
        if self.index >= len(self.string):
 | 
						|
            self.next = None
 | 
						|
            return
 | 
						|
        char = self.string[self.index]
 | 
						|
        if char[0] == "\\":
 | 
						|
            try:
 | 
						|
                c = self.string[self.index + 1]
 | 
						|
            except IndexError:
 | 
						|
                raise error, "bogus escape"
 | 
						|
            char = char + c
 | 
						|
        self.index = self.index + len(char)
 | 
						|
        self.next = char
 | 
						|
    def match(self, char):
 | 
						|
        if char == self.next:
 | 
						|
            self.__next()
 | 
						|
            return 1
 | 
						|
        return 0
 | 
						|
    def get(self):
 | 
						|
        this = self.next
 | 
						|
        self.__next()
 | 
						|
        return this
 | 
						|
    def tell(self):
 | 
						|
        return self.index, self.next
 | 
						|
    def seek(self, index):
 | 
						|
        self.index, self.next = index
 | 
						|
 | 
						|
def isident(char):
 | 
						|
    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
 | 
						|
 | 
						|
def isdigit(char):
 | 
						|
    return "0" <= char <= "9"
 | 
						|
 | 
						|
def isname(name):
 | 
						|
    # check that group name is a valid string
 | 
						|
    if not isident(name[0]):
 | 
						|
        return 0
 | 
						|
    for char in name:
 | 
						|
        if not isident(char) and not isdigit(char):
 | 
						|
            return 0
 | 
						|
    return 1
 | 
						|
 | 
						|
def _group(escape, groups):
 | 
						|
    # check if the escape string represents a valid group
 | 
						|
    try:
 | 
						|
        gid = int(escape[1:])
 | 
						|
        if gid and gid < groups:
 | 
						|
            return gid
 | 
						|
    except ValueError:
 | 
						|
        pass
 | 
						|
    return None # not a valid group
 | 
						|
 | 
						|
def _class_escape(source, escape):
 | 
						|
    # handle escape code inside character class
 | 
						|
    code = ESCAPES.get(escape)
 | 
						|
    if code:
 | 
						|
        return code
 | 
						|
    code = CATEGORIES.get(escape)
 | 
						|
    if code:
 | 
						|
        return code
 | 
						|
    try:
 | 
						|
        if escape[1:2] == "x":
 | 
						|
            while source.next in HEXDIGITS:
 | 
						|
                escape = escape + source.get()
 | 
						|
            escape = escape[2:]
 | 
						|
            return LITERAL, int(escape[-4:], 16) & CHARMASK
 | 
						|
        elif str(escape[1:2]) in OCTDIGITS:
 | 
						|
            while source.next in OCTDIGITS:
 | 
						|
                escape = escape + source.get()
 | 
						|
            escape = escape[1:]
 | 
						|
            return LITERAL, int(escape[-6:], 8) & CHARMASK
 | 
						|
        if len(escape) == 2:
 | 
						|
            return LITERAL, ord(escape[1])
 | 
						|
    except ValueError:
 | 
						|
        pass
 | 
						|
    raise error, "bogus escape: %s" % repr(escape)
 | 
						|
 | 
						|
def _escape(source, escape, state):
 | 
						|
    # handle escape code in expression
 | 
						|
    code = CATEGORIES.get(escape)
 | 
						|
    if code:
 | 
						|
        return code
 | 
						|
    code = ESCAPES.get(escape)
 | 
						|
    if code:
 | 
						|
        return code
 | 
						|
    try:
 | 
						|
        if escape[1:2] == "x":
 | 
						|
            while source.next in HEXDIGITS:
 | 
						|
                escape = escape + source.get()
 | 
						|
            escape = escape[2:]
 | 
						|
            return LITERAL, int(escape[-4:], 16) & CHARMASK
 | 
						|
        elif escape[1:2] in DIGITS:
 | 
						|
            while 1:
 | 
						|
                group = _group(escape, state.groups)
 | 
						|
                if group:
 | 
						|
                    if (not source.next or
 | 
						|
                        not _group(escape + source.next, state.groups)):
 | 
						|
                        return GROUPREF, group
 | 
						|
                    escape = escape + source.get()
 | 
						|
                elif source.next in OCTDIGITS:
 | 
						|
                    escape = escape + source.get()
 | 
						|
                else:
 | 
						|
                    break
 | 
						|
            escape = escape[1:]
 | 
						|
            return LITERAL, int(escape[-6:], 8) & CHARMASK
 | 
						|
        if len(escape) == 2:
 | 
						|
            return LITERAL, ord(escape[1])
 | 
						|
    except ValueError:
 | 
						|
        pass
 | 
						|
    raise error, "bogus escape: %s" % repr(escape)
 | 
						|
 | 
						|
def _branch(pattern, items):
 | 
						|
    # form a branch operator from a set of items
 | 
						|
 | 
						|
    subpattern = SubPattern(pattern)
 | 
						|
 | 
						|
    # check if all items share a common prefix
 | 
						|
    while 1:
 | 
						|
        prefix = None
 | 
						|
        for item in items:
 | 
						|
            if not item:
 | 
						|
                break
 | 
						|
            if prefix is None:
 | 
						|
                prefix = item[0]
 | 
						|
            elif item[0] != prefix:
 | 
						|
                break
 | 
						|
        else:
 | 
						|
            # all subitems start with a common "prefix".
 | 
						|
            # move it out of the branch
 | 
						|
            for item in items:
 | 
						|
                del item[0]
 | 
						|
            subpattern.append(prefix)
 | 
						|
            continue # check next one
 | 
						|
        break
 | 
						|
 | 
						|
    # check if the branch can be replaced by a character set
 | 
						|
    for item in items:
 | 
						|
        if len(item) != 1 or item[0][0] != LITERAL:
 | 
						|
            break
 | 
						|
    else:
 | 
						|
        # we can store this as a character set instead of a
 | 
						|
        # branch (FIXME: use a range if possible)
 | 
						|
        set = []
 | 
						|
        for item in items:
 | 
						|
            set.append(item[0])
 | 
						|
        subpattern.append((IN, set))
 | 
						|
        return subpattern
 | 
						|
 | 
						|
    subpattern.append((BRANCH, (None, items)))
 | 
						|
    return subpattern
 | 
						|
 | 
						|
def _parse(source, state):
 | 
						|
 | 
						|
    # parse regular expression pattern into an operator list.
 | 
						|
 | 
						|
    subpattern = SubPattern(state)
 | 
						|
 | 
						|
    while 1:
 | 
						|
 | 
						|
        if source.next in ("|", ")"):
 | 
						|
            break # end of subpattern
 | 
						|
        this = source.get()
 | 
						|
        if this is None:
 | 
						|
            break # end of pattern
 | 
						|
 | 
						|
        if state.flags & SRE_FLAG_VERBOSE:
 | 
						|
            # skip whitespace and comments
 | 
						|
            if this in WHITESPACE:
 | 
						|
                continue
 | 
						|
            if this == "#":
 | 
						|
                while 1:
 | 
						|
                    this = source.get()
 | 
						|
                    if this in (None, "\n"):
 | 
						|
                        break
 | 
						|
                continue
 | 
						|
 | 
						|
        if this and this[0] not in SPECIAL_CHARS:
 | 
						|
            subpattern.append((LITERAL, ord(this)))
 | 
						|
 | 
						|
        elif this == "[":
 | 
						|
            # character set
 | 
						|
            set = []
 | 
						|
##          if source.match(":"):
 | 
						|
##              pass # handle character classes
 | 
						|
            if source.match("^"):
 | 
						|
                set.append((NEGATE, None))
 | 
						|
            # check remaining characters
 | 
						|
            start = set[:]
 | 
						|
            while 1:
 | 
						|
                this = source.get()
 | 
						|
                if this == "]" and set != start:
 | 
						|
                    break
 | 
						|
                elif this and this[0] == "\\":
 | 
						|
                    code1 = _class_escape(source, this)
 | 
						|
                elif this:
 | 
						|
                    code1 = LITERAL, ord(this)
 | 
						|
                else:
 | 
						|
                    raise error, "unexpected end of regular expression"
 | 
						|
                if source.match("-"):
 | 
						|
                    # potential range
 | 
						|
                    this = source.get()
 | 
						|
                    if this == "]":
 | 
						|
                        set.append(code1)
 | 
						|
                        set.append((LITERAL, ord("-")))
 | 
						|
                        break
 | 
						|
                    else:
 | 
						|
                        if this[0] == "\\":
 | 
						|
                            code2 = _class_escape(source, this)
 | 
						|
                        else:
 | 
						|
                            code2 = LITERAL, ord(this)
 | 
						|
                        if code1[0] != LITERAL or code2[0] != LITERAL:
 | 
						|
                            raise error, "illegal range"
 | 
						|
                        set.append((RANGE, (code1[1], code2[1])))
 | 
						|
                else:
 | 
						|
                    if code1[0] is IN:
 | 
						|
                        code1 = code1[1][0]
 | 
						|
                    set.append(code1)
 | 
						|
 | 
						|
            # FIXME: <fl> move set optimization to compiler!
 | 
						|
            if len(set)==1 and set[0][0] is LITERAL:
 | 
						|
                subpattern.append(set[0]) # optimization
 | 
						|
            elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
 | 
						|
                subpattern.append((NOT_LITERAL, set[1][1])) # optimization
 | 
						|
            else:
 | 
						|
                # FIXME: <fl> add charmap optimization
 | 
						|
                subpattern.append((IN, set))
 | 
						|
 | 
						|
        elif this and this[0] in REPEAT_CHARS:
 | 
						|
            # repeat previous item
 | 
						|
            if this == "?":
 | 
						|
                min, max = 0, 1
 | 
						|
            elif this == "*":
 | 
						|
                min, max = 0, MAXREPEAT
 | 
						|
            elif this == "+":
 | 
						|
                min, max = 1, MAXREPEAT
 | 
						|
            elif this == "{":
 | 
						|
                here = source.tell()
 | 
						|
                min, max = 0, MAXREPEAT
 | 
						|
                lo = hi = ""
 | 
						|
                while source.next in DIGITS:
 | 
						|
                    lo = lo + source.get()
 | 
						|
                if source.match(","):
 | 
						|
                    while source.next in DIGITS:
 | 
						|
                        hi = hi + source.get()
 | 
						|
                else:
 | 
						|
                    hi = lo
 | 
						|
                if not source.match("}"):
 | 
						|
                    subpattern.append((LITERAL, ord(this)))
 | 
						|
                    source.seek(here)
 | 
						|
                    continue
 | 
						|
                if lo:
 | 
						|
                    min = int(lo)
 | 
						|
                if hi:
 | 
						|
                    max = int(hi)
 | 
						|
                # FIXME: <fl> check that hi >= lo!
 | 
						|
            else:
 | 
						|
                raise error, "not supported"
 | 
						|
            # figure out which item to repeat
 | 
						|
            if subpattern:
 | 
						|
                item = subpattern[-1:]
 | 
						|
            else:
 | 
						|
                raise error, "nothing to repeat"
 | 
						|
            if source.match("?"):
 | 
						|
                subpattern[-1] = (MIN_REPEAT, (min, max, item))
 | 
						|
            else:
 | 
						|
                subpattern[-1] = (MAX_REPEAT, (min, max, item))
 | 
						|
 | 
						|
        elif this == ".":
 | 
						|
            subpattern.append((ANY, None))
 | 
						|
 | 
						|
        elif this == "(":
 | 
						|
            group = 1
 | 
						|
            name = None
 | 
						|
            if source.match("?"):
 | 
						|
                group = 0
 | 
						|
                # options
 | 
						|
                if source.match("P"):
 | 
						|
                    # python extensions
 | 
						|
                    if source.match("<"):
 | 
						|
                        # named group: skip forward to end of name
 | 
						|
                        name = ""
 | 
						|
                        while 1:
 | 
						|
                            char = source.get()
 | 
						|
                            if char is None:
 | 
						|
                                raise error, "unterminated name"
 | 
						|
                            if char == ">":
 | 
						|
                                break
 | 
						|
                            name = name + char
 | 
						|
                        group = 1
 | 
						|
                        if not isname(name):
 | 
						|
                            raise error, "illegal character in group name"
 | 
						|
                    elif source.match("="):
 | 
						|
                        # named backreference
 | 
						|
                        name = ""
 | 
						|
                        while 1:
 | 
						|
                            char = source.get()
 | 
						|
                            if char is None:
 | 
						|
                                raise error, "unterminated name"
 | 
						|
                            if char == ")":
 | 
						|
                                break
 | 
						|
                            name = name + char
 | 
						|
                        if not isname(name):
 | 
						|
                            raise error, "illegal character in group name"
 | 
						|
                        gid = state.groupdict.get(name)
 | 
						|
                        if gid is None:
 | 
						|
                            raise error, "unknown group name"
 | 
						|
                        subpattern.append((GROUPREF, gid))
 | 
						|
                    elif source.match("#"):
 | 
						|
                        index = ""
 | 
						|
                        while 1:
 | 
						|
                            char = source.get()
 | 
						|
                            if char is None:
 | 
						|
                                raise error, "unterminated index"
 | 
						|
                            if char == ")":
 | 
						|
                                break
 | 
						|
                            index = index + char
 | 
						|
                        try:
 | 
						|
                            index = int(index)
 | 
						|
                            if index < 0 or index > MAXREPEAT:
 | 
						|
                                raise ValueError
 | 
						|
                        except ValueError:
 | 
						|
                            raise error, "illegal index"
 | 
						|
                        subpattern.append((INDEX, index))
 | 
						|
                        continue
 | 
						|
                    else:
 | 
						|
                        char = source.get()
 | 
						|
                        if char is None:
 | 
						|
                            raise error, "unexpected end of pattern"
 | 
						|
                        raise error, "unknown specifier: ?P%s" % char
 | 
						|
                elif source.match(":"):
 | 
						|
                    # non-capturing group
 | 
						|
                    group = 2
 | 
						|
                elif source.match("#"):
 | 
						|
                    # comment
 | 
						|
                    while 1:
 | 
						|
                        if source.next is None or source.next == ")":
 | 
						|
                            break
 | 
						|
                        source.get()
 | 
						|
                elif source.next in ("=", "!", "<"):
 | 
						|
                    # lookahead assertions
 | 
						|
                    char = source.get()
 | 
						|
                    dir = 1
 | 
						|
                    if char == "<":
 | 
						|
                        if source.next not in ("=", "!"):
 | 
						|
                            raise error, "syntax error"
 | 
						|
                        dir = -1 # lookbehind
 | 
						|
                        char = source.get()
 | 
						|
                    b = []
 | 
						|
                    while 1:
 | 
						|
                        p = _parse(source, state)
 | 
						|
                        if source.next == ")":
 | 
						|
                            if b:
 | 
						|
                                b.append(p)
 | 
						|
                                p = _branch(state, b)
 | 
						|
                            if char == "=":
 | 
						|
                                subpattern.append((ASSERT, (dir, p)))
 | 
						|
                            else:
 | 
						|
                                subpattern.append((ASSERT_NOT, (dir, p)))
 | 
						|
                            break
 | 
						|
                        elif source.match("|"):
 | 
						|
                            b.append(p)
 | 
						|
                        else:
 | 
						|
                            raise error, "pattern not properly closed"
 | 
						|
                else:
 | 
						|
                    # flags
 | 
						|
                    while FLAGS.has_key(source.next):
 | 
						|
                        state.flags = state.flags | FLAGS[source.get()]
 | 
						|
            if group:
 | 
						|
                # parse group contents
 | 
						|
                b = []
 | 
						|
                if group == 2:
 | 
						|
                    # anonymous group
 | 
						|
                    group = None
 | 
						|
                else:
 | 
						|
                    group = state.getgroup(name)
 | 
						|
                while 1:
 | 
						|
                    p = _parse(source, state)
 | 
						|
                    if group is not None:
 | 
						|
                        p.append((INDEX, group))
 | 
						|
                    if source.match(")"):
 | 
						|
                        if b:
 | 
						|
                            b.append(p)
 | 
						|
                            p = _branch(state, b)
 | 
						|
                        subpattern.append((SUBPATTERN, (group, p)))
 | 
						|
                        break
 | 
						|
                    elif source.match("|"):
 | 
						|
                        b.append(p)
 | 
						|
                    else:
 | 
						|
                        raise error, "group not properly closed"
 | 
						|
            else:
 | 
						|
                while 1:
 | 
						|
                    char = source.get()
 | 
						|
                    if char is None or char == ")":
 | 
						|
                        break
 | 
						|
                    raise error, "unknown extension"
 | 
						|
 | 
						|
        elif this == "^":
 | 
						|
            subpattern.append((AT, AT_BEGINNING))
 | 
						|
 | 
						|
        elif this == "$":
 | 
						|
            subpattern.append((AT, AT_END))
 | 
						|
 | 
						|
        elif this and this[0] == "\\":
 | 
						|
            code = _escape(source, this, state)
 | 
						|
            subpattern.append(code)
 | 
						|
 | 
						|
        else:
 | 
						|
            raise error, "parser error"
 | 
						|
 | 
						|
    return subpattern
 | 
						|
 | 
						|
def parse(pattern, flags=0):
 | 
						|
    # parse 're' pattern into list of (opcode, argument) tuples
 | 
						|
    source = Tokenizer(pattern)
 | 
						|
    state = State()
 | 
						|
    state.flags = flags
 | 
						|
    b = []
 | 
						|
    while 1:
 | 
						|
        p = _parse(source, state)
 | 
						|
        tail = source.get()
 | 
						|
        if tail == "|":
 | 
						|
            b.append(p)
 | 
						|
        elif tail == ")":
 | 
						|
            raise error, "unbalanced parenthesis"
 | 
						|
        elif tail is None:
 | 
						|
            if b:
 | 
						|
                b.append(p)
 | 
						|
                p = _branch(state, b)
 | 
						|
            break
 | 
						|
        else:
 | 
						|
            raise error, "bogus characters at end of regular expression"
 | 
						|
    return p
 | 
						|
 | 
						|
def parse_template(source, pattern):
 | 
						|
    # parse 're' replacement string into list of literals and
 | 
						|
    # group references
 | 
						|
    s = Tokenizer(source)
 | 
						|
    p = []
 | 
						|
    a = p.append
 | 
						|
    while 1:
 | 
						|
        this = s.get()
 | 
						|
        if this is None:
 | 
						|
            break # end of replacement string
 | 
						|
        if this and this[0] == "\\":
 | 
						|
            # group
 | 
						|
            if this == "\\g":
 | 
						|
                name = ""
 | 
						|
                if s.match("<"):
 | 
						|
                    while 1:
 | 
						|
                        char = s.get()
 | 
						|
                        if char is None:
 | 
						|
                            raise error, "unterminated group name"
 | 
						|
                        if char == ">":
 | 
						|
                            break
 | 
						|
                        name = name + char
 | 
						|
                if not name:
 | 
						|
                    raise error, "bad group name"
 | 
						|
                try:
 | 
						|
                    index = int(name)
 | 
						|
                except ValueError:
 | 
						|
                    if not isname(name):
 | 
						|
                        raise error, "illegal character in group name"
 | 
						|
                    try:
 | 
						|
                        index = pattern.groupindex[name]
 | 
						|
                    except KeyError:
 | 
						|
                        raise IndexError, "unknown group name"
 | 
						|
                a((MARK, index))
 | 
						|
            elif len(this) > 1 and this[1] in DIGITS:
 | 
						|
                code = None
 | 
						|
                while 1:
 | 
						|
                    group = _group(this, pattern.groups+1)
 | 
						|
                    if group:
 | 
						|
                        if (not s.next or
 | 
						|
                            not _group(this + s.next, pattern.groups+1)):
 | 
						|
                            code = MARK, int(group)
 | 
						|
                            break
 | 
						|
                    elif s.next in OCTDIGITS:
 | 
						|
                        this = this + s.get()
 | 
						|
                    else:
 | 
						|
                        break
 | 
						|
                if not code:
 | 
						|
                    this = this[1:]
 | 
						|
                    code = LITERAL, int(this[-6:], 8) & CHARMASK
 | 
						|
                a(code)
 | 
						|
            else:
 | 
						|
                try:
 | 
						|
                    a(ESCAPES[this])
 | 
						|
                except KeyError:
 | 
						|
                    for c in this:
 | 
						|
                        a((LITERAL, ord(c)))
 | 
						|
        else:
 | 
						|
            a((LITERAL, ord(this)))
 | 
						|
    return p
 | 
						|
 | 
						|
def expand_template(template, match):
 | 
						|
    # FIXME: <fl> this is sooooo slow.  drop in the slicelist
 | 
						|
    # code instead
 | 
						|
    p = []
 | 
						|
    a = p.append
 | 
						|
    sep = match.string[:0]
 | 
						|
    if type(sep) is type(""):
 | 
						|
        char = chr
 | 
						|
    else:
 | 
						|
        char = unichr
 | 
						|
    for c, s in template:
 | 
						|
        if c is LITERAL:
 | 
						|
            a(char(s))
 | 
						|
        elif c is MARK:
 | 
						|
            s = match.group(s)
 | 
						|
            if s is None:
 | 
						|
                raise error, "empty group"
 | 
						|
            a(s)
 | 
						|
    return sep.join(p)
 |