mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Fredrik Lundh: new snapshot. Mostly reindented.
This one should work with unicode expressions, and compile a bit more silently.
This commit is contained in:
parent
5de435a245
commit
b81e70ebdb
3 changed files with 482 additions and 477 deletions
|
@ -164,7 +164,7 @@ def _compile(code, pattern, flags):
|
|||
|
||||
def compile(p, flags=()):
|
||||
# convert pattern list to internal format
|
||||
if type(p) is type(""):
|
||||
if type(p) in (type(""), type(u"")):
|
||||
import sre_parse
|
||||
pattern = p
|
||||
p = sre_parse.parse(p)
|
||||
|
|
|
@ -26,8 +26,11 @@ from sre_constants import *
|
|||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||
REPEAT_CHARS = "*+?{"
|
||||
|
||||
OCTDIGITS = "01234567"
|
||||
HEXDIGITS = "0123456789abcdefABCDEF"
|
||||
# FIXME: string in tuple tests may explode with if char is unicode :-(
|
||||
DIGITS = tuple(string.digits)
|
||||
|
||||
OCTDIGITS = tuple("01234567")
|
||||
HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||
|
||||
ESCAPES = {
|
||||
"\\a": (LITERAL, chr(7)),
|
||||
|
@ -65,7 +68,7 @@ class Pattern:
|
|||
self.groupdict[name] = gid
|
||||
return gid
|
||||
def setflag(self, flag):
|
||||
if flag not in self.flags:
|
||||
if flag in self.flags:
|
||||
self.flags.append(flag)
|
||||
|
||||
class SubPattern:
|
||||
|
@ -153,16 +156,16 @@ class Tokenizer:
|
|||
# hexadecimal constant
|
||||
for i in xrange(2, sys.maxint):
|
||||
c = self.string[i]
|
||||
if c not in HEXDIGITS:
|
||||
if str(c) not in HEXDIGITS:
|
||||
break
|
||||
char = char + c
|
||||
elif c in string.digits:
|
||||
elif str(c) in DIGITS:
|
||||
# decimal (or octal) number
|
||||
for i in xrange(2, sys.maxint):
|
||||
c = self.string[i]
|
||||
# FIXME: if larger than current number of
|
||||
# groups, interpret as an octal number
|
||||
if c not in string.digits:
|
||||
if str(c) not in DIGITS:
|
||||
break
|
||||
char = char + c
|
||||
except IndexError:
|
||||
|
@ -175,7 +178,7 @@ class Tokenizer:
|
|||
return 1
|
||||
return 0
|
||||
def match_set(self, set):
|
||||
if self.next in set:
|
||||
if self.next and self.next in set:
|
||||
self.next = self.__next()
|
||||
return 1
|
||||
return 0
|
||||
|
@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0):
|
|||
try:
|
||||
if escape[1:2] == "x":
|
||||
escape = escape[2:]
|
||||
return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
|
||||
elif escape[1:2] in string.digits:
|
||||
return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
|
||||
return LITERAL, chr(int(escape[-2:], 16) & 0xff)
|
||||
elif str(escape[1:2]) in DIGITS:
|
||||
return LITERAL, chr(int(escape[1:], 8) & 0xff)
|
||||
elif len(escape) == 2:
|
||||
return LITERAL, escape[1]
|
||||
except ValueError:
|
||||
|
@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()):
|
|||
|
||||
while 1:
|
||||
|
||||
if source.next in ("|", ")"):
|
||||
if str(source.next) in ("|", ")"):
|
||||
break # end of subpattern
|
||||
this = source.get()
|
||||
if this is None:
|
||||
|
@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()):
|
|||
elif this == "{":
|
||||
min, max = 0, sys.maxint
|
||||
lo = hi = ""
|
||||
while source.next in string.digits:
|
||||
while str(source.next) in DIGITS:
|
||||
lo = lo + source.get()
|
||||
if source.match(","):
|
||||
while source.next in string.digits:
|
||||
while str(source.next) in DIGITS:
|
||||
hi = hi + source.get()
|
||||
else:
|
||||
hi = lo
|
||||
|
@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()):
|
|||
name = ""
|
||||
while 1:
|
||||
char = source.get()
|
||||
if char in (">", None):
|
||||
if char is None or char == ">":
|
||||
break
|
||||
name = name + char
|
||||
group = 1
|
||||
|
@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()):
|
|||
subpattern.append((MARK, (group-1)*2+1))
|
||||
else:
|
||||
# FIXME: should this really be a while loop?
|
||||
while source.get() not in (")", None):
|
||||
pass
|
||||
while 1:
|
||||
char = source.get()
|
||||
if char is None or char == ")":
|
||||
break
|
||||
|
||||
elif this == "^":
|
||||
subpattern.append((AT, AT_BEGINNING))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue