mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
Fredrik Lundh: new snapshot. Mostly reindented.
This one should work with unicode expressions, and compile a bit more silently.
This commit is contained in:
parent
5de435a245
commit
b81e70ebdb
3 changed files with 482 additions and 477 deletions
|
@ -164,7 +164,7 @@ def _compile(code, pattern, flags):
|
||||||
|
|
||||||
def compile(p, flags=()):
|
def compile(p, flags=()):
|
||||||
# convert pattern list to internal format
|
# convert pattern list to internal format
|
||||||
if type(p) is type(""):
|
if type(p) in (type(""), type(u"")):
|
||||||
import sre_parse
|
import sre_parse
|
||||||
pattern = p
|
pattern = p
|
||||||
p = sre_parse.parse(p)
|
p = sre_parse.parse(p)
|
||||||
|
|
|
@ -26,8 +26,11 @@ from sre_constants import *
|
||||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||||
REPEAT_CHARS = "*+?{"
|
REPEAT_CHARS = "*+?{"
|
||||||
|
|
||||||
OCTDIGITS = "01234567"
|
# FIXME: string in tuple tests may explode with if char is unicode :-(
|
||||||
HEXDIGITS = "0123456789abcdefABCDEF"
|
DIGITS = tuple(string.digits)
|
||||||
|
|
||||||
|
OCTDIGITS = tuple("01234567")
|
||||||
|
HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||||
|
|
||||||
ESCAPES = {
|
ESCAPES = {
|
||||||
"\\a": (LITERAL, chr(7)),
|
"\\a": (LITERAL, chr(7)),
|
||||||
|
@ -65,7 +68,7 @@ class Pattern:
|
||||||
self.groupdict[name] = gid
|
self.groupdict[name] = gid
|
||||||
return gid
|
return gid
|
||||||
def setflag(self, flag):
|
def setflag(self, flag):
|
||||||
if flag not in self.flags:
|
if flag in self.flags:
|
||||||
self.flags.append(flag)
|
self.flags.append(flag)
|
||||||
|
|
||||||
class SubPattern:
|
class SubPattern:
|
||||||
|
@ -153,16 +156,16 @@ class Tokenizer:
|
||||||
# hexadecimal constant
|
# hexadecimal constant
|
||||||
for i in xrange(2, sys.maxint):
|
for i in xrange(2, sys.maxint):
|
||||||
c = self.string[i]
|
c = self.string[i]
|
||||||
if c not in HEXDIGITS:
|
if str(c) not in HEXDIGITS:
|
||||||
break
|
break
|
||||||
char = char + c
|
char = char + c
|
||||||
elif c in string.digits:
|
elif str(c) in DIGITS:
|
||||||
# decimal (or octal) number
|
# decimal (or octal) number
|
||||||
for i in xrange(2, sys.maxint):
|
for i in xrange(2, sys.maxint):
|
||||||
c = self.string[i]
|
c = self.string[i]
|
||||||
# FIXME: if larger than current number of
|
# FIXME: if larger than current number of
|
||||||
# groups, interpret as an octal number
|
# groups, interpret as an octal number
|
||||||
if c not in string.digits:
|
if str(c) not in DIGITS:
|
||||||
break
|
break
|
||||||
char = char + c
|
char = char + c
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@ -175,7 +178,7 @@ class Tokenizer:
|
||||||
return 1
|
return 1
|
||||||
return 0
|
return 0
|
||||||
def match_set(self, set):
|
def match_set(self, set):
|
||||||
if self.next in set:
|
if self.next and self.next in set:
|
||||||
self.next = self.__next()
|
self.next = self.__next()
|
||||||
return 1
|
return 1
|
||||||
return 0
|
return 0
|
||||||
|
@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0):
|
||||||
try:
|
try:
|
||||||
if escape[1:2] == "x":
|
if escape[1:2] == "x":
|
||||||
escape = escape[2:]
|
escape = escape[2:]
|
||||||
return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
|
return LITERAL, chr(int(escape[-2:], 16) & 0xff)
|
||||||
elif escape[1:2] in string.digits:
|
elif str(escape[1:2]) in DIGITS:
|
||||||
return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
|
return LITERAL, chr(int(escape[1:], 8) & 0xff)
|
||||||
elif len(escape) == 2:
|
elif len(escape) == 2:
|
||||||
return LITERAL, escape[1]
|
return LITERAL, escape[1]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()):
|
||||||
|
|
||||||
while 1:
|
while 1:
|
||||||
|
|
||||||
if source.next in ("|", ")"):
|
if str(source.next) in ("|", ")"):
|
||||||
break # end of subpattern
|
break # end of subpattern
|
||||||
this = source.get()
|
this = source.get()
|
||||||
if this is None:
|
if this is None:
|
||||||
|
@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()):
|
||||||
elif this == "{":
|
elif this == "{":
|
||||||
min, max = 0, sys.maxint
|
min, max = 0, sys.maxint
|
||||||
lo = hi = ""
|
lo = hi = ""
|
||||||
while source.next in string.digits:
|
while str(source.next) in DIGITS:
|
||||||
lo = lo + source.get()
|
lo = lo + source.get()
|
||||||
if source.match(","):
|
if source.match(","):
|
||||||
while source.next in string.digits:
|
while str(source.next) in DIGITS:
|
||||||
hi = hi + source.get()
|
hi = hi + source.get()
|
||||||
else:
|
else:
|
||||||
hi = lo
|
hi = lo
|
||||||
|
@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()):
|
||||||
name = ""
|
name = ""
|
||||||
while 1:
|
while 1:
|
||||||
char = source.get()
|
char = source.get()
|
||||||
if char in (">", None):
|
if char is None or char == ">":
|
||||||
break
|
break
|
||||||
name = name + char
|
name = name + char
|
||||||
group = 1
|
group = 1
|
||||||
|
@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()):
|
||||||
subpattern.append((MARK, (group-1)*2+1))
|
subpattern.append((MARK, (group-1)*2+1))
|
||||||
else:
|
else:
|
||||||
# FIXME: should this really be a while loop?
|
# FIXME: should this really be a while loop?
|
||||||
while source.get() not in (")", None):
|
while 1:
|
||||||
pass
|
char = source.get()
|
||||||
|
if char is None or char == ")":
|
||||||
|
break
|
||||||
|
|
||||||
elif this == "^":
|
elif this == "^":
|
||||||
subpattern.append((AT, AT_BEGINNING))
|
subpattern.append((AT, AT_BEGINNING))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue