Fredrik Lundh: new snapshot. Mostly reindented.

This one should work with unicode expressions, and compile a bit more silently.
2025-09-27 02:39:58 +00:00 · 2000-04-10 17:10:48 +00:00 · 2000-04-10 17:10:48 +00:00 · b81e70ebdb
commit b81e70ebdb
parent 5de435a245
3 changed files with 482 additions and 477 deletions
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@ -164,7 +164,7 @@ def _compile(code, pattern, flags):
 def compile(p, flags=()):
    # convert pattern list to internal format
-    if type(p) is type(""):
+    if type(p) in (type(""), type(u"")):
 	import sre_parse
 	pattern = p
 	p = sre_parse.parse(p)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@ -26,8 +26,11 @@ from sre_constants import *
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS  = "*+?{"
-OCTDIGITS = "01234567"
+# FIXME: string in tuple tests may explode with if char is unicode :-(
-HEXDIGITS = "0123456789abcdefABCDEF"
+DIGITS = tuple(string.digits)
 OCTDIGITS = tuple("01234567")
 HEXDIGITS = tuple("0123456789abcdefABCDEF")
 ESCAPES = {
    "\\a": (LITERAL, chr(7)),
@ -65,7 +68,7 @@ class Pattern:
 	    self.groupdict[name] = gid
 	return gid
    def setflag(self, flag):
-        if flag not in self.flags:
+	if flag in self.flags:
 	    self.flags.append(flag)
 class SubPattern:
@ -153,16 +156,16 @@ class Tokenizer:
 		    # hexadecimal constant
 		    for i in xrange(2, sys.maxint):
 			c = self.string[i]
-                        if c not in HEXDIGITS:
+			if str(c) not in HEXDIGITS:
 			    break
 			char = char + c
-                elif c in string.digits:
+		elif str(c) in DIGITS:
 		    # decimal (or octal) number
 		    for i in xrange(2, sys.maxint):
 			c = self.string[i]
 			# FIXME: if larger than current number of
 			# groups, interpret as an octal number 
-                        if c not in string.digits:
+			if str(c) not in DIGITS:
 			    break
 			char = char + c
 	    except IndexError:
@ -175,7 +178,7 @@ class Tokenizer:
 	    return 1
 	return 0
    def match_set(self, set):
-        if self.next in set:
+	if self.next and self.next in set:
 	    self.next = self.__next()
 	    return 1
 	return 0
@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0):
    try:
 	if escape[1:2] == "x":
 	    escape = escape[2:]
-            return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
+	    return LITERAL, chr(int(escape[-2:], 16) & 0xff)
-        elif escape[1:2] in string.digits:
+	elif str(escape[1:2]) in DIGITS:
-            return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
+	    return LITERAL, chr(int(escape[1:], 8) & 0xff)
 	elif len(escape) == 2:
 	    return LITERAL, escape[1]
    except ValueError:
@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()):
    while 1:
-        if source.next in ("|", ")"):
+	if str(source.next) in ("|", ")"):
 	    break # end of subpattern
 	this = source.get()
 	if this is None:
@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()):
 	    elif this == "{":
 		min, max = 0, sys.maxint
 		lo = hi = ""
-                while source.next in string.digits:
+		while str(source.next) in DIGITS:
 		    lo = lo + source.get()
 		if source.match(","):
-                    while source.next in string.digits:
+		    while str(source.next) in DIGITS:
 			hi = hi + source.get()
 		else:
 		    hi = lo
@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()):
 			name = ""
 			while 1:
 			    char = source.get()
-                            if char in (">", None):
+			    if char is None or char == ">":
 				break
 			    name = name + char
 			group = 1
@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()):
 		    subpattern.append((MARK, (group-1)*2+1))
 	    else:
 		# FIXME: should this really be a while loop?
-                while source.get() not in (")", None):
+		while 1:
-                    pass
+		    char = source.get()
 		    if char is None or char == ")":
 			break
 	elif this == "^":
 	    subpattern.append((AT, AT_BEGINNING))