1.5a3 prerelease 1 from AMK

2025-09-28 03:13:48 +00:00 · 1997-08-13 22:34:14 +00:00 · 1997-08-13 22:34:14 +00:00 · 95e8053a9f
commit 95e8053a9f
parent a74ef66ac8
8 changed files with 226 additions and 114 deletions
--- a/Lib/re.py
+++ b/Lib/re.py
@ -317,10 +317,19 @@ class Eol(Instruction):
 class Set(Instruction):
    name = 'set'
-    def __init__(self, set):
+    def __init__(self, set, flags=0):
 	self.set = set
-	Instruction.__init__(self, chr(3), 33)
+	if flags & IGNORECASE: self.set=map(string.lower, self.set)
 	if len(set)==1: 
 	    # If only one element, use the "exact" opcode (it'll be faster)
 	    Instruction.__init__(self, chr(4), 2)
 	else:
 	    # Use the "set" opcode
 	    Instruction.__init__(self, chr(3), 33)
    def assemble(self, position, labels):
 	if len(self.set)==1:
 	    # If only one character in set, generate an "exact" opcode
 	    return self.opcode + self.set[0]
 	result = self.opcode
 	temp = 0
 	for i, c in map(lambda x: (x, chr(x)), range(256)):
@ -333,14 +342,16 @@ class Set(Instruction):
    def __repr__(self):
 	result = '%-15s' % (self.name)
 	self.set.sort()
 	# XXX this should print more intelligently
 	for char in self.set:
 	    result = result + char
 	return result
 class Exact(Instruction):
    name = 'exact'
-    def __init__(self, char):
+    def __init__(self, char, flags):
 	self.char = char
 	if flags & IGNORECASE: self.char=string.lower(self.char)
 	Instruction.__init__(self, chr(4), 2)
    def assemble(self, position, labels):
 	return self.opcode + self.char
@ -881,7 +892,7 @@ def compile(pattern, flags=0):
 	    escape_type, value, index = expand_escape(pattern, index)
 	    if escape_type == CHAR:
-		stack.append([Exact(value)])
+		stack.append([Exact(value, flags)])
 		lastop = '\\' + value
 	    elif escape_type == MEMORY_REFERENCE:
@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
 	elif char == '.':
 	    if flags & DOTALL:
-		stack.append([Set(map(chr, range(256)))])
+		stack.append([Set(map(chr, range(256)), flags)])
 	    else:
 		stack.append([AnyChar()])
 	    lastop = '.'
@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
 		    index = end + 1
 		# do not change lastop
 	    else:
-		stack.append([Exact(char)])
+		stack.append([Exact(char, flags)])
 		lastop = '#'
 	elif char in string.whitespace:
 	    if not (flags & VERBOSE):
-		stack.append([Exact(char)])
+		stack.append([Exact(char, flags)])
 		lastop = char
 	elif char == '[':
@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
 	    index = index + 1
 	    if negate:
 		# If case is being ignored, then both upper- and lowercase
 		# versions of the letters must be excluded.
 		if flags & IGNORECASE: set=set+map(string.upper, set)
 		notset = []
 		for char in map(chr, range(256)):
 		    if char not in set:
 			notset.append(char)
 		if len(notset) == 0:
 		    raise error, 'empty negated set'
-		stack.append([Set(notset)])
+		stack.append([Set(notset, flags)])
 	    else:
 		if len(set) == 0:
 		    raise error, 'empty set'
-		stack.append([Set(set)])
+		stack.append([Set(set, flags)])
 	    lastop = '[]'
 	else:
-	    stack.append([Exact(char)])
+	    stack.append([Exact(char, flags)])
 	    lastop = char
    code = []
@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
 	code.append(Label(label))
 	label = label + 1
    code.append(End())
 #    print code
    return RegexObject(pattern, flags, code, register, groupindex)
 # Replace expand_escape and _expand functions with their C equivalents.
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@ -318,6 +318,7 @@ tests = [
 #    ('((((((((((a))))))))))\\41', 'aa', FAIL),
 #    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
    ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
    ('multiple words of text', 'uh-uh', FAIL),
    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
@ -448,7 +449,6 @@ tests = [
    ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
    #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
    #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
    ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
@ -506,10 +506,21 @@ xyzabc
    ('a.b', 'a\nb', FAIL),
    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
-    # test \w, etc.
+    # test \w, etc. both inside and outside character classes
    ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
    ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
    ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
    ('[\\d-x]', '-', SYNTAX_ERROR),
    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
    (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
    (r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'),
 ]
--- a/Lib/test/regex_tests.py
+++ b/Lib/test/regex_tests.py
@ -278,6 +278,12 @@ tests = [
 ('\\([xyz]*\\)x', 'abcx', SUCCEED,
 'found+"-"+g1', 'x-'),
 ('\\(a\\)+b\\|aac', 'aac', SUCCEED,
- 'found+"-"+g1', 'aac-None')
+ 'found+"-"+g1', 'aac-None'),
 ('\<a', 'a', SUCCEED, 'found', 'a'),
 ('\<a', '!', FAIL),
 ('a\<b', 'ab', FAIL),
 ('a\>', 'ab', FAIL),
 ('a\>', 'a!', SUCCEED, 'found', 'a'),
 ('a\>', 'a', SUCCEED, 'found', 'a'),
 ]
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@ -31,6 +31,10 @@ try:
    assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
    assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
    assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
    assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
 except AssertionError:
    raise TestFailed, "re.sub"
@ -120,7 +124,6 @@ if verbose:
    print 'Running re_tests test suite'
 for t in tests:
    print t
    sys.stdout.flush()
    pattern=s=outcome=repl=expected=None
    if len(t)==5:
@ -136,6 +139,7 @@ for t in tests:
 	if outcome==SYNTAX_ERROR: pass	# Expected a syntax error
 	else: 
 	    print '=== Syntax error:', t
    except KeyboardInterrupt: raise KeyboardInterrupt
    except:
 	print '*** Unexpected error ***'
 	if verbose:
@ -182,3 +186,10 @@ for t in tests:
 		    print repr(repl)+' should be '+repr(expected)
 	    else:
 		print '=== Failed incorrectly', t
            # Try the match with IGNORECASE enabled, and check that it
 	    # still succeeds.
            obj=re.compile(pattern, re.IGNORECASE)
            result=obj.search(s)
            if result==None:
                print '=== Fails on case-insensitive match', t
--- a/Modules/regexmodule.c
+++ b/Modules/regexmodule.c
@ -132,8 +132,10 @@ regobj_match(re, args)
 	re->re_lastok = NULL;
 	result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
 	if (result < -1) {
-		/* Failure like stack overflow */
+		/* Serious failure of some sort; if re_match didn't 
-		PyErr_SetString(RegexError, "match failure");
+		   set an exception, raise a generic error */
 	        if (!PyErr_Occurred())
 		        PyErr_SetString(RegexError, "match failure");
 		return NULL;
 	}
 	if (result >= 0) {
@ -174,8 +176,10 @@ regobj_search(re, args)
 	result = re_search(&re->re_patbuf, buffer, size, offset, range,
 			   &re->re_regs);
 	if (result < -1) {
-		/* Failure like stack overflow */
+		/* Serious failure of some sort; if re_match didn't 
-		PyErr_SetString(RegexError, "match failure");
+		   set an exception, raise a generic error */
 	        if (!PyErr_Occurred())
 	  	        PyErr_SetString(RegexError, "match failure");
 		return NULL;
 	}
 	if (result >= 0) {
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
@ -33,6 +33,7 @@
 #include "myproto.h" /* For PROTO macro --Guido */
 #include <stdio.h>
 #include "Python.h"
 #ifndef NDEBUG
 #define NDEBUG 1
@ -85,16 +86,16 @@ typedef union item_t
 	{
 		int num;
 		int level;
-		char *start;
+		unsigned char *start;
-		char *end;
+		unsigned char *end;
 	} reg;
 	struct
 	{
 		int count;
 		int level;
 		int phantom;
-		char *code;
+		unsigned char *code;
-		char *text;
+		unsigned char *text;
 	} fail;
 	struct
 	{
@ -139,8 +140,8 @@ typedef struct match_state
 	 * offsets from the beginning of the string before returning the
 	 * registers to the calling program. */
-	char *start[NUM_REGISTERS];
+	unsigned char *start[NUM_REGISTERS];
-	char *end[NUM_REGISTERS];
+	unsigned char *end[NUM_REGISTERS];
 	/* Keeps track of whether a register has changed recently. */
@ -422,7 +423,7 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
 	Cwordbound,	      /* match if at word boundary */
 	Cnotwordbound,        /* match if not at word boundary */
 	Csyntaxspec,	      /* matches syntax code (1 byte follows) */
-	Cnotsyntaxspec,       /* matches if syntax code does not match (1 byte foll)*/
+	Cnotsyntaxspec,       /* matches if syntax code does not match (1 byte follows) */
 	Crepeat1
 };
@ -469,7 +470,7 @@ static int regexp_ansi_sequences;
 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-char re_syntax_table[256];
+unsigned char re_syntax_table[256];
 void re_compile_initialize(void)
 {
@ -593,11 +594,11 @@ static int hex_char_to_decimal(int ch)
 	return 16;
 }
-static void re_compile_fastmap_aux(char *code,
+static void re_compile_fastmap_aux(unsigned char *code,
 				   int pos,
-				   char *visited,
+				   unsigned char *visited,
-				   char *can_be_null,
+				   unsigned char *can_be_null,
-				   char *fastmap)
+				   unsigned char *fastmap)
 {
 	int a;
 	int b;
@ -717,19 +718,20 @@ static void re_compile_fastmap_aux(char *code,
 		}
 		default:
 		{
-			abort();  /* probably some opcode is missing from this switch */
+		        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
 		        return;
 			/*NOTREACHED*/
 		}
 		}
 }
-static int re_do_compile_fastmap(char *buffer,
+static int re_do_compile_fastmap(unsigned char *buffer,
 				 int used,
 				 int pos,
-				 char *can_be_null,
+				 unsigned char *can_be_null,
-				 char *fastmap)
+				 unsigned char *fastmap)
 {
-	char small_visited[512], *visited;
+	unsigned char small_visited[512], *visited;
 	if (used <= sizeof(small_visited))
 		visited = small_visited;
@ -759,6 +761,7 @@ void re_compile_fastmap(regexp_t bufp)
 				   &bufp->can_be_null,
 				   bufp->fastmap))
 		return;
 	if (PyErr_Occurred()) return;
 	if (bufp->buffer[0] == Cbol)
 		bufp->anchor = 1;   /* begline */
 	else
@ -792,13 +795,13 @@ void re_compile_fastmap(regexp_t bufp)
 *
 */
-static int re_optimize_star_jump(regexp_t bufp, char *code)
+static int re_optimize_star_jump(regexp_t bufp, unsigned char *code)
 {
-	char map[256];
+	unsigned char map[256];
-	char can_be_null;
+	unsigned char can_be_null;
-	char *p1;
+	unsigned char *p1;
-	char *p2;
+	unsigned char *p2;
-	char ch;
+	unsigned char ch;
 	int a;
 	int b;
 	int num_instructions = 0;
@ -808,6 +811,13 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
 	a = (int)SHORT(a);
 	p1 = code + a + 3; /* skip the failure_jump */
 	/* Check that the jump is within the pattern */
 	if (p1<bufp->buffer || bufp->buffer+bufp->used<p1)
 	  {
 	    PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (failure_jump opt)");
 	    return 0;
 	  }
 	assert(p1[-3] == Cfailure_jump);
 	p2 = code;
 	/* p1 points inside loop, p2 points to after loop */
@ -923,7 +933,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
 		}
 	}
-  make_update_jump:
+	/* make_update_jump: */
 	code -= 3;
 	a += 3;  /* jump to after the Cfailure_jump */
 	code[0] = Cupdate_failure_jump;
@ -948,7 +958,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
 static int re_optimize(regexp_t bufp)
 {
-	char *code;
+	unsigned char *code;
 	code = bufp->buffer;
@ -1073,7 +1083,7 @@ else \
 #define GETHEX(var) \
 { \
-	char gethex_ch, gethex_value; \
+	unsigned char gethex_ch, gethex_value; \
 	NEXTCHAR(gethex_ch); \
 	gethex_value = hex_char_to_decimal(gethex_ch); \
 	if (gethex_value == 16) \
@ -1147,7 +1157,7 @@ else \
 	} \
 }
-char *re_compile_pattern(char *regex, int size, regexp_t bufp)
+unsigned char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp)
 {
 	int a;
 	int pos;
@ -1161,8 +1171,8 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
 	int future_jumps[MAX_NESTING];
 	int num_jumps;
 	unsigned char ch = '\0';
-	char *pattern;
+	unsigned char *pattern;
-	char *translate;
+	unsigned char *translate;
 	int next_register;
 	int paren_depth;
 	int num_open_registers;
@ -1580,23 +1590,23 @@ if (translate) \
 	var = translate[var]
 int re_match(regexp_t bufp,
-	     char *string,
+	     unsigned char *string,
 	     int size,
 	     int pos,
 	     regexp_registers_t old_regs)
 {
-	char *code;
+	unsigned char *code;
-	char *translate;
+	unsigned char *translate;
-	char *text;
+	unsigned char *text;
-	char *textstart;
+	unsigned char *textstart;
-	char *textend;
+	unsigned char *textend;
 	int a;
 	int b;
 	int ch;
 	int reg;
 	int match_end;
-	char *regstart;
+	unsigned char *regstart;
-	char *regend;
+	unsigned char *regend;
 	int regsize;
 	match_state state;
@ -1738,18 +1748,36 @@ int re_match(regexp_t bufp,
 		a = (unsigned char)*code++;
 		a |= (unsigned char)*code++ << 8;
 		code += (int)SHORT(a);
 		if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
 		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)");
 			FREE_STATE(state);
            	        return -2;
         	}
 		goto continue_matching;
 	}
 	case Cdummy_failure_jump:
 	{
                unsigned char *failuredest;
 		a = (unsigned char)*code++;
 		a |= (unsigned char)*code++ << 8;
 		a = (int)SHORT(a);
 		assert(*code == Cfailure_jump);
 		b = (unsigned char)code[1];
 		b |= (unsigned char)code[2] << 8;
-		PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error);
+                failuredest = code + (int)SHORT(b) + 3;
 		if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
 		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)");
 			FREE_STATE(state);
            	        return -2;
 		}
 		PUSH_FAILURE(state, failuredest, NULL, goto error);
 		code += a;
 		if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
 		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)");
 			FREE_STATE(state);
            	        return -2;
         	}
 		goto continue_matching;
 	}
 	case Cfailure_jump:
@ -1757,16 +1785,26 @@ int re_match(regexp_t bufp,
 		a = (unsigned char)*code++;
 		a |= (unsigned char)*code++ << 8;
 		a = (int)SHORT(a);
 		if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
 		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)");
 			FREE_STATE(state);
            	        return -2;
         	}
 		PUSH_FAILURE(state, code + a, text, goto error);
 		goto continue_matching;
 	}
 	case Crepeat1:
 	{
-		char *pinst;
+		unsigned char *pinst;
 		a = (unsigned char)*code++;
 		a |= (unsigned char)*code++ << 8;
 		a = (int)SHORT(a);
 		pinst = code + a;
 		if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
 		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)");
 			FREE_STATE(state);
            	        return -2;
         	}
 		/* pinst is sole instruction in loop, and it matches a
 		 * single character.  Since Crepeat1 was originally a
 		 * Cupdate_failure_jump, we also know that backtracking
@ -1777,8 +1815,8 @@ int re_match(regexp_t bufp,
 		switch (*pinst++)
 		{
 		case Cset:
-		{
+		  {
-			if (translate)
+		        if (translate)
 			{
 				while (text < textend)
 				{
@ -1801,7 +1839,7 @@ int re_match(regexp_t bufp,
 				}
 			}
 			break;
-		}
+                }
 		case Cexact:
 		{
 			ch = (unsigned char)*pinst;
@ -1858,7 +1896,9 @@ int re_match(regexp_t bufp,
 		}
 		default:
 		{
-			abort();
+		        FREE_STATE(state);
 		        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
 		        return -2;
 			/*NOTREACHED*/
 		}
 		}
@ -1900,9 +1940,9 @@ int re_match(regexp_t bufp,
 			goto fail;
 		if (text == textend)
 			goto continue_matching;
-		if (SYNTAX(*text) & Sword)
+		if (!(SYNTAX(*text) & Sword))
-			goto fail;
+		        goto continue_matching;
-		goto continue_matching;
+                goto fail;
 	}
 	case Cwordbound:
 	{
@ -1936,16 +1976,20 @@ int re_match(regexp_t bufp,
 	{
 		NEXTCHAR(ch);
 		if (SYNTAX(ch) & (unsigned char)*code++)
-			break;
+			goto fail;
 		goto continue_matching;
 	}
 	default:
 	{
-		abort();
+	        FREE_STATE(state);
 	        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
 		return -2;
 		/*NOTREACHED*/
 	}
 	}
 #if 0 /* This line is never reached --Guido */
 	abort();
 #endif
@ -1953,6 +1997,7 @@ int re_match(regexp_t bufp,
 	 *NOTREACHED
 	 */
 	/* Using "break;" in the above switch statement is equivalent to "goto fail;" */
  fail:
 	POP_FAILURE(state, code, text, goto done_matching, goto error);
 	goto continue_matching;
@ -1970,32 +2015,36 @@ int re_match(regexp_t bufp,
 	return -2;
 }
 #undef PREFETCH
 #undef NEXTCHAR
 int re_search(regexp_t bufp,
-	      char *string,
+	      unsigned char *string,
 	      int size,
 	      int pos,
 	      int range,
 	      regexp_registers_t regs)
 {
-	char *fastmap;
+	unsigned char *fastmap;
-	char *translate;
+	unsigned char *translate;
-	char *text;
+	unsigned char *text;
-	char *partstart;
+	unsigned char *partstart;
-	char *partend;
+	unsigned char *partend;
 	int dir;
 	int ret;
-	char anchor;
+	unsigned char anchor;
 	assert(size >= 0 && pos >= 0);
 	assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
 	fastmap = bufp->fastmap;
 	translate = bufp->translate;
-	if (fastmap && !bufp->fastmap_accurate)
+	if (fastmap && !bufp->fastmap_accurate) {
-		re_compile_fastmap(bufp);
+                re_compile_fastmap(bufp);
 	        if (PyErr_Occurred()) return -2;
 	}
 	anchor = bufp->anchor;
 	if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
 		fastmap = NULL;
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@ -33,16 +33,16 @@ extern "C" {
 typedef struct re_pattern_buffer
 {
-	char *buffer;          /* compiled pattern */
+	unsigned char *buffer;          /* compiled pattern */
 	int allocated;         /* allocated size of compiled pattern */
 	int used;              /* actual length of compiled pattern */
-	char *fastmap;         /* fastmap[ch] is true if ch can start pattern */
+	unsigned char *fastmap;         /* fastmap[ch] is true if ch can start pattern */
-	char *translate;       /* translation to apply during compilation/matching */
+	unsigned char *translate;       /* translation to apply during compilation/matching */
-	char fastmap_accurate; /* true if fastmap is valid */
+	unsigned char fastmap_accurate; /* true if fastmap is valid */
-	char can_be_null;      /* true if can match empty string */
+	unsigned char can_be_null;      /* true if can match empty string */
-	char uses_registers;   /* registers are used and need to be initialized */
+	unsigned char uses_registers;   /* registers are used and need to be initialized */
 	int num_registers;     /* number of registers used */
-	char anchor;           /* anchor: 0=none 1=begline 2=begbuf */
+	unsigned char anchor;           /* anchor: 0=none 1=begline 2=begbuf */
 } *regexp_t;
 typedef struct re_registers
@ -93,7 +93,7 @@ extern int re_syntax;
 /* This is the actual syntax mask.  It was added so that Python could do
 * syntax-dependent munging of patterns before compilation. */
-extern char re_syntax_table[256];
+extern unsigned char re_syntax_table[256];
 void re_compile_initialize(void);
@ -101,7 +101,7 @@ int re_set_syntax(int syntax);
 /* This sets the syntax to use and returns the previous syntax.  The
 * syntax is specified by a bit mask of the above defined bits. */
-char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
+unsigned char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled);
 /* This compiles the regexp (given in regex and length in regex_size).
 * This returns NULL if the regexp compiled successfully, and an error
 * message if an error was encountered.  The buffer field must be
@ -110,14 +110,14 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
 * buffer is NULL).  Also, the translate field must be set to point to a
 * valid translation table, or NULL if it is not used. */
-int re_match(regexp_t compiled, char *string, int size, int pos,
+int re_match(regexp_t compiled, unsigned char *string, int size, int pos,
 	     regexp_registers_t old_regs);
 /* This tries to match the regexp against the string.  This returns the
 * length of the matched portion, or -1 if the pattern could not be
 * matched and -2 if an error (such as failure stack overflow) is
 * encountered. */
-int re_search(regexp_t compiled, char *string, int size, int startpos,
+int re_search(regexp_t compiled, unsigned char *string, int size, int startpos,
 	      int range, regexp_registers_t regs);
 /* This rearches for a substring matching the regexp.  This returns the
 * first index at which a match is found.  range specifies at how many
@ -132,28 +132,16 @@ void re_compile_fastmap(regexp_t compiled);
 * the calling program must have initialized the fastmap field to point
 * to an array of 256 characters. */
 char *re_comp(char *s);
 /* BSD 4.2 regex library routine re_comp.  This compiles the regexp into
 * an internal buffer.  This returns NULL if the regexp was compiled
 * successfully, and an error message if there was an error. */
 int re_exec(char *s);
 /* BSD 4.2 regexp library routine re_exec.  This returns true if the
 * string matches the regular expression (that is, a matching part is
 * found anywhere in the string). */
 #else /* HAVE_PROTOTYPES */
 extern int re_syntax;
-extern char re_syntax_table[256];
+extern unsigned char re_syntax_table[256];
 void re_compile_initialize();
 int re_set_syntax();
-char *re_compile_pattern();
+unsigned char *re_compile_pattern();
 int re_match();
 int re_search();
 void re_compile_fastmap();
 char *re_comp();
 int re_exec();
 #endif /* HAVE_PROTOTYPES */
--- a/Modules/reopmodule.c
+++ b/Modules/reopmodule.c
@ -62,7 +62,7 @@ static PyObject *ReopError;	/* Exception */
 #define BEGINNING_OF_BUFFER	7
 #define END_OF_BUFFER		8
-static char *reop_casefold;
+static unsigned char *reop_casefold;
 static PyObject *
 makeresult(regs, num_regs)
@ -105,7 +105,7 @@ reop_match(self, args)
 	PyObject *self;
 	PyObject *args;
 {
-	char *string;
+	unsigned char *string;
 	int fastmaplen, stringlen;
 	int can_be_null, anchor, i;
 	int flags, pos, result;
@ -163,8 +163,8 @@ reop_match(self, args)
 	if (result < -1) {
 		/* Failure like stack overflow */
-		PyErr_SetString(ReopError, "match failure");
+	        if (!PyErr_Occurred())
-		
+	  	        PyErr_SetString(ReopError, "match failure");
 		return NULL;
 	}
 	if (result == -1) {
@ -174,12 +174,38 @@ reop_match(self, args)
 	return makeresult(&re_regs, bufp.num_registers);
 }
 #if 0
 static PyObject *
 reop_optimize(self, args)
 	PyObject *self;
 	PyObject *args;
 {
  unsigned char *buffer;
  int buflen;
  struct re_pattern_buffer bufp;
  PyObject *opt_code;
  if (!PyArg_Parse(args, "(s#)", &buffer, &buflen)) return NULL;
  /* Create a new string for the optimized code */
  opt_code=PyString_FromStringAndSize(buffer, buflen);
  if (opt_code!=NULL)
    {
      bufp.buffer = PyString_AsString(opt_code);
      bufp.used=bufp.allocated=buflen;
    }
  return opt_code;
 }
 #endif
 static PyObject *
 reop_search(self, args)
 	PyObject *self;
 	PyObject *args;
 {
-	char *string;
+	unsigned char *string;
 	int fastmaplen, stringlen;
 	int can_be_null, anchor, i;
 	int flags, pos, result;
@ -237,7 +263,8 @@ reop_search(self, args)
 	if (result < -1) {
 		/* Failure like stack overflow */
-		PyErr_SetString(ReopError, "match failure");
+	        if (!PyErr_Occurred())
 	  	        PyErr_SetString(ReopError, "match failure");
 		return NULL;
 	}
@ -626,7 +653,7 @@ reop__expand(self, args)
 {
  PyObject *results, *match_obj;
  PyObject *repl_obj, *newstring;
-  char *repl;
+  unsigned char *repl;
  int size, total_len, i, start, pos;
  if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj)) 
@ -810,7 +837,7 @@ internal_split(args, retain)
  reopobject *pattern;
  int maxsplit=0, count=0, length, next=0, result;
  int match_end=0; /* match_start is defined below */
-  char *start;
+  unsigned char *start;
  if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
 			&maxsplit))
@ -911,6 +938,7 @@ static struct PyMethodDef reop_global_methods[] = {
 	{"expand_escape", reop_expand_escape, 1},
 	{"_expand", reop__expand, 1},
 #if 0
 	{"_optimize",	reop_optimize, 0},
 	{"split",  reop_split, 0},
 	{"splitx",  reop_splitx, 0},
 #endif
@ -922,8 +950,8 @@ initreop()
 {
 	PyObject *m, *d, *k, *v, *o;
 	int i;
-	char *s;
+	unsigned char *s;
-	char j[2];
+	unsigned char j[2];
 	re_compile_initialize();
@ -936,7 +964,7 @@ initreop()
 		goto finally;
 	/* Initialize reop.casefold constant */
-	if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
+	if (!(v = PyString_FromStringAndSize((unsigned char *)NULL, 256)))
 		goto finally;
 	if (!(s = PyString_AsString(v)))