mirror of
https://github.com/python/cpython.git
synced 2025-09-28 03:13:48 +00:00
1.5a3 prerelease 1 from AMK
This commit is contained in:
parent
a74ef66ac8
commit
95e8053a9f
8 changed files with 226 additions and 114 deletions
35
Lib/re.py
35
Lib/re.py
|
@ -317,10 +317,19 @@ class Eol(Instruction):
|
||||||
|
|
||||||
class Set(Instruction):
|
class Set(Instruction):
|
||||||
name = 'set'
|
name = 'set'
|
||||||
def __init__(self, set):
|
def __init__(self, set, flags=0):
|
||||||
self.set = set
|
self.set = set
|
||||||
Instruction.__init__(self, chr(3), 33)
|
if flags & IGNORECASE: self.set=map(string.lower, self.set)
|
||||||
|
if len(set)==1:
|
||||||
|
# If only one element, use the "exact" opcode (it'll be faster)
|
||||||
|
Instruction.__init__(self, chr(4), 2)
|
||||||
|
else:
|
||||||
|
# Use the "set" opcode
|
||||||
|
Instruction.__init__(self, chr(3), 33)
|
||||||
def assemble(self, position, labels):
|
def assemble(self, position, labels):
|
||||||
|
if len(self.set)==1:
|
||||||
|
# If only one character in set, generate an "exact" opcode
|
||||||
|
return self.opcode + self.set[0]
|
||||||
result = self.opcode
|
result = self.opcode
|
||||||
temp = 0
|
temp = 0
|
||||||
for i, c in map(lambda x: (x, chr(x)), range(256)):
|
for i, c in map(lambda x: (x, chr(x)), range(256)):
|
||||||
|
@ -333,14 +342,16 @@ class Set(Instruction):
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
result = '%-15s' % (self.name)
|
result = '%-15s' % (self.name)
|
||||||
self.set.sort()
|
self.set.sort()
|
||||||
|
# XXX this should print more intelligently
|
||||||
for char in self.set:
|
for char in self.set:
|
||||||
result = result + char
|
result = result + char
|
||||||
return result
|
return result
|
||||||
|
|
||||||
class Exact(Instruction):
|
class Exact(Instruction):
|
||||||
name = 'exact'
|
name = 'exact'
|
||||||
def __init__(self, char):
|
def __init__(self, char, flags):
|
||||||
self.char = char
|
self.char = char
|
||||||
|
if flags & IGNORECASE: self.char=string.lower(self.char)
|
||||||
Instruction.__init__(self, chr(4), 2)
|
Instruction.__init__(self, chr(4), 2)
|
||||||
def assemble(self, position, labels):
|
def assemble(self, position, labels):
|
||||||
return self.opcode + self.char
|
return self.opcode + self.char
|
||||||
|
@ -881,7 +892,7 @@ def compile(pattern, flags=0):
|
||||||
escape_type, value, index = expand_escape(pattern, index)
|
escape_type, value, index = expand_escape(pattern, index)
|
||||||
|
|
||||||
if escape_type == CHAR:
|
if escape_type == CHAR:
|
||||||
stack.append([Exact(value)])
|
stack.append([Exact(value, flags)])
|
||||||
lastop = '\\' + value
|
lastop = '\\' + value
|
||||||
|
|
||||||
elif escape_type == MEMORY_REFERENCE:
|
elif escape_type == MEMORY_REFERENCE:
|
||||||
|
@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
|
||||||
|
|
||||||
elif char == '.':
|
elif char == '.':
|
||||||
if flags & DOTALL:
|
if flags & DOTALL:
|
||||||
stack.append([Set(map(chr, range(256)))])
|
stack.append([Set(map(chr, range(256)), flags)])
|
||||||
else:
|
else:
|
||||||
stack.append([AnyChar()])
|
stack.append([AnyChar()])
|
||||||
lastop = '.'
|
lastop = '.'
|
||||||
|
@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
|
||||||
index = end + 1
|
index = end + 1
|
||||||
# do not change lastop
|
# do not change lastop
|
||||||
else:
|
else:
|
||||||
stack.append([Exact(char)])
|
stack.append([Exact(char, flags)])
|
||||||
lastop = '#'
|
lastop = '#'
|
||||||
|
|
||||||
elif char in string.whitespace:
|
elif char in string.whitespace:
|
||||||
if not (flags & VERBOSE):
|
if not (flags & VERBOSE):
|
||||||
stack.append([Exact(char)])
|
stack.append([Exact(char, flags)])
|
||||||
lastop = char
|
lastop = char
|
||||||
|
|
||||||
elif char == '[':
|
elif char == '[':
|
||||||
|
@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
|
||||||
index = index + 1
|
index = index + 1
|
||||||
|
|
||||||
if negate:
|
if negate:
|
||||||
|
# If case is being ignored, then both upper- and lowercase
|
||||||
|
# versions of the letters must be excluded.
|
||||||
|
if flags & IGNORECASE: set=set+map(string.upper, set)
|
||||||
notset = []
|
notset = []
|
||||||
for char in map(chr, range(256)):
|
for char in map(chr, range(256)):
|
||||||
if char not in set:
|
if char not in set:
|
||||||
notset.append(char)
|
notset.append(char)
|
||||||
if len(notset) == 0:
|
if len(notset) == 0:
|
||||||
raise error, 'empty negated set'
|
raise error, 'empty negated set'
|
||||||
stack.append([Set(notset)])
|
stack.append([Set(notset, flags)])
|
||||||
else:
|
else:
|
||||||
if len(set) == 0:
|
if len(set) == 0:
|
||||||
raise error, 'empty set'
|
raise error, 'empty set'
|
||||||
stack.append([Set(set)])
|
stack.append([Set(set, flags)])
|
||||||
|
|
||||||
lastop = '[]'
|
lastop = '[]'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
stack.append([Exact(char)])
|
stack.append([Exact(char, flags)])
|
||||||
lastop = char
|
lastop = char
|
||||||
|
|
||||||
code = []
|
code = []
|
||||||
|
@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
|
||||||
code.append(Label(label))
|
code.append(Label(label))
|
||||||
label = label + 1
|
label = label + 1
|
||||||
code.append(End())
|
code.append(End())
|
||||||
|
# print code
|
||||||
return RegexObject(pattern, flags, code, register, groupindex)
|
return RegexObject(pattern, flags, code, register, groupindex)
|
||||||
|
|
||||||
# Replace expand_escape and _expand functions with their C equivalents.
|
# Replace expand_escape and _expand functions with their C equivalents.
|
||||||
|
|
|
@ -318,6 +318,7 @@ tests = [
|
||||||
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
|
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
|
||||||
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
|
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
|
||||||
('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
||||||
|
('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
||||||
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
|
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
|
||||||
('multiple words of text', 'uh-uh', FAIL),
|
('multiple words of text', 'uh-uh', FAIL),
|
||||||
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
|
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
|
||||||
|
@ -448,7 +449,6 @@ tests = [
|
||||||
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
|
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
|
||||||
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
|
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
|
||||||
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
|
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
|
||||||
('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
|
|
||||||
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
|
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
|
||||||
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
|
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
|
||||||
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
|
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
|
||||||
|
@ -506,10 +506,21 @@ xyzabc
|
||||||
('a.b', 'a\nb', FAIL),
|
('a.b', 'a\nb', FAIL),
|
||||||
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||||
|
|
||||||
# test \w, etc.
|
# test \w, etc. both inside and outside character classes
|
||||||
|
|
||||||
('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
|
('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
|
||||||
|
('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
|
||||||
('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
|
('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
|
||||||
|
('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
|
||||||
('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
|
('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
|
||||||
('[\\d-x]', '-', SYNTAX_ERROR),
|
('[\\d-x]', '-', SYNTAX_ERROR),
|
||||||
|
(r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
|
||||||
|
(r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
|
||||||
|
|
||||||
|
(r'\xff', '\377', SUCCEED, 'found', chr(255)),
|
||||||
|
(r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
|
||||||
|
(r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
|
||||||
|
('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
|
||||||
|
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
|
||||||
|
(r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'),
|
||||||
]
|
]
|
||||||
|
|
|
@ -278,6 +278,12 @@ tests = [
|
||||||
('\\([xyz]*\\)x', 'abcx', SUCCEED,
|
('\\([xyz]*\\)x', 'abcx', SUCCEED,
|
||||||
'found+"-"+g1', 'x-'),
|
'found+"-"+g1', 'x-'),
|
||||||
('\\(a\\)+b\\|aac', 'aac', SUCCEED,
|
('\\(a\\)+b\\|aac', 'aac', SUCCEED,
|
||||||
'found+"-"+g1', 'aac-None')
|
'found+"-"+g1', 'aac-None'),
|
||||||
|
('\<a', 'a', SUCCEED, 'found', 'a'),
|
||||||
|
('\<a', '!', FAIL),
|
||||||
|
('a\<b', 'ab', FAIL),
|
||||||
|
('a\>', 'ab', FAIL),
|
||||||
|
('a\>', 'a!', SUCCEED, 'found', 'a'),
|
||||||
|
('a\>', 'a', SUCCEED, 'found', 'a'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,10 @@ try:
|
||||||
|
|
||||||
assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
|
assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
|
||||||
|
|
||||||
|
assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
|
||||||
|
assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
|
||||||
|
assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
|
||||||
|
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
raise TestFailed, "re.sub"
|
raise TestFailed, "re.sub"
|
||||||
|
|
||||||
|
@ -120,7 +124,6 @@ if verbose:
|
||||||
print 'Running re_tests test suite'
|
print 'Running re_tests test suite'
|
||||||
|
|
||||||
for t in tests:
|
for t in tests:
|
||||||
print t
|
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
pattern=s=outcome=repl=expected=None
|
pattern=s=outcome=repl=expected=None
|
||||||
if len(t)==5:
|
if len(t)==5:
|
||||||
|
@ -136,6 +139,7 @@ for t in tests:
|
||||||
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
|
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
|
||||||
else:
|
else:
|
||||||
print '=== Syntax error:', t
|
print '=== Syntax error:', t
|
||||||
|
except KeyboardInterrupt: raise KeyboardInterrupt
|
||||||
except:
|
except:
|
||||||
print '*** Unexpected error ***'
|
print '*** Unexpected error ***'
|
||||||
if verbose:
|
if verbose:
|
||||||
|
@ -182,3 +186,10 @@ for t in tests:
|
||||||
print repr(repl)+' should be '+repr(expected)
|
print repr(repl)+' should be '+repr(expected)
|
||||||
else:
|
else:
|
||||||
print '=== Failed incorrectly', t
|
print '=== Failed incorrectly', t
|
||||||
|
|
||||||
|
# Try the match with IGNORECASE enabled, and check that it
|
||||||
|
# still succeeds.
|
||||||
|
obj=re.compile(pattern, re.IGNORECASE)
|
||||||
|
result=obj.search(s)
|
||||||
|
if result==None:
|
||||||
|
print '=== Fails on case-insensitive match', t
|
||||||
|
|
|
@ -132,8 +132,10 @@ regobj_match(re, args)
|
||||||
re->re_lastok = NULL;
|
re->re_lastok = NULL;
|
||||||
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
|
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
/* Failure like stack overflow */
|
/* Serious failure of some sort; if re_match didn't
|
||||||
PyErr_SetString(RegexError, "match failure");
|
set an exception, raise a generic error */
|
||||||
|
if (!PyErr_Occurred())
|
||||||
|
PyErr_SetString(RegexError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (result >= 0) {
|
if (result >= 0) {
|
||||||
|
@ -174,8 +176,10 @@ regobj_search(re, args)
|
||||||
result = re_search(&re->re_patbuf, buffer, size, offset, range,
|
result = re_search(&re->re_patbuf, buffer, size, offset, range,
|
||||||
&re->re_regs);
|
&re->re_regs);
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
/* Failure like stack overflow */
|
/* Serious failure of some sort; if re_match didn't
|
||||||
PyErr_SetString(RegexError, "match failure");
|
set an exception, raise a generic error */
|
||||||
|
if (!PyErr_Occurred())
|
||||||
|
PyErr_SetString(RegexError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (result >= 0) {
|
if (result >= 0) {
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include "myproto.h" /* For PROTO macro --Guido */
|
#include "myproto.h" /* For PROTO macro --Guido */
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include "Python.h"
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
#define NDEBUG 1
|
#define NDEBUG 1
|
||||||
|
@ -85,16 +86,16 @@ typedef union item_t
|
||||||
{
|
{
|
||||||
int num;
|
int num;
|
||||||
int level;
|
int level;
|
||||||
char *start;
|
unsigned char *start;
|
||||||
char *end;
|
unsigned char *end;
|
||||||
} reg;
|
} reg;
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
int level;
|
int level;
|
||||||
int phantom;
|
int phantom;
|
||||||
char *code;
|
unsigned char *code;
|
||||||
char *text;
|
unsigned char *text;
|
||||||
} fail;
|
} fail;
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
|
@ -139,8 +140,8 @@ typedef struct match_state
|
||||||
* offsets from the beginning of the string before returning the
|
* offsets from the beginning of the string before returning the
|
||||||
* registers to the calling program. */
|
* registers to the calling program. */
|
||||||
|
|
||||||
char *start[NUM_REGISTERS];
|
unsigned char *start[NUM_REGISTERS];
|
||||||
char *end[NUM_REGISTERS];
|
unsigned char *end[NUM_REGISTERS];
|
||||||
|
|
||||||
/* Keeps track of whether a register has changed recently. */
|
/* Keeps track of whether a register has changed recently. */
|
||||||
|
|
||||||
|
@ -422,7 +423,7 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
|
||||||
Cwordbound, /* match if at word boundary */
|
Cwordbound, /* match if at word boundary */
|
||||||
Cnotwordbound, /* match if not at word boundary */
|
Cnotwordbound, /* match if not at word boundary */
|
||||||
Csyntaxspec, /* matches syntax code (1 byte follows) */
|
Csyntaxspec, /* matches syntax code (1 byte follows) */
|
||||||
Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/
|
Cnotsyntaxspec, /* matches if syntax code does not match (1 byte follows) */
|
||||||
Crepeat1
|
Crepeat1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -469,7 +470,7 @@ static int regexp_ansi_sequences;
|
||||||
|
|
||||||
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
|
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
|
||||||
|
|
||||||
char re_syntax_table[256];
|
unsigned char re_syntax_table[256];
|
||||||
|
|
||||||
void re_compile_initialize(void)
|
void re_compile_initialize(void)
|
||||||
{
|
{
|
||||||
|
@ -593,11 +594,11 @@ static int hex_char_to_decimal(int ch)
|
||||||
return 16;
|
return 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void re_compile_fastmap_aux(char *code,
|
static void re_compile_fastmap_aux(unsigned char *code,
|
||||||
int pos,
|
int pos,
|
||||||
char *visited,
|
unsigned char *visited,
|
||||||
char *can_be_null,
|
unsigned char *can_be_null,
|
||||||
char *fastmap)
|
unsigned char *fastmap)
|
||||||
{
|
{
|
||||||
int a;
|
int a;
|
||||||
int b;
|
int b;
|
||||||
|
@ -717,19 +718,20 @@ static void re_compile_fastmap_aux(char *code,
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
abort(); /* probably some opcode is missing from this switch */
|
PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
|
||||||
|
return;
|
||||||
/*NOTREACHED*/
|
/*NOTREACHED*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int re_do_compile_fastmap(char *buffer,
|
static int re_do_compile_fastmap(unsigned char *buffer,
|
||||||
int used,
|
int used,
|
||||||
int pos,
|
int pos,
|
||||||
char *can_be_null,
|
unsigned char *can_be_null,
|
||||||
char *fastmap)
|
unsigned char *fastmap)
|
||||||
{
|
{
|
||||||
char small_visited[512], *visited;
|
unsigned char small_visited[512], *visited;
|
||||||
|
|
||||||
if (used <= sizeof(small_visited))
|
if (used <= sizeof(small_visited))
|
||||||
visited = small_visited;
|
visited = small_visited;
|
||||||
|
@ -759,6 +761,7 @@ void re_compile_fastmap(regexp_t bufp)
|
||||||
&bufp->can_be_null,
|
&bufp->can_be_null,
|
||||||
bufp->fastmap))
|
bufp->fastmap))
|
||||||
return;
|
return;
|
||||||
|
if (PyErr_Occurred()) return;
|
||||||
if (bufp->buffer[0] == Cbol)
|
if (bufp->buffer[0] == Cbol)
|
||||||
bufp->anchor = 1; /* begline */
|
bufp->anchor = 1; /* begline */
|
||||||
else
|
else
|
||||||
|
@ -792,13 +795,13 @@ void re_compile_fastmap(regexp_t bufp)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int re_optimize_star_jump(regexp_t bufp, char *code)
|
static int re_optimize_star_jump(regexp_t bufp, unsigned char *code)
|
||||||
{
|
{
|
||||||
char map[256];
|
unsigned char map[256];
|
||||||
char can_be_null;
|
unsigned char can_be_null;
|
||||||
char *p1;
|
unsigned char *p1;
|
||||||
char *p2;
|
unsigned char *p2;
|
||||||
char ch;
|
unsigned char ch;
|
||||||
int a;
|
int a;
|
||||||
int b;
|
int b;
|
||||||
int num_instructions = 0;
|
int num_instructions = 0;
|
||||||
|
@ -808,6 +811,13 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
a = (int)SHORT(a);
|
a = (int)SHORT(a);
|
||||||
|
|
||||||
p1 = code + a + 3; /* skip the failure_jump */
|
p1 = code + a + 3; /* skip the failure_jump */
|
||||||
|
/* Check that the jump is within the pattern */
|
||||||
|
if (p1<bufp->buffer || bufp->buffer+bufp->used<p1)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (failure_jump opt)");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
assert(p1[-3] == Cfailure_jump);
|
assert(p1[-3] == Cfailure_jump);
|
||||||
p2 = code;
|
p2 = code;
|
||||||
/* p1 points inside loop, p2 points to after loop */
|
/* p1 points inside loop, p2 points to after loop */
|
||||||
|
@ -923,7 +933,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
make_update_jump:
|
/* make_update_jump: */
|
||||||
code -= 3;
|
code -= 3;
|
||||||
a += 3; /* jump to after the Cfailure_jump */
|
a += 3; /* jump to after the Cfailure_jump */
|
||||||
code[0] = Cupdate_failure_jump;
|
code[0] = Cupdate_failure_jump;
|
||||||
|
@ -948,7 +958,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
|
|
||||||
static int re_optimize(regexp_t bufp)
|
static int re_optimize(regexp_t bufp)
|
||||||
{
|
{
|
||||||
char *code;
|
unsigned char *code;
|
||||||
|
|
||||||
code = bufp->buffer;
|
code = bufp->buffer;
|
||||||
|
|
||||||
|
@ -1073,7 +1083,7 @@ else \
|
||||||
|
|
||||||
#define GETHEX(var) \
|
#define GETHEX(var) \
|
||||||
{ \
|
{ \
|
||||||
char gethex_ch, gethex_value; \
|
unsigned char gethex_ch, gethex_value; \
|
||||||
NEXTCHAR(gethex_ch); \
|
NEXTCHAR(gethex_ch); \
|
||||||
gethex_value = hex_char_to_decimal(gethex_ch); \
|
gethex_value = hex_char_to_decimal(gethex_ch); \
|
||||||
if (gethex_value == 16) \
|
if (gethex_value == 16) \
|
||||||
|
@ -1147,7 +1157,7 @@ else \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
char *re_compile_pattern(char *regex, int size, regexp_t bufp)
|
unsigned char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp)
|
||||||
{
|
{
|
||||||
int a;
|
int a;
|
||||||
int pos;
|
int pos;
|
||||||
|
@ -1161,8 +1171,8 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
|
||||||
int future_jumps[MAX_NESTING];
|
int future_jumps[MAX_NESTING];
|
||||||
int num_jumps;
|
int num_jumps;
|
||||||
unsigned char ch = '\0';
|
unsigned char ch = '\0';
|
||||||
char *pattern;
|
unsigned char *pattern;
|
||||||
char *translate;
|
unsigned char *translate;
|
||||||
int next_register;
|
int next_register;
|
||||||
int paren_depth;
|
int paren_depth;
|
||||||
int num_open_registers;
|
int num_open_registers;
|
||||||
|
@ -1580,23 +1590,23 @@ if (translate) \
|
||||||
var = translate[var]
|
var = translate[var]
|
||||||
|
|
||||||
int re_match(regexp_t bufp,
|
int re_match(regexp_t bufp,
|
||||||
char *string,
|
unsigned char *string,
|
||||||
int size,
|
int size,
|
||||||
int pos,
|
int pos,
|
||||||
regexp_registers_t old_regs)
|
regexp_registers_t old_regs)
|
||||||
{
|
{
|
||||||
char *code;
|
unsigned char *code;
|
||||||
char *translate;
|
unsigned char *translate;
|
||||||
char *text;
|
unsigned char *text;
|
||||||
char *textstart;
|
unsigned char *textstart;
|
||||||
char *textend;
|
unsigned char *textend;
|
||||||
int a;
|
int a;
|
||||||
int b;
|
int b;
|
||||||
int ch;
|
int ch;
|
||||||
int reg;
|
int reg;
|
||||||
int match_end;
|
int match_end;
|
||||||
char *regstart;
|
unsigned char *regstart;
|
||||||
char *regend;
|
unsigned char *regend;
|
||||||
int regsize;
|
int regsize;
|
||||||
match_state state;
|
match_state state;
|
||||||
|
|
||||||
|
@ -1738,18 +1748,36 @@ int re_match(regexp_t bufp,
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
code += (int)SHORT(a);
|
code += (int)SHORT(a);
|
||||||
|
if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)");
|
||||||
|
FREE_STATE(state);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
case Cdummy_failure_jump:
|
case Cdummy_failure_jump:
|
||||||
{
|
{
|
||||||
|
unsigned char *failuredest;
|
||||||
|
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)SHORT(a);
|
a = (int)SHORT(a);
|
||||||
assert(*code == Cfailure_jump);
|
assert(*code == Cfailure_jump);
|
||||||
b = (unsigned char)code[1];
|
b = (unsigned char)code[1];
|
||||||
b |= (unsigned char)code[2] << 8;
|
b |= (unsigned char)code[2] << 8;
|
||||||
PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error);
|
failuredest = code + (int)SHORT(b) + 3;
|
||||||
|
if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)");
|
||||||
|
FREE_STATE(state);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
PUSH_FAILURE(state, failuredest, NULL, goto error);
|
||||||
code += a;
|
code += a;
|
||||||
|
if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)");
|
||||||
|
FREE_STATE(state);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
case Cfailure_jump:
|
case Cfailure_jump:
|
||||||
|
@ -1757,16 +1785,26 @@ int re_match(regexp_t bufp,
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)SHORT(a);
|
a = (int)SHORT(a);
|
||||||
|
if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)");
|
||||||
|
FREE_STATE(state);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
PUSH_FAILURE(state, code + a, text, goto error);
|
PUSH_FAILURE(state, code + a, text, goto error);
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
case Crepeat1:
|
case Crepeat1:
|
||||||
{
|
{
|
||||||
char *pinst;
|
unsigned char *pinst;
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)SHORT(a);
|
a = (int)SHORT(a);
|
||||||
pinst = code + a;
|
pinst = code + a;
|
||||||
|
if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)");
|
||||||
|
FREE_STATE(state);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
/* pinst is sole instruction in loop, and it matches a
|
/* pinst is sole instruction in loop, and it matches a
|
||||||
* single character. Since Crepeat1 was originally a
|
* single character. Since Crepeat1 was originally a
|
||||||
* Cupdate_failure_jump, we also know that backtracking
|
* Cupdate_failure_jump, we also know that backtracking
|
||||||
|
@ -1777,8 +1815,8 @@ int re_match(regexp_t bufp,
|
||||||
switch (*pinst++)
|
switch (*pinst++)
|
||||||
{
|
{
|
||||||
case Cset:
|
case Cset:
|
||||||
{
|
{
|
||||||
if (translate)
|
if (translate)
|
||||||
{
|
{
|
||||||
while (text < textend)
|
while (text < textend)
|
||||||
{
|
{
|
||||||
|
@ -1801,7 +1839,7 @@ int re_match(regexp_t bufp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Cexact:
|
case Cexact:
|
||||||
{
|
{
|
||||||
ch = (unsigned char)*pinst;
|
ch = (unsigned char)*pinst;
|
||||||
|
@ -1858,7 +1896,9 @@ int re_match(regexp_t bufp,
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
abort();
|
FREE_STATE(state);
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
|
||||||
|
return -2;
|
||||||
/*NOTREACHED*/
|
/*NOTREACHED*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1900,9 +1940,9 @@ int re_match(regexp_t bufp,
|
||||||
goto fail;
|
goto fail;
|
||||||
if (text == textend)
|
if (text == textend)
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
if (SYNTAX(*text) & Sword)
|
if (!(SYNTAX(*text) & Sword))
|
||||||
goto fail;
|
goto continue_matching;
|
||||||
goto continue_matching;
|
goto fail;
|
||||||
}
|
}
|
||||||
case Cwordbound:
|
case Cwordbound:
|
||||||
{
|
{
|
||||||
|
@ -1936,16 +1976,20 @@ int re_match(regexp_t bufp,
|
||||||
{
|
{
|
||||||
NEXTCHAR(ch);
|
NEXTCHAR(ch);
|
||||||
if (SYNTAX(ch) & (unsigned char)*code++)
|
if (SYNTAX(ch) & (unsigned char)*code++)
|
||||||
break;
|
goto fail;
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
abort();
|
FREE_STATE(state);
|
||||||
|
PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
|
||||||
|
return -2;
|
||||||
/*NOTREACHED*/
|
/*NOTREACHED*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0 /* This line is never reached --Guido */
|
#if 0 /* This line is never reached --Guido */
|
||||||
abort();
|
abort();
|
||||||
#endif
|
#endif
|
||||||
|
@ -1953,6 +1997,7 @@ int re_match(regexp_t bufp,
|
||||||
*NOTREACHED
|
*NOTREACHED
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Using "break;" in the above switch statement is equivalent to "goto fail;" */
|
||||||
fail:
|
fail:
|
||||||
POP_FAILURE(state, code, text, goto done_matching, goto error);
|
POP_FAILURE(state, code, text, goto done_matching, goto error);
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
|
@ -1970,32 +2015,36 @@ int re_match(regexp_t bufp,
|
||||||
return -2;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#undef PREFETCH
|
#undef PREFETCH
|
||||||
#undef NEXTCHAR
|
#undef NEXTCHAR
|
||||||
|
|
||||||
int re_search(regexp_t bufp,
|
int re_search(regexp_t bufp,
|
||||||
char *string,
|
unsigned char *string,
|
||||||
int size,
|
int size,
|
||||||
int pos,
|
int pos,
|
||||||
int range,
|
int range,
|
||||||
regexp_registers_t regs)
|
regexp_registers_t regs)
|
||||||
{
|
{
|
||||||
char *fastmap;
|
unsigned char *fastmap;
|
||||||
char *translate;
|
unsigned char *translate;
|
||||||
char *text;
|
unsigned char *text;
|
||||||
char *partstart;
|
unsigned char *partstart;
|
||||||
char *partend;
|
unsigned char *partend;
|
||||||
int dir;
|
int dir;
|
||||||
int ret;
|
int ret;
|
||||||
char anchor;
|
unsigned char anchor;
|
||||||
|
|
||||||
assert(size >= 0 && pos >= 0);
|
assert(size >= 0 && pos >= 0);
|
||||||
assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
|
assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
|
||||||
|
|
||||||
fastmap = bufp->fastmap;
|
fastmap = bufp->fastmap;
|
||||||
translate = bufp->translate;
|
translate = bufp->translate;
|
||||||
if (fastmap && !bufp->fastmap_accurate)
|
if (fastmap && !bufp->fastmap_accurate) {
|
||||||
re_compile_fastmap(bufp);
|
re_compile_fastmap(bufp);
|
||||||
|
if (PyErr_Occurred()) return -2;
|
||||||
|
}
|
||||||
|
|
||||||
anchor = bufp->anchor;
|
anchor = bufp->anchor;
|
||||||
if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
|
if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
|
||||||
fastmap = NULL;
|
fastmap = NULL;
|
||||||
|
|
|
@ -33,16 +33,16 @@ extern "C" {
|
||||||
|
|
||||||
typedef struct re_pattern_buffer
|
typedef struct re_pattern_buffer
|
||||||
{
|
{
|
||||||
char *buffer; /* compiled pattern */
|
unsigned char *buffer; /* compiled pattern */
|
||||||
int allocated; /* allocated size of compiled pattern */
|
int allocated; /* allocated size of compiled pattern */
|
||||||
int used; /* actual length of compiled pattern */
|
int used; /* actual length of compiled pattern */
|
||||||
char *fastmap; /* fastmap[ch] is true if ch can start pattern */
|
unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */
|
||||||
char *translate; /* translation to apply during compilation/matching */
|
unsigned char *translate; /* translation to apply during compilation/matching */
|
||||||
char fastmap_accurate; /* true if fastmap is valid */
|
unsigned char fastmap_accurate; /* true if fastmap is valid */
|
||||||
char can_be_null; /* true if can match empty string */
|
unsigned char can_be_null; /* true if can match empty string */
|
||||||
char uses_registers; /* registers are used and need to be initialized */
|
unsigned char uses_registers; /* registers are used and need to be initialized */
|
||||||
int num_registers; /* number of registers used */
|
int num_registers; /* number of registers used */
|
||||||
char anchor; /* anchor: 0=none 1=begline 2=begbuf */
|
unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */
|
||||||
} *regexp_t;
|
} *regexp_t;
|
||||||
|
|
||||||
typedef struct re_registers
|
typedef struct re_registers
|
||||||
|
@ -93,7 +93,7 @@ extern int re_syntax;
|
||||||
/* This is the actual syntax mask. It was added so that Python could do
|
/* This is the actual syntax mask. It was added so that Python could do
|
||||||
* syntax-dependent munging of patterns before compilation. */
|
* syntax-dependent munging of patterns before compilation. */
|
||||||
|
|
||||||
extern char re_syntax_table[256];
|
extern unsigned char re_syntax_table[256];
|
||||||
|
|
||||||
void re_compile_initialize(void);
|
void re_compile_initialize(void);
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ int re_set_syntax(int syntax);
|
||||||
/* This sets the syntax to use and returns the previous syntax. The
|
/* This sets the syntax to use and returns the previous syntax. The
|
||||||
* syntax is specified by a bit mask of the above defined bits. */
|
* syntax is specified by a bit mask of the above defined bits. */
|
||||||
|
|
||||||
char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
|
unsigned char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled);
|
||||||
/* This compiles the regexp (given in regex and length in regex_size).
|
/* This compiles the regexp (given in regex and length in regex_size).
|
||||||
* This returns NULL if the regexp compiled successfully, and an error
|
* This returns NULL if the regexp compiled successfully, and an error
|
||||||
* message if an error was encountered. The buffer field must be
|
* message if an error was encountered. The buffer field must be
|
||||||
|
@ -110,14 +110,14 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
|
||||||
* buffer is NULL). Also, the translate field must be set to point to a
|
* buffer is NULL). Also, the translate field must be set to point to a
|
||||||
* valid translation table, or NULL if it is not used. */
|
* valid translation table, or NULL if it is not used. */
|
||||||
|
|
||||||
int re_match(regexp_t compiled, char *string, int size, int pos,
|
int re_match(regexp_t compiled, unsigned char *string, int size, int pos,
|
||||||
regexp_registers_t old_regs);
|
regexp_registers_t old_regs);
|
||||||
/* This tries to match the regexp against the string. This returns the
|
/* This tries to match the regexp against the string. This returns the
|
||||||
* length of the matched portion, or -1 if the pattern could not be
|
* length of the matched portion, or -1 if the pattern could not be
|
||||||
* matched and -2 if an error (such as failure stack overflow) is
|
* matched and -2 if an error (such as failure stack overflow) is
|
||||||
* encountered. */
|
* encountered. */
|
||||||
|
|
||||||
int re_search(regexp_t compiled, char *string, int size, int startpos,
|
int re_search(regexp_t compiled, unsigned char *string, int size, int startpos,
|
||||||
int range, regexp_registers_t regs);
|
int range, regexp_registers_t regs);
|
||||||
/* This rearches for a substring matching the regexp. This returns the
|
/* This rearches for a substring matching the regexp. This returns the
|
||||||
* first index at which a match is found. range specifies at how many
|
* first index at which a match is found. range specifies at how many
|
||||||
|
@ -132,28 +132,16 @@ void re_compile_fastmap(regexp_t compiled);
|
||||||
* the calling program must have initialized the fastmap field to point
|
* the calling program must have initialized the fastmap field to point
|
||||||
* to an array of 256 characters. */
|
* to an array of 256 characters. */
|
||||||
|
|
||||||
char *re_comp(char *s);
|
|
||||||
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
|
|
||||||
* an internal buffer. This returns NULL if the regexp was compiled
|
|
||||||
* successfully, and an error message if there was an error. */
|
|
||||||
|
|
||||||
int re_exec(char *s);
|
|
||||||
/* BSD 4.2 regexp library routine re_exec. This returns true if the
|
|
||||||
* string matches the regular expression (that is, a matching part is
|
|
||||||
* found anywhere in the string). */
|
|
||||||
|
|
||||||
#else /* HAVE_PROTOTYPES */
|
#else /* HAVE_PROTOTYPES */
|
||||||
|
|
||||||
extern int re_syntax;
|
extern int re_syntax;
|
||||||
extern char re_syntax_table[256];
|
extern unsigned char re_syntax_table[256];
|
||||||
void re_compile_initialize();
|
void re_compile_initialize();
|
||||||
int re_set_syntax();
|
int re_set_syntax();
|
||||||
char *re_compile_pattern();
|
unsigned char *re_compile_pattern();
|
||||||
int re_match();
|
int re_match();
|
||||||
int re_search();
|
int re_search();
|
||||||
void re_compile_fastmap();
|
void re_compile_fastmap();
|
||||||
char *re_comp();
|
|
||||||
int re_exec();
|
|
||||||
|
|
||||||
#endif /* HAVE_PROTOTYPES */
|
#endif /* HAVE_PROTOTYPES */
|
||||||
|
|
||||||
|
|
|
@ -62,7 +62,7 @@ static PyObject *ReopError; /* Exception */
|
||||||
#define BEGINNING_OF_BUFFER 7
|
#define BEGINNING_OF_BUFFER 7
|
||||||
#define END_OF_BUFFER 8
|
#define END_OF_BUFFER 8
|
||||||
|
|
||||||
static char *reop_casefold;
|
static unsigned char *reop_casefold;
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
makeresult(regs, num_regs)
|
makeresult(regs, num_regs)
|
||||||
|
@ -105,7 +105,7 @@ reop_match(self, args)
|
||||||
PyObject *self;
|
PyObject *self;
|
||||||
PyObject *args;
|
PyObject *args;
|
||||||
{
|
{
|
||||||
char *string;
|
unsigned char *string;
|
||||||
int fastmaplen, stringlen;
|
int fastmaplen, stringlen;
|
||||||
int can_be_null, anchor, i;
|
int can_be_null, anchor, i;
|
||||||
int flags, pos, result;
|
int flags, pos, result;
|
||||||
|
@ -163,8 +163,8 @@ reop_match(self, args)
|
||||||
|
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
/* Failure like stack overflow */
|
/* Failure like stack overflow */
|
||||||
PyErr_SetString(ReopError, "match failure");
|
if (!PyErr_Occurred())
|
||||||
|
PyErr_SetString(ReopError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (result == -1) {
|
if (result == -1) {
|
||||||
|
@ -174,12 +174,38 @@ reop_match(self, args)
|
||||||
return makeresult(&re_regs, bufp.num_registers);
|
return makeresult(&re_regs, bufp.num_registers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static PyObject *
|
||||||
|
reop_optimize(self, args)
|
||||||
|
PyObject *self;
|
||||||
|
PyObject *args;
|
||||||
|
{
|
||||||
|
unsigned char *buffer;
|
||||||
|
int buflen;
|
||||||
|
struct re_pattern_buffer bufp;
|
||||||
|
|
||||||
|
PyObject *opt_code;
|
||||||
|
|
||||||
|
if (!PyArg_Parse(args, "(s#)", &buffer, &buflen)) return NULL;
|
||||||
|
/* Create a new string for the optimized code */
|
||||||
|
opt_code=PyString_FromStringAndSize(buffer, buflen);
|
||||||
|
if (opt_code!=NULL)
|
||||||
|
{
|
||||||
|
bufp.buffer = PyString_AsString(opt_code);
|
||||||
|
bufp.used=bufp.allocated=buflen;
|
||||||
|
|
||||||
|
}
|
||||||
|
return opt_code;
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
reop_search(self, args)
|
reop_search(self, args)
|
||||||
PyObject *self;
|
PyObject *self;
|
||||||
PyObject *args;
|
PyObject *args;
|
||||||
{
|
{
|
||||||
char *string;
|
unsigned char *string;
|
||||||
int fastmaplen, stringlen;
|
int fastmaplen, stringlen;
|
||||||
int can_be_null, anchor, i;
|
int can_be_null, anchor, i;
|
||||||
int flags, pos, result;
|
int flags, pos, result;
|
||||||
|
@ -237,7 +263,8 @@ reop_search(self, args)
|
||||||
|
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
/* Failure like stack overflow */
|
/* Failure like stack overflow */
|
||||||
PyErr_SetString(ReopError, "match failure");
|
if (!PyErr_Occurred())
|
||||||
|
PyErr_SetString(ReopError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -626,7 +653,7 @@ reop__expand(self, args)
|
||||||
{
|
{
|
||||||
PyObject *results, *match_obj;
|
PyObject *results, *match_obj;
|
||||||
PyObject *repl_obj, *newstring;
|
PyObject *repl_obj, *newstring;
|
||||||
char *repl;
|
unsigned char *repl;
|
||||||
int size, total_len, i, start, pos;
|
int size, total_len, i, start, pos;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
|
if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
|
||||||
|
@ -810,7 +837,7 @@ internal_split(args, retain)
|
||||||
reopobject *pattern;
|
reopobject *pattern;
|
||||||
int maxsplit=0, count=0, length, next=0, result;
|
int maxsplit=0, count=0, length, next=0, result;
|
||||||
int match_end=0; /* match_start is defined below */
|
int match_end=0; /* match_start is defined below */
|
||||||
char *start;
|
unsigned char *start;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
|
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
|
||||||
&maxsplit))
|
&maxsplit))
|
||||||
|
@ -911,6 +938,7 @@ static struct PyMethodDef reop_global_methods[] = {
|
||||||
{"expand_escape", reop_expand_escape, 1},
|
{"expand_escape", reop_expand_escape, 1},
|
||||||
{"_expand", reop__expand, 1},
|
{"_expand", reop__expand, 1},
|
||||||
#if 0
|
#if 0
|
||||||
|
{"_optimize", reop_optimize, 0},
|
||||||
{"split", reop_split, 0},
|
{"split", reop_split, 0},
|
||||||
{"splitx", reop_splitx, 0},
|
{"splitx", reop_splitx, 0},
|
||||||
#endif
|
#endif
|
||||||
|
@ -922,8 +950,8 @@ initreop()
|
||||||
{
|
{
|
||||||
PyObject *m, *d, *k, *v, *o;
|
PyObject *m, *d, *k, *v, *o;
|
||||||
int i;
|
int i;
|
||||||
char *s;
|
unsigned char *s;
|
||||||
char j[2];
|
unsigned char j[2];
|
||||||
|
|
||||||
re_compile_initialize();
|
re_compile_initialize();
|
||||||
|
|
||||||
|
@ -936,7 +964,7 @@ initreop()
|
||||||
goto finally;
|
goto finally;
|
||||||
|
|
||||||
/* Initialize reop.casefold constant */
|
/* Initialize reop.casefold constant */
|
||||||
if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
|
if (!(v = PyString_FromStringAndSize((unsigned char *)NULL, 256)))
|
||||||
goto finally;
|
goto finally;
|
||||||
|
|
||||||
if (!(s = PyString_AsString(v)))
|
if (!(s = PyString_AsString(v)))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue