bumped SRE version number to 2.1. cleaned up and added 1.5.2

compatibility patches.
This commit is contained in:
Fredrik Lundh 2001-01-16 07:37:30 +00:00
parent dfb673b457
commit 1c5aa6901f
4 changed files with 76 additions and 44 deletions

View file

@ -181,7 +181,7 @@ def _split(pattern, string, maxsplit=0):
continue continue
append(string[i:b]) append(string[i:b])
if g and b != e: if g and b != e:
extend(m.groups()) extend(list(m.groups()))
i = e i = e
n = n + 1 n = n + 1
append(string[i:]) append(string[i:])

View file

@ -60,6 +60,12 @@ FLAGS = {
"u": SRE_FLAG_UNICODE, "u": SRE_FLAG_UNICODE,
} }
try:
int("10", 8)
atoi = int
except TypeError:
atoi = string.atoi
class Pattern: class Pattern:
# master pattern object. keeps track of global attributes # master pattern object. keeps track of global attributes
def __init__(self): def __init__(self):
@ -216,7 +222,7 @@ def isname(name):
def _group(escape, groups): def _group(escape, groups):
# check if the escape string represents a valid group # check if the escape string represents a valid group
try: try:
gid = int(escape[1:]) gid = atoi(escape[1:])
if gid and gid < groups: if gid and gid < groups:
return gid return gid
except ValueError: except ValueError:
@ -239,13 +245,13 @@ def _class_escape(source, escape):
escape = escape[2:] escape = escape[2:]
if len(escape) != 2: if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape) raise error, "bogus escape: %s" % repr("\\" + escape)
return LITERAL, int(escape, 16) & 0xff return LITERAL, atoi(escape, 16) & 0xff
elif str(escape[1:2]) in OCTDIGITS: elif str(escape[1:2]) in OCTDIGITS:
# octal escape (up to three digits) # octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) < 5: while source.next in OCTDIGITS and len(escape) < 5:
escape = escape + source.get() escape = escape + source.get()
escape = escape[1:] escape = escape[1:]
return LITERAL, int(escape, 8) & 0xff return LITERAL, atoi(escape, 8) & 0xff
if len(escape) == 2: if len(escape) == 2:
return LITERAL, ord(escape[1]) return LITERAL, ord(escape[1])
except ValueError: except ValueError:
@ -267,12 +273,12 @@ def _escape(source, escape, state):
escape = escape + source.get() escape = escape + source.get()
if len(escape) != 4: if len(escape) != 4:
raise ValueError raise ValueError
return LITERAL, int(escape[2:], 16) & 0xff return LITERAL, atoi(escape[2:], 16) & 0xff
elif escape[1:2] == "0": elif escape[1:2] == "0":
# octal escape # octal escape
while source.next in OCTDIGITS and len(escape) < 4: while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get() escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff return LITERAL, atoi(escape[1:], 8) & 0xff
elif escape[1:2] in DIGITS: elif escape[1:2] in DIGITS:
# octal escape *or* decimal group reference (sigh) # octal escape *or* decimal group reference (sigh)
here = source.tell() here = source.tell()
@ -282,7 +288,7 @@ def _escape(source, escape, state):
source.next in OCTDIGITS): source.next in OCTDIGITS):
# got three octal digits; this is an octal escape # got three octal digits; this is an octal escape
escape = escape + source.get() escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff return LITERAL, atoi(escape[1:], 8) & 0xff
# got at least one decimal digit; this is a group reference # got at least one decimal digit; this is a group reference
group = _group(escape, state.groups) group = _group(escape, state.groups)
if group: if group:
@ -456,9 +462,9 @@ def _parse(source, state):
source.seek(here) source.seek(here)
continue continue
if lo: if lo:
min = int(lo) min = atoi(lo)
if hi: if hi:
max = int(hi) max = atoi(hi)
if max < min: if max < min:
raise error, "bad repeat interval" raise error, "bad repeat interval"
else: else:
@ -646,7 +652,7 @@ def parse_template(source, pattern):
if not name: if not name:
raise error, "bad group name" raise error, "bad group name"
try: try:
index = int(name) index = atoi(name)
except ValueError: except ValueError:
if not isname(name): if not isname(name):
raise error, "bad character in group name" raise error, "bad character in group name"
@ -662,7 +668,7 @@ def parse_template(source, pattern):
if group: if group:
if (s.next not in DIGITS or if (s.next not in DIGITS or
not _group(this + s.next, pattern.groups+1)): not _group(this + s.next, pattern.groups+1)):
code = MARK, int(group) code = MARK, group
break break
elif s.next in OCTDIGITS: elif s.next in OCTDIGITS:
this = this + s.get() this = this + s.get()
@ -670,7 +676,7 @@ def parse_template(source, pattern):
break break
if not code: if not code:
this = this[1:] this = this[1:]
code = LITERAL, int(this[-6:], 8) & 0xff code = LITERAL, atoi(this[-6:], 8) & 0xff
a(code) a(code)
else: else:
try: try:

View file

@ -325,16 +325,26 @@ for t in tests:
# Try the match on a unicode string, and check that it # Try the match on a unicode string, and check that it
# still succeeds. # still succeeds.
result=obj.search(unicode(s, "latin-1")) try:
if result==None: u = unicode(s, "latin-1")
print '=== Fails on unicode match', t except NameError:
pass
else:
result=obj.search(u)
if result==None:
print '=== Fails on unicode match', t
# Try the match on a unicode pattern, and check that it # Try the match on a unicode pattern, and check that it
# still succeeds. # still succeeds.
obj=sre.compile(unicode(pattern, "latin-1")) try:
result=obj.search(s) u = unicode(pattern, "latin-1")
if result==None: except NameError:
print '=== Fails on unicode pattern match', t pass
else:
obj=sre.compile(u)
result=obj.search(s)
if result==None:
print '=== Fails on unicode pattern match', t
# Try the match with the search area limited to the extent # Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will # of the match and see if it still succeeds. \B will

View file

@ -24,7 +24,7 @@
* 2000-10-24 fl really fixed assert_not; reset groups in findall * 2000-10-24 fl really fixed assert_not; reset groups in findall
* 2000-12-21 fl fixed memory leak in groupdict * 2000-12-21 fl fixed memory leak in groupdict
* 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
* 2001-01-15 fl don't use recursion for unbounded MIN_UTIL; fixed * 2001-01-15 fl avoid recursion for MIN_UTIL; fixed uppercase literal bug
* 2001-01-16 fl fixed memory leak in pattern destructor * 2001-01-16 fl fixed memory leak in pattern destructor
* *
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
@ -40,7 +40,7 @@
#ifndef SRE_RECURSIVE #ifndef SRE_RECURSIVE
char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB "; char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
#include "Python.h" #include "Python.h"
@ -49,7 +49,9 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
#include <ctype.h> #include <ctype.h>
/* name of this module, minus the leading underscore */ /* name of this module, minus the leading underscore */
#define MODULE "sre" #if !defined(SRE_MODULE)
#define SRE_MODULE "sre"
#endif
/* defining this one enables tracing */ /* defining this one enables tracing */
#undef VERBOSE #undef VERBOSE
@ -81,6 +83,10 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
/* enables aggressive inlining (always on for Visual C) */ /* enables aggressive inlining (always on for Visual C) */
#undef USE_INLINE #undef USE_INLINE
#if PY_VERSION_HEX < 0x01060000
#define PyObject_DEL(op) PyMem_DEL((op))
#endif
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -221,6 +227,23 @@ sre_category(SRE_CODE category, unsigned int ch)
return SRE_UNI_IS_LINEBREAK(ch); return SRE_UNI_IS_LINEBREAK(ch);
case SRE_CATEGORY_UNI_NOT_LINEBREAK: case SRE_CATEGORY_UNI_NOT_LINEBREAK:
return !SRE_UNI_IS_LINEBREAK(ch); return !SRE_UNI_IS_LINEBREAK(ch);
#else
case SRE_CATEGORY_UNI_DIGIT:
return SRE_IS_DIGIT(ch);
case SRE_CATEGORY_UNI_NOT_DIGIT:
return !SRE_IS_DIGIT(ch);
case SRE_CATEGORY_UNI_SPACE:
return SRE_IS_SPACE(ch);
case SRE_CATEGORY_UNI_NOT_SPACE:
return !SRE_IS_SPACE(ch);
case SRE_CATEGORY_UNI_WORD:
return SRE_LOC_IS_WORD(ch);
case SRE_CATEGORY_UNI_NOT_WORD:
return !SRE_LOC_IS_WORD(ch);
case SRE_CATEGORY_UNI_LINEBREAK:
return SRE_IS_LINEBREAK(ch);
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
return !SRE_IS_LINEBREAK(ch);
#endif #endif
} }
return 0; return 0;
@ -1208,33 +1231,22 @@ _compile(PyObject* self_, PyObject* args)
int groups = 0; int groups = 0;
PyObject* groupindex = NULL; PyObject* groupindex = NULL;
PyObject* indexgroup = NULL; PyObject* indexgroup = NULL;
if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code, if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
&groups, &groupindex, &indexgroup)) &PyList_Type, &code, &groups,
&groupindex, &indexgroup))
return NULL; return NULL;
code = PySequence_Fast(code, "code argument must be a sequence"); n = PyList_GET_SIZE(code);
if (!code)
return NULL;
#if PY_VERSION_HEX >= 0x01060000
n = PySequence_Size(code);
#else
n = PySequence_Length(code);
#endif
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
if (!self) { if (!self)
Py_DECREF(code);
return NULL; return NULL;
}
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
PyObject *o = PySequence_Fast_GET_ITEM(code, i); PyObject *o = PyList_GET_ITEM(code, i);
self->code[i] = (SRE_CODE) PyInt_AsLong(o); self->code[i] = (SRE_CODE) PyInt_AsLong(o);
} }
Py_DECREF(code);
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
PyObject_DEL(self); PyObject_DEL(self);
return NULL; return NULL;
@ -1270,9 +1282,11 @@ sre_getlower(PyObject* self, PyObject* args)
return NULL; return NULL;
if (flags & SRE_FLAG_LOCALE) if (flags & SRE_FLAG_LOCALE)
return Py_BuildValue("i", sre_lower_locale(character)); return Py_BuildValue("i", sre_lower_locale(character));
#if defined(HAVE_UNICODE)
if (flags & SRE_FLAG_UNICODE) if (flags & SRE_FLAG_UNICODE)
#if defined(HAVE_UNICODE)
return Py_BuildValue("i", sre_lower_unicode(character)); return Py_BuildValue("i", sre_lower_unicode(character));
#else
return Py_BuildValue("i", sre_lower_locale(character));
#endif #endif
return Py_BuildValue("i", sre_lower(character)); return Py_BuildValue("i", sre_lower(character));
} }
@ -1380,9 +1394,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if (pattern->flags & SRE_FLAG_LOCALE) if (pattern->flags & SRE_FLAG_LOCALE)
state->lower = sre_lower_locale; state->lower = sre_lower_locale;
#if defined(HAVE_UNICODE)
else if (pattern->flags & SRE_FLAG_UNICODE) else if (pattern->flags & SRE_FLAG_UNICODE)
#if defined(HAVE_UNICODE)
state->lower = sre_lower_unicode; state->lower = sre_lower_unicode;
#else
state->lower = sre_lower_locale;
#endif #endif
else else
state->lower = sre_lower; state->lower = sre_lower;
@ -1520,7 +1536,7 @@ pattern_scanner(PatternObject* pattern, PyObject* args)
string = state_init(&self->state, pattern, string, start, end); string = state_init(&self->state, pattern, string, start, end);
if (!string) { if (!string) {
PyObject_Del(self); PyObject_DEL(self);
return NULL; return NULL;
} }
@ -1619,7 +1635,7 @@ call(char* function, PyObject* args)
PyObject* func; PyObject* func;
PyObject* result; PyObject* result;
name = PyString_FromString(MODULE); name = PyString_FromString(SRE_MODULE);
if (!name) if (!name)
return NULL; return NULL;
module = PyImport_Import(name); module = PyImport_Import(name);
@ -2366,7 +2382,7 @@ init_sre(void)
Pattern_Type.ob_type = Match_Type.ob_type = Pattern_Type.ob_type = Match_Type.ob_type =
Scanner_Type.ob_type = &PyType_Type; Scanner_Type.ob_type = &PyType_Type;
m = Py_InitModule("_" MODULE, _functions); m = Py_InitModule("_" SRE_MODULE, _functions);
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
PyDict_SetItemString( PyDict_SetItemString(