mirror of
https://github.com/python/cpython.git
synced 2025-09-28 03:13:48 +00:00
bumped SRE version number to 2.1. cleaned up and added 1.5.2
compatibility patches.
This commit is contained in:
parent
dfb673b457
commit
1c5aa6901f
4 changed files with 76 additions and 44 deletions
|
@ -181,7 +181,7 @@ def _split(pattern, string, maxsplit=0):
|
||||||
continue
|
continue
|
||||||
append(string[i:b])
|
append(string[i:b])
|
||||||
if g and b != e:
|
if g and b != e:
|
||||||
extend(m.groups())
|
extend(list(m.groups()))
|
||||||
i = e
|
i = e
|
||||||
n = n + 1
|
n = n + 1
|
||||||
append(string[i:])
|
append(string[i:])
|
||||||
|
|
|
@ -60,6 +60,12 @@ FLAGS = {
|
||||||
"u": SRE_FLAG_UNICODE,
|
"u": SRE_FLAG_UNICODE,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
int("10", 8)
|
||||||
|
atoi = int
|
||||||
|
except TypeError:
|
||||||
|
atoi = string.atoi
|
||||||
|
|
||||||
class Pattern:
|
class Pattern:
|
||||||
# master pattern object. keeps track of global attributes
|
# master pattern object. keeps track of global attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -216,7 +222,7 @@ def isname(name):
|
||||||
def _group(escape, groups):
|
def _group(escape, groups):
|
||||||
# check if the escape string represents a valid group
|
# check if the escape string represents a valid group
|
||||||
try:
|
try:
|
||||||
gid = int(escape[1:])
|
gid = atoi(escape[1:])
|
||||||
if gid and gid < groups:
|
if gid and gid < groups:
|
||||||
return gid
|
return gid
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -239,13 +245,13 @@ def _class_escape(source, escape):
|
||||||
escape = escape[2:]
|
escape = escape[2:]
|
||||||
if len(escape) != 2:
|
if len(escape) != 2:
|
||||||
raise error, "bogus escape: %s" % repr("\\" + escape)
|
raise error, "bogus escape: %s" % repr("\\" + escape)
|
||||||
return LITERAL, int(escape, 16) & 0xff
|
return LITERAL, atoi(escape, 16) & 0xff
|
||||||
elif str(escape[1:2]) in OCTDIGITS:
|
elif str(escape[1:2]) in OCTDIGITS:
|
||||||
# octal escape (up to three digits)
|
# octal escape (up to three digits)
|
||||||
while source.next in OCTDIGITS and len(escape) < 5:
|
while source.next in OCTDIGITS and len(escape) < 5:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
escape = escape[1:]
|
escape = escape[1:]
|
||||||
return LITERAL, int(escape, 8) & 0xff
|
return LITERAL, atoi(escape, 8) & 0xff
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -267,12 +273,12 @@ def _escape(source, escape, state):
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
if len(escape) != 4:
|
if len(escape) != 4:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
return LITERAL, int(escape[2:], 16) & 0xff
|
return LITERAL, atoi(escape[2:], 16) & 0xff
|
||||||
elif escape[1:2] == "0":
|
elif escape[1:2] == "0":
|
||||||
# octal escape
|
# octal escape
|
||||||
while source.next in OCTDIGITS and len(escape) < 4:
|
while source.next in OCTDIGITS and len(escape) < 4:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
return LITERAL, int(escape[1:], 8) & 0xff
|
return LITERAL, atoi(escape[1:], 8) & 0xff
|
||||||
elif escape[1:2] in DIGITS:
|
elif escape[1:2] in DIGITS:
|
||||||
# octal escape *or* decimal group reference (sigh)
|
# octal escape *or* decimal group reference (sigh)
|
||||||
here = source.tell()
|
here = source.tell()
|
||||||
|
@ -282,7 +288,7 @@ def _escape(source, escape, state):
|
||||||
source.next in OCTDIGITS):
|
source.next in OCTDIGITS):
|
||||||
# got three octal digits; this is an octal escape
|
# got three octal digits; this is an octal escape
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
return LITERAL, int(escape[1:], 8) & 0xff
|
return LITERAL, atoi(escape[1:], 8) & 0xff
|
||||||
# got at least one decimal digit; this is a group reference
|
# got at least one decimal digit; this is a group reference
|
||||||
group = _group(escape, state.groups)
|
group = _group(escape, state.groups)
|
||||||
if group:
|
if group:
|
||||||
|
@ -456,9 +462,9 @@ def _parse(source, state):
|
||||||
source.seek(here)
|
source.seek(here)
|
||||||
continue
|
continue
|
||||||
if lo:
|
if lo:
|
||||||
min = int(lo)
|
min = atoi(lo)
|
||||||
if hi:
|
if hi:
|
||||||
max = int(hi)
|
max = atoi(hi)
|
||||||
if max < min:
|
if max < min:
|
||||||
raise error, "bad repeat interval"
|
raise error, "bad repeat interval"
|
||||||
else:
|
else:
|
||||||
|
@ -646,7 +652,7 @@ def parse_template(source, pattern):
|
||||||
if not name:
|
if not name:
|
||||||
raise error, "bad group name"
|
raise error, "bad group name"
|
||||||
try:
|
try:
|
||||||
index = int(name)
|
index = atoi(name)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
if not isname(name):
|
if not isname(name):
|
||||||
raise error, "bad character in group name"
|
raise error, "bad character in group name"
|
||||||
|
@ -662,7 +668,7 @@ def parse_template(source, pattern):
|
||||||
if group:
|
if group:
|
||||||
if (s.next not in DIGITS or
|
if (s.next not in DIGITS or
|
||||||
not _group(this + s.next, pattern.groups+1)):
|
not _group(this + s.next, pattern.groups+1)):
|
||||||
code = MARK, int(group)
|
code = MARK, group
|
||||||
break
|
break
|
||||||
elif s.next in OCTDIGITS:
|
elif s.next in OCTDIGITS:
|
||||||
this = this + s.get()
|
this = this + s.get()
|
||||||
|
@ -670,7 +676,7 @@ def parse_template(source, pattern):
|
||||||
break
|
break
|
||||||
if not code:
|
if not code:
|
||||||
this = this[1:]
|
this = this[1:]
|
||||||
code = LITERAL, int(this[-6:], 8) & 0xff
|
code = LITERAL, atoi(this[-6:], 8) & 0xff
|
||||||
a(code)
|
a(code)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -325,13 +325,23 @@ for t in tests:
|
||||||
|
|
||||||
# Try the match on a unicode string, and check that it
|
# Try the match on a unicode string, and check that it
|
||||||
# still succeeds.
|
# still succeeds.
|
||||||
result=obj.search(unicode(s, "latin-1"))
|
try:
|
||||||
|
u = unicode(s, "latin-1")
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
result=obj.search(u)
|
||||||
if result==None:
|
if result==None:
|
||||||
print '=== Fails on unicode match', t
|
print '=== Fails on unicode match', t
|
||||||
|
|
||||||
# Try the match on a unicode pattern, and check that it
|
# Try the match on a unicode pattern, and check that it
|
||||||
# still succeeds.
|
# still succeeds.
|
||||||
obj=sre.compile(unicode(pattern, "latin-1"))
|
try:
|
||||||
|
u = unicode(pattern, "latin-1")
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
obj=sre.compile(u)
|
||||||
result=obj.search(s)
|
result=obj.search(s)
|
||||||
if result==None:
|
if result==None:
|
||||||
print '=== Fails on unicode pattern match', t
|
print '=== Fails on unicode pattern match', t
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
* 2000-10-24 fl really fixed assert_not; reset groups in findall
|
* 2000-10-24 fl really fixed assert_not; reset groups in findall
|
||||||
* 2000-12-21 fl fixed memory leak in groupdict
|
* 2000-12-21 fl fixed memory leak in groupdict
|
||||||
* 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
|
* 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
|
||||||
* 2001-01-15 fl don't use recursion for unbounded MIN_UTIL; fixed
|
* 2001-01-15 fl avoid recursion for MIN_UTIL; fixed uppercase literal bug
|
||||||
* 2001-01-16 fl fixed memory leak in pattern destructor
|
* 2001-01-16 fl fixed memory leak in pattern destructor
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
|
@ -40,7 +40,7 @@
|
||||||
|
|
||||||
#ifndef SRE_RECURSIVE
|
#ifndef SRE_RECURSIVE
|
||||||
|
|
||||||
char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
|
char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
|
||||||
|
@ -49,7 +49,9 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
/* name of this module, minus the leading underscore */
|
/* name of this module, minus the leading underscore */
|
||||||
#define MODULE "sre"
|
#if !defined(SRE_MODULE)
|
||||||
|
#define SRE_MODULE "sre"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* defining this one enables tracing */
|
/* defining this one enables tracing */
|
||||||
#undef VERBOSE
|
#undef VERBOSE
|
||||||
|
@ -81,6 +83,10 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
|
||||||
/* enables aggressive inlining (always on for Visual C) */
|
/* enables aggressive inlining (always on for Visual C) */
|
||||||
#undef USE_INLINE
|
#undef USE_INLINE
|
||||||
|
|
||||||
|
#if PY_VERSION_HEX < 0x01060000
|
||||||
|
#define PyObject_DEL(op) PyMem_DEL((op))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
@ -221,6 +227,23 @@ sre_category(SRE_CODE category, unsigned int ch)
|
||||||
return SRE_UNI_IS_LINEBREAK(ch);
|
return SRE_UNI_IS_LINEBREAK(ch);
|
||||||
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
||||||
return !SRE_UNI_IS_LINEBREAK(ch);
|
return !SRE_UNI_IS_LINEBREAK(ch);
|
||||||
|
#else
|
||||||
|
case SRE_CATEGORY_UNI_DIGIT:
|
||||||
|
return SRE_IS_DIGIT(ch);
|
||||||
|
case SRE_CATEGORY_UNI_NOT_DIGIT:
|
||||||
|
return !SRE_IS_DIGIT(ch);
|
||||||
|
case SRE_CATEGORY_UNI_SPACE:
|
||||||
|
return SRE_IS_SPACE(ch);
|
||||||
|
case SRE_CATEGORY_UNI_NOT_SPACE:
|
||||||
|
return !SRE_IS_SPACE(ch);
|
||||||
|
case SRE_CATEGORY_UNI_WORD:
|
||||||
|
return SRE_LOC_IS_WORD(ch);
|
||||||
|
case SRE_CATEGORY_UNI_NOT_WORD:
|
||||||
|
return !SRE_LOC_IS_WORD(ch);
|
||||||
|
case SRE_CATEGORY_UNI_LINEBREAK:
|
||||||
|
return SRE_IS_LINEBREAK(ch);
|
||||||
|
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
||||||
|
return !SRE_IS_LINEBREAK(ch);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1208,33 +1231,22 @@ _compile(PyObject* self_, PyObject* args)
|
||||||
int groups = 0;
|
int groups = 0;
|
||||||
PyObject* groupindex = NULL;
|
PyObject* groupindex = NULL;
|
||||||
PyObject* indexgroup = NULL;
|
PyObject* indexgroup = NULL;
|
||||||
if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
|
if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
|
||||||
&groups, &groupindex, &indexgroup))
|
&PyList_Type, &code, &groups,
|
||||||
|
&groupindex, &indexgroup))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
code = PySequence_Fast(code, "code argument must be a sequence");
|
n = PyList_GET_SIZE(code);
|
||||||
if (!code)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
#if PY_VERSION_HEX >= 0x01060000
|
|
||||||
n = PySequence_Size(code);
|
|
||||||
#else
|
|
||||||
n = PySequence_Length(code);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
|
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
|
||||||
if (!self) {
|
if (!self)
|
||||||
Py_DECREF(code);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
PyObject *o = PySequence_Fast_GET_ITEM(code, i);
|
PyObject *o = PyList_GET_ITEM(code, i);
|
||||||
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
|
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_DECREF(code);
|
|
||||||
|
|
||||||
if (PyErr_Occurred()) {
|
if (PyErr_Occurred()) {
|
||||||
PyObject_DEL(self);
|
PyObject_DEL(self);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1270,9 +1282,11 @@ sre_getlower(PyObject* self, PyObject* args)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (flags & SRE_FLAG_LOCALE)
|
if (flags & SRE_FLAG_LOCALE)
|
||||||
return Py_BuildValue("i", sre_lower_locale(character));
|
return Py_BuildValue("i", sre_lower_locale(character));
|
||||||
#if defined(HAVE_UNICODE)
|
|
||||||
if (flags & SRE_FLAG_UNICODE)
|
if (flags & SRE_FLAG_UNICODE)
|
||||||
|
#if defined(HAVE_UNICODE)
|
||||||
return Py_BuildValue("i", sre_lower_unicode(character));
|
return Py_BuildValue("i", sre_lower_unicode(character));
|
||||||
|
#else
|
||||||
|
return Py_BuildValue("i", sre_lower_locale(character));
|
||||||
#endif
|
#endif
|
||||||
return Py_BuildValue("i", sre_lower(character));
|
return Py_BuildValue("i", sre_lower(character));
|
||||||
}
|
}
|
||||||
|
@ -1380,9 +1394,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
||||||
|
|
||||||
if (pattern->flags & SRE_FLAG_LOCALE)
|
if (pattern->flags & SRE_FLAG_LOCALE)
|
||||||
state->lower = sre_lower_locale;
|
state->lower = sre_lower_locale;
|
||||||
#if defined(HAVE_UNICODE)
|
|
||||||
else if (pattern->flags & SRE_FLAG_UNICODE)
|
else if (pattern->flags & SRE_FLAG_UNICODE)
|
||||||
|
#if defined(HAVE_UNICODE)
|
||||||
state->lower = sre_lower_unicode;
|
state->lower = sre_lower_unicode;
|
||||||
|
#else
|
||||||
|
state->lower = sre_lower_locale;
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
state->lower = sre_lower;
|
state->lower = sre_lower;
|
||||||
|
@ -1520,7 +1536,7 @@ pattern_scanner(PatternObject* pattern, PyObject* args)
|
||||||
|
|
||||||
string = state_init(&self->state, pattern, string, start, end);
|
string = state_init(&self->state, pattern, string, start, end);
|
||||||
if (!string) {
|
if (!string) {
|
||||||
PyObject_Del(self);
|
PyObject_DEL(self);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1619,7 +1635,7 @@ call(char* function, PyObject* args)
|
||||||
PyObject* func;
|
PyObject* func;
|
||||||
PyObject* result;
|
PyObject* result;
|
||||||
|
|
||||||
name = PyString_FromString(MODULE);
|
name = PyString_FromString(SRE_MODULE);
|
||||||
if (!name)
|
if (!name)
|
||||||
return NULL;
|
return NULL;
|
||||||
module = PyImport_Import(name);
|
module = PyImport_Import(name);
|
||||||
|
@ -2366,7 +2382,7 @@ init_sre(void)
|
||||||
Pattern_Type.ob_type = Match_Type.ob_type =
|
Pattern_Type.ob_type = Match_Type.ob_type =
|
||||||
Scanner_Type.ob_type = &PyType_Type;
|
Scanner_Type.ob_type = &PyType_Type;
|
||||||
|
|
||||||
m = Py_InitModule("_" MODULE, _functions);
|
m = Py_InitModule("_" SRE_MODULE, _functions);
|
||||||
d = PyModule_GetDict(m);
|
d = PyModule_GetDict(m);
|
||||||
|
|
||||||
PyDict_SetItemString(
|
PyDict_SetItemString(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue