mirror of
https://github.com/python/cpython.git
synced 2025-10-18 12:48:57 +00:00
another major speedup: let sre.sub/subn check for escapes in the
template string, and don't call the template compiler if we can avoid it.
This commit is contained in:
parent
d05e051aa7
commit
6de22ef677
1 changed files with 89 additions and 30 deletions
119
Modules/_sre.c
119
Modules/_sre.c
|
@ -34,6 +34,7 @@
|
||||||
* 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
|
* 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
|
||||||
* 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
|
* 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
|
||||||
* 2001-10-21 fl added sub/subn primitive
|
* 2001-10-21 fl added sub/subn primitive
|
||||||
|
* 2001-10-22 fl check for literal sub/subn templates
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -359,6 +360,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
|
||||||
#define SRE_INFO sre_info
|
#define SRE_INFO sre_info
|
||||||
#define SRE_MATCH sre_match
|
#define SRE_MATCH sre_match
|
||||||
#define SRE_SEARCH sre_search
|
#define SRE_SEARCH sre_search
|
||||||
|
#define SRE_LITERAL_TEMPLATE sre_literal_template
|
||||||
|
|
||||||
#if defined(HAVE_UNICODE)
|
#if defined(HAVE_UNICODE)
|
||||||
|
|
||||||
|
@ -366,6 +368,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
|
||||||
#include "_sre.c"
|
#include "_sre.c"
|
||||||
#undef SRE_RECURSIVE
|
#undef SRE_RECURSIVE
|
||||||
|
|
||||||
|
#undef SRE_LITERAL_TEMPLATE
|
||||||
#undef SRE_SEARCH
|
#undef SRE_SEARCH
|
||||||
#undef SRE_MATCH
|
#undef SRE_MATCH
|
||||||
#undef SRE_INFO
|
#undef SRE_INFO
|
||||||
|
@ -383,6 +386,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
|
||||||
#define SRE_INFO sre_uinfo
|
#define SRE_INFO sre_uinfo
|
||||||
#define SRE_MATCH sre_umatch
|
#define SRE_MATCH sre_umatch
|
||||||
#define SRE_SEARCH sre_usearch
|
#define SRE_SEARCH sre_usearch
|
||||||
|
#define SRE_LITERAL_TEMPLATE sre_uliteral_template
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* SRE_RECURSIVE */
|
#endif /* SRE_RECURSIVE */
|
||||||
|
@ -1282,6 +1286,15 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOCAL(int)
|
||||||
|
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
|
||||||
|
{
|
||||||
|
/* check if given string is a literal template (i.e. no escapes) */
|
||||||
|
while (len-- > 0)
|
||||||
|
if (*ptr++ == '\\')
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(SRE_RECURSIVE)
|
#if !defined(SRE_RECURSIVE)
|
||||||
|
|
||||||
|
@ -1388,27 +1401,24 @@ state_reset(SRE_STATE* state)
|
||||||
mark_fini(state);
|
mark_fini(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOCAL(PyObject*)
|
static void*
|
||||||
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
getstring(PyObject* string, int* p_length, int* p_charsize)
|
||||||
int start, int end)
|
|
||||||
{
|
{
|
||||||
/* prepare state object */
|
/* given a python object, return a data pointer, a length (in
|
||||||
|
characters), and a character size. return NULL if the object
|
||||||
|
is not a string (or not compatible) */
|
||||||
|
|
||||||
PyBufferProcs *buffer;
|
PyBufferProcs *buffer;
|
||||||
int size, bytes;
|
int size, bytes, charsize;
|
||||||
void* ptr;
|
void* ptr;
|
||||||
|
|
||||||
memset(state, 0, sizeof(SRE_STATE));
|
|
||||||
|
|
||||||
state->lastindex = -1;
|
|
||||||
|
|
||||||
#if defined(HAVE_UNICODE)
|
#if defined(HAVE_UNICODE)
|
||||||
if (PyUnicode_Check(string)) {
|
if (PyUnicode_Check(string)) {
|
||||||
/* unicode strings doesn't always support the buffer interface */
|
/* unicode strings doesn't always support the buffer interface */
|
||||||
ptr = (void*) PyUnicode_AS_DATA(string);
|
ptr = (void*) PyUnicode_AS_DATA(string);
|
||||||
bytes = PyUnicode_GET_DATA_SIZE(string);
|
bytes = PyUnicode_GET_DATA_SIZE(string);
|
||||||
size = PyUnicode_GET_SIZE(string);
|
size = PyUnicode_GET_SIZE(string);
|
||||||
state->charsize = sizeof(Py_UNICODE);
|
charsize = sizeof(Py_UNICODE);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
#endif
|
#endif
|
||||||
|
@ -1436,10 +1446,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (PyString_Check(string) || bytes == size)
|
if (PyString_Check(string) || bytes == size)
|
||||||
state->charsize = 1;
|
charsize = 1;
|
||||||
#if defined(HAVE_UNICODE)
|
#if defined(HAVE_UNICODE)
|
||||||
else if (bytes == (int) (size * sizeof(Py_UNICODE)))
|
else if (bytes == (int) (size * sizeof(Py_UNICODE)))
|
||||||
state->charsize = sizeof(Py_UNICODE);
|
charsize = sizeof(Py_UNICODE);
|
||||||
#endif
|
#endif
|
||||||
else {
|
else {
|
||||||
PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
|
PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
|
||||||
|
@ -1450,16 +1460,42 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
*p_length = size;
|
||||||
|
*p_charsize = charsize;
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOCAL(PyObject*)
|
||||||
|
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
||||||
|
int start, int end)
|
||||||
|
{
|
||||||
|
/* prepare state object */
|
||||||
|
|
||||||
|
int length;
|
||||||
|
int charsize;
|
||||||
|
void* ptr;
|
||||||
|
|
||||||
|
memset(state, 0, sizeof(SRE_STATE));
|
||||||
|
|
||||||
|
state->lastindex = -1;
|
||||||
|
|
||||||
|
ptr = getstring(string, &length, &charsize);
|
||||||
|
if (!ptr)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* adjust boundaries */
|
/* adjust boundaries */
|
||||||
if (start < 0)
|
if (start < 0)
|
||||||
start = 0;
|
start = 0;
|
||||||
else if (start > size)
|
else if (start > length)
|
||||||
start = size;
|
start = length;
|
||||||
|
|
||||||
if (end < 0)
|
if (end < 0)
|
||||||
end = 0;
|
end = 0;
|
||||||
else if (end > size)
|
else if (end > length)
|
||||||
end = size;
|
end = length;
|
||||||
|
|
||||||
|
state->charsize = charsize;
|
||||||
|
|
||||||
state->beginning = ptr;
|
state->beginning = ptr;
|
||||||
|
|
||||||
|
@ -2038,6 +2074,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
PyObject* filter;
|
PyObject* filter;
|
||||||
PyObject* args;
|
PyObject* args;
|
||||||
PyObject* match;
|
PyObject* match;
|
||||||
|
void* ptr;
|
||||||
int status;
|
int status;
|
||||||
int n;
|
int n;
|
||||||
int i, b, e;
|
int i, b, e;
|
||||||
|
@ -2049,15 +2086,35 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
Py_INCREF(filter);
|
Py_INCREF(filter);
|
||||||
filter_is_callable = 1;
|
filter_is_callable = 1;
|
||||||
} else {
|
} else {
|
||||||
/* if not callable, call the template compiler. it may return
|
/* if not callable, check if it's a literal string */
|
||||||
either a filter function or a literal string */
|
int literal;
|
||||||
filter = call(
|
ptr = getstring(template, &n, &b);
|
||||||
SRE_MODULE, "_subx",
|
if (ptr) {
|
||||||
Py_BuildValue("OO", self, template)
|
if (b == 1) {
|
||||||
);
|
literal = sre_literal_template(ptr, n);
|
||||||
if (!filter)
|
} else {
|
||||||
return NULL;
|
#if defined(HAVE_UNICODE)
|
||||||
filter_is_callable = PyCallable_Check(filter);
|
literal = sre_uliteral_template(ptr, n);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PyErr_Clear();
|
||||||
|
literal = 0;
|
||||||
|
}
|
||||||
|
if (literal) {
|
||||||
|
filter = template;
|
||||||
|
Py_INCREF(filter);
|
||||||
|
filter_is_callable = 0;
|
||||||
|
} else {
|
||||||
|
/* not a literal; hand it over to the template compiler */
|
||||||
|
filter = call(
|
||||||
|
SRE_MODULE, "_subx",
|
||||||
|
Py_BuildValue("OO", self, template)
|
||||||
|
);
|
||||||
|
if (!filter)
|
||||||
|
return NULL;
|
||||||
|
filter_is_callable = PyCallable_Check(filter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string = state_init(&state, self, string, 0, INT_MAX);
|
string = state_init(&state, self, string, 0, INT_MAX);
|
||||||
|
@ -2132,10 +2189,12 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* add to list */
|
/* add to list */
|
||||||
status = PyList_Append(list, item);
|
if (item != Py_None) {
|
||||||
Py_DECREF(item);
|
status = PyList_Append(list, item);
|
||||||
if (status < 0)
|
Py_DECREF(item);
|
||||||
goto error;
|
if (status < 0)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
i = e;
|
i = e;
|
||||||
n = n + 1;
|
n = n + 1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue