mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
_compile(): raise an exception if downcasting to SRE_CODE
loses information: OverflowError: regular expression code size limit exceeded Otherwise the compiled code is gibberish, possibly leading at least to wrong results or (as reported on c.l.py) internal sre errors at match time. I'm not sure how to test this. SRE_CODE is a 2-byte type on my box, and it's easy to create a regexp that causes the new exception to trigger here. But it may be a 4-byte type on other boxes, and creating a regexp large enough to trigger problems there would be pretty crazy. Bugfix candidate.
This commit is contained in:
parent
887c080a80
commit
3d56350910
1 changed files with 40 additions and 37 deletions
|
@ -451,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
||||||
if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
|
if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
|
||||||
return ok;
|
return ok;
|
||||||
set += 16;
|
set += 16;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* <CHARSET> <bitmap> (32 bits per code word) */
|
/* <CHARSET> <bitmap> (32 bits per code word) */
|
||||||
if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
|
if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
|
||||||
|
@ -492,7 +492,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
||||||
else
|
else
|
||||||
block = -1;
|
block = -1;
|
||||||
set += 64;
|
set += 64;
|
||||||
if (block >=0 &&
|
if (block >=0 &&
|
||||||
(set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
|
(set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
|
||||||
return ok;
|
return ok;
|
||||||
set += count*8;
|
set += count*8;
|
||||||
|
@ -568,7 +568,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount)
|
||||||
while (ptr < end && (SRE_CODE) *ptr != chr)
|
while (ptr < end && (SRE_CODE) *ptr != chr)
|
||||||
ptr++;
|
ptr++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SRE_OP_NOT_LITERAL_IGNORE:
|
case SRE_OP_NOT_LITERAL_IGNORE:
|
||||||
/* repeated non-literal */
|
/* repeated non-literal */
|
||||||
chr = pattern[1];
|
chr = pattern[1];
|
||||||
|
@ -644,7 +644,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
* - Recursive SRE_MATCH() returned false, and will continue the
|
* - Recursive SRE_MATCH() returned false, and will continue the
|
||||||
* outside 'for' loop: must be protected when breaking, since the next
|
* outside 'for' loop: must be protected when breaking, since the next
|
||||||
* OP could potentially depend on lastmark;
|
* OP could potentially depend on lastmark;
|
||||||
*
|
*
|
||||||
* - Recursive SRE_MATCH() returned false, and will be called again
|
* - Recursive SRE_MATCH() returned false, and will be called again
|
||||||
* inside a local for/while loop: must be protected between each
|
* inside a local for/while loop: must be protected between each
|
||||||
* loop iteration, since the recursive SRE_MATCH() could do anything,
|
* loop iteration, since the recursive SRE_MATCH() could do anything,
|
||||||
|
@ -848,7 +848,7 @@ entrance:
|
||||||
/* state->lastmark is the highest valid index in the
|
/* state->lastmark is the highest valid index in the
|
||||||
state->mark array. If it is increased by more than 1,
|
state->mark array. If it is increased by more than 1,
|
||||||
the intervening marks must be set to NULL to signal
|
the intervening marks must be set to NULL to signal
|
||||||
that these marks have not been encountered. */
|
that these marks have not been encountered. */
|
||||||
int j = state->lastmark + 1;
|
int j = state->lastmark + 1;
|
||||||
while (j < i)
|
while (j < i)
|
||||||
state->mark[j++] = NULL;
|
state->mark[j++] = NULL;
|
||||||
|
@ -1065,9 +1065,9 @@ entrance:
|
||||||
RETURN_ON_ERROR(ret);
|
RETURN_ON_ERROR(ret);
|
||||||
RETURN_SUCCESS;
|
RETURN_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
LASTMARK_RESTORE();
|
LASTMARK_RESTORE();
|
||||||
|
|
||||||
ctx->ptr--;
|
ctx->ptr--;
|
||||||
ctx->count--;
|
ctx->count--;
|
||||||
}
|
}
|
||||||
|
@ -1115,7 +1115,7 @@ entrance:
|
||||||
RETURN_ON_ERROR(ret);
|
RETURN_ON_ERROR(ret);
|
||||||
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
||||||
if (ret < (int) ctx->pattern[1])
|
if (ret < (int) ctx->pattern[1])
|
||||||
/* didn't match minimum number of times */
|
/* didn't match minimum number of times */
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
/* advance past minimum matches of repeat */
|
/* advance past minimum matches of repeat */
|
||||||
ctx->count = ret;
|
ctx->count = ret;
|
||||||
|
@ -1546,7 +1546,6 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
|
@ -1601,7 +1600,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOCAL(int)
|
LOCAL(int)
|
||||||
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
|
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
|
||||||
{
|
{
|
||||||
|
@ -1652,10 +1651,14 @@ _compile(PyObject* self_, PyObject* args)
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
PyObject *o = PyList_GET_ITEM(code, i);
|
PyObject *o = PyList_GET_ITEM(code, i);
|
||||||
if (PyInt_Check(o))
|
unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
|
||||||
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
|
: PyLong_AsUnsignedLong(o);
|
||||||
else
|
self->code[i] = (SRE_CODE) value;
|
||||||
self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o);
|
if ((unsigned long) self->code[i] != value) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"regular expression code size limit exceeded");
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PyErr_Occurred()) {
|
if (PyErr_Occurred()) {
|
||||||
|
@ -1724,7 +1727,7 @@ getstring(PyObject* string, int* p_length, int* p_charsize)
|
||||||
/* given a python object, return a data pointer, a length (in
|
/* given a python object, return a data pointer, a length (in
|
||||||
characters), and a character size. return NULL if the object
|
characters), and a character size. return NULL if the object
|
||||||
is not a string (or not compatible) */
|
is not a string (or not compatible) */
|
||||||
|
|
||||||
PyBufferProcs *buffer;
|
PyBufferProcs *buffer;
|
||||||
int size, bytes, charsize;
|
int size, bytes, charsize;
|
||||||
void* ptr;
|
void* ptr;
|
||||||
|
@ -2203,7 +2206,7 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
|
||||||
while (state.start <= state.end) {
|
while (state.start <= state.end) {
|
||||||
|
|
||||||
PyObject* item;
|
PyObject* item;
|
||||||
|
|
||||||
state_reset(&state);
|
state_reset(&state);
|
||||||
|
|
||||||
state.ptr = state.start;
|
state.ptr = state.start;
|
||||||
|
@ -2222,7 +2225,7 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
|
||||||
pattern_error(status);
|
pattern_error(status);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* don't bother to build a match object */
|
/* don't bother to build a match object */
|
||||||
switch (self->groups) {
|
switch (self->groups) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -2271,7 +2274,7 @@ error:
|
||||||
Py_DECREF(list);
|
Py_DECREF(list);
|
||||||
state_fini(&state);
|
state_fini(&state);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if PY_VERSION_HEX >= 0x02020000
|
#if PY_VERSION_HEX >= 0x02020000
|
||||||
|
@ -2349,7 +2352,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
|
||||||
pattern_error(status);
|
pattern_error(status);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state.start == state.ptr) {
|
if (state.start == state.ptr) {
|
||||||
if (last == state.end)
|
if (last == state.end)
|
||||||
break;
|
break;
|
||||||
|
@ -2405,7 +2408,7 @@ error:
|
||||||
Py_DECREF(list);
|
Py_DECREF(list);
|
||||||
state_fini(&state);
|
state_fini(&state);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
|
@ -2496,7 +2499,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
pattern_error(status);
|
pattern_error(status);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
b = STATE_OFFSET(&state, state.start);
|
b = STATE_OFFSET(&state, state.start);
|
||||||
e = STATE_OFFSET(&state, state.ptr);
|
e = STATE_OFFSET(&state, state.ptr);
|
||||||
|
|
||||||
|
@ -2542,7 +2545,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = e;
|
i = e;
|
||||||
n = n + 1;
|
n = n + 1;
|
||||||
|
|
||||||
|
@ -2586,7 +2589,7 @@ error:
|
||||||
state_fini(&state);
|
state_fini(&state);
|
||||||
Py_DECREF(filter);
|
Py_DECREF(filter);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
|
@ -2626,7 +2629,7 @@ pattern_copy(PatternObject* self, PyObject* args)
|
||||||
|
|
||||||
if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
|
if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
|
copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
|
||||||
if (!copy)
|
if (!copy)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -2653,7 +2656,7 @@ pattern_deepcopy(PatternObject* self, PyObject* args)
|
||||||
{
|
{
|
||||||
#ifdef USE_BUILTIN_COPY
|
#ifdef USE_BUILTIN_COPY
|
||||||
PatternObject* copy;
|
PatternObject* copy;
|
||||||
|
|
||||||
PyObject* memo;
|
PyObject* memo;
|
||||||
if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
|
if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -2701,28 +2704,28 @@ PyDoc_STRVAR(pattern_finditer_doc,
|
||||||
PyDoc_STRVAR(pattern_sub_doc,
|
PyDoc_STRVAR(pattern_sub_doc,
|
||||||
"sub(repl, string[, count = 0]) --> newstring\n\
|
"sub(repl, string[, count = 0]) --> newstring\n\
|
||||||
Return the string obtained by replacing the leftmost non-overlapping\n\
|
Return the string obtained by replacing the leftmost non-overlapping\n\
|
||||||
occurrences of pattern in string by the replacement repl.");
|
occurrences of pattern in string by the replacement repl.");
|
||||||
|
|
||||||
PyDoc_STRVAR(pattern_subn_doc,
|
PyDoc_STRVAR(pattern_subn_doc,
|
||||||
"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
|
"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
|
||||||
Return the tuple (new_string, number_of_subs_made) found by replacing\n\
|
Return the tuple (new_string, number_of_subs_made) found by replacing\n\
|
||||||
the leftmost non-overlapping occurrences of pattern with the\n\
|
the leftmost non-overlapping occurrences of pattern with the\n\
|
||||||
replacement repl.");
|
replacement repl.");
|
||||||
|
|
||||||
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
|
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
|
||||||
|
|
||||||
static PyMethodDef pattern_methods[] = {
|
static PyMethodDef pattern_methods[] = {
|
||||||
{"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
|
{"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_match_doc},
|
pattern_match_doc},
|
||||||
{"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
|
{"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_search_doc},
|
pattern_search_doc},
|
||||||
{"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
|
{"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_sub_doc},
|
pattern_sub_doc},
|
||||||
{"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
|
{"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_subn_doc},
|
pattern_subn_doc},
|
||||||
{"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
|
{"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_split_doc},
|
pattern_split_doc},
|
||||||
{"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
|
{"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
|
||||||
pattern_findall_doc},
|
pattern_findall_doc},
|
||||||
#if PY_VERSION_HEX >= 0x02020000
|
#if PY_VERSION_HEX >= 0x02020000
|
||||||
{"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
|
{"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
|
||||||
|
@ -2734,7 +2737,7 @@ static PyMethodDef pattern_methods[] = {
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
pattern_getattr(PatternObject* self, char* name)
|
pattern_getattr(PatternObject* self, char* name)
|
||||||
{
|
{
|
||||||
PyObject* res;
|
PyObject* res;
|
||||||
|
@ -3113,7 +3116,7 @@ match_copy(MatchObject* self, PyObject* args)
|
||||||
#ifdef USE_BUILTIN_COPY
|
#ifdef USE_BUILTIN_COPY
|
||||||
MatchObject* copy;
|
MatchObject* copy;
|
||||||
int slots, offset;
|
int slots, offset;
|
||||||
|
|
||||||
if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
|
if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -3146,7 +3149,7 @@ match_deepcopy(MatchObject* self, PyObject* args)
|
||||||
{
|
{
|
||||||
#ifdef USE_BUILTIN_COPY
|
#ifdef USE_BUILTIN_COPY
|
||||||
MatchObject* copy;
|
MatchObject* copy;
|
||||||
|
|
||||||
PyObject* memo;
|
PyObject* memo;
|
||||||
if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
|
if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -3181,7 +3184,7 @@ static PyMethodDef match_methods[] = {
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
match_getattr(MatchObject* self, char* name)
|
match_getattr(MatchObject* self, char* name)
|
||||||
{
|
{
|
||||||
PyObject* res;
|
PyObject* res;
|
||||||
|
@ -3337,7 +3340,7 @@ static PyMethodDef scanner_methods[] = {
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
scanner_getattr(ScannerObject* self, char* name)
|
scanner_getattr(ScannerObject* self, char* name)
|
||||||
{
|
{
|
||||||
PyObject* res;
|
PyObject* res;
|
||||||
|
@ -3374,7 +3377,7 @@ static PyMethodDef _functions[] = {
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if PY_VERSION_HEX < 0x02030000
|
#if PY_VERSION_HEX < 0x02030000
|
||||||
DL_EXPORT(void) init_sre(void)
|
DL_EXPORT(void) init_sre(void)
|
||||||
#else
|
#else
|
||||||
PyMODINIT_FUNC init_sre(void)
|
PyMODINIT_FUNC init_sre(void)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue