mirror of
https://github.com/python/cpython.git
synced 2025-11-17 01:25:57 +00:00
bpo-47256: Increasing the depth of backtracking in RE (GH-32411)
Limit the maximum capturing group to 2**30-1 on 64-bit platforms (it was 2**31-1). No change on 32-bit platforms (2**28-1). It allows to reduce the size of SRE(match_context): - On 32 bit platform: 36 bytes, no change. (msvc2022) - On 64 bit platform: 72 bytes -> 56 bytes. (msvc2022/gcc9.4) which leads to increasing the depth of backtracking.
This commit is contained in:
parent
1c2fcebf3c
commit
a29f858124
3 changed files with 46 additions and 44 deletions
|
|
@ -0,0 +1,2 @@
|
||||||
|
:mod:`re` module, limit the maximum capturing group to 1,073,741,823 in
|
||||||
|
64-bit build, this increases the depth of backtracking.
|
||||||
|
|
@ -18,10 +18,10 @@
|
||||||
#define SRE_CODE Py_UCS4
|
#define SRE_CODE Py_UCS4
|
||||||
#if SIZEOF_SIZE_T > 4
|
#if SIZEOF_SIZE_T > 4
|
||||||
# define SRE_MAXREPEAT (~(SRE_CODE)0)
|
# define SRE_MAXREPEAT (~(SRE_CODE)0)
|
||||||
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
|
# define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
|
||||||
#else
|
#else
|
||||||
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
||||||
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
|
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -73,12 +73,12 @@ typedef struct {
|
||||||
Py_ssize_t pos, endpos;
|
Py_ssize_t pos, endpos;
|
||||||
int isbytes;
|
int isbytes;
|
||||||
int charsize; /* character size */
|
int charsize; /* character size */
|
||||||
/* registers */
|
|
||||||
Py_ssize_t lastindex;
|
|
||||||
Py_ssize_t lastmark;
|
|
||||||
const void** mark;
|
|
||||||
int match_all;
|
int match_all;
|
||||||
int must_advance;
|
int must_advance;
|
||||||
|
/* marks */
|
||||||
|
int lastmark;
|
||||||
|
int lastindex;
|
||||||
|
const void** mark;
|
||||||
/* dynamically allocated stuff */
|
/* dynamically allocated stuff */
|
||||||
char* data_stack;
|
char* data_stack;
|
||||||
size_t data_stack_size;
|
size_t data_stack_size;
|
||||||
|
|
|
||||||
|
|
@ -450,20 +450,23 @@ do { \
|
||||||
|
|
||||||
#define MARK_PUSH(lastmark) \
|
#define MARK_PUSH(lastmark) \
|
||||||
do if (lastmark >= 0) { \
|
do if (lastmark >= 0) { \
|
||||||
i = lastmark; /* ctx->lastmark may change if reallocated */ \
|
size_t _marks_size = (lastmark+1) * sizeof(void*); \
|
||||||
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
|
DATA_STACK_PUSH(state, state->mark, _marks_size); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define MARK_POP(lastmark) \
|
#define MARK_POP(lastmark) \
|
||||||
do if (lastmark >= 0) { \
|
do if (lastmark >= 0) { \
|
||||||
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
|
size_t _marks_size = (lastmark+1) * sizeof(void*); \
|
||||||
|
DATA_STACK_POP(state, state->mark, _marks_size, 1); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define MARK_POP_KEEP(lastmark) \
|
#define MARK_POP_KEEP(lastmark) \
|
||||||
do if (lastmark >= 0) { \
|
do if (lastmark >= 0) { \
|
||||||
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
|
size_t _marks_size = (lastmark+1) * sizeof(void*); \
|
||||||
|
DATA_STACK_POP(state, state->mark, _marks_size, 0); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define MARK_POP_DISCARD(lastmark) \
|
#define MARK_POP_DISCARD(lastmark) \
|
||||||
do if (lastmark >= 0) { \
|
do if (lastmark >= 0) { \
|
||||||
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
|
size_t _marks_size = (lastmark+1) * sizeof(void*); \
|
||||||
|
DATA_STACK_POP_DISCARD(state, _marks_size); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define JUMP_NONE 0
|
#define JUMP_NONE 0
|
||||||
|
|
@ -488,10 +491,10 @@ do { \
|
||||||
ctx->pattern = pattern; \
|
ctx->pattern = pattern; \
|
||||||
ctx->ptr = ptr; \
|
ctx->ptr = ptr; \
|
||||||
DATA_ALLOC(SRE(match_context), nextctx); \
|
DATA_ALLOC(SRE(match_context), nextctx); \
|
||||||
nextctx->last_ctx_pos = ctx_pos; \
|
|
||||||
nextctx->jump = jumpvalue; \
|
|
||||||
nextctx->pattern = nextpattern; \
|
nextctx->pattern = nextpattern; \
|
||||||
nextctx->toplevel = toplevel_; \
|
nextctx->toplevel = toplevel_; \
|
||||||
|
nextctx->jump = jumpvalue; \
|
||||||
|
nextctx->last_ctx_pos = ctx_pos; \
|
||||||
pattern = nextpattern; \
|
pattern = nextpattern; \
|
||||||
ctx_pos = alloc_pos; \
|
ctx_pos = alloc_pos; \
|
||||||
ctx = nextctx; \
|
ctx = nextctx; \
|
||||||
|
|
@ -507,18 +510,18 @@ do { \
|
||||||
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
|
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
Py_ssize_t last_ctx_pos;
|
|
||||||
Py_ssize_t jump;
|
|
||||||
const SRE_CHAR* ptr;
|
|
||||||
const SRE_CODE* pattern;
|
|
||||||
Py_ssize_t count;
|
Py_ssize_t count;
|
||||||
Py_ssize_t lastmark;
|
|
||||||
Py_ssize_t lastindex;
|
|
||||||
union {
|
union {
|
||||||
SRE_CODE chr;
|
SRE_CODE chr;
|
||||||
SRE_REPEAT* rep;
|
SRE_REPEAT* rep;
|
||||||
} u;
|
} u;
|
||||||
|
int lastmark;
|
||||||
|
int lastindex;
|
||||||
|
const SRE_CODE* pattern;
|
||||||
|
const SRE_CHAR* ptr;
|
||||||
int toplevel;
|
int toplevel;
|
||||||
|
int jump;
|
||||||
|
Py_ssize_t last_ctx_pos;
|
||||||
} SRE(match_context);
|
} SRE(match_context);
|
||||||
|
|
||||||
#define MAYBE_CHECK_SIGNALS \
|
#define MAYBE_CHECK_SIGNALS \
|
||||||
|
|
@ -558,8 +561,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
|
||||||
{
|
{
|
||||||
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
|
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
|
||||||
Py_ssize_t alloc_pos, ctx_pos = -1;
|
Py_ssize_t alloc_pos, ctx_pos = -1;
|
||||||
Py_ssize_t i, ret = 0;
|
Py_ssize_t ret = 0;
|
||||||
Py_ssize_t jump;
|
int jump;
|
||||||
unsigned int sigcount=0;
|
unsigned int sigcount=0;
|
||||||
|
|
||||||
SRE(match_context)* ctx;
|
SRE(match_context)* ctx;
|
||||||
|
|
@ -607,7 +610,8 @@ dispatch:
|
||||||
/* <MARK> <gid> */
|
/* <MARK> <gid> */
|
||||||
TRACE(("|%p|%p|MARK %d\n", pattern,
|
TRACE(("|%p|%p|MARK %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
i = pattern[0];
|
{
|
||||||
|
int i = pattern[0];
|
||||||
if (i & 1)
|
if (i & 1)
|
||||||
state->lastindex = i/2 + 1;
|
state->lastindex = i/2 + 1;
|
||||||
if (i > state->lastmark) {
|
if (i > state->lastmark) {
|
||||||
|
|
@ -615,12 +619,13 @@ dispatch:
|
||||||
state->mark array. If it is increased by more than 1,
|
state->mark array. If it is increased by more than 1,
|
||||||
the intervening marks must be set to NULL to signal
|
the intervening marks must be set to NULL to signal
|
||||||
that these marks have not been encountered. */
|
that these marks have not been encountered. */
|
||||||
Py_ssize_t j = state->lastmark + 1;
|
int j = state->lastmark + 1;
|
||||||
while (j < i)
|
while (j < i)
|
||||||
state->mark[j++] = NULL;
|
state->mark[j++] = NULL;
|
||||||
state->lastmark = i;
|
state->lastmark = i;
|
||||||
}
|
}
|
||||||
state->mark[i] = ptr;
|
state->mark[i] = ptr;
|
||||||
|
}
|
||||||
pattern++;
|
pattern++;
|
||||||
DISPATCH;
|
DISPATCH;
|
||||||
|
|
||||||
|
|
@ -1373,9 +1378,8 @@ dispatch:
|
||||||
/* match backreference */
|
/* match backreference */
|
||||||
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
|
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
i = pattern[0];
|
|
||||||
{
|
{
|
||||||
Py_ssize_t groupref = i+i;
|
int groupref = pattern[0] * 2;
|
||||||
if (groupref >= state->lastmark) {
|
if (groupref >= state->lastmark) {
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1398,9 +1402,8 @@ dispatch:
|
||||||
/* match backreference */
|
/* match backreference */
|
||||||
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
|
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
i = pattern[0];
|
|
||||||
{
|
{
|
||||||
Py_ssize_t groupref = i+i;
|
int groupref = pattern[0] * 2;
|
||||||
if (groupref >= state->lastmark) {
|
if (groupref >= state->lastmark) {
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1424,9 +1427,8 @@ dispatch:
|
||||||
/* match backreference */
|
/* match backreference */
|
||||||
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
|
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
i = pattern[0];
|
|
||||||
{
|
{
|
||||||
Py_ssize_t groupref = i+i;
|
int groupref = pattern[0] * 2;
|
||||||
if (groupref >= state->lastmark) {
|
if (groupref >= state->lastmark) {
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1450,9 +1452,8 @@ dispatch:
|
||||||
/* match backreference */
|
/* match backreference */
|
||||||
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
|
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
i = pattern[0];
|
|
||||||
{
|
{
|
||||||
Py_ssize_t groupref = i+i;
|
int groupref = pattern[0] * 2;
|
||||||
if (groupref >= state->lastmark) {
|
if (groupref >= state->lastmark) {
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1476,9 +1477,8 @@ dispatch:
|
||||||
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
|
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
|
||||||
ptr, pattern[0]));
|
ptr, pattern[0]));
|
||||||
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
|
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
|
||||||
i = pattern[0];
|
|
||||||
{
|
{
|
||||||
Py_ssize_t groupref = i+i;
|
int groupref = pattern[0] * 2;
|
||||||
if (groupref >= state->lastmark) {
|
if (groupref >= state->lastmark) {
|
||||||
pattern += pattern[1];
|
pattern += pattern[1];
|
||||||
DISPATCH;
|
DISPATCH;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue