mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
New versions straight from Jeffrey Ollie's web site
This commit is contained in:
parent
db9e20f418
commit
db25f32849
3 changed files with 698 additions and 167 deletions
|
@ -1,3 +1,7 @@
|
||||||
|
/*
|
||||||
|
* -*- mode: c-mode; c-file-style: python -*-
|
||||||
|
*/
|
||||||
|
|
||||||
/* regexpr.c
|
/* regexpr.c
|
||||||
*
|
*
|
||||||
* Author: Tatu Ylonen <ylo@ngs.fi>
|
* Author: Tatu Ylonen <ylo@ngs.fi>
|
||||||
|
@ -57,6 +61,12 @@ char *realloc();
|
||||||
#endif /* __STDC__ */
|
#endif /* __STDC__ */
|
||||||
#endif /* THINK_C */
|
#endif /* THINK_C */
|
||||||
|
|
||||||
|
/* The original code blithely assumed that sizeof(short) == 2. Not
|
||||||
|
* always true. Original instances of "(short)x" were replaced by
|
||||||
|
* SHORT(x), where SHORT is #defined below. */
|
||||||
|
|
||||||
|
#define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x))
|
||||||
|
|
||||||
/* The stack implementation is taken from an idea by Andrew Kuchling.
|
/* The stack implementation is taken from an idea by Andrew Kuchling.
|
||||||
* It's a doubly linked list of arrays. The advantages of this over a
|
* It's a doubly linked list of arrays. The advantages of this over a
|
||||||
* simple linked list are that the number of mallocs required are
|
* simple linked list are that the number of mallocs required are
|
||||||
|
@ -75,27 +85,27 @@ char *realloc();
|
||||||
|
|
||||||
typedef union item_t
|
typedef union item_t
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
int num;
|
int num;
|
||||||
int level;
|
int level;
|
||||||
char *start;
|
char *start;
|
||||||
char *end;
|
char *end;
|
||||||
} reg;
|
} reg;
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
int level;
|
int level;
|
||||||
int phantom;
|
int phantom;
|
||||||
char *code;
|
char *code;
|
||||||
char *text;
|
char *text;
|
||||||
} fail;
|
} fail;
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
int num;
|
int num;
|
||||||
int level;
|
int level;
|
||||||
int count;
|
int count;
|
||||||
} cntr;
|
} cntr;
|
||||||
} item_t;
|
} item_t;
|
||||||
|
|
||||||
#define STACK_PAGE_SIZE 256
|
#define STACK_PAGE_SIZE 256
|
||||||
|
@ -105,43 +115,98 @@ typedef union item_t
|
||||||
|
|
||||||
typedef struct item_page_t
|
typedef struct item_page_t
|
||||||
{
|
{
|
||||||
item_t items[STACK_PAGE_SIZE];
|
item_t items[STACK_PAGE_SIZE];
|
||||||
struct item_page_t *prev;
|
struct item_page_t *prev;
|
||||||
struct item_page_t *next;
|
struct item_page_t *next;
|
||||||
} item_page_t;
|
} item_page_t;
|
||||||
|
|
||||||
|
|
||||||
typedef struct match_state
|
typedef struct match_state
|
||||||
{
|
{
|
||||||
/* Structure to encapsulate the stack. */
|
/* The number of registers that have been pushed onto the stack
|
||||||
struct
|
* since the last failure point. */
|
||||||
{
|
|
||||||
/* index into the curent page. If index == 0 and you need
|
|
||||||
* to pop and item, move to the previous page and set
|
|
||||||
* index = STACK_PAGE_SIZE - 1. Otherwise decrement index
|
|
||||||
* to push a page. If index == STACK_PAGE_SIZE and you
|
|
||||||
* need to push a page move to the next page and set index
|
|
||||||
* = 0. If there is no new next page, allocate a new page
|
|
||||||
* and link it in. Otherwise, increment index to push a
|
|
||||||
* page. */
|
|
||||||
int index;
|
|
||||||
item_page_t *current; /* Pointer to the current page. */
|
|
||||||
item_page_t first; /* First page is statically allocated. */
|
|
||||||
} stack;
|
|
||||||
char *start[NUM_REGISTERS];
|
|
||||||
char *end[NUM_REGISTERS];
|
|
||||||
|
|
||||||
int changed[NUM_REGISTERS];
|
int count;
|
||||||
/* The number of registers that have been pushed onto the stack
|
|
||||||
* since the last failure point. */
|
/* Used to control when registers need to be pushed onto the
|
||||||
int count;
|
* stack. */
|
||||||
/* Used to control when registers need to be pushed onto the
|
|
||||||
* stack. */
|
int level;
|
||||||
int level;
|
|
||||||
/* The number of failure points on the stack. */
|
/* The number of failure points on the stack. */
|
||||||
int point;
|
|
||||||
|
int point;
|
||||||
|
|
||||||
|
/* Storage for the registers. Each register consists of two
|
||||||
|
* pointers to characters. So register N is represented as
|
||||||
|
* start[N] and end[N]. The pointers must be converted to
|
||||||
|
* offsets from the beginning of the string before returning the
|
||||||
|
* registers to the calling program. */
|
||||||
|
|
||||||
|
char *start[NUM_REGISTERS];
|
||||||
|
char *end[NUM_REGISTERS];
|
||||||
|
|
||||||
|
/* Keeps track of whether a register has changed recently. */
|
||||||
|
|
||||||
|
int changed[NUM_REGISTERS];
|
||||||
|
|
||||||
|
/* Structure to encapsulate the stack. */
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/* index into the curent page. If index == 0 and you need
|
||||||
|
* to pop an item, move to the previous page and set index
|
||||||
|
* = STACK_PAGE_SIZE - 1. Otherwise decrement index to
|
||||||
|
* push a page. If index == STACK_PAGE_SIZE and you need
|
||||||
|
* to push a page move to the next page and set index =
|
||||||
|
* 0. If there is no new next page, allocate a new page
|
||||||
|
* and link it in. Otherwise, increment index to push a
|
||||||
|
* page. */
|
||||||
|
|
||||||
|
int index;
|
||||||
|
item_page_t *current; /* Pointer to the current page. */
|
||||||
|
item_page_t first; /* First page is statically allocated. */
|
||||||
|
} stack;
|
||||||
} match_state;
|
} match_state;
|
||||||
|
|
||||||
|
/* Initialize a state object */
|
||||||
|
|
||||||
|
/* #define NEW_STATE(state) \ */
|
||||||
|
/* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */
|
||||||
|
/* state.stack.current = &state.stack.first; \ */
|
||||||
|
/* state.stack.first.prev = NULL; \ */
|
||||||
|
/* state.stack.first.next = NULL; \ */
|
||||||
|
/* state.stack.index = 0; \ */
|
||||||
|
/* state.level = 1 */
|
||||||
|
|
||||||
|
#define NEW_STATE(state, nregs) \
|
||||||
|
{ \
|
||||||
|
int i; \
|
||||||
|
for (i = 0; i < nregs; i++) \
|
||||||
|
{ \
|
||||||
|
state.start[i] = NULL; \
|
||||||
|
state.end[i] = NULL; \
|
||||||
|
state.changed[i] = 0; \
|
||||||
|
} \
|
||||||
|
state.stack.current = &state.stack.first; \
|
||||||
|
state.stack.first.prev = NULL; \
|
||||||
|
state.stack.first.next = NULL; \
|
||||||
|
state.stack.index = 0; \
|
||||||
|
state.level = 1; \
|
||||||
|
state.count = 0; \
|
||||||
|
state.level = 0; \
|
||||||
|
state.point = 0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free any memory that might have been malloc'd */
|
||||||
|
|
||||||
|
#define FREE_STATE(state) \
|
||||||
|
while(state.stack.first.next != NULL) \
|
||||||
|
{ \
|
||||||
|
state.stack.current = state.stack.first.next; \
|
||||||
|
state.stack.first.next = state.stack.current->next; \
|
||||||
|
free(state.stack.current); \
|
||||||
|
}
|
||||||
|
|
||||||
/* Discard the top 'count' stack items. */
|
/* Discard the top 'count' stack items. */
|
||||||
|
|
||||||
#define STACK_DISCARD(stack, count, on_error) \
|
#define STACK_DISCARD(stack, count, on_error) \
|
||||||
|
@ -226,24 +291,6 @@ else \
|
||||||
#define STACK_EMPTY(stack) ((stack.index == 0) && \
|
#define STACK_EMPTY(stack) ((stack.index == 0) && \
|
||||||
(stack.current->prev == NULL))
|
(stack.current->prev == NULL))
|
||||||
|
|
||||||
|
|
||||||
/* Initialize a state object */
|
|
||||||
|
|
||||||
#define NEW_STATE(state) \
|
|
||||||
memset(&state, 0, sizeof(match_state)); \
|
|
||||||
state.stack.current = &state.stack.first; \
|
|
||||||
state.level = 1
|
|
||||||
|
|
||||||
/* Free any memory that might have been malloc'd */
|
|
||||||
|
|
||||||
#define FREE_STATE(state) \
|
|
||||||
while(state.stack.first.next != NULL) \
|
|
||||||
{ \
|
|
||||||
state.stack.current = state.stack.first.next; \
|
|
||||||
state.stack.first.next = state.stack.current->next; \
|
|
||||||
free(state.stack.current); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return the start of register 'reg' */
|
/* Return the start of register 'reg' */
|
||||||
|
|
||||||
#define GET_REG_START(state, reg) (state.start[reg])
|
#define GET_REG_START(state, reg) (state.start[reg])
|
||||||
|
@ -302,22 +349,6 @@ state.end[reg] = text
|
||||||
|
|
||||||
/* Update the last failure point with a new position in the text. */
|
/* Update the last failure point with a new position in the text. */
|
||||||
|
|
||||||
/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
|
|
||||||
/* { \ */
|
|
||||||
/* item_t *item; \ */
|
|
||||||
/* STACK_DISCARD(state.stack, state.count, on_error); \ */
|
|
||||||
/* STACK_TOP(state.stack, item, on_error); \ */
|
|
||||||
/* item->fail.text = xtext; \ */
|
|
||||||
/* state.count = 0; \ */
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
|
|
||||||
/* { \ */
|
|
||||||
/* item_t *item; \ */
|
|
||||||
/* STACK_BACK(state.stack, item, state.count + 1, on_error); \ */
|
|
||||||
/* item->fail.text = xtext; \ */
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
#define UPDATE_FAILURE(state, xtext, on_error) \
|
#define UPDATE_FAILURE(state, xtext, on_error) \
|
||||||
{ \
|
{ \
|
||||||
item_t *item; \
|
item_t *item; \
|
||||||
|
@ -391,7 +422,8 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
|
||||||
Cwordbound, /* match if at word boundary */
|
Cwordbound, /* match if at word boundary */
|
||||||
Cnotwordbound, /* match if not at word boundary */
|
Cnotwordbound, /* match if not at word boundary */
|
||||||
Csyntaxspec, /* matches syntax code (1 byte follows) */
|
Csyntaxspec, /* matches syntax code (1 byte follows) */
|
||||||
Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/
|
Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/
|
||||||
|
Crepeat1
|
||||||
};
|
};
|
||||||
|
|
||||||
enum regexp_syntax_op /* syntax codes for plain and quoted characters */
|
enum regexp_syntax_op /* syntax codes for plain and quoted characters */
|
||||||
|
@ -581,6 +613,8 @@ static void re_compile_fastmap_aux(char *code,
|
||||||
case Cwordbound:
|
case Cwordbound:
|
||||||
case Cnotwordbound:
|
case Cnotwordbound:
|
||||||
{
|
{
|
||||||
|
for (a = 0; a < 256; a++)
|
||||||
|
fastmap[a] = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Csyntaxspec:
|
case Csyntaxspec:
|
||||||
|
@ -648,7 +682,7 @@ static void re_compile_fastmap_aux(char *code,
|
||||||
{
|
{
|
||||||
a = (unsigned char)code[pos++];
|
a = (unsigned char)code[pos++];
|
||||||
a |= (unsigned char)code[pos++] << 8;
|
a |= (unsigned char)code[pos++] << 8;
|
||||||
pos += (int)(short)a;
|
pos += (int)SHORT(a);
|
||||||
if (visited[pos])
|
if (visited[pos])
|
||||||
{
|
{
|
||||||
/* argh... the regexp contains empty loops. This is not
|
/* argh... the regexp contains empty loops. This is not
|
||||||
|
@ -664,10 +698,15 @@ static void re_compile_fastmap_aux(char *code,
|
||||||
{
|
{
|
||||||
a = (unsigned char)code[pos++];
|
a = (unsigned char)code[pos++];
|
||||||
a |= (unsigned char)code[pos++] << 8;
|
a |= (unsigned char)code[pos++] << 8;
|
||||||
a = pos + (int)(short)a;
|
a = pos + (int)SHORT(a);
|
||||||
re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
|
re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Crepeat1:
|
||||||
|
{
|
||||||
|
pos += 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
abort(); /* probably some opcode is missing from this switch */
|
abort(); /* probably some opcode is missing from this switch */
|
||||||
|
@ -754,10 +793,11 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
char ch;
|
char ch;
|
||||||
int a;
|
int a;
|
||||||
int b;
|
int b;
|
||||||
|
int num_instructions = 0;
|
||||||
|
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)(short)a;
|
a = (int)SHORT(a);
|
||||||
|
|
||||||
p1 = code + a + 3; /* skip the failure_jump */
|
p1 = code + a + 3; /* skip the failure_jump */
|
||||||
assert(p1[-3] == Cfailure_jump);
|
assert(p1[-3] == Cfailure_jump);
|
||||||
|
@ -775,6 +815,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
|
|
||||||
/* loop until we find something that consumes a character */
|
/* loop until we find something that consumes a character */
|
||||||
loop_p1:
|
loop_p1:
|
||||||
|
num_instructions++;
|
||||||
switch (*p1++)
|
switch (*p1++)
|
||||||
{
|
{
|
||||||
case Cbol:
|
case Cbol:
|
||||||
|
@ -824,6 +865,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
/* now we know that we can't backtrack. */
|
/* now we know that we can't backtrack. */
|
||||||
while (p1 != p2 - 3)
|
while (p1 != p2 - 3)
|
||||||
{
|
{
|
||||||
|
num_instructions++;
|
||||||
switch (*p1++)
|
switch (*p1++)
|
||||||
{
|
{
|
||||||
case Cend:
|
case Cend:
|
||||||
|
@ -873,11 +915,22 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
make_update_jump:
|
||||||
code -= 3;
|
code -= 3;
|
||||||
a += 3; /* jump to after the Cfailure_jump */
|
a += 3; /* jump to after the Cfailure_jump */
|
||||||
code[0] = Cupdate_failure_jump;
|
code[0] = Cupdate_failure_jump;
|
||||||
code[1] = a & 0xff;
|
code[1] = a & 0xff;
|
||||||
code[2] = a >> 8;
|
code[2] = a >> 8;
|
||||||
|
if (num_instructions > 1)
|
||||||
|
return 1;
|
||||||
|
assert(num_instructions == 1);
|
||||||
|
/* if the only instruction matches a single character, we can do
|
||||||
|
* better
|
||||||
|
*/
|
||||||
|
p1 = code + 3 + a; /* start of sole instruction */
|
||||||
|
if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar ||
|
||||||
|
*p1 == Csyntaxspec || *p1 == Cnotsyntaxspec)
|
||||||
|
code[0] = Crepeat1;
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
make_normal_jump:
|
make_normal_jump:
|
||||||
|
@ -939,6 +992,7 @@ static int re_optimize(regexp_t bufp)
|
||||||
case Cjump:
|
case Cjump:
|
||||||
case Cdummy_failure_jump:
|
case Cdummy_failure_jump:
|
||||||
case Cfailure_jump:
|
case Cfailure_jump:
|
||||||
|
case Crepeat1:
|
||||||
{
|
{
|
||||||
code += 2;
|
code += 2;
|
||||||
break;
|
break;
|
||||||
|
@ -1111,7 +1165,8 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
|
||||||
re_compile_initialize();
|
re_compile_initialize();
|
||||||
bufp->used = 0;
|
bufp->used = 0;
|
||||||
bufp->fastmap_accurate = 0;
|
bufp->fastmap_accurate = 0;
|
||||||
bufp->uses_registers = 0;
|
bufp->uses_registers = 1;
|
||||||
|
bufp->num_registers = 1;
|
||||||
translate = bufp->translate;
|
translate = bufp->translate;
|
||||||
pattern = bufp->buffer;
|
pattern = bufp->buffer;
|
||||||
alloc = bufp->allocated;
|
alloc = bufp->allocated;
|
||||||
|
@ -1289,6 +1344,7 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
|
||||||
STORE(Cstart_memory);
|
STORE(Cstart_memory);
|
||||||
STORE(next_register);
|
STORE(next_register);
|
||||||
open_registers[num_open_registers++] = next_register;
|
open_registers[num_open_registers++] = next_register;
|
||||||
|
bufp->num_registers++;
|
||||||
next_register++;
|
next_register++;
|
||||||
}
|
}
|
||||||
paren_depth++;
|
paren_depth++;
|
||||||
|
@ -1545,27 +1601,8 @@ int re_match(regexp_t bufp,
|
||||||
code = bufp->buffer;
|
code = bufp->buffer;
|
||||||
|
|
||||||
translate = bufp->translate;
|
translate = bufp->translate;
|
||||||
/* translated = NULL; */
|
|
||||||
/* if (bufp->translate) */
|
|
||||||
/* { */
|
|
||||||
/* char *t1; */
|
|
||||||
/* char *t2; */
|
|
||||||
|
|
||||||
/* translated = malloc(size); */
|
|
||||||
/* if (translated == NULL) */
|
|
||||||
/* goto error; */
|
|
||||||
|
|
||||||
/* t1 = string; */
|
|
||||||
/* t2 = translated; */
|
|
||||||
/* while(t1 < textend) */
|
|
||||||
/* *t2++ = bufp->translate[*t1++]; */
|
|
||||||
|
|
||||||
/* text = translated + pos; */
|
|
||||||
/* textstart = translated; */
|
|
||||||
/* textend = translated + size; */
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
NEW_STATE(state);
|
NEW_STATE(state, bufp->num_registers);
|
||||||
|
|
||||||
continue_matching:
|
continue_matching:
|
||||||
switch (*code++)
|
switch (*code++)
|
||||||
|
@ -1587,7 +1624,7 @@ int re_match(regexp_t bufp,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (a = 1; a < RE_NREGS; a++)
|
for (a = 1; a < bufp->num_registers; a++)
|
||||||
{
|
{
|
||||||
if ((GET_REG_START(state, a) == NULL) ||
|
if ((GET_REG_START(state, a) == NULL) ||
|
||||||
(GET_REG_END(state, a) == NULL))
|
(GET_REG_END(state, a) == NULL))
|
||||||
|
@ -1599,10 +1636,13 @@ int re_match(regexp_t bufp,
|
||||||
old_regs->start[a] = GET_REG_START(state, a) - textstart;
|
old_regs->start[a] = GET_REG_START(state, a) - textstart;
|
||||||
old_regs->end[a] = GET_REG_END(state, a) - textstart;
|
old_regs->end[a] = GET_REG_END(state, a) - textstart;
|
||||||
}
|
}
|
||||||
|
for (; a < RE_NREGS; a++)
|
||||||
|
{
|
||||||
|
old_regs->start[a] = -1;
|
||||||
|
old_regs->end[a] = -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* if(translated) */
|
|
||||||
/* free(translated); */
|
|
||||||
FREE_STATE(state);
|
FREE_STATE(state);
|
||||||
return match_end - pos;
|
return match_end - pos;
|
||||||
}
|
}
|
||||||
|
@ -1703,18 +1743,18 @@ int re_match(regexp_t bufp,
|
||||||
{
|
{
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
code += (int)(short)a;
|
code += (int)SHORT(a);
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
case Cdummy_failure_jump:
|
case Cdummy_failure_jump:
|
||||||
{
|
{
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)(short)a;
|
a = (int)SHORT(a);
|
||||||
assert(*code == Cfailure_jump);
|
assert(*code == Cfailure_jump);
|
||||||
b = (unsigned char)code[1];
|
b = (unsigned char)code[1];
|
||||||
b |= (unsigned char)code[2] << 8;
|
b |= (unsigned char)code[2] << 8;
|
||||||
PUSH_FAILURE(state, code + (int)(short)b + 3, NULL, goto error);
|
PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error);
|
||||||
code += a;
|
code += a;
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
|
@ -1722,10 +1762,120 @@ int re_match(regexp_t bufp,
|
||||||
{
|
{
|
||||||
a = (unsigned char)*code++;
|
a = (unsigned char)*code++;
|
||||||
a |= (unsigned char)*code++ << 8;
|
a |= (unsigned char)*code++ << 8;
|
||||||
a = (int)(short)a;
|
a = (int)SHORT(a);
|
||||||
PUSH_FAILURE(state, code + a, text, goto error);
|
PUSH_FAILURE(state, code + a, text, goto error);
|
||||||
goto continue_matching;
|
goto continue_matching;
|
||||||
}
|
}
|
||||||
|
case Crepeat1:
|
||||||
|
{
|
||||||
|
char *pinst;
|
||||||
|
a = (unsigned char)*code++;
|
||||||
|
a |= (unsigned char)*code++ << 8;
|
||||||
|
a = (int)SHORT(a);
|
||||||
|
pinst = code + a;
|
||||||
|
/* pinst is sole instruction in loop, and it matches a
|
||||||
|
* single character. Since Crepeat1 was originally a
|
||||||
|
* Cupdate_failure_jump, we also know that backtracking is
|
||||||
|
* useless: so long as the single-character expression matches,
|
||||||
|
* it must be used. Also, in the case of +, we've already
|
||||||
|
* matched one character, so + can't fail: nothing here can
|
||||||
|
* cause a failure.
|
||||||
|
*/
|
||||||
|
switch (*pinst++)
|
||||||
|
{
|
||||||
|
case Cset:
|
||||||
|
{
|
||||||
|
if (translate)
|
||||||
|
{
|
||||||
|
while (text < textend)
|
||||||
|
{
|
||||||
|
ch = translate[(unsigned char)*text];
|
||||||
|
if (pinst[ch/8] & (1<<(ch & 7)))
|
||||||
|
text++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (text < textend)
|
||||||
|
{
|
||||||
|
ch = (unsigned char)*text;
|
||||||
|
if (pinst[ch/8] & (1<<(ch & 7)))
|
||||||
|
text++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Cexact:
|
||||||
|
{
|
||||||
|
ch = (unsigned char)*pinst;
|
||||||
|
if (translate)
|
||||||
|
{
|
||||||
|
while (text < textend &&
|
||||||
|
translate[(unsigned char)*text] == ch)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (text < textend && (unsigned char)*text == ch)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Canychar:
|
||||||
|
{
|
||||||
|
while (text < textend && (unsigned char)*text != '\n')
|
||||||
|
text++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Csyntaxspec:
|
||||||
|
{
|
||||||
|
a = (unsigned char)*pinst;
|
||||||
|
if (translate)
|
||||||
|
{
|
||||||
|
while (text < textend &&
|
||||||
|
translate[SYNTAX(*text)] == a)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (text < textend && SYNTAX(*text) == a)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Cnotsyntaxspec:
|
||||||
|
{
|
||||||
|
a = (unsigned char)*pinst;
|
||||||
|
if (translate)
|
||||||
|
{
|
||||||
|
while (text < textend &&
|
||||||
|
translate[SYNTAX(*text)] != a)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (text < textend && SYNTAX(*text) != a)
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
/*NOTREACHED*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* due to the funky way + and * are compiled, the top failure-
|
||||||
|
* stack entry at this point is actually a success entry --
|
||||||
|
* update it & pop it
|
||||||
|
*/
|
||||||
|
UPDATE_FAILURE(state, text, goto error);
|
||||||
|
goto fail; /* i.e., succeed <wink/sigh> */
|
||||||
|
}
|
||||||
case Cbegbuf:
|
case Cbegbuf:
|
||||||
{
|
{
|
||||||
if (text == textstart)
|
if (text == textstart)
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
/*
|
||||||
|
* -*- mode: c-mode; c-file-style: python -*-
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef Py_REGEXPR_H
|
#ifndef Py_REGEXPR_H
|
||||||
#define Py_REGEXPR_H
|
#define Py_REGEXPR_H
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -5,22 +9,22 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* regexpr.h
|
||||||
|
*
|
||||||
|
* Author: Tatu Ylonen <ylo@ngs.fi>
|
||||||
|
*
|
||||||
|
* Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, distribute, and sell this software
|
||||||
|
* and its documentation for any purpose is hereby granted without fee,
|
||||||
|
* provided that the above copyright notice appear in all copies. This
|
||||||
|
* software is provided "as is" without express or implied warranty.
|
||||||
|
*
|
||||||
|
* Created: Thu Sep 26 17:15:36 1991 ylo
|
||||||
|
* Last modified: Mon Nov 4 15:49:46 1991 ylo
|
||||||
|
*/
|
||||||
|
|
||||||
regexpr.h
|
/* $Id$ */
|
||||||
|
|
||||||
Author: Tatu Ylonen <ylo@ngs.fi>
|
|
||||||
|
|
||||||
Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
|
|
||||||
|
|
||||||
Permission to use, copy, modify, distribute, and sell this software
|
|
||||||
and its documentation for any purpose is hereby granted without fee,
|
|
||||||
provided that the above copyright notice appear in all copies. This
|
|
||||||
software is provided "as is" without express or implied warranty.
|
|
||||||
|
|
||||||
Created: Thu Sep 26 17:15:36 1991 ylo
|
|
||||||
Last modified: Mon Nov 4 15:49:46 1991 ylo
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REGEXPR_H
|
#ifndef REGEXPR_H
|
||||||
#define REGEXPR_H
|
#define REGEXPR_H
|
||||||
|
@ -29,21 +33,22 @@ Last modified: Mon Nov 4 15:49:46 1991 ylo
|
||||||
|
|
||||||
typedef struct re_pattern_buffer
|
typedef struct re_pattern_buffer
|
||||||
{
|
{
|
||||||
char *buffer; /* compiled pattern */
|
char *buffer; /* compiled pattern */
|
||||||
int allocated; /* allocated size of compiled pattern */
|
int allocated; /* allocated size of compiled pattern */
|
||||||
int used; /* actual length of compiled pattern */
|
int used; /* actual length of compiled pattern */
|
||||||
char *fastmap; /* fastmap[ch] is true if ch can start pattern */
|
char *fastmap; /* fastmap[ch] is true if ch can start pattern */
|
||||||
char *translate; /* translation to apply during compilation/matching */
|
char *translate; /* translation to apply during compilation/matching */
|
||||||
char fastmap_accurate; /* true if fastmap is valid */
|
char fastmap_accurate; /* true if fastmap is valid */
|
||||||
char can_be_null; /* true if can match empty string */
|
char can_be_null; /* true if can match empty string */
|
||||||
char uses_registers; /* registers are used and need to be initialized */
|
char uses_registers; /* registers are used and need to be initialized */
|
||||||
char anchor; /* anchor: 0=none 1=begline 2=begbuf */
|
int num_registers; /* number of registers used */
|
||||||
|
char anchor; /* anchor: 0=none 1=begline 2=begbuf */
|
||||||
} *regexp_t;
|
} *regexp_t;
|
||||||
|
|
||||||
typedef struct re_registers
|
typedef struct re_registers
|
||||||
{
|
{
|
||||||
int start[RE_NREGS]; /* start offset of region */
|
int start[RE_NREGS]; /* start offset of region */
|
||||||
int end[RE_NREGS]; /* end offset of region */
|
int end[RE_NREGS]; /* end offset of region */
|
||||||
} *regexp_registers_t;
|
} *regexp_registers_t;
|
||||||
|
|
||||||
/* bit definitions for syntax */
|
/* bit definitions for syntax */
|
||||||
|
@ -77,52 +82,53 @@ typedef struct re_registers
|
||||||
#ifdef HAVE_PROTOTYPES
|
#ifdef HAVE_PROTOTYPES
|
||||||
|
|
||||||
extern int re_syntax;
|
extern int re_syntax;
|
||||||
/* This is the actual syntax mask. It was added so that Python
|
/* This is the actual syntax mask. It was added so that Python could do
|
||||||
could do syntax-dependent munging of patterns before compilation. */
|
* syntax-dependent munging of patterns before compilation. */
|
||||||
|
|
||||||
int re_set_syntax(int syntax);
|
int re_set_syntax(int syntax);
|
||||||
/* This sets the syntax to use and returns the previous syntax. The
|
/* This sets the syntax to use and returns the previous syntax. The
|
||||||
syntax is specified by a bit mask of the above defined bits. */
|
* syntax is specified by a bit mask of the above defined bits. */
|
||||||
|
|
||||||
char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
|
char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
|
||||||
/* This compiles the regexp (given in regex and length in regex_size).
|
/* This compiles the regexp (given in regex and length in regex_size).
|
||||||
This returns NULL if the regexp compiled successfully, and an error
|
* This returns NULL if the regexp compiled successfully, and an error
|
||||||
message if an error was encountered. The buffer field must be
|
* message if an error was encountered. The buffer field must be
|
||||||
initialized to a memory area allocated by malloc (or to NULL) before
|
* initialized to a memory area allocated by malloc (or to NULL) before
|
||||||
use, and the allocated field must be set to its length (or 0 if buffer is
|
* use, and the allocated field must be set to its length (or 0 if
|
||||||
NULL). Also, the translate field must be set to point to a valid
|
* buffer is NULL). Also, the translate field must be set to point to a
|
||||||
translation table, or NULL if it is not used. */
|
* valid translation table, or NULL if it is not used. */
|
||||||
|
|
||||||
int re_match(regexp_t compiled, char *string, int size, int pos,
|
int re_match(regexp_t compiled, char *string, int size, int pos,
|
||||||
regexp_registers_t old_regs);
|
regexp_registers_t old_regs);
|
||||||
/* This tries to match the regexp against the string. This returns the
|
/* This tries to match the regexp against the string. This returns the
|
||||||
length of the matched portion, or -1 if the pattern could not be
|
* length of the matched portion, or -1 if the pattern could not be
|
||||||
matched and -2 if an error (such as failure stack overflow) is
|
* matched and -2 if an error (such as failure stack overflow) is
|
||||||
encountered. */
|
* encountered. */
|
||||||
|
|
||||||
int re_search(regexp_t compiled, char *string, int size, int startpos,
|
int re_search(regexp_t compiled, char *string, int size, int startpos,
|
||||||
int range, regexp_registers_t regs);
|
int range, regexp_registers_t regs);
|
||||||
/* This rearches for a substring matching the regexp. This returns the first
|
/* This rearches for a substring matching the regexp. This returns the
|
||||||
index at which a match is found. range specifies at how many positions to
|
* first index at which a match is found. range specifies at how many
|
||||||
try matching; positive values indicate searching forwards, and negative
|
* positions to try matching; positive values indicate searching
|
||||||
values indicate searching backwards. mstop specifies the offset beyond
|
* forwards, and negative values indicate searching backwards. mstop
|
||||||
which a match must not go. This returns -1 if no match is found, and
|
* specifies the offset beyond which a match must not go. This returns
|
||||||
-2 if an error (such as failure stack overflow) is encountered. */
|
* -1 if no match is found, and -2 if an error (such as failure stack
|
||||||
|
* overflow) is encountered. */
|
||||||
|
|
||||||
void re_compile_fastmap(regexp_t compiled);
|
void re_compile_fastmap(regexp_t compiled);
|
||||||
/* This computes the fastmap for the regexp. For this to have any effect,
|
/* This computes the fastmap for the regexp. For this to have any effect,
|
||||||
the calling program must have initialized the fastmap field to point
|
* the calling program must have initialized the fastmap field to point
|
||||||
to an array of 256 characters. */
|
* to an array of 256 characters. */
|
||||||
|
|
||||||
char *re_comp(char *s);
|
char *re_comp(char *s);
|
||||||
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
|
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
|
||||||
an internal buffer. This returns NULL if the regexp was compiled
|
* an internal buffer. This returns NULL if the regexp was compiled
|
||||||
successfully, and an error message if there was an error. */
|
* successfully, and an error message if there was an error. */
|
||||||
|
|
||||||
int re_exec(char *s);
|
int re_exec(char *s);
|
||||||
/* BSD 4.2 regexp library routine re_exec. This returns true if the string
|
/* BSD 4.2 regexp library routine re_exec. This returns true if the
|
||||||
matches the regular expression (that is, a matching part is found
|
* string matches the regular expression (that is, a matching part is
|
||||||
anywhere in the string). */
|
* found anywhere in the string). */
|
||||||
|
|
||||||
#else /* HAVE_PROTOTYPES */
|
#else /* HAVE_PROTOTYPES */
|
||||||
|
|
||||||
|
|
375
Modules/reopmodule.c
Normal file
375
Modules/reopmodule.c
Normal file
|
@ -0,0 +1,375 @@
|
||||||
|
/***********************************************************
|
||||||
|
Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
|
||||||
|
The Netherlands.
|
||||||
|
|
||||||
|
All Rights Reserved
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and its
|
||||||
|
documentation for any purpose and without fee is hereby granted,
|
||||||
|
provided that the above copyright notice appear in all copies and that
|
||||||
|
both that copyright notice and this permission notice appear in
|
||||||
|
supporting documentation, and that the names of Stichting Mathematisch
|
||||||
|
Centrum or CWI or Corporation for National Research Initiatives or
|
||||||
|
CNRI not be used in advertising or publicity pertaining to
|
||||||
|
distribution of the software without specific, written prior
|
||||||
|
permission.
|
||||||
|
|
||||||
|
While CWI is the initial source for this software, a modified version
|
||||||
|
is made available by the Corporation for National Research Initiatives
|
||||||
|
(CNRI) at the Internet address ftp://ftp.python.org.
|
||||||
|
|
||||||
|
STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
|
||||||
|
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
|
||||||
|
CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
||||||
|
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||||
|
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||||
|
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||||
|
PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
|
||||||
|
******************************************************************/
|
||||||
|
|
||||||
|
/* $Id$ */
|
||||||
|
|
||||||
|
/* Regular expression objects */
|
||||||
|
/* This uses Tatu Ylonen's copyleft-free reimplementation of
|
||||||
|
GNU regular expressions */
|
||||||
|
|
||||||
|
#include "Python.h"
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include "regexpr.h"
|
||||||
|
|
||||||
|
static PyObject *ReopError; /* Exception */
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
makeresult(regs, num_regs)
|
||||||
|
struct re_registers *regs;
|
||||||
|
int num_regs;
|
||||||
|
{
|
||||||
|
PyObject *v;
|
||||||
|
int i;
|
||||||
|
static PyObject *filler = NULL;
|
||||||
|
|
||||||
|
if (filler == NULL) {
|
||||||
|
filler = Py_BuildValue("(ii)", -1, -1);
|
||||||
|
if (filler == NULL)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
v = PyTuple_New(num_regs);
|
||||||
|
if (v == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < num_regs; i++) {
|
||||||
|
int lo = regs->start[i];
|
||||||
|
int hi = regs->end[i];
|
||||||
|
PyObject *w;
|
||||||
|
if (lo == -1 && hi == -1) {
|
||||||
|
w = filler;
|
||||||
|
Py_INCREF(w);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
w = Py_BuildValue("(ii)", lo, hi);
|
||||||
|
if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
|
||||||
|
Py_DECREF(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
reop_match(self, args)
|
||||||
|
PyObject *self;
|
||||||
|
PyObject *args;
|
||||||
|
{
|
||||||
|
char *string;
|
||||||
|
int fastmaplen, stringlen;
|
||||||
|
int can_be_null, anchor, i;
|
||||||
|
int num_regs, flags, pos, result;
|
||||||
|
struct re_pattern_buffer bufp;
|
||||||
|
struct re_registers re_regs;
|
||||||
|
|
||||||
|
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||||
|
&(bufp.buffer), &(bufp.allocated),
|
||||||
|
&num_regs, &flags, &can_be_null,
|
||||||
|
&(bufp.fastmap), &fastmaplen,
|
||||||
|
&anchor,
|
||||||
|
&string, &stringlen,
|
||||||
|
&pos))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* XXX sanity-check the input data */
|
||||||
|
bufp.used=bufp.allocated;
|
||||||
|
bufp.translate=NULL;
|
||||||
|
bufp.fastmap_accurate=1;
|
||||||
|
bufp.can_be_null=can_be_null;
|
||||||
|
bufp.uses_registers=1;
|
||||||
|
bufp.num_registers=num_regs;
|
||||||
|
bufp.anchor=anchor;
|
||||||
|
|
||||||
|
for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||||
|
|
||||||
|
result = re_match(&bufp,
|
||||||
|
string, stringlen, pos,
|
||||||
|
&re_regs);
|
||||||
|
if (result < -1) {
|
||||||
|
/* Failure like stack overflow */
|
||||||
|
PyErr_SetString(ReopError, "match failure");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return makeresult(&re_regs, num_regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
reop_search(self, args)
|
||||||
|
PyObject *self;
|
||||||
|
PyObject *args;
|
||||||
|
{
|
||||||
|
char *string;
|
||||||
|
int fastmaplen, stringlen;
|
||||||
|
int can_be_null, anchor, i;
|
||||||
|
int num_regs, flags, pos, result;
|
||||||
|
struct re_pattern_buffer bufp;
|
||||||
|
struct re_registers re_regs;
|
||||||
|
|
||||||
|
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||||
|
&(bufp.buffer), &(bufp.allocated),
|
||||||
|
&num_regs, &flags, &can_be_null,
|
||||||
|
&(bufp.fastmap), &fastmaplen,
|
||||||
|
&anchor,
|
||||||
|
&string, &stringlen,
|
||||||
|
&pos))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* XXX sanity-check the input data */
|
||||||
|
bufp.used=bufp.allocated;
|
||||||
|
bufp.translate=NULL;
|
||||||
|
bufp.fastmap_accurate=1;
|
||||||
|
bufp.can_be_null=can_be_null;
|
||||||
|
bufp.uses_registers=1;
|
||||||
|
bufp.num_registers=1;
|
||||||
|
bufp.anchor=anchor;
|
||||||
|
|
||||||
|
for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||||
|
|
||||||
|
result = re_search(&bufp,
|
||||||
|
string, stringlen, pos, stringlen-pos,
|
||||||
|
&re_regs);
|
||||||
|
if (result < -1) {
|
||||||
|
/* Failure like stack overflow */
|
||||||
|
PyErr_SetString(ReopError, "match failure");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return makeresult(&re_regs, num_regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Functions originally in the regsub module.
|
||||||
|
Added June 1, 1997.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* A cache of previously used patterns is maintained. Notice that if
|
||||||
|
you change the reop syntax flag, entries in the cache are
|
||||||
|
invalidated.
|
||||||
|
XXX Solution: use (syntax flag, pattern) as keys? Clear the cache
|
||||||
|
every so often, or once it gets past a certain size?
|
||||||
|
*/
|
||||||
|
|
||||||
|
static PyObject *cache_dict=NULL;
|
||||||
|
|
||||||
|
/* Accept an object; if it's a reop pattern, Py_INCREF it and return
|
||||||
|
it. If it's a string, a reop object is compiled and cached.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static reopobject *
|
||||||
|
cached_compile(pattern)
|
||||||
|
PyObject *pattern;
|
||||||
|
{
|
||||||
|
reopobject *p2;
|
||||||
|
|
||||||
|
if (!PyString_Check(pattern))
|
||||||
|
{
|
||||||
|
/* It's not a string, so assume it's a compiled reop object */
|
||||||
|
/* XXX check that! */
|
||||||
|
Py_INCREF(pattern);
|
||||||
|
return (reopobject*)pattern;
|
||||||
|
}
|
||||||
|
if (cache_dict==NULL)
|
||||||
|
{
|
||||||
|
cache_dict=PyDict_New();
|
||||||
|
if (cache_dict==NULL)
|
||||||
|
{
|
||||||
|
return (reopobject*)NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See if the pattern has already been cached; if so, return that
|
||||||
|
reop object */
|
||||||
|
p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
|
||||||
|
if (p2)
|
||||||
|
{
|
||||||
|
Py_INCREF(p2);
|
||||||
|
return (reopobject*)p2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compile the pattern and cache it */
|
||||||
|
p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
|
||||||
|
if (!p2) return p2;
|
||||||
|
PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
|
||||||
|
return p2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
internal_split(args, retain)
|
||||||
|
PyObject *args;
|
||||||
|
int retain;
|
||||||
|
{
|
||||||
|
PyObject *newlist, *s;
|
||||||
|
reopobject *pattern;
|
||||||
|
int maxsplit=0, count=0, length, next=0, result;
|
||||||
|
int match_end=0; /* match_start is defined below */
|
||||||
|
char *start;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
|
||||||
|
&maxsplit))
|
||||||
|
{
|
||||||
|
PyErr_Clear();
|
||||||
|
if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
pattern=cached_compile((PyObject *)pattern);
|
||||||
|
if (!pattern) return NULL;
|
||||||
|
|
||||||
|
newlist=PyList_New(0);
|
||||||
|
if (!newlist) return NULL;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
result = re_search(&pattern->re_patbuf,
|
||||||
|
start, length, next, length-next,
|
||||||
|
&pattern->re_regs);
|
||||||
|
if (result < -1)
|
||||||
|
{ /* Erk... an error happened during the reop search */
|
||||||
|
Py_DECREF(newlist);
|
||||||
|
PyErr_SetString(ReopError, "match failure");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (next<=result)
|
||||||
|
{
|
||||||
|
int match_start=pattern->re_regs.start[0];
|
||||||
|
int oldmatch_end=match_end;
|
||||||
|
match_end=pattern->re_regs.end[0];
|
||||||
|
|
||||||
|
if (match_start==match_end)
|
||||||
|
{ /* A zero-length match; increment to the next position */
|
||||||
|
next=result+1;
|
||||||
|
match_end=oldmatch_end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Append the string up to the start of the match */
|
||||||
|
s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
|
||||||
|
if (!s)
|
||||||
|
{
|
||||||
|
Py_DECREF(newlist);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyList_Append(newlist, s);
|
||||||
|
Py_DECREF(s);
|
||||||
|
|
||||||
|
if (retain)
|
||||||
|
{
|
||||||
|
/* Append a string containing whatever matched */
|
||||||
|
s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
|
||||||
|
if (!s)
|
||||||
|
{
|
||||||
|
Py_DECREF(newlist);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyList_Append(newlist, s);
|
||||||
|
Py_DECREF(s);
|
||||||
|
}
|
||||||
|
/* Update the pointer, and increment the count of splits */
|
||||||
|
next=match_end; count++;
|
||||||
|
}
|
||||||
|
} while (result!=-1 && !(maxsplit && maxsplit==count) &&
|
||||||
|
next<length);
|
||||||
|
s=PyString_FromStringAndSize(start+match_end, length-match_end);
|
||||||
|
if (!s)
|
||||||
|
{
|
||||||
|
Py_DECREF(newlist);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyList_Append(newlist, s);
|
||||||
|
Py_DECREF(s);
|
||||||
|
Py_DECREF(pattern);
|
||||||
|
return newlist;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
reop_split(self, args)
|
||||||
|
PyObject *self;
|
||||||
|
PyObject *args;
|
||||||
|
{
|
||||||
|
return internal_split(args, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
reop_splitx(self, args)
|
||||||
|
PyObject *self;
|
||||||
|
PyObject *args;
|
||||||
|
{
|
||||||
|
return internal_split(args, 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static struct PyMethodDef reop_global_methods[] = {
|
||||||
|
{"match", reop_match, 0},
|
||||||
|
{"search", reop_search, 0},
|
||||||
|
#if 0
|
||||||
|
{"split", reop_split, 0},
|
||||||
|
{"splitx", reop_splitx, 0},
|
||||||
|
#endif
|
||||||
|
{NULL, NULL} /* sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
initreop()
|
||||||
|
{
|
||||||
|
PyObject *m, *d, *v;
|
||||||
|
int i;
|
||||||
|
char *s;
|
||||||
|
|
||||||
|
m = Py_InitModule("reop", reop_global_methods);
|
||||||
|
d = PyModule_GetDict(m);
|
||||||
|
|
||||||
|
/* Initialize reop.error exception */
|
||||||
|
v = ReopError = PyString_FromString("reop.error");
|
||||||
|
if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
|
||||||
|
goto finally;
|
||||||
|
|
||||||
|
/* Initialize reop.casefold constant */
|
||||||
|
if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
|
||||||
|
goto finally;
|
||||||
|
|
||||||
|
if (!(s = PyString_AsString(v)))
|
||||||
|
goto finally;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
if (isupper(i))
|
||||||
|
s[i] = tolower(i);
|
||||||
|
else
|
||||||
|
s[i] = i;
|
||||||
|
}
|
||||||
|
if (PyDict_SetItemString(d, "casefold", v) < 0)
|
||||||
|
goto finally;
|
||||||
|
Py_DECREF(v);
|
||||||
|
|
||||||
|
if (!PyErr_Occurred())
|
||||||
|
return;
|
||||||
|
finally:
|
||||||
|
Py_FatalError("can't initialize reop module");
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue