mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
AMK's latest
This commit is contained in:
parent
104be4a4a7
commit
042ff9eb3a
4 changed files with 168 additions and 90 deletions
|
@ -3,7 +3,7 @@
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
|
|
||||||
#define PCRE_VERSION "1.04 22-Dec-1997"
|
#define PCRE_VERSION "1.07 16-Feb-1998"
|
||||||
|
|
||||||
|
|
||||||
/* This is a library of functions to support regular expressions whose syntax
|
/* This is a library of functions to support regular expressions whose syntax
|
||||||
|
@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals.
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||||
|
|
||||||
Copyright (c) 1997 University of Cambridge
|
Copyright (c) 1998 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Permission is granted to anyone to use this software for any purpose on any
|
||||||
|
@ -192,6 +192,7 @@ enum {
|
||||||
OP_CRMINRANGE,
|
OP_CRMINRANGE,
|
||||||
|
|
||||||
OP_CLASS, /* Match a character class */
|
OP_CLASS, /* Match a character class */
|
||||||
|
OP_NEGCLASS, /* Match a character class, specified negatively */
|
||||||
OP_CLASS_L, /* Match a character class */
|
OP_CLASS_L, /* Match a character class */
|
||||||
OP_REF, /* Match a back reference */
|
OP_REF, /* Match a back reference */
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* Copyright (c) 1997 University of Cambridge */
|
/* Copyright (c) 1998 University of Cambridge */
|
||||||
|
|
||||||
#ifndef _PCRE_H
|
#ifndef _PCRE_H
|
||||||
#define _PCRE_H
|
#define _PCRE_H
|
||||||
|
@ -17,6 +17,12 @@ it is needed here for malloc. */
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Allow for C++ users */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Options */
|
/* Options */
|
||||||
|
|
||||||
#define PCRE_CASELESS 0x0001
|
#define PCRE_CASELESS 0x0001
|
||||||
|
@ -68,4 +74,8 @@ extern int pcre_info(const pcre *, int *, int *);
|
||||||
extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||||
extern const char *pcre_version(void);
|
extern const char *pcre_version(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* End of pcre.h */
|
#endif /* End of pcre.h */
|
||||||
|
|
|
@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type;
|
||||||
#define NOT_WORD_BOUNDARY 6
|
#define NOT_WORD_BOUNDARY 6
|
||||||
#define BEGINNING_OF_BUFFER 7
|
#define BEGINNING_OF_BUFFER 7
|
||||||
#define END_OF_BUFFER 8
|
#define END_OF_BUFFER 8
|
||||||
|
#define STRING 9
|
||||||
|
|
||||||
static PcreObject *
|
static PcreObject *
|
||||||
newPcreObject(arg)
|
newPcreObject(arg)
|
||||||
|
@ -191,49 +191,20 @@ PyPcre_compile(self, args)
|
||||||
{
|
{
|
||||||
PcreObject *rv;
|
PcreObject *rv;
|
||||||
PyObject *dictionary;
|
PyObject *dictionary;
|
||||||
char *pattern, *newpattern;
|
char *pattern;
|
||||||
const char *error;
|
const char *error;
|
||||||
int num_zeros, i, j;
|
int num_zeros, i, j;
|
||||||
|
|
||||||
int patternlen, options, erroroffset;
|
int options, erroroffset;
|
||||||
if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options,
|
if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
|
||||||
&PyDict_Type, &dictionary))
|
&PyDict_Type, &dictionary))
|
||||||
return NULL;
|
return NULL;
|
||||||
rv = newPcreObject(args);
|
rv = newPcreObject(args);
|
||||||
if ( rv == NULL )
|
if ( rv == NULL )
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* PCRE doesn't like having null bytes in its pattern, so we have to replace
|
rv->regex = pcre_compile((char*)pattern, options,
|
||||||
any zeros in the string with the characters '\000'. This increases the size
|
|
||||||
of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
|
|
||||||
num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
|
|
||||||
for(i=0; i<patternlen; i++) {
|
|
||||||
if (pattern[i]==0) num_zeros++;
|
|
||||||
}
|
|
||||||
newpattern=malloc(patternlen + num_zeros*3 + 4);
|
|
||||||
if (newpattern==NULL) {
|
|
||||||
PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
for (i=j=0; i<patternlen; i++, j++)
|
|
||||||
{
|
|
||||||
if (pattern[i]!=0) newpattern[j]=pattern[i];
|
|
||||||
else {
|
|
||||||
newpattern[j++] ='\\';
|
|
||||||
newpattern[j++] = '0';
|
|
||||||
newpattern[j++] = '0';
|
|
||||||
newpattern[j ] = '0';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Keep purify happy; for pcre, one null byte is enough! */
|
|
||||||
newpattern[j++]='\0';
|
|
||||||
newpattern[j++]='\0';
|
|
||||||
newpattern[j++]='\0';
|
|
||||||
newpattern[j]='\0';
|
|
||||||
|
|
||||||
rv->regex = pcre_compile((char*)newpattern, options,
|
|
||||||
&error, &erroroffset, dictionary);
|
&error, &erroroffset, dictionary);
|
||||||
free(newpattern);
|
|
||||||
if (rv->regex==NULL)
|
if (rv->regex==NULL)
|
||||||
{
|
{
|
||||||
PyMem_DEL(rv);
|
PyMem_DEL(rv);
|
||||||
|
@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
|
||||||
*indexptr=index;
|
*indexptr=index;
|
||||||
return Py_BuildValue("c", (char)8);
|
return Py_BuildValue("c", (char)8);
|
||||||
break;
|
break;
|
||||||
|
case('\\'):
|
||||||
|
*indexptr=index;
|
||||||
|
return Py_BuildValue("c", '\\');
|
||||||
|
break;
|
||||||
|
|
||||||
case('x'):
|
case('x'):
|
||||||
{
|
{
|
||||||
|
@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
|
||||||
case('g'):
|
case('g'):
|
||||||
{
|
{
|
||||||
int end, i;
|
int end, i;
|
||||||
|
int group_num = 0, is_number=0;
|
||||||
|
|
||||||
if (pattern_len<=index)
|
if (pattern_len<=index)
|
||||||
{
|
{
|
||||||
PyErr_SetString(ErrorObject, "unfinished symbolic reference");
|
PyErr_SetString(ErrorObject, "unfinished symbolic reference");
|
||||||
|
@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
|
||||||
PyErr_SetString(ErrorObject, "zero-length symbolic reference");
|
PyErr_SetString(ErrorObject, "zero-length symbolic reference");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */
|
if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
|
||||||
|| (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
|
|
||||||
{
|
{
|
||||||
/* XXX should include the text of the reference */
|
is_number = 1;
|
||||||
PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _");
|
group_num = pattern[index] - '0';
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i=index+1; i<end; i++)
|
for(i=index+1; i<end; i++)
|
||||||
{
|
{
|
||||||
|
if (is_number &&
|
||||||
|
!(pcre_ctypes[pattern[i]] & ctype_digit) )
|
||||||
|
{
|
||||||
|
/* XXX should include the text of the reference */
|
||||||
|
PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else {group_num = group_num * 10 + pattern[i] - '0';}
|
||||||
if (!(pcre_ctypes[pattern[i]] & ctype_word) )
|
if (!(pcre_ctypes[pattern[i]] & ctype_word) )
|
||||||
{
|
{
|
||||||
/* XXX should include the text of the reference */
|
/* XXX should include the text of the reference */
|
||||||
|
@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
|
||||||
|
|
||||||
*typeptr = MEMORY_REFERENCE;
|
*typeptr = MEMORY_REFERENCE;
|
||||||
*indexptr = end+1;
|
*indexptr = end+1;
|
||||||
|
/* If it's a number, return the integer value of the group */
|
||||||
|
if (is_number) return Py_BuildValue("i", group_num);
|
||||||
|
/* Otherwise, return a string containing the group name */
|
||||||
return Py_BuildValue("s#", pattern+index, end-index);
|
return Py_BuildValue("s#", pattern+index, end-index);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
/* It's some unknown escape like \s, so return a string containing
|
||||||
|
\s */
|
||||||
|
*typeptr = STRING;
|
||||||
*indexptr = index;
|
*indexptr = index;
|
||||||
return Py_BuildValue("c", c);
|
return Py_BuildValue("s#", pattern+index-2, 2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -571,6 +560,12 @@ PyPcre_expand(self, args)
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case(STRING):
|
||||||
|
{
|
||||||
|
PyList_Append(results, value);
|
||||||
|
total_len += PyString_Size(value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
Py_DECREF(results);
|
Py_DECREF(results);
|
||||||
PyErr_SetString(ErrorObject,
|
PyErr_SetString(ErrorObject,
|
||||||
|
|
164
Modules/pypcre.c
164
Modules/pypcre.c
|
@ -211,7 +211,7 @@ the file Tech.Notes for some information on the internals.
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||||
|
|
||||||
Copyright (c) 1997 University of Cambridge
|
Copyright (c) 1998 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Permission is granted to anyone to use this software for any purpose on any
|
||||||
|
@ -409,6 +409,7 @@ do
|
||||||
according to the repeat count. */
|
according to the repeat count. */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
|
case OP_NEGCLASS:
|
||||||
{
|
{
|
||||||
tcode++;
|
tcode++;
|
||||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||||
|
@ -547,7 +548,7 @@ the file Tech.Notes for some information on the internals.
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||||
|
|
||||||
Copyright (c) 1997 University of Cambridge
|
Copyright (c) 1998 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
Permission is granted to anyone to use this software for any purpose on any
|
||||||
|
@ -586,18 +587,26 @@ the external pcre header. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef Py_eval_input
|
#ifndef Py_eval_input
|
||||||
/* For Python 1.4, graminit.h has to be explicitly included */
|
/* For Python 1.4, graminit.h has to be explicitly included */
|
||||||
#define Py_eval_input eval_input
|
#define Py_eval_input eval_input
|
||||||
|
|
||||||
#endif /* FOR_PYTHON */
|
#endif /* FOR_PYTHON */
|
||||||
|
|
||||||
|
/* Allow compilation as C++ source code, should anybody want to do that. */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define class pcre_class
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
||||||
|
|
||||||
static char rep_min[] = { 0, 0, 1, 1, 0, 0 };
|
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
|
||||||
static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
|
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
|
||||||
|
|
||||||
/* Text forms of OP_ values and things, for debugging */
|
/* Text forms of OP_ values and things, for debugging (not all used) */
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
static const char *OP_names[] = {
|
static const char *OP_names[] = {
|
||||||
|
@ -610,7 +619,7 @@ static const char *OP_names[] = {
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{",
|
"*", "*?", "+", "+?", "?", "??", "{", "{",
|
||||||
"class", "classL", "Ref",
|
"class", "negclass", "classL", "Ref",
|
||||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
|
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
|
||||||
"Brazero", "Braminzero", "Bra"
|
"Brazero", "Braminzero", "Bra"
|
||||||
};
|
};
|
||||||
|
@ -621,7 +630,7 @@ are simple data values; negative values are for special things like \d and so
|
||||||
on. Zero means further processing is needed (for things like \x), or the escape
|
on. Zero means further processing is needed (for things like \x), or the escape
|
||||||
is invalid. */
|
is invalid. */
|
||||||
|
|
||||||
static short int escapes[] = {
|
static const short int escapes[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
||||||
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
||||||
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
|
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
|
||||||
|
@ -636,8 +645,9 @@ static short int escapes[] = {
|
||||||
|
|
||||||
/* Definition to allow mutual recursion */
|
/* Definition to allow mutual recursion */
|
||||||
|
|
||||||
static BOOL compile_regex(int, int *, uschar **, const uschar **,
|
static BOOL
|
||||||
const char **, PyObject *);
|
compile_regex(int, int *, uschar **, const uschar **, const char **,
|
||||||
|
PyObject *);
|
||||||
|
|
||||||
/* Structure for passing "static" information around between the functions
|
/* Structure for passing "static" information around between the functions
|
||||||
doing the matching, so that they are thread-safe. */
|
doing the matching, so that they are thread-safe. */
|
||||||
|
@ -866,12 +876,13 @@ do {
|
||||||
/* Check a class or a back reference for a zero minimum */
|
/* Check a class or a back reference for a zero minimum */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
|
case OP_NEGCLASS:
|
||||||
case OP_REF:
|
case OP_REF:
|
||||||
case OP_CLASS_L:
|
case OP_CLASS_L:
|
||||||
switch(*cc)
|
switch(*cc)
|
||||||
{
|
{
|
||||||
case (OP_REF): cc += 2; break;
|
case (OP_REF): cc += 2; break;
|
||||||
case (OP_CLASS): cc += 1+32; break;
|
case (OP_CLASS): case (OP_NEGCLASS): cc += 1+32; break;
|
||||||
case (OP_CLASS_L): cc += 1+1+32; break;
|
case (OP_CLASS_L): cc += 1+1+32; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1017,15 +1028,17 @@ else
|
||||||
|
|
||||||
{
|
{
|
||||||
/* PYTHON: Try to compute an octal value for a character */
|
/* PYTHON: Try to compute an octal value for a character */
|
||||||
for(c=0, i=0; c!=-1 && ptr[i]!=0 && i<3; i++)
|
for(c=0, i=0; ptr[i]!=0 && i<3; i++)
|
||||||
{
|
{
|
||||||
if (( pcre_ctypes[ ptr[i] ] & ctype_odigit) != 0)
|
if (( pcre_ctypes[ ptr[i] ] & ctype_odigit) != 0)
|
||||||
c = c * 8 + ptr[i]-'0';
|
c = c * 8 + ptr[i]-'0';
|
||||||
else
|
else
|
||||||
c = -1; /* Non-octal character */
|
break; /* Non-octal character--break out of the loop */
|
||||||
}
|
}
|
||||||
/* Aha! There were 3 octal digits, so it must be a character */
|
/* It's a character if there were exactly 3 octal digits, or if
|
||||||
if (c != -1 && i == 3)
|
we're inside a character class and there was at least one
|
||||||
|
octal digit. */
|
||||||
|
if ( (i == 3) || (isclass && i!=0) )
|
||||||
{
|
{
|
||||||
ptr += i-1;
|
ptr += i-1;
|
||||||
break;
|
break;
|
||||||
|
@ -1278,11 +1291,14 @@ for (;; ptr++)
|
||||||
class_flag = NULL;
|
class_flag = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the first character is '^', set the negation flag */
|
/* If the first character is '^', set the negation flag, and use a
|
||||||
|
different opcode. This only matters if caseless matching is specified at
|
||||||
|
runtime. */
|
||||||
|
|
||||||
if ((c = *(++ptr)) == '^')
|
if ((c = *(++ptr)) == '^')
|
||||||
{
|
{
|
||||||
negate_class = TRUE;
|
negate_class = TRUE;
|
||||||
|
if (*(code-1)==OP_CLASS) *(code-1) = OP_NEGCLASS;
|
||||||
c = *(++ptr);
|
c = *(++ptr);
|
||||||
}
|
}
|
||||||
else negate_class = FALSE;
|
else negate_class = FALSE;
|
||||||
|
@ -1648,7 +1664,8 @@ for (;; ptr++)
|
||||||
/* If previous was a character class or a back reference, we put the repeat
|
/* If previous was a character class or a back reference, we put the repeat
|
||||||
stuff after it. */
|
stuff after it. */
|
||||||
|
|
||||||
else if (*previous == OP_CLASS || *previous==OP_CLASS_L || *previous == OP_REF)
|
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
|
||||||
|
*previous==OP_CLASS_L || *previous == OP_REF)
|
||||||
{
|
{
|
||||||
if (repeat_min == 0 && repeat_max == -1)
|
if (repeat_min == 0 && repeat_max == -1)
|
||||||
*code++ = OP_CRSTAR + repeat_type;
|
*code++ = OP_CRSTAR + repeat_type;
|
||||||
|
@ -2003,7 +2020,7 @@ for (;; ptr++)
|
||||||
the next state. */
|
the next state. */
|
||||||
|
|
||||||
previous[1] = length;
|
previous[1] = length;
|
||||||
ptr--;
|
if (length < 255) ptr--;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} /* end of big loop */
|
} /* end of big loop */
|
||||||
|
@ -2832,6 +2849,7 @@ while (code < code_end)
|
||||||
goto CLASS_REF_REPEAT;
|
goto CLASS_REF_REPEAT;
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
|
case OP_NEGCLASS:
|
||||||
case OP_CLASS_L:
|
case OP_CLASS_L:
|
||||||
{
|
{
|
||||||
int i, min, max;
|
int i, min, max;
|
||||||
|
@ -2840,11 +2858,14 @@ while (code < code_end)
|
||||||
{
|
{
|
||||||
code++;
|
code++;
|
||||||
printf("Locflag = %i ", *code++);
|
printf("Locflag = %i ", *code++);
|
||||||
|
printf(" [");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
code++;
|
{
|
||||||
|
if (*code++ == OP_CLASS) printf(" [");
|
||||||
|
else printf(" ^[");
|
||||||
|
}
|
||||||
|
|
||||||
printf(" [");
|
|
||||||
|
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
|
@ -3601,10 +3622,14 @@ for (;;)
|
||||||
item to see if there is repeat information following. Then obey similar
|
item to see if there is repeat information following. Then obey similar
|
||||||
code to character type repeats - written out again for speed. If caseless
|
code to character type repeats - written out again for speed. If caseless
|
||||||
matching was set at runtime but not at compile time, we have to check both
|
matching was set at runtime but not at compile time, we have to check both
|
||||||
versions of a character. */
|
versions of a character, and we have to behave differently for positive and
|
||||||
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
|
||||||
|
treated differently. */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
|
case OP_NEGCLASS:
|
||||||
{
|
{
|
||||||
|
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
|
||||||
const uschar *data = ecode + 1; /* Save for matching */
|
const uschar *data = ecode + 1; /* Save for matching */
|
||||||
ecode += 33; /* Advance past the item */
|
ecode += 33; /* Advance past the item */
|
||||||
|
|
||||||
|
@ -3633,15 +3658,8 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: /* No repeat follows */
|
default: /* No repeat follows */
|
||||||
if (eptr >= md->end_subject) FAIL;
|
min = max = 1;
|
||||||
c = *eptr++;
|
break;
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */
|
|
||||||
if (md->runtime_caseless)
|
|
||||||
{
|
|
||||||
c = pcre_fcc[c];
|
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */
|
|
||||||
}
|
|
||||||
FAIL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* First, ensure the minimum number of matches are present. */
|
/* First, ensure the minimum number of matches are present. */
|
||||||
|
@ -3650,13 +3668,31 @@ for (;;)
|
||||||
{
|
{
|
||||||
if (eptr >= md->end_subject) FAIL;
|
if (eptr >= md->end_subject) FAIL;
|
||||||
c = *eptr++;
|
c = *eptr++;
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
|
||||||
if (md->runtime_caseless)
|
/* Either not runtime caseless, or it was a positive class. For
|
||||||
|
runtime caseless, continue if either case is in the map. */
|
||||||
|
|
||||||
|
if (!nasty_case)
|
||||||
{
|
{
|
||||||
c = pcre_fcc[c];
|
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
if (md->runtime_caseless)
|
||||||
|
{
|
||||||
|
c = pcre_fcc[c];
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
FAIL;
|
|
||||||
|
/* Runtime caseless and it was a negative class. Continue only if
|
||||||
|
both cases are in the map. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((data[c/8] & (1 << (c&7))) == 0) FAIL;
|
||||||
|
c = pcre_fcc[c];
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If max == min we can continue with the main loop without the
|
/* If max == min we can continue with the main loop without the
|
||||||
|
@ -3674,12 +3710,30 @@ for (;;)
|
||||||
if (match(eptr, ecode, offset_top, md)) SUCCEED;
|
if (match(eptr, ecode, offset_top, md)) SUCCEED;
|
||||||
if (i >= max || eptr >= md->end_subject) FAIL;
|
if (i >= max || eptr >= md->end_subject) FAIL;
|
||||||
c = *eptr++;
|
c = *eptr++;
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
|
||||||
if (md->runtime_caseless)
|
/* Either not runtime caseless, or it was a positive class. For
|
||||||
|
runtime caseless, continue if either case is in the map. */
|
||||||
|
|
||||||
|
if (!nasty_case)
|
||||||
{
|
{
|
||||||
c = pcre_fcc[c];
|
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
if (md->runtime_caseless)
|
||||||
|
{
|
||||||
|
c = pcre_fcc[c];
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Runtime caseless and it was a negative class. Continue only if
|
||||||
|
both cases are in the map. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
|
||||||
|
c = pcre_fcc[c];
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
}
|
||||||
|
|
||||||
FAIL;
|
FAIL;
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
|
@ -3694,12 +3748,30 @@ for (;;)
|
||||||
{
|
{
|
||||||
if (eptr >= md->end_subject) break;
|
if (eptr >= md->end_subject) break;
|
||||||
c = *eptr;
|
c = *eptr;
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
|
||||||
if (md->runtime_caseless)
|
/* Either not runtime caseless, or it was a positive class. For
|
||||||
|
runtime caseless, continue if either case is in the map. */
|
||||||
|
|
||||||
|
if (!nasty_case)
|
||||||
{
|
{
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
if (md->runtime_caseless)
|
||||||
|
{
|
||||||
|
c = pcre_fcc[c];
|
||||||
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Runtime caseless and it was a negative class. Continue only if
|
||||||
|
both cases are in the map. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((data[c/8] & (1 << (c&7))) == 0) break;
|
||||||
c = pcre_fcc[c];
|
c = pcre_fcc[c];
|
||||||
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
if ((data[c/8] & (1 << (c&7))) != 0) continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4430,17 +4502,17 @@ pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
|
||||||
/* The "volatile" directives are to make gcc -Wall stop complaining
|
/* The "volatile" directives are to make gcc -Wall stop complaining
|
||||||
that these variables can be clobbered by the longjmp. Hopefully
|
that these variables can be clobbered by the longjmp. Hopefully
|
||||||
they won't cost too much performance. */
|
they won't cost too much performance. */
|
||||||
int resetcount, ocount;
|
volatile int resetcount, ocount;
|
||||||
int first_char = -1;
|
volatile int first_char = -1;
|
||||||
match_data match_block;
|
match_data match_block;
|
||||||
const uschar *start_bits = NULL;
|
const uschar *start_bits = NULL;
|
||||||
const uschar *start_match = (const uschar *)subject + start_pos;
|
const uschar *start_match = (const uschar *)subject + start_pos;
|
||||||
const uschar *end_subject;
|
const uschar *end_subject;
|
||||||
const real_pcre *re = (const real_pcre *)external_re;
|
const real_pcre *re = (const real_pcre *)external_re;
|
||||||
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
|
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
|
||||||
BOOL using_temporary_offsets = FALSE;
|
volatile BOOL using_temporary_offsets = FALSE;
|
||||||
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
|
volatile BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
|
||||||
BOOL startline = (re->options & PCRE_STARTLINE) != 0;
|
volatile BOOL startline = (re->options & PCRE_STARTLINE) != 0;
|
||||||
|
|
||||||
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
|
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
|
||||||
|
|
||||||
|
@ -4480,7 +4552,7 @@ ocount = offsetcount & (-2);
|
||||||
if (re->top_backref > 0 && re->top_backref >= ocount/2)
|
if (re->top_backref > 0 && re->top_backref >= ocount/2)
|
||||||
{
|
{
|
||||||
ocount = re->top_backref * 2 + 2;
|
ocount = re->top_backref * 2 + 2;
|
||||||
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
|
match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
|
||||||
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
|
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
using_temporary_offsets = TRUE;
|
using_temporary_offsets = TRUE;
|
||||||
DPRINTF(("Got memory to hold back references\n"));
|
DPRINTF(("Got memory to hold back references\n"));
|
||||||
|
@ -4639,10 +4711,10 @@ do
|
||||||
free_stack(&match_block);
|
free_stack(&match_block);
|
||||||
return rc;
|
return rc;
|
||||||
} /* End of (if setjmp(match_block.error_env)...) */
|
} /* End of (if setjmp(match_block.error_env)...) */
|
||||||
|
free_stack(&match_block);
|
||||||
|
|
||||||
/* Return an error code; pcremodule.c will preserve the exception */
|
/* Return an error code; pcremodule.c will preserve the exception */
|
||||||
if (PyErr_Occurred()) return PCRE_ERROR_NOMEMORY;
|
if (PyErr_Occurred()) return PCRE_ERROR_NOMEMORY;
|
||||||
|
|
||||||
free_stack(&match_block);
|
|
||||||
}
|
}
|
||||||
while (!anchored &&
|
while (!anchored &&
|
||||||
match_block.errorcode == PCRE_ERROR_NOMATCH &&
|
match_block.errorcode == PCRE_ERROR_NOMATCH &&
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue