New versions straight from Jeffrey Ollie's web site

2025-12-04 08:34:25 +00:00 · 1997-07-10 14:31:32 +00:00 · 1997-07-10 14:31:32 +00:00 · db25f32849
commit db25f32849
parent db9e20f418
3 changed files with 698 additions and 167 deletions
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
@ -1,3 +1,7 @@
 /*
 * -*- mode: c-mode; c-file-style: python -*-
 */
 /* regexpr.c
 *
 * Author: Tatu Ylonen <ylo@ngs.fi>
@ -57,6 +61,12 @@ char *realloc();
 #endif /* __STDC__ */
 #endif /* THINK_C */
 /* The original code blithely assumed that sizeof(short) == 2.  Not
 * always true.  Original instances of "(short)x" were replaced by
 * SHORT(x), where SHORT is #defined below.  */
 #define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x))
 /* The stack implementation is taken from an idea by Andrew Kuchling.
 * It's a doubly linked list of arrays. The advantages of this over a
 * simple linked list are that the number of mallocs required are
@ -75,27 +85,27 @@ char *realloc();
 typedef union item_t
 {
-      struct
+	struct
-      {
+	{
-	    int num;
+		int num;
-	    int level;
+		int level;
-	    char *start;
+		char *start;
-	    char *end;
+		char *end;
-      } reg;
+	} reg;
-      struct
+	struct
-      {
+	{
-	    int count;
+		int count;
-	    int level;
+		int level;
-	    int phantom;
+		int phantom;
-	    char *code;
+		char *code;
-	    char *text;
+		char *text;
-      } fail;
+	} fail;
-      struct
+	struct
-      {
+	{
-	    int num;
+		int num;
-	    int level;
+		int level;
-	    int count;
+		int count;
-      } cntr;
+	} cntr;
 } item_t;
 #define STACK_PAGE_SIZE 256
@ -105,43 +115,98 @@ typedef union item_t
 typedef struct item_page_t
 {
-      item_t items[STACK_PAGE_SIZE];
+	item_t items[STACK_PAGE_SIZE];
-      struct item_page_t *prev;
+	struct item_page_t *prev;
-      struct item_page_t *next;
+	struct item_page_t *next;
 } item_page_t;
 typedef struct match_state
 {
-      /* Structure to encapsulate the stack. */
+	/* The number of registers that have been pushed onto the stack
-      struct
+	 * since the last failure point. */
      {
 	    /* index into the curent page.  If index == 0 and you need
 	     * to pop and item, move to the previous page and set
 	     * index = STACK_PAGE_SIZE - 1.  Otherwise decrement index
 	     * to push a page. If index == STACK_PAGE_SIZE and you
 	     * need to push a page move to the next page and set index
 	     * = 0. If there is no new next page, allocate a new page
 	     * and link it in. Otherwise, increment index to push a
 	     * page. */
 	    int index;
 	    item_page_t *current; /* Pointer to the current page. */
 	    item_page_t first; /* First page is statically allocated. */
      } stack;
      char *start[NUM_REGISTERS];
      char *end[NUM_REGISTERS];
-      int changed[NUM_REGISTERS];
+	int count;
-      /* The number of registers that have been pushed onto the stack
+
-       * since the last failure point. */
+	/* Used to control when registers need to be pushed onto the
-      int count;
+	 * stack. */
-      /* Used to control when registers need to be pushed onto the
+	
-       * stack. */
+	int level;
-      int level;
+	
-      /* The number of failure points on the stack. */
+	/* The number of failure points on the stack. */
-      int point;
+	
 	int point;
 	/* Storage for the registers.  Each register consists of two
 	 * pointers to characters.  So register N is represented as
 	 * start[N] and end[N].  The pointers must be converted to
 	 * offsets from the beginning of the string before returning the
 	 * registers to the calling program. */
 	char *start[NUM_REGISTERS];
 	char *end[NUM_REGISTERS];
 	/* Keeps track of whether a register has changed recently. */
 	int changed[NUM_REGISTERS];
 	/* Structure to encapsulate the stack. */
 	struct
 	{
 		/* index into the curent page.  If index == 0 and you need
 		 * to pop an item, move to the previous page and set index
 		 * = STACK_PAGE_SIZE - 1.  Otherwise decrement index to
 		 * push a page. If index == STACK_PAGE_SIZE and you need
 		 * to push a page move to the next page and set index =
 		 * 0. If there is no new next page, allocate a new page
 		 * and link it in. Otherwise, increment index to push a
 		 * page. */
 		int index;
 		item_page_t *current; /* Pointer to the current page. */
 		item_page_t first; /* First page is statically allocated. */
 	} stack;
 } match_state;
 /* Initialize a state object */
 /* #define NEW_STATE(state) \ */
 /* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */
 /* state.stack.current = &state.stack.first; \ */
 /* state.stack.first.prev = NULL; \ */
 /* state.stack.first.next = NULL; \ */
 /* state.stack.index = 0; \ */
 /* state.level = 1 */
 #define NEW_STATE(state, nregs) \
 { \
 	int i; \
 	for (i = 0; i < nregs; i++) \
 	{ \
 		state.start[i] = NULL; \
 		state.end[i] = NULL; \
 		state.changed[i] = 0; \
 	} \
 	state.stack.current = &state.stack.first; \
 	state.stack.first.prev = NULL; \
 	state.stack.first.next = NULL; \
 	state.stack.index = 0; \
 	state.level = 1; \
 	state.count = 0; \
 	state.level = 0; \
 	state.point = 0; \
 }
 /* Free any memory that might have been malloc'd */
 #define FREE_STATE(state) \
 while(state.stack.first.next != NULL) \
 { \
 	state.stack.current = state.stack.first.next; \
 	state.stack.first.next = state.stack.current->next; \
 	free(state.stack.current); \
 }
 /* Discard the top 'count' stack items. */
 #define STACK_DISCARD(stack, count, on_error) \
@ -226,24 +291,6 @@ else \
 #define STACK_EMPTY(stack) ((stack.index == 0) && \
 			    (stack.current->prev == NULL))
 /* Initialize a state object */
 #define NEW_STATE(state) \
 memset(&state, 0, sizeof(match_state)); \
 state.stack.current = &state.stack.first; \
 state.level = 1
 /* Free any memory that might have been malloc'd */
 #define FREE_STATE(state) \
 while(state.stack.first.next != NULL) \
 { \
   state.stack.current = state.stack.first.next; \
   state.stack.first.next = state.stack.current->next; \
   free(state.stack.current); \
 }
 /* Return the start of register 'reg' */
 #define GET_REG_START(state, reg) (state.start[reg])
@ -302,22 +349,6 @@ state.end[reg] = text
 /* Update the last failure point with a new position in the text. */
 /* #define UPDATE_FAILURE(state, xtext, on_error) \ */
 /* { \ */
 /*    item_t *item; \ */
 /*    STACK_DISCARD(state.stack, state.count, on_error); \ */
 /*    STACK_TOP(state.stack, item, on_error); \ */
 /*    item->fail.text = xtext; \ */
 /*    state.count = 0; \ */
 /* } */
 /* #define UPDATE_FAILURE(state, xtext, on_error) \ */
 /* { \ */
 /*    item_t *item; \ */
 /*    STACK_BACK(state.stack, item, state.count + 1, on_error); \ */
 /*    item->fail.text = xtext; \ */
 /* } */
 #define UPDATE_FAILURE(state, xtext, on_error) \
 { \
   item_t *item; \
@ -391,7 +422,8 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
  Cwordbound,		/* match if at word boundary */
  Cnotwordbound,	/* match if not at word boundary */
  Csyntaxspec,		/* matches syntax code (1 byte follows) */
-  Cnotsyntaxspec	/* matches if syntax code does not match (1 byte foll)*/
+  Cnotsyntaxspec,	/* matches if syntax code does not match (1 byte foll)*/
  Crepeat1
 };
 enum regexp_syntax_op	/* syntax codes for plain and quoted characters */
@ -581,6 +613,8 @@ static void re_compile_fastmap_aux(char *code,
 	 case Cwordbound:
 	 case Cnotwordbound:
 	 {
 	    for (a = 0; a < 256; a++)
 	       fastmap[a] = 1;
 	    break;
 	 }
 	 case Csyntaxspec:
@ -648,7 +682,7 @@ static void re_compile_fastmap_aux(char *code,
 	 {
 	    a = (unsigned char)code[pos++];
 	    a |= (unsigned char)code[pos++] << 8;
-	    pos += (int)(short)a;
+	    pos += (int)SHORT(a);
 	    if (visited[pos])
 	    {
 	       /* argh... the regexp contains empty loops.  This is not
@ -664,10 +698,15 @@ static void re_compile_fastmap_aux(char *code,
 	 {
 	    a = (unsigned char)code[pos++];
 	    a |= (unsigned char)code[pos++] << 8;
-	    a = pos + (int)(short)a;
+	    a = pos + (int)SHORT(a);
 	    re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
 	    break;
 	 }
 	 case Crepeat1:
 	 {
 	    pos += 2;
 	    break;
 	 }
 	 default:
 	 {
 	    abort();  /* probably some opcode is missing from this switch */
@ -754,10 +793,11 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
   char ch;
   int a;
   int b;
   int num_instructions = 0;
   a = (unsigned char)*code++;
   a |= (unsigned char)*code++ << 8;
-   a = (int)(short)a;
+   a = (int)SHORT(a);
   p1 = code + a + 3; /* skip the failure_jump */
   assert(p1[-3] == Cfailure_jump);
@ -775,6 +815,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
   /* loop until we find something that consumes a character */
  loop_p1:
   num_instructions++;
   switch (*p1++)
   {
      case Cbol:
@ -824,6 +865,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
   /* now we know that we can't backtrack. */
   while (p1 != p2 - 3)
   {
      num_instructions++;
      switch (*p1++)
      {
 	 case Cend:
@ -873,11 +915,22 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
      }
   }
  make_update_jump:
   code -= 3;
   a += 3;  /* jump to after the Cfailure_jump */
   code[0] = Cupdate_failure_jump;
   code[1] = a & 0xff;
   code[2] = a >> 8;
   if (num_instructions > 1)
      return 1;
   assert(num_instructions == 1);
   /* if the only instruction matches a        single character, we can do
    * better
    */
   p1 = code + 3 + a;   /* start of sole instruction */
   if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar ||
       *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec)
      code[0] =        Crepeat1;
   return 1;
  make_normal_jump:
@ -939,6 +992,7 @@ static int re_optimize(regexp_t bufp)
 	 case Cjump:
 	 case Cdummy_failure_jump:
 	 case Cfailure_jump:
 	 case Crepeat1:
 	 {
 	    code += 2;
 	    break;
@ -1111,7 +1165,8 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
      re_compile_initialize();
   bufp->used = 0;
   bufp->fastmap_accurate = 0;
-   bufp->uses_registers = 0;
+   bufp->uses_registers = 1;
   bufp->num_registers = 1;
   translate = bufp->translate;
   pattern = bufp->buffer;
   alloc = bufp->allocated;
@ -1289,6 +1344,7 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
 	       STORE(Cstart_memory);
 	       STORE(next_register);
 	       open_registers[num_open_registers++] = next_register;
 	       bufp->num_registers++;
 	       next_register++;
 	    }
 	    paren_depth++;
@ -1545,27 +1601,8 @@ int re_match(regexp_t bufp,
  code = bufp->buffer;
  translate = bufp->translate;
 /*   translated = NULL; */
 /*   if (bufp->translate) */
 /*   { */
 /*      char *t1; */
 /*      char *t2; */
 /*      translated = malloc(size); */
 /*      if (translated == NULL) */
 /* 	goto error; */
 /*      t1 = string; */
 /*      t2 = translated; */
 /*      while(t1 < textend) */
 /* 	*t2++ = bufp->translate[*t1++]; */
 /*      text = translated + pos; */
 /*      textstart = translated; */
 /*      textend = translated + size; */
 /*   } */
-  NEW_STATE(state);
+  NEW_STATE(state, bufp->num_registers);
  continue_matching:
  switch (*code++)
@ -1587,7 +1624,7 @@ int re_match(regexp_t bufp,
 	   }
 	   else
 	   {
-	      for (a = 1; a < RE_NREGS; a++)
+	      for (a = 1; a < bufp->num_registers; a++)
 	      {
 		 if ((GET_REG_START(state, a) == NULL) ||
 		     (GET_REG_END(state, a) == NULL))
@ -1599,10 +1636,13 @@ int re_match(regexp_t bufp,
 		 old_regs->start[a] = GET_REG_START(state, a) - textstart;
 		 old_regs->end[a] = GET_REG_END(state, a) - textstart;
 	      }
 	      for (; a < RE_NREGS; a++)
 	      {
 		 old_regs->start[a] = -1;
 		 old_regs->end[a] = -1;
 	      }
 	   }
 	}
 /* 	if(translated) */
 /* 	   free(translated); */
 	FREE_STATE(state);
 	return match_end - pos;
     }
@ -1703,18 +1743,18 @@ int re_match(regexp_t bufp,
     {
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
-	code += (int)(short)a;
+	code += (int)SHORT(a);
 	goto continue_matching;
     }
     case Cdummy_failure_jump:
     {
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
-	a = (int)(short)a;
+	a = (int)SHORT(a);
 	assert(*code == Cfailure_jump);
 	b = (unsigned char)code[1];
 	b |= (unsigned char)code[2] << 8;
-	PUSH_FAILURE(state, code + (int)(short)b + 3, NULL, goto error);
+	PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error);
 	code += a;
 	goto continue_matching;
     }
@ -1722,10 +1762,120 @@ int re_match(regexp_t bufp,
     {
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
-	a = (int)(short)a;
+	a = (int)SHORT(a);
 	PUSH_FAILURE(state, code + a, text, goto error);
 	goto continue_matching;
     }
     case Crepeat1:
     {
 	char *pinst;
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
 	a = (int)SHORT(a);
 	pinst = code + a;
 	/* pinst is sole instruction in loop, and it matches a
 	 * single character.  Since Crepeat1 was originally a
 	 * Cupdate_failure_jump, we also know that backtracking is
 	 * useless:  so long as the single-character expression matches,
 	 * it must be used.  Also, in the case of +, we've already
 	 * matched one character, so + can't fail:  nothing here can
 	 * cause a failure.
 	 */
 	switch (*pinst++)
 	{
 	   case Cset:
 	   {
              if (translate)
 	      {
 		 while (text < textend)
 		 {
 		    ch = translate[(unsigned char)*text];
 		    if (pinst[ch/8] & (1<<(ch & 7)))
 		       text++;
 		    else
 		       break;
 		 }
              }
              else
              {
 		 while (text < textend)
 		 {
 		    ch = (unsigned char)*text;
 		    if (pinst[ch/8] & (1<<(ch & 7)))
 		       text++;
 		    else
 		       break;
 		 }
              }
 	      break;
 	   }
 	   case Cexact:
 	   {
 	      ch = (unsigned char)*pinst;
              if (translate)
 	      {
 		 while (text < textend &&
 			translate[(unsigned char)*text] == ch)
 		    text++;
              }
              else
              {
 		 while (text < textend && (unsigned char)*text == ch)
 		    text++;
              }
 	      break;
 	   }
 	   case Canychar:
 	   {
 	      while (text < textend && (unsigned char)*text != '\n')
 		 text++;
 	      break;
 	   }
 	   case Csyntaxspec:
 	   {
 	      a = (unsigned char)*pinst;
              if (translate)
 	      {
 		 while (text < textend &&
 			translate[SYNTAX(*text)] == a)
 		    text++;
              }
              else
              {
                while (text < textend && SYNTAX(*text) == a)
                   text++;
              }
 	      break;
 	   }
 	   case Cnotsyntaxspec:
 	   {
 	      a = (unsigned char)*pinst;
              if (translate)
 	      {
 		 while (text < textend &&
 			translate[SYNTAX(*text)] != a)
 		    text++;
              }
              else
              {
 		 while (text < textend && SYNTAX(*text) != a)
 		    text++;
              }
 	      break;
 	   }
 	   default:
 	   {
 	      abort();
 	      /*NOTREACHED*/
 	   }
 	}
 	/* due to the funky way + and * are compiled, the top failure-
 	 * stack entry at this point is actually a success entry --
 	 * update it & pop it
 	 */
 	UPDATE_FAILURE(state, text, goto error);
 	goto fail;      /* i.e., succeed <wink/sigh> */
     }
     case Cbegbuf:
     {
 	if (text == textstart)
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@ -1,3 +1,7 @@
 /*
 * -*- mode: c-mode; c-file-style: python -*-
 */
 #ifndef Py_REGEXPR_H
 #define Py_REGEXPR_H
 #ifdef __cplusplus
@ -5,22 +9,22 @@ extern "C" {
 #endif
 /*
 * regexpr.h
 *
 * Author: Tatu Ylonen <ylo@ngs.fi>
 *
 * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
 *
 * Permission to use, copy, modify, distribute, and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies.  This
 * software is provided "as is" without express or implied warranty.
 *
 * Created: Thu Sep 26 17:15:36 1991 ylo
 * Last modified: Mon Nov  4 15:49:46 1991 ylo
 */
-regexpr.h
+/* $Id$ */
 Author: Tatu Ylonen <ylo@ngs.fi>
 Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
 Permission to use, copy, modify, distribute, and sell this software
 and its documentation for any purpose is hereby granted without fee,
 provided that the above copyright notice appear in all copies.  This
 software is provided "as is" without express or implied warranty.
 Created: Thu Sep 26 17:15:36 1991 ylo
 Last modified: Mon Nov  4 15:49:46 1991 ylo
 */
 #ifndef REGEXPR_H
 #define REGEXPR_H
@ -29,21 +33,22 @@ Last modified: Mon Nov  4 15:49:46 1991 ylo
 typedef struct re_pattern_buffer
 {
-  char *buffer; 	 /* compiled pattern */
+	char *buffer;          /* compiled pattern */
-  int allocated;	 /* allocated size of compiled pattern */
+	int allocated;         /* allocated size of compiled pattern */
-  int used;		 /* actual length of compiled pattern */
+	int used;              /* actual length of compiled pattern */
-  char *fastmap;	 /* fastmap[ch] is true if ch can start pattern */
+	char *fastmap;         /* fastmap[ch] is true if ch can start pattern */
-  char *translate;	 /* translation to apply during compilation/matching */
+	char *translate;       /* translation to apply during compilation/matching */
-  char fastmap_accurate; /* true if fastmap is valid */
+	char fastmap_accurate; /* true if fastmap is valid */
-  char can_be_null;	 /* true if can match empty string */
+	char can_be_null;      /* true if can match empty string */
-  char uses_registers;	 /* registers are used and need to be initialized */
+	char uses_registers;   /* registers are used and need to be initialized */
-  char anchor;		 /* anchor: 0=none 1=begline 2=begbuf */
+	int num_registers;     /* number of registers used */
 	char anchor;           /* anchor: 0=none 1=begline 2=begbuf */
 } *regexp_t;
 typedef struct re_registers
 {
-  int start[RE_NREGS];  /* start offset of region */
+	int start[RE_NREGS];  /* start offset of region */
-  int end[RE_NREGS];    /* end offset of region */
+	int end[RE_NREGS];    /* end offset of region */
 } *regexp_registers_t;
 /* bit definitions for syntax */
@ -77,52 +82,53 @@ typedef struct re_registers
 #ifdef HAVE_PROTOTYPES
 extern int re_syntax;
-/* This is the actual syntax mask.  It was added so that Python
+/* This is the actual syntax mask.  It was added so that Python could do
-   could do syntax-dependent munging of patterns before compilation. */
+ * syntax-dependent munging of patterns before compilation. */
 int re_set_syntax(int syntax);
 /* This sets the syntax to use and returns the previous syntax.  The
-   syntax is specified by a bit mask of the above defined bits. */
+ * syntax is specified by a bit mask of the above defined bits. */
 char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
 /* This compiles the regexp (given in regex and length in regex_size).
-   This returns NULL if the regexp compiled successfully, and an error
+ * This returns NULL if the regexp compiled successfully, and an error
-   message if an error was encountered.  The buffer field must be
+ * message if an error was encountered.  The buffer field must be
-   initialized to a memory area allocated by malloc (or to NULL) before
+ * initialized to a memory area allocated by malloc (or to NULL) before
-   use, and the allocated field must be set to its length (or 0 if buffer is
+ * use, and the allocated field must be set to its length (or 0 if
-   NULL).  Also, the translate field must be set to point to a valid
+ * buffer is NULL).  Also, the translate field must be set to point to a
-   translation table, or NULL if it is not used. */
+ * valid translation table, or NULL if it is not used. */
 int re_match(regexp_t compiled, char *string, int size, int pos,
 	     regexp_registers_t old_regs);
 /* This tries to match the regexp against the string.  This returns the
-   length of the matched portion, or -1 if the pattern could not be
+ * length of the matched portion, or -1 if the pattern could not be
-   matched and -2 if an error (such as failure stack overflow) is
+ * matched and -2 if an error (such as failure stack overflow) is
-   encountered. */
+ * encountered. */
 int re_search(regexp_t compiled, char *string, int size, int startpos,
 	      int range, regexp_registers_t regs);
-/* This rearches for a substring matching the regexp.  This returns the first
+/* This rearches for a substring matching the regexp.  This returns the
-   index at which a match is found.  range specifies at how many positions to
+ * first index at which a match is found.  range specifies at how many
-   try matching; positive values indicate searching forwards, and negative
+ * positions to try matching; positive values indicate searching
-   values indicate searching backwards.  mstop specifies the offset beyond
+ * forwards, and negative values indicate searching backwards.  mstop
-   which a match must not go.  This returns -1 if no match is found, and
+ * specifies the offset beyond which a match must not go.  This returns
-   -2 if an error (such as failure stack overflow) is encountered. */
+ * -1 if no match is found, and -2 if an error (such as failure stack
 * overflow) is encountered. */
 void re_compile_fastmap(regexp_t compiled);
 /* This computes the fastmap for the regexp.  For this to have any effect,
-   the calling program must have initialized the fastmap field to point
+ * the calling program must have initialized the fastmap field to point
-   to an array of 256 characters. */
+ * to an array of 256 characters. */
 char *re_comp(char *s);
 /* BSD 4.2 regex library routine re_comp.  This compiles the regexp into
-   an internal buffer.  This returns NULL if the regexp was compiled
+ * an internal buffer.  This returns NULL if the regexp was compiled
-   successfully, and an error message if there was an error. */
+ * successfully, and an error message if there was an error. */
 int re_exec(char *s);
-/* BSD 4.2 regexp library routine re_exec.  This returns true if the string
+/* BSD 4.2 regexp library routine re_exec.  This returns true if the
-   matches the regular expression (that is, a matching part is found
+ * string matches the regular expression (that is, a matching part is
-   anywhere in the string). */
+ * found anywhere in the string). */
 #else /* HAVE_PROTOTYPES */
--- a/Modules/reopmodule.c
+++ b/Modules/reopmodule.c
@ -0,0 +1,375 @@
 /***********************************************************
 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
 The Netherlands.
                        All Rights Reserved
 Permission to use, copy, modify, and distribute this software and its
 documentation for any purpose and without fee is hereby granted,
 provided that the above copyright notice appear in all copies and that
 both that copyright notice and this permission notice appear in
 supporting documentation, and that the names of Stichting Mathematisch
 Centrum or CWI or Corporation for National Research Initiatives or
 CNRI not be used in advertising or publicity pertaining to
 distribution of the software without specific, written prior
 permission.
 While CWI is the initial source for this software, a modified version
 is made available by the Corporation for National Research Initiatives
 (CNRI) at the Internet address ftp://ftp.python.org.
 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 PERFORMANCE OF THIS SOFTWARE.
 ******************************************************************/
 /* $Id$ */
 /* Regular expression objects */
 /* This uses Tatu Ylonen's copyleft-free reimplementation of
   GNU regular expressions */
 #include "Python.h"
 #include <ctype.h>
 #include "regexpr.h"
 static PyObject *ReopError;	/* Exception */	
 static PyObject *
 makeresult(regs, num_regs)
 	struct re_registers *regs;
 	int num_regs;
 {
 	PyObject *v;
 	int i;
 	static PyObject *filler = NULL;
 	if (filler == NULL) {
 		filler = Py_BuildValue("(ii)", -1, -1);
 		if (filler == NULL)
 			return NULL;
 	}
 	v = PyTuple_New(num_regs);
 	if (v == NULL)
 		return NULL;
 	for (i = 0; i < num_regs; i++) {
 		int lo = regs->start[i];
 		int hi = regs->end[i];
 		PyObject *w;
 		if (lo == -1 && hi == -1) {
 			w = filler;
 			Py_INCREF(w);
 		}
 		else
 			w = Py_BuildValue("(ii)", lo, hi);
 		if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
 			Py_DECREF(v);
 			return NULL;
 		}
 	}
 	return v;
 }
 static PyObject *
 reop_match(self, args)
 	PyObject *self;
 	PyObject *args;
 {
 	char *string;
 	int fastmaplen, stringlen;
 	int can_be_null, anchor, i;
 	int num_regs, flags, pos, result;
 	struct re_pattern_buffer bufp;
 	struct re_registers re_regs;
 	if (!PyArg_Parse(args, "(s#iiis#is#i)", 
 			 &(bufp.buffer), &(bufp.allocated), 
 			 &num_regs, &flags, &can_be_null,
 			 &(bufp.fastmap), &fastmaplen,
 			 &anchor,
 			 &string, &stringlen, 
 			 &pos))
 	  return NULL;
 	/* XXX sanity-check the input data */
 	bufp.used=bufp.allocated;
 	bufp.translate=NULL;
 	bufp.fastmap_accurate=1;
 	bufp.can_be_null=can_be_null;
 	bufp.uses_registers=1;
 	bufp.num_registers=num_regs;
 	bufp.anchor=anchor;
 	for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
 	result = re_match(&bufp, 
 			  string, stringlen, pos, 
 			  &re_regs);
 	if (result < -1) {
 		/* Failure like stack overflow */
 		PyErr_SetString(ReopError, "match failure");
 		return NULL;
 	}
 	return makeresult(&re_regs, num_regs);
 }
 static PyObject *
 reop_search(self, args)
 	PyObject *self;
 	PyObject *args;
 {
 	char *string;
 	int fastmaplen, stringlen;
 	int can_be_null, anchor, i;
 	int num_regs, flags, pos, result;
 	struct re_pattern_buffer bufp;
 	struct re_registers re_regs;
 	if (!PyArg_Parse(args, "(s#iiis#is#i)", 
 			 &(bufp.buffer), &(bufp.allocated), 
 			 &num_regs, &flags, &can_be_null,
 			 &(bufp.fastmap), &fastmaplen,
 			 &anchor,
 			 &string, &stringlen, 
 			 &pos))
 	  return NULL;
 	/* XXX sanity-check the input data */
 	bufp.used=bufp.allocated;
 	bufp.translate=NULL;
 	bufp.fastmap_accurate=1;
 	bufp.can_be_null=can_be_null;
 	bufp.uses_registers=1;
 	bufp.num_registers=1;
 	bufp.anchor=anchor;
 	for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
 	result = re_search(&bufp, 
 			   string, stringlen, pos, stringlen-pos,
 			   &re_regs);
 	if (result < -1) {
 		/* Failure like stack overflow */
 		PyErr_SetString(ReopError, "match failure");
 		return NULL;
 	}
 	return makeresult(&re_regs, num_regs);
 }
 #if 0
 /* Functions originally in the regsub module.
   Added June 1, 1997. 
   */
 /* A cache of previously used patterns is maintained.  Notice that if
   you change the reop syntax flag, entries in the cache are
   invalidated.  
   XXX Solution: use (syntax flag, pattern) as keys?  Clear the cache
   every so often, or once it gets past a certain size? 
 */
 static PyObject *cache_dict=NULL;
 /* Accept an object; if it's a reop pattern, Py_INCREF it and return
   it.  If it's a string, a reop object is compiled and cached.
 */
 static reopobject *
 cached_compile(pattern)
     PyObject *pattern;
 {
  reopobject *p2;
  if (!PyString_Check(pattern)) 
    {
      /* It's not a string, so assume it's a compiled reop object */
      /* XXX check that! */
      Py_INCREF(pattern);
      return (reopobject*)pattern;
    }
  if (cache_dict==NULL)
    {
      cache_dict=PyDict_New();
      if (cache_dict==NULL) 
 	{
 	  return (reopobject*)NULL;
 	}
    }
  /* See if the pattern has already been cached; if so, return that
     reop object */
  p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
  if (p2)
    {
      Py_INCREF(p2);
      return (reopobject*)p2;
    }
  /* Compile the pattern and cache it */
  p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
  if (!p2) return p2;
  PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
  return p2;
 }
 static PyObject *
 internal_split(args, retain)
 	PyObject *args;
 	int retain;
 {
  PyObject *newlist, *s;
  reopobject *pattern;
  int maxsplit=0, count=0, length, next=0, result;
  int match_end=0; /* match_start is defined below */
  char *start;
  if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
 			&maxsplit))
    {
      PyErr_Clear();
      if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
 	return NULL;
    }
  pattern=cached_compile((PyObject *)pattern);
  if (!pattern) return NULL;
  newlist=PyList_New(0);
  if (!newlist) return NULL;
  do
    {
      result = re_search(&pattern->re_patbuf, 
 			     start, length, next, length-next,
 			     &pattern->re_regs);
      if (result < -1)
 	{  /* Erk... an error happened during the reop search */
 	  Py_DECREF(newlist);
 	  PyErr_SetString(ReopError, "match failure");
 	  return NULL;
 	}
      if (next<=result) 
 	{
 	  int match_start=pattern->re_regs.start[0];
 	  int oldmatch_end=match_end;
 	  match_end=pattern->re_regs.end[0];
 	  if (match_start==match_end) 
 	    { /* A zero-length match; increment to the next position */
 	      next=result+1;
 	      match_end=oldmatch_end;
 	      continue;
 	    }
 	  /* Append the string up to the start of the match */
 	  s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
 	  if (!s) 
 	    {
 	      Py_DECREF(newlist);
 	      return NULL;
 	    }
 	  PyList_Append(newlist, s);
 	  Py_DECREF(s);
 	  if (retain)
 	    {
 	      /* Append a string containing whatever matched */
 	      s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
 	      if (!s) 
 		{
 		  Py_DECREF(newlist);
 		  return NULL;
 		}
 	      PyList_Append(newlist, s);
 	      Py_DECREF(s);
 	    }
 	  /* Update the pointer, and increment the count of splits */
 	  next=match_end; count++;
 	}
    } while (result!=-1 && !(maxsplit && maxsplit==count) &&
 	     next<length);
  s=PyString_FromStringAndSize(start+match_end, length-match_end);
  if (!s) 
    {
      Py_DECREF(newlist);
      return NULL;
    }
  PyList_Append(newlist, s);
  Py_DECREF(s);
  Py_DECREF(pattern);
  return newlist;
 }
 static PyObject *
 reop_split(self, args)
 	PyObject *self;
 	PyObject *args;
 {
  return internal_split(args, 0);
 }
 static PyObject *
 reop_splitx(self, args)
 	PyObject *self;
 	PyObject *args;
 {
  return internal_split(args, 1);
 }
 #endif
 static struct PyMethodDef reop_global_methods[] = {
 	{"match",	reop_match, 0},
 	{"search",	reop_search, 0},
 #if 0
 	{"split",  reop_split, 0},
 	{"splitx",  reop_splitx, 0},
 #endif
 	{NULL,		NULL}		     /* sentinel */
 };
 void
 initreop()
 {
 	PyObject *m, *d, *v;
 	int i;
 	char *s;
 	m = Py_InitModule("reop", reop_global_methods);
 	d = PyModule_GetDict(m);
 	/* Initialize reop.error exception */
 	v = ReopError = PyString_FromString("reop.error");
 	if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
 		goto finally;
 	/* Initialize reop.casefold constant */
 	if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
 		goto finally;
 	if (!(s = PyString_AsString(v)))
 		goto finally;
 	for (i = 0; i < 256; i++) {
 		if (isupper(i))
 			s[i] = tolower(i);
 		else
 			s[i] = i;
 	}
 	if (PyDict_SetItemString(d, "casefold", v) < 0)
 		goto finally;
 	Py_DECREF(v);
 	if (!PyErr_Occurred())
 		return;
  finally:
 	Py_FatalError("can't initialize reop module");
 }