mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 10:26:02 +00:00 
			
		
		
		
	 0cb96de269
			
		
	
	
		0cb96de269
		
	
	
	
	
		
			
			(1) Use PyErr_NewException("module.class", NULL, NULL) to create the
    exception object.
(2) Remove all calls to Py_FatalError(); instead, return or
    ignore the errors -- the import code now checks PyErr_Occurred()
    after calling a module's init function, so it's no longer a
    fatal error for the initialization to fail.
Also did some small cleanups, e.g. removed unnecessary test for
"already initialized" from initfpectl(), and unified
initposix()/initnt().
I haven't checked this very thoroughly, so while the changes are
pretty trivial -- beware of untested code!
		
	
			
		
			
				
	
	
		
			1053 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1053 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /***********************************************************
 | |
| Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
 | |
| The Netherlands.
 | |
| 
 | |
|                         All Rights Reserved
 | |
| 
 | |
| Permission to use, copy, modify, and distribute this software and its
 | |
| documentation for any purpose and without fee is hereby granted,
 | |
| provided that the above copyright notice appear in all copies and that
 | |
| both that copyright notice and this permission notice appear in
 | |
| supporting documentation, and that the names of Stichting Mathematisch
 | |
| Centrum or CWI or Corporation for National Research Initiatives or
 | |
| CNRI not be used in advertising or publicity pertaining to
 | |
| distribution of the software without specific, written prior
 | |
| permission.
 | |
| 
 | |
| While CWI is the initial source for this software, a modified version
 | |
| is made available by the Corporation for National Research Initiatives
 | |
| (CNRI) at the Internet address ftp://ftp.python.org.
 | |
| 
 | |
| STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
 | |
| REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 | |
| MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
 | |
| CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 | |
| DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 | |
| PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 | |
| TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 | |
| PERFORMANCE OF THIS SOFTWARE.
 | |
| 
 | |
| ******************************************************************/
 | |
| 
 | |
| /* $Id$ */
 | |
| 
 | |
| /* Regular expression objects */
 | |
| /* This uses Tatu Ylonen's copyleft-free reimplementation of
 | |
|    GNU regular expressions */
 | |
| 
 | |
| #include "Python.h"
 | |
| 
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include "regexpr.h"
 | |
| 
 | |
| static PyObject *ReopError;	/* Exception */	
 | |
| 
 | |
| #define IGNORECASE 0x01
 | |
| #define MULTILINE  0x02
 | |
| #define DOTALL     0x04
 | |
| #define VERBOSE    0x08
 | |
| 
 | |
| #define NORMAL			0
 | |
| #define CHARCLASS		1
 | |
| #define REPLACEMENT		2
 | |
| 
 | |
| #define CHAR 			0
 | |
| #define MEMORY_REFERENCE 	1
 | |
| #define SYNTAX 			2
 | |
| #define NOT_SYNTAX 		3
 | |
| #define SET			4
 | |
| #define WORD_BOUNDARY		5
 | |
| #define NOT_WORD_BOUNDARY	6
 | |
| #define BEGINNING_OF_BUFFER	7
 | |
| #define END_OF_BUFFER		8
 | |
| 
 | |
| static PyObject *
 | |
| makeresult(regs, num_regs)
 | |
| 	struct re_registers *regs;
 | |
| 	int num_regs;
 | |
| {
 | |
| 	PyObject *v;
 | |
| 	int i;
 | |
| 	static PyObject *filler = NULL;
 | |
| 
 | |
| 	if (filler == NULL) {
 | |
| 		filler = Py_BuildValue("(ii)", -1, -1);
 | |
| 		if (filler == NULL)
 | |
| 			return NULL;
 | |
| 	}
 | |
| 	v = PyTuple_New(num_regs);
 | |
| 	if (v == NULL)
 | |
| 		return NULL;
 | |
| 
 | |
| 	for (i = 0; i < num_regs; i++) {
 | |
| 		int lo = regs->start[i];
 | |
| 		int hi = regs->end[i];
 | |
| 		PyObject *w;
 | |
| 		if (lo == -1 && hi == -1) {
 | |
| 			w = filler;
 | |
| 			Py_INCREF(w);
 | |
| 		}
 | |
| 		else
 | |
| 			w = Py_BuildValue("(ii)", lo, hi);
 | |
| 		if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
 | |
| 			Py_DECREF(v);
 | |
| 			return NULL;
 | |
| 		}
 | |
| 	}
 | |
| 	return v;
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| reop_match(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
| 	unsigned char *string;
 | |
| 	int fastmaplen, stringlen;
 | |
| 	int can_be_null, anchor, i;
 | |
| 	int flags, pos, result;
 | |
| 	struct re_pattern_buffer bufp;
 | |
| 	struct re_registers re_regs;
 | |
| 	PyObject *modules = NULL;
 | |
| 	PyObject *reopmodule = NULL;
 | |
| 	PyObject *reopdict = NULL;
 | |
| 	PyObject *casefold = NULL;
 | |
| 	
 | |
| 	if (!PyArg_Parse(args, "(s#iiis#is#i)", 
 | |
| 			 &(bufp.buffer), &(bufp.allocated), 
 | |
| 			 &(bufp.num_registers), &flags, &can_be_null,
 | |
| 			 &(bufp.fastmap), &fastmaplen,
 | |
| 			 &anchor,
 | |
| 			 &string, &stringlen, 
 | |
| 			 &pos))
 | |
| 	  return NULL;
 | |
| 
 | |
| 	/* XXX sanity-check the input data */
 | |
| 	bufp.used=bufp.allocated;
 | |
| 	if (flags & IGNORECASE)
 | |
| 	{
 | |
| 		if ((modules = PyImport_GetModuleDict()) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((reopmodule = PyDict_GetItemString(modules,
 | |
| 						       "reop")) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((casefold = PyDict_GetItemString(reopdict,
 | |
| 						     "casefold")) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		bufp.translate = (unsigned char*)PyString_AsString(casefold);
 | |
| 	}
 | |
| 	else
 | |
| 		bufp.translate=NULL;
 | |
| 	bufp.fastmap_accurate=1;
 | |
| 	bufp.can_be_null=can_be_null;
 | |
| 	bufp.uses_registers=1;
 | |
| 	bufp.anchor=anchor;
 | |
| 	
 | |
| 	for(i=0; i<bufp.num_registers; i++) {
 | |
| 		re_regs.start[i]=-1;
 | |
| 		re_regs.end[i]=-1;
 | |
| 	}
 | |
| 	
 | |
| 	result = re_match(&bufp, 
 | |
| 			  string, stringlen, pos, 
 | |
| 			  &re_regs);
 | |
| 
 | |
| 	if (result < -1) {
 | |
| 		/* Failure like stack overflow */
 | |
| 	        if (!PyErr_Occurred())
 | |
| 	  	        PyErr_SetString(ReopError, "match failure");
 | |
| 		return NULL;
 | |
| 	}
 | |
| 	if (result == -1) {
 | |
| 		Py_INCREF(Py_None);
 | |
| 		return Py_None;
 | |
| 	}
 | |
| 	return makeresult(&re_regs, bufp.num_registers);
 | |
| }
 | |
| 
 | |
| #if 0
 | |
| static PyObject *
 | |
| reop_optimize(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
|   unsigned char *buffer;
 | |
|   int buflen;
 | |
|   struct re_pattern_buffer bufp;
 | |
| 
 | |
|   PyObject *opt_code;
 | |
|   
 | |
|   if (!PyArg_Parse(args, "(s#)", &buffer, &buflen)) return NULL;
 | |
|   /* Create a new string for the optimized code */
 | |
|   opt_code=PyString_FromStringAndSize(buffer, buflen);
 | |
|   if (opt_code!=NULL)
 | |
|     {
 | |
|       bufp.buffer = PyString_AsString(opt_code);
 | |
|       bufp.used=bufp.allocated=buflen;
 | |
|       
 | |
|     }
 | |
|   return opt_code;
 | |
|   
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static PyObject *
 | |
| reop_search(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
| 	unsigned char *string;
 | |
| 	int fastmaplen, stringlen;
 | |
| 	int can_be_null, anchor, i;
 | |
| 	int flags, pos, result;
 | |
| 	struct re_pattern_buffer bufp;
 | |
| 	struct re_registers re_regs;
 | |
| 	PyObject *modules = NULL;
 | |
| 	PyObject *reopmodule = NULL;
 | |
| 	PyObject *reopdict = NULL;
 | |
| 	PyObject *casefold = NULL;
 | |
| 	
 | |
| 	if (!PyArg_Parse(args, "(s#iiis#is#i)", 
 | |
| 			 &(bufp.buffer), &(bufp.allocated), 
 | |
| 			 &(bufp.num_registers), &flags, &can_be_null,
 | |
| 			 &(bufp.fastmap), &fastmaplen,
 | |
| 			 &anchor,
 | |
| 			 &string, &stringlen, 
 | |
| 			 &pos))
 | |
| 	  return NULL;
 | |
| 
 | |
| 	/* XXX sanity-check the input data */
 | |
| 	bufp.used=bufp.allocated;
 | |
| 	if (flags & IGNORECASE)
 | |
| 	{
 | |
| 		if ((modules = PyImport_GetModuleDict()) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((reopmodule = PyDict_GetItemString(modules,
 | |
| 						       "reop")) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		if ((casefold = PyDict_GetItemString(reopdict,
 | |
| 						     "casefold")) == NULL)
 | |
| 			return NULL;
 | |
| 
 | |
| 		bufp.translate = (unsigned char *)PyString_AsString(casefold);
 | |
| 	}
 | |
| 	else
 | |
| 		bufp.translate=NULL;
 | |
| 	bufp.fastmap_accurate=1;
 | |
| 	bufp.can_be_null=can_be_null;
 | |
| 	bufp.uses_registers=1;
 | |
| 	bufp.anchor=anchor;
 | |
| 
 | |
| 	for(i = 0; i < bufp.num_registers; i++) {
 | |
| 		re_regs.start[i] = -1;
 | |
| 		re_regs.end[i] = -1;
 | |
| 	}
 | |
| 	
 | |
| 	result = re_search(&bufp, 
 | |
| 			   string, stringlen, pos, stringlen-pos,
 | |
| 			   &re_regs);
 | |
| 
 | |
| 	if (result < -1) {
 | |
| 		/* Failure like stack overflow */
 | |
| 	        if (!PyErr_Occurred())
 | |
| 	  	        PyErr_SetString(ReopError, "match failure");
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (result == -1) {
 | |
| 		Py_INCREF(Py_None);
 | |
| 		return Py_None;
 | |
| 	}
 | |
| 
 | |
| 	return makeresult(&re_regs, bufp.num_registers);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| reop_expand_escape(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
|   unsigned char c, *pattern;
 | |
|   int index, context=NORMAL, pattern_len;
 | |
| 
 | |
|   if (!PyArg_ParseTuple(args, "s#i|i", &pattern, &pattern_len, &index,
 | |
| 			&context)) 
 | |
|     return NULL;
 | |
|   if (pattern_len<=index)
 | |
|     {
 | |
|       PyErr_SetString(ReopError, "escape ends too soon");
 | |
|       return NULL;
 | |
|     }
 | |
|   c=pattern[index]; index++;
 | |
|   switch (c)
 | |
|     {
 | |
|     case('t'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)9, index);
 | |
|       break;
 | |
|     case('n'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)10, index);
 | |
|       break;
 | |
|     case('v'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)11, index);
 | |
|       break;
 | |
|     case('r'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)13, index);
 | |
|       break;
 | |
|     case('f'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)12, index);
 | |
|       break;
 | |
|     case('a'):
 | |
|       return Py_BuildValue("ici", CHAR, (char)7, index);
 | |
|       break;
 | |
|     case('x'):
 | |
|       {
 | |
| 	int end, length;
 | |
| 	unsigned char *string;
 | |
| 	PyObject *v, *result;
 | |
| 
 | |
| 	end=index; 
 | |
| 	while (end<pattern_len && 
 | |
| 	       ( re_syntax_table[ pattern[end] ] & Shexdigit ) )
 | |
| 	  end++;
 | |
| 	if (end==index)
 | |
| 	  {
 | |
| 	    PyErr_SetString(ReopError, "\\x must be followed by hex digits");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	length=end-index;
 | |
| 	string=malloc(length+4+1);
 | |
| 	if (string==NULL)
 | |
| 	  {
 | |
| 	    PyErr_SetString(PyExc_MemoryError, "can't allocate memory for \\x string");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	/* Create a string containing "\x<hexdigits>", which will be
 | |
| 	   passed to eval() */
 | |
| 	string[0]=string[length+3]='"';
 | |
| 	string[1]='\\';
 | |
| 	string[length+4]='\0';
 | |
| 	memcpy(string+2, pattern+index-1, length+1);
 | |
| 	v=PyRun_String((char *)string, Py_eval_input, 
 | |
| 		       PyEval_GetGlobals(), PyEval_GetLocals());
 | |
| 	free(string);
 | |
| 	/* The evaluation raised an exception */
 | |
| 	if (v==NULL) return NULL;
 | |
| 	result=Py_BuildValue("iOi", CHAR, v, end);
 | |
| 	Py_DECREF(v);
 | |
| 	return result;
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|     case('b'):
 | |
|       if (context!=NORMAL)
 | |
| 	return Py_BuildValue("ici", CHAR, (char)8, index);
 | |
|       else 
 | |
| 	{
 | |
| 	  unsigned char empty_string[1];
 | |
| 	  empty_string[0]='\0';
 | |
| 	  return Py_BuildValue("isi", WORD_BOUNDARY, empty_string, index);
 | |
| 	}
 | |
|       break;
 | |
|     case('B'):
 | |
|       if (context!=NORMAL)
 | |
| 	return Py_BuildValue("ici", CHAR, 'B', index);
 | |
|       else 
 | |
| 	{
 | |
| 	  unsigned char empty_string[1];
 | |
| 	  empty_string[0]='\0';
 | |
| 	  return Py_BuildValue("isi", NOT_WORD_BOUNDARY, empty_string, index);
 | |
| 	}
 | |
|       break;
 | |
|     case('A'):
 | |
|       if (context!=NORMAL)
 | |
| 	return Py_BuildValue("ici", CHAR, 'A', index);
 | |
|       else 
 | |
| 	{
 | |
| 	  unsigned char empty_string[1];
 | |
| 	  empty_string[0]='\0';
 | |
| 	  return Py_BuildValue("isi", BEGINNING_OF_BUFFER, empty_string, index);
 | |
| 	}
 | |
|       break;
 | |
|     case('Z'):
 | |
|       if (context!=NORMAL)
 | |
| 	return Py_BuildValue("ici", CHAR, 'Z', index);
 | |
|       else 
 | |
| 	{
 | |
| 	  unsigned char empty_string[1];
 | |
| 	  empty_string[0]='\0';
 | |
| 	  return Py_BuildValue("isi", END_OF_BUFFER, empty_string, index);
 | |
| 	}
 | |
|       break;
 | |
|     case('E'):    case('G'):    case('L'):    case('Q'):
 | |
|     case('U'):    case('l'):    case('u'):
 | |
|       {
 | |
| 	char message[50];
 | |
| 	sprintf(message, "\\%c is not allowed", c);
 | |
| 	PyErr_SetString(ReopError, message);
 | |
| 	return NULL;
 | |
|       }
 | |
| 
 | |
|     case ('w'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", SYNTAX, Sword, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 'w', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if (re_syntax_table[i] & Sword) 
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
|     case ('W'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", NOT_SYNTAX, Sword, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 'W', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if (! (re_syntax_table[i] & Sword))
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
|     case ('s'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", SYNTAX, Swhitespace, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 's', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if (re_syntax_table[i] & Swhitespace) 
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
|     case ('S'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", NOT_SYNTAX, Swhitespace, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 'S', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if (! (re_syntax_table[i] & Swhitespace) )
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|     case ('d'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", SYNTAX, Sdigit, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 'd', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if (re_syntax_table[i] & Sdigit) 
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
|     case ('D'):
 | |
|       if (context==NORMAL)
 | |
| 	return Py_BuildValue("iii", NOT_SYNTAX, Sdigit, index);
 | |
|       if (context!=CHARCLASS)
 | |
| 	return Py_BuildValue("ici", CHAR, 'D', index);
 | |
|       {
 | |
| 	/* context==CHARCLASS */
 | |
| 	unsigned char set[256];
 | |
| 	int i, j;
 | |
| 	for(i=j=0; i<256; i++)
 | |
| 	  if ( !(re_syntax_table[i] & Sdigit) )
 | |
| 	    {
 | |
| 	      set[j++] = i;
 | |
| 	    }
 | |
| 	return Py_BuildValue("is#i", SET, set, j, index);
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|     case('g'):
 | |
|       {
 | |
| 	int end, valid, i;
 | |
| 	if (context!=REPLACEMENT)
 | |
| 	  return Py_BuildValue("ici", CHAR, 'g', index);
 | |
| 	if (pattern_len<=index)
 | |
| 	  {
 | |
| 	    PyErr_SetString(ReopError, "unfinished symbolic reference");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	if (pattern[index]!='<')
 | |
| 	  {
 | |
| 	    PyErr_SetString(ReopError, "missing < in symbolic reference");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	index++;
 | |
| 	end=index;
 | |
| 	while (end<pattern_len && pattern[end]!='>')
 | |
| 	  end++;
 | |
| 	if (end==pattern_len)
 | |
| 	  {
 | |
| 	    PyErr_SetString(ReopError, "unfinished symbolic reference");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	valid=1;
 | |
| 	if (index==end		/* Zero-length name */
 | |
| 	    || !(re_syntax_table[pattern[index]] & Sword) /* First char. not alphanumeric */
 | |
| 	    || (re_syntax_table[pattern[index]] & Sdigit) ) /* First char. a digit */
 | |
| 	  valid=0;
 | |
| 
 | |
| 	for(i=index+1; i<end; i++)
 | |
| 	  {
 | |
| 	    if (!(re_syntax_table[pattern[i]] & Sword) )
 | |
| 	      valid=0;
 | |
| 	  }	
 | |
| 	if (!valid)
 | |
| 	  {
 | |
| 	    /* XXX should include the text of the reference */
 | |
| 	    PyErr_SetString(ReopError, "illegal symbolic reference");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	    
 | |
| 	return Py_BuildValue("is#i", MEMORY_REFERENCE, 
 | |
| 			             pattern+index, end-index, 
 | |
| 			             end+1);
 | |
|       }
 | |
|     break;
 | |
| 
 | |
|     case('0'):
 | |
|       {
 | |
| 	/* \0 always indicates an octal escape, so we consume up to 3
 | |
| 	   characters, as long as they're all octal digits */
 | |
| 	int octval=0, i;
 | |
| 	index--;
 | |
| 	for(i=index;
 | |
| 	    i<=index+2 && i<pattern_len 
 | |
| 	      && (re_syntax_table[ pattern[i] ] & Soctaldigit );
 | |
| 	    i++)
 | |
| 	  {
 | |
| 	    octval = octval * 8 + pattern[i] - '0';
 | |
| 	  }
 | |
| 	if (octval>255)
 | |
| 	  {
 | |
| 	    PyErr_SetString(ReopError, "octal value out of range");
 | |
| 	    return NULL;
 | |
| 	  }
 | |
| 	return Py_BuildValue("ici", CHAR, (unsigned char)octval, i);
 | |
|       }
 | |
|       break;
 | |
|     case('1'):    case('2'):    case('3'):    case('4'):
 | |
|     case('5'):    case('6'):    case('7'):    case('8'):
 | |
|     case('9'):
 | |
|       {
 | |
| 	/* Handle \?, where ? is from 1 through 9 */
 | |
| 	int value=0;
 | |
| 	index--;
 | |
| 	/* If it's at least a two-digit reference, like \34, it might
 | |
|            either be a 3-digit octal escape (\123) or a 2-digit
 | |
|            decimal memory reference (\34) */
 | |
| 
 | |
| 	if ( (index+1) <pattern_len && 
 | |
| 	    (re_syntax_table[ pattern[index+1] ] & Sdigit) )
 | |
| 	  {
 | |
| 	    if ( (index+2) <pattern_len && 
 | |
| 		(re_syntax_table[ pattern[index+2] ] & Soctaldigit) &&
 | |
| 		(re_syntax_table[ pattern[index+1] ] & Soctaldigit) &&
 | |
| 		(re_syntax_table[ pattern[index  ] ] & Soctaldigit)
 | |
| 		)
 | |
| 	      {
 | |
| 		/* 3 octal digits */
 | |
| 		value= 8*8*(pattern[index  ]-'0') +
 | |
| 		         8*(pattern[index+1]-'0') +
 | |
| 		           (pattern[index+2]-'0');
 | |
| 		if (value>255)
 | |
| 		  {
 | |
| 		    PyErr_SetString(ReopError, "octal value out of range");
 | |
| 		    return NULL;
 | |
| 		  }
 | |
| 		return Py_BuildValue("ici", CHAR, (unsigned char)value, index+3);
 | |
| 	      }
 | |
| 	    else
 | |
| 	      {
 | |
| 		/* 2-digit form, so it's a memory reference */
 | |
| 		if (context==CHARCLASS)
 | |
| 		  {
 | |
| 		    PyErr_SetString(ReopError, 
 | |
| 				    "cannot reference a register from inside a character class");
 | |
| 		    return NULL;
 | |
| 		  }
 | |
| 		value= 10*(pattern[index  ]-'0') +
 | |
| 		          (pattern[index+1]-'0');
 | |
| 		if (value<1 || RE_NREGS<=value)
 | |
| 		  {
 | |
| 		    PyErr_SetString(ReopError, "memory reference out of range");
 | |
| 		    return NULL;
 | |
| 		  }
 | |
| 		return Py_BuildValue("iii", MEMORY_REFERENCE, 
 | |
| 				     value, index+2);
 | |
| 	      }
 | |
| 	  }
 | |
| 	else 
 | |
| 	  {
 | |
| 	    /* Single-digit form, like \2, so it's a memory reference */
 | |
| 	    if (context==CHARCLASS)
 | |
| 	      {
 | |
| 		PyErr_SetString(ReopError, 
 | |
| 				"cannot reference a register from inside a character class");
 | |
| 		return NULL;
 | |
| 	      }
 | |
| 	    return Py_BuildValue("iii", MEMORY_REFERENCE, 
 | |
| 				 pattern[index]-'0', index+1);
 | |
| 	  }
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|     default:
 | |
| 	return Py_BuildValue("ici", CHAR, c, index);
 | |
| 	break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| reop__expand(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
|   PyObject *results, *match_obj;
 | |
|   PyObject *repl_obj, *newstring;
 | |
|   unsigned char *repl;
 | |
|   int size, total_len, i, start, pos;
 | |
| 
 | |
|   if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj)) 
 | |
|     return NULL;
 | |
| 
 | |
|   repl=(unsigned char *)PyString_AsString(repl_obj);
 | |
|   size=PyString_Size(repl_obj);
 | |
|   results=PyList_New(0);
 | |
|   if (results==NULL) return NULL;
 | |
|   for(start=total_len=i=0; i<size; i++)
 | |
|     {
 | |
|       if (repl[i]=='\\')
 | |
| 	{
 | |
| 	  PyObject *args, *t, *value;
 | |
| 	  int escape_type;
 | |
| 
 | |
| 	  if (start!=i)
 | |
| 	    {
 | |
| 	      PyList_Append(results, 
 | |
| 			    PyString_FromStringAndSize((char *)repl+start, i-start));
 | |
| 	      total_len += i-start;
 | |
| 	    }
 | |
| 	  i++;
 | |
| 	  args=Py_BuildValue("Oii", repl_obj, i, REPLACEMENT);
 | |
| 	  t=reop_expand_escape(NULL, args);
 | |
| 	  Py_DECREF(args);
 | |
| 	  if (t==NULL)
 | |
| 	    {
 | |
| 	      /* reop_expand_escape triggered an exception of some sort,
 | |
| 		 so just return */
 | |
| 	      Py_DECREF(results);
 | |
| 	      return NULL;
 | |
| 	    }
 | |
| 	  value=PyTuple_GetItem(t, 1);
 | |
| 	  escape_type=PyInt_AsLong(PyTuple_GetItem(t, 0));
 | |
| 	  switch (escape_type)
 | |
| 	    {
 | |
| 	    case (CHAR):
 | |
| 	      PyList_Append(results, value);
 | |
| 	      total_len += PyString_Size(value);
 | |
| 	      break;
 | |
| 	    case(MEMORY_REFERENCE):
 | |
| 	      {
 | |
| 		PyObject *r, *tuple, *result;
 | |
| 		r=PyObject_GetAttrString(match_obj, "group");
 | |
| 		tuple=PyTuple_New(1);
 | |
| 		Py_INCREF(value);
 | |
| 		PyTuple_SetItem(tuple, 0, value);
 | |
| 		result=PyEval_CallObject(r, tuple);
 | |
| 		Py_DECREF(r); Py_DECREF(tuple);
 | |
| 		if (result==NULL)
 | |
| 		  {
 | |
| 		    /* The group() method trigged an exception of some sort */
 | |
| 		    Py_DECREF(results);
 | |
| 		    return NULL;
 | |
| 		  }
 | |
| 		if (result==Py_None)
 | |
| 		  {
 | |
| 		    char message[50];
 | |
| 		    sprintf(message, 
 | |
| 			    "group %li did not contribute to the match",
 | |
| 			    PyInt_AsLong(value));
 | |
| 		    PyErr_SetString(ReopError, 
 | |
| 				    message);
 | |
| 		    Py_DECREF(result);
 | |
| 		    Py_DECREF(t);
 | |
| 		    Py_DECREF(results);
 | |
| 		    return NULL;
 | |
| 		  }
 | |
| 		/* xxx typecheck that it's a string! */
 | |
| 		PyList_Append(results, result);
 | |
| 		total_len += PyString_Size(result);
 | |
| 		Py_DECREF(result);
 | |
| 	      }
 | |
| 	      break;
 | |
| 	    default:
 | |
| 	      Py_DECREF(t);
 | |
| 	      Py_DECREF(results);
 | |
| 	      PyErr_SetString(ReopError, 
 | |
| 			      "bad escape in replacement");
 | |
| 	      return NULL;
 | |
| 	    }
 | |
| 	  i=start=PyInt_AsLong(PyTuple_GetItem(t, 2));
 | |
| 	  i--; /* Decrement now, because the 'for' loop will increment it */
 | |
| 	  Py_DECREF(t);
 | |
| 	}
 | |
|     } /* endif repl[i]!='\\' */
 | |
| 
 | |
|   if (start!=i)
 | |
|     {
 | |
|       PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
 | |
|       total_len += i-start;
 | |
|     }
 | |
| 
 | |
|   /* Whew!  Now we've constructed a list containing various pieces of
 | |
|      strings that will make up our final result.  So, iterate over 
 | |
|      the list concatenating them.  A new string measuring total_len
 | |
|      bytes is allocated and filled in. */
 | |
|      
 | |
|   newstring=PyString_FromStringAndSize(NULL, total_len);
 | |
|   if (newstring==NULL)
 | |
|     {
 | |
|       Py_DECREF(results);
 | |
|       return NULL;
 | |
|     }
 | |
| 
 | |
|   repl=(unsigned char *)PyString_AsString(newstring);
 | |
|   for (pos=i=0; i<PyList_Size(results); i++)
 | |
|     {
 | |
|       PyObject *item=PyList_GetItem(results, i);
 | |
|       memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
 | |
|       pos += PyString_Size(item);
 | |
|     }
 | |
|   Py_DECREF(results);
 | |
|   return newstring;
 | |
| }
 | |
| 
 | |
| 
 | |
| #if 0
 | |
| /* Functions originally in the regsub module.
 | |
|    Added June 1, 1997. 
 | |
|    */
 | |
| 
 | |
| /* A cache of previously used patterns is maintained.  Notice that if
 | |
|    you change the reop syntax flag, entries in the cache are
 | |
|    invalidated.  
 | |
|    XXX Solution: use (syntax flag, pattern) as keys?  Clear the cache
 | |
|    every so often, or once it gets past a certain size? 
 | |
| */
 | |
| 
 | |
| static PyObject *cache_dict=NULL;
 | |
| 
 | |
| /* Accept an object; if it's a reop pattern, Py_INCREF it and return
 | |
|    it.  If it's a string, a reop object is compiled and cached.
 | |
| */
 | |
|    
 | |
| static reopobject *
 | |
| cached_compile(pattern)
 | |
|      PyObject *pattern;
 | |
| {
 | |
|   reopobject *p2;
 | |
| 
 | |
|   if (!PyString_Check(pattern)) 
 | |
|     {
 | |
|       /* It's not a string, so assume it's a compiled reop object */
 | |
|       /* XXX check that! */
 | |
|       Py_INCREF(pattern);
 | |
|       return (reopobject*)pattern;
 | |
|     }
 | |
|   if (cache_dict==NULL)
 | |
|     {
 | |
|       cache_dict=PyDict_New();
 | |
|       if (cache_dict==NULL) 
 | |
| 	{
 | |
| 	  return (reopobject*)NULL;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   /* See if the pattern has already been cached; if so, return that
 | |
|      reop object */
 | |
|   p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
 | |
|   if (p2)
 | |
|     {
 | |
|       Py_INCREF(p2);
 | |
|       return (reopobject*)p2;
 | |
|     }
 | |
| 
 | |
|   /* Compile the pattern and cache it */
 | |
|   p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
 | |
|   if (!p2) return p2;
 | |
|   PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
 | |
|   return p2;
 | |
| }
 | |
| 
 | |
| 
 | |
| static PyObject *
 | |
| internal_split(args, retain)
 | |
| 	PyObject *args;
 | |
| 	int retain;
 | |
| {
 | |
|   PyObject *newlist, *s;
 | |
|   reopobject *pattern;
 | |
|   int maxsplit=0, count=0, length, next=0, result;
 | |
|   int match_end=0; /* match_start is defined below */
 | |
|   unsigned char *start;
 | |
| 
 | |
|   if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
 | |
| 			&maxsplit))
 | |
|     {
 | |
|       PyErr_Clear();
 | |
|       if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
 | |
| 	return NULL;
 | |
|     }
 | |
|   pattern=cached_compile((PyObject *)pattern);
 | |
|   if (!pattern) return NULL;
 | |
| 
 | |
|   newlist=PyList_New(0);
 | |
|   if (!newlist) return NULL;
 | |
|   
 | |
|   do
 | |
|     {
 | |
|       result = re_search(&pattern->re_patbuf, 
 | |
| 			     start, length, next, length-next,
 | |
| 			     &pattern->re_regs);
 | |
|       if (result < -1)
 | |
| 	{  /* Erk... an error happened during the reop search */
 | |
| 	  Py_DECREF(newlist);
 | |
| 	  PyErr_SetString(ReopError, "match failure");
 | |
| 	  return NULL;
 | |
| 	}
 | |
|       if (next<=result) 
 | |
| 	{
 | |
| 	  int match_start=pattern->re_regs.start[0];
 | |
| 	  int oldmatch_end=match_end;
 | |
| 	  match_end=pattern->re_regs.end[0];
 | |
| 
 | |
| 	  if (match_start==match_end) 
 | |
| 	    { /* A zero-length match; increment to the next position */
 | |
| 	      next=result+1;
 | |
| 	      match_end=oldmatch_end;
 | |
| 	      continue;
 | |
| 	    }
 | |
| 
 | |
| 	  /* Append the string up to the start of the match */
 | |
| 	  s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
 | |
| 	  if (!s) 
 | |
| 	    {
 | |
| 	      Py_DECREF(newlist);
 | |
| 	      return NULL;
 | |
| 	    }
 | |
| 	  PyList_Append(newlist, s);
 | |
| 	  Py_DECREF(s);
 | |
| 
 | |
| 	  if (retain)
 | |
| 	    {
 | |
| 	      /* Append a string containing whatever matched */
 | |
| 	      s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
 | |
| 	      if (!s) 
 | |
| 		{
 | |
| 		  Py_DECREF(newlist);
 | |
| 		  return NULL;
 | |
| 		}
 | |
| 	      PyList_Append(newlist, s);
 | |
| 	      Py_DECREF(s);
 | |
| 	    }
 | |
| 	  /* Update the pointer, and increment the count of splits */
 | |
| 	  next=match_end; count++;
 | |
| 	}
 | |
|     } while (result!=-1 && !(maxsplit && maxsplit==count) &&
 | |
| 	     next<length);
 | |
|   s=PyString_FromStringAndSize(start+match_end, length-match_end);
 | |
|   if (!s) 
 | |
|     {
 | |
|       Py_DECREF(newlist);
 | |
|       return NULL;
 | |
|     }
 | |
|   PyList_Append(newlist, s);
 | |
|   Py_DECREF(s);
 | |
|   Py_DECREF(pattern);
 | |
|   return newlist;
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| reop_split(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
|   return internal_split(args, 0);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| reop_splitx(self, args)
 | |
| 	PyObject *self;
 | |
| 	PyObject *args;
 | |
| {
 | |
|   return internal_split(args, 1);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static struct PyMethodDef reop_global_methods[] = {
 | |
| 	{"match",	reop_match, 0},
 | |
| 	{"search",	reop_search, 0},
 | |
| 	{"expand_escape", reop_expand_escape, 1},
 | |
| 	{"_expand", reop__expand, 1},
 | |
| #if 0
 | |
| 	{"_optimize",	reop_optimize, 0},
 | |
| 	{"split",  reop_split, 0},
 | |
| 	{"splitx",  reop_splitx, 0},
 | |
| #endif
 | |
| 	{NULL,		NULL}		     /* sentinel */
 | |
| };
 | |
| 
 | |
| void
 | |
| initreop()
 | |
| {
 | |
| 	PyObject *m, *d, *k, *v, *o;
 | |
| 	int i;
 | |
| 	unsigned char *s;
 | |
| 	unsigned char j[2];
 | |
| 
 | |
| 	re_compile_initialize();
 | |
| 
 | |
| 	m = Py_InitModule("reop", reop_global_methods);
 | |
| 	d = PyModule_GetDict(m);
 | |
| 	
 | |
| 	/* Initialize reop.error exception */
 | |
| 	v = ReopError = PyErr_NewException("reop.error", NULL, NULL);
 | |
| 	if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
 | |
| 		goto finally;
 | |
| 	
 | |
| 	/* Initialize reop.casefold constant */
 | |
| 	if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
 | |
| 		goto finally;
 | |
| 	
 | |
| 	if (!(s = (unsigned char *)PyString_AsString(v)))
 | |
| 		goto finally;
 | |
| 
 | |
| 	for (i = 0; i < 256; i++) {
 | |
| 		if (isupper(i))
 | |
| 			s[i] = tolower(i);
 | |
| 		else
 | |
| 			s[i] = i;
 | |
| 	}
 | |
| 
 | |
| 	if (PyDict_SetItemString(d, "casefold", v) < 0)
 | |
| 		goto finally;
 | |
| 	Py_DECREF(v);
 | |
| 
 | |
| 	/* Initialize the syntax table */
 | |
| 
 | |
| 	o = PyDict_New();
 | |
| 	if (o == NULL)
 | |
| 	   goto finally;
 | |
| 
 | |
| 	j[1] = '\0';
 | |
| 	for (i = 0; i < 256; i++)
 | |
| 	{
 | |
| 	   j[0] = i;
 | |
| 	   k = PyString_FromStringAndSize((char *)j, 1);
 | |
| 	   if (k == NULL)
 | |
| 	      goto finally;
 | |
| 	   v = PyInt_FromLong(re_syntax_table[i]);
 | |
| 	   if (v == NULL)
 | |
| 	      goto finally;
 | |
| 	   if (PyDict_SetItem(o, k, v) < 0)
 | |
| 	      goto finally;
 | |
| 	   Py_DECREF(k);
 | |
| 	   Py_DECREF(v);
 | |
| 	}
 | |
| 
 | |
| 	if (PyDict_SetItemString(d, "syntax_table", o) < 0)
 | |
| 	   goto finally;
 | |
| 	Py_DECREF(o);
 | |
| 
 | |
| 	v = PyInt_FromLong(Sword);
 | |
| 	if (v == NULL)
 | |
| 	   goto finally;
 | |
| 
 | |
| 	if (PyDict_SetItemString(d, "word", v) < 0)
 | |
| 	   goto finally;
 | |
| 	Py_DECREF(v);
 | |
| 
 | |
| 	v = PyInt_FromLong(Swhitespace);
 | |
| 	if (v == NULL)
 | |
| 	   goto finally;
 | |
| 
 | |
| 	if (PyDict_SetItemString(d, "whitespace", v) < 0)
 | |
| 	   goto finally;
 | |
| 	Py_DECREF(v);
 | |
| 
 | |
| 	v = PyInt_FromLong(Sdigit);
 | |
| 	if (v == NULL)
 | |
| 	   goto finally;
 | |
| 
 | |
| 	if (PyDict_SetItemString(d, "digit", v) < 0)
 | |
| 	   goto finally;
 | |
| 	Py_DECREF(v);
 | |
| 
 | |
| 	PyDict_SetItemString(d, "NORMAL", PyInt_FromLong(NORMAL));
 | |
| 	PyDict_SetItemString(d, "CHARCLASS", PyInt_FromLong(CHARCLASS));
 | |
| 	PyDict_SetItemString(d, "REPLACEMENT", PyInt_FromLong(REPLACEMENT));
 | |
| 
 | |
| 	PyDict_SetItemString(d, "CHAR", PyInt_FromLong(CHAR));
 | |
| 	PyDict_SetItemString(d, "MEMORY_REFERENCE", PyInt_FromLong(MEMORY_REFERENCE));
 | |
| 	PyDict_SetItemString(d, "SYNTAX", PyInt_FromLong(SYNTAX));
 | |
| 	PyDict_SetItemString(d, "NOT_SYNTAX", PyInt_FromLong(NOT_SYNTAX));
 | |
| 	PyDict_SetItemString(d, "SET", PyInt_FromLong(SET));
 | |
| 	PyDict_SetItemString(d, "WORD_BOUNDARY", PyInt_FromLong(WORD_BOUNDARY));
 | |
| 	PyDict_SetItemString(d, "NOT_WORD_BOUNDARY", PyInt_FromLong(NOT_WORD_BOUNDARY));
 | |
| 	PyDict_SetItemString(d, "BEGINNING_OF_BUFFER", PyInt_FromLong(BEGINNING_OF_BUFFER));
 | |
| 	PyDict_SetItemString(d, "END_OF_BUFFER", PyInt_FromLong(END_OF_BUFFER));
 | |
| 
 | |
| 	if (!PyErr_Occurred())
 | |
| 		return;
 | |
| 
 | |
|   finally:
 | |
| 	/* Nothing */;
 | |
| }
 | |
| 
 |