mirror of
https://github.com/python/cpython.git
synced 2025-11-03 03:22:27 +00:00
New versions straight from Jeffrey Ollie's web site
This commit is contained in:
parent
db9e20f418
commit
db25f32849
3 changed files with 698 additions and 167 deletions
375
Modules/reopmodule.c
Normal file
375
Modules/reopmodule.c
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
/***********************************************************
|
||||
Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
|
||||
The Netherlands.
|
||||
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Stichting Mathematisch
|
||||
Centrum or CWI or Corporation for National Research Initiatives or
|
||||
CNRI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
While CWI is the initial source for this software, a modified version
|
||||
is made available by the Corporation for National Research Initiatives
|
||||
(CNRI) at the Internet address ftp://ftp.python.org.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
|
||||
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
|
||||
CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
******************************************************************/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
/* Regular expression objects */
|
||||
/* This uses Tatu Ylonen's copyleft-free reimplementation of
|
||||
GNU regular expressions */
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "regexpr.h"
|
||||
|
||||
static PyObject *ReopError; /* Exception */
|
||||
|
||||
static PyObject *
|
||||
makeresult(regs, num_regs)
|
||||
struct re_registers *regs;
|
||||
int num_regs;
|
||||
{
|
||||
PyObject *v;
|
||||
int i;
|
||||
static PyObject *filler = NULL;
|
||||
|
||||
if (filler == NULL) {
|
||||
filler = Py_BuildValue("(ii)", -1, -1);
|
||||
if (filler == NULL)
|
||||
return NULL;
|
||||
}
|
||||
v = PyTuple_New(num_regs);
|
||||
if (v == NULL)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < num_regs; i++) {
|
||||
int lo = regs->start[i];
|
||||
int hi = regs->end[i];
|
||||
PyObject *w;
|
||||
if (lo == -1 && hi == -1) {
|
||||
w = filler;
|
||||
Py_INCREF(w);
|
||||
}
|
||||
else
|
||||
w = Py_BuildValue("(ii)", lo, hi);
|
||||
if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
|
||||
Py_DECREF(v);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
reop_match(self, args)
|
||||
PyObject *self;
|
||||
PyObject *args;
|
||||
{
|
||||
char *string;
|
||||
int fastmaplen, stringlen;
|
||||
int can_be_null, anchor, i;
|
||||
int num_regs, flags, pos, result;
|
||||
struct re_pattern_buffer bufp;
|
||||
struct re_registers re_regs;
|
||||
|
||||
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||
&(bufp.buffer), &(bufp.allocated),
|
||||
&num_regs, &flags, &can_be_null,
|
||||
&(bufp.fastmap), &fastmaplen,
|
||||
&anchor,
|
||||
&string, &stringlen,
|
||||
&pos))
|
||||
return NULL;
|
||||
|
||||
/* XXX sanity-check the input data */
|
||||
bufp.used=bufp.allocated;
|
||||
bufp.translate=NULL;
|
||||
bufp.fastmap_accurate=1;
|
||||
bufp.can_be_null=can_be_null;
|
||||
bufp.uses_registers=1;
|
||||
bufp.num_registers=num_regs;
|
||||
bufp.anchor=anchor;
|
||||
|
||||
for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||
|
||||
result = re_match(&bufp,
|
||||
string, stringlen, pos,
|
||||
&re_regs);
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
PyErr_SetString(ReopError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
return makeresult(&re_regs, num_regs);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
reop_search(self, args)
|
||||
PyObject *self;
|
||||
PyObject *args;
|
||||
{
|
||||
char *string;
|
||||
int fastmaplen, stringlen;
|
||||
int can_be_null, anchor, i;
|
||||
int num_regs, flags, pos, result;
|
||||
struct re_pattern_buffer bufp;
|
||||
struct re_registers re_regs;
|
||||
|
||||
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||
&(bufp.buffer), &(bufp.allocated),
|
||||
&num_regs, &flags, &can_be_null,
|
||||
&(bufp.fastmap), &fastmaplen,
|
||||
&anchor,
|
||||
&string, &stringlen,
|
||||
&pos))
|
||||
return NULL;
|
||||
|
||||
/* XXX sanity-check the input data */
|
||||
bufp.used=bufp.allocated;
|
||||
bufp.translate=NULL;
|
||||
bufp.fastmap_accurate=1;
|
||||
bufp.can_be_null=can_be_null;
|
||||
bufp.uses_registers=1;
|
||||
bufp.num_registers=1;
|
||||
bufp.anchor=anchor;
|
||||
|
||||
for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||
|
||||
result = re_search(&bufp,
|
||||
string, stringlen, pos, stringlen-pos,
|
||||
&re_regs);
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
PyErr_SetString(ReopError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
return makeresult(&re_regs, num_regs);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Functions originally in the regsub module.
|
||||
Added June 1, 1997.
|
||||
*/
|
||||
|
||||
/* A cache of previously used patterns is maintained. Notice that if
|
||||
you change the reop syntax flag, entries in the cache are
|
||||
invalidated.
|
||||
XXX Solution: use (syntax flag, pattern) as keys? Clear the cache
|
||||
every so often, or once it gets past a certain size?
|
||||
*/
|
||||
|
||||
static PyObject *cache_dict=NULL;
|
||||
|
||||
/* Accept an object; if it's a reop pattern, Py_INCREF it and return
|
||||
it. If it's a string, a reop object is compiled and cached.
|
||||
*/
|
||||
|
||||
static reopobject *
|
||||
cached_compile(pattern)
|
||||
PyObject *pattern;
|
||||
{
|
||||
reopobject *p2;
|
||||
|
||||
if (!PyString_Check(pattern))
|
||||
{
|
||||
/* It's not a string, so assume it's a compiled reop object */
|
||||
/* XXX check that! */
|
||||
Py_INCREF(pattern);
|
||||
return (reopobject*)pattern;
|
||||
}
|
||||
if (cache_dict==NULL)
|
||||
{
|
||||
cache_dict=PyDict_New();
|
||||
if (cache_dict==NULL)
|
||||
{
|
||||
return (reopobject*)NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* See if the pattern has already been cached; if so, return that
|
||||
reop object */
|
||||
p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
|
||||
if (p2)
|
||||
{
|
||||
Py_INCREF(p2);
|
||||
return (reopobject*)p2;
|
||||
}
|
||||
|
||||
/* Compile the pattern and cache it */
|
||||
p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
|
||||
if (!p2) return p2;
|
||||
PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
|
||||
return p2;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
internal_split(args, retain)
|
||||
PyObject *args;
|
||||
int retain;
|
||||
{
|
||||
PyObject *newlist, *s;
|
||||
reopobject *pattern;
|
||||
int maxsplit=0, count=0, length, next=0, result;
|
||||
int match_end=0; /* match_start is defined below */
|
||||
char *start;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
|
||||
&maxsplit))
|
||||
{
|
||||
PyErr_Clear();
|
||||
if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
|
||||
return NULL;
|
||||
}
|
||||
pattern=cached_compile((PyObject *)pattern);
|
||||
if (!pattern) return NULL;
|
||||
|
||||
newlist=PyList_New(0);
|
||||
if (!newlist) return NULL;
|
||||
|
||||
do
|
||||
{
|
||||
result = re_search(&pattern->re_patbuf,
|
||||
start, length, next, length-next,
|
||||
&pattern->re_regs);
|
||||
if (result < -1)
|
||||
{ /* Erk... an error happened during the reop search */
|
||||
Py_DECREF(newlist);
|
||||
PyErr_SetString(ReopError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
if (next<=result)
|
||||
{
|
||||
int match_start=pattern->re_regs.start[0];
|
||||
int oldmatch_end=match_end;
|
||||
match_end=pattern->re_regs.end[0];
|
||||
|
||||
if (match_start==match_end)
|
||||
{ /* A zero-length match; increment to the next position */
|
||||
next=result+1;
|
||||
match_end=oldmatch_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Append the string up to the start of the match */
|
||||
s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
|
||||
if (!s)
|
||||
{
|
||||
Py_DECREF(newlist);
|
||||
return NULL;
|
||||
}
|
||||
PyList_Append(newlist, s);
|
||||
Py_DECREF(s);
|
||||
|
||||
if (retain)
|
||||
{
|
||||
/* Append a string containing whatever matched */
|
||||
s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
|
||||
if (!s)
|
||||
{
|
||||
Py_DECREF(newlist);
|
||||
return NULL;
|
||||
}
|
||||
PyList_Append(newlist, s);
|
||||
Py_DECREF(s);
|
||||
}
|
||||
/* Update the pointer, and increment the count of splits */
|
||||
next=match_end; count++;
|
||||
}
|
||||
} while (result!=-1 && !(maxsplit && maxsplit==count) &&
|
||||
next<length);
|
||||
s=PyString_FromStringAndSize(start+match_end, length-match_end);
|
||||
if (!s)
|
||||
{
|
||||
Py_DECREF(newlist);
|
||||
return NULL;
|
||||
}
|
||||
PyList_Append(newlist, s);
|
||||
Py_DECREF(s);
|
||||
Py_DECREF(pattern);
|
||||
return newlist;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
reop_split(self, args)
|
||||
PyObject *self;
|
||||
PyObject *args;
|
||||
{
|
||||
return internal_split(args, 0);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
reop_splitx(self, args)
|
||||
PyObject *self;
|
||||
PyObject *args;
|
||||
{
|
||||
return internal_split(args, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct PyMethodDef reop_global_methods[] = {
|
||||
{"match", reop_match, 0},
|
||||
{"search", reop_search, 0},
|
||||
#if 0
|
||||
{"split", reop_split, 0},
|
||||
{"splitx", reop_splitx, 0},
|
||||
#endif
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
void
|
||||
initreop()
|
||||
{
|
||||
PyObject *m, *d, *v;
|
||||
int i;
|
||||
char *s;
|
||||
|
||||
m = Py_InitModule("reop", reop_global_methods);
|
||||
d = PyModule_GetDict(m);
|
||||
|
||||
/* Initialize reop.error exception */
|
||||
v = ReopError = PyString_FromString("reop.error");
|
||||
if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
|
||||
goto finally;
|
||||
|
||||
/* Initialize reop.casefold constant */
|
||||
if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
|
||||
goto finally;
|
||||
|
||||
if (!(s = PyString_AsString(v)))
|
||||
goto finally;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (isupper(i))
|
||||
s[i] = tolower(i);
|
||||
else
|
||||
s[i] = i;
|
||||
}
|
||||
if (PyDict_SetItemString(d, "casefold", v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(v);
|
||||
|
||||
if (!PyErr_Occurred())
|
||||
return;
|
||||
finally:
|
||||
Py_FatalError("can't initialize reop module");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue