mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Initial revision
This commit is contained in:
parent
7e73fd0024
commit
6f4c43d4fd
1 changed files with 306 additions and 0 deletions
306
Modules/regexmodule.c
Normal file
306
Modules/regexmodule.c
Normal file
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
XXX support translate table
|
||||
XXX support range parameter on search
|
||||
XXX support mstop parameter on search
|
||||
*/
|
||||
|
||||
/***********************************************************
|
||||
Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
|
||||
Netherlands.
|
||||
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Stichting Mathematisch
|
||||
Centrum or CWI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior permission.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
|
||||
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
******************************************************************/
|
||||
|
||||
/* Regular expression objects */
|
||||
/* This uses GNU regex.c, from subdirectory regex !!! */
|
||||
|
||||
#include "allobjects.h"
|
||||
#include "modsupport.h"
|
||||
|
||||
#include "regex.h"
|
||||
|
||||
static object *RegexError; /* Exception */
|
||||
|
||||
typedef struct {
|
||||
OB_HEAD
|
||||
object *re_string; /* The string (for printing) */
|
||||
struct re_pattern_buffer re_patbuf; /* The compiled expression */
|
||||
struct re_registers re_regs; /* The registers from the last match */
|
||||
int re_regs_valid; /* Nonzero if the registers are valid */
|
||||
char re_fastmap[256]; /* Storage for fastmap */
|
||||
} regexobject;
|
||||
|
||||
/* Regex object methods */
|
||||
|
||||
static void
|
||||
reg_dealloc(re)
|
||||
regexobject *re;
|
||||
{
|
||||
XDECREF(re->re_string);
|
||||
XDEL(re->re_patbuf.buffer);
|
||||
XDEL(re->re_patbuf.translate);
|
||||
DEL(re);
|
||||
}
|
||||
|
||||
static object *
|
||||
makeresult(regs)
|
||||
struct re_registers *regs;
|
||||
{
|
||||
object *v = newtupleobject(RE_NREGS);
|
||||
if (v != NULL) {
|
||||
int i;
|
||||
for (i = 0; i < RE_NREGS; i++) {
|
||||
object *w, *u;
|
||||
if ( (w = newtupleobject(2)) == NULL ||
|
||||
(u = newintobject(regs->start[i])) == NULL ||
|
||||
settupleitem(w, 0, u) != 0 ||
|
||||
(u = newintobject(regs->end[i])) == NULL ||
|
||||
settupleitem(w, 1, u) != 0) {
|
||||
XDECREF(w);
|
||||
DECREF(v);
|
||||
return NULL;
|
||||
}
|
||||
settupleitem(v, i, w);
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static object *
|
||||
reg_match(re, args)
|
||||
regexobject *re;
|
||||
object *args;
|
||||
{
|
||||
object *v;
|
||||
char *buffer;
|
||||
int offset;
|
||||
int result;
|
||||
if (args != NULL && is_stringobject(args)) {
|
||||
v = args;
|
||||
offset = 0;
|
||||
}
|
||||
else if (!getstrintarg(args, &v, &offset))
|
||||
return NULL;
|
||||
buffer = getstringvalue(v);
|
||||
re->re_regs_valid = 0;
|
||||
result = re_match(&re->re_patbuf, buffer, getstringsize(v),
|
||||
offset, &re->re_regs);
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
err_setstr(RegexError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
re->re_regs_valid = result >= 0;
|
||||
return newintobject((long)result); /* Length of the match or -1 */
|
||||
}
|
||||
static object *
|
||||
reg_search(re, args)
|
||||
regexobject *re;
|
||||
object *args;
|
||||
{
|
||||
object *v;
|
||||
char *buffer;
|
||||
int size;
|
||||
int offset;
|
||||
int range;
|
||||
int result;
|
||||
if (args != NULL && is_stringobject(args)) {
|
||||
v = args;
|
||||
offset = 0;
|
||||
}
|
||||
else if (!getstrintarg(args, &v, &offset))
|
||||
return NULL;
|
||||
buffer = getstringvalue(v);
|
||||
size = getstringsize(v);
|
||||
if (offset < 0 || offset > size) {
|
||||
err_setstr(RegexError, "search offset out of range");
|
||||
return NULL;
|
||||
}
|
||||
range = size - offset + 1;
|
||||
re->re_regs_valid = 0;
|
||||
result = re_search(&re->re_patbuf, buffer, size, offset, range,
|
||||
&re->re_regs);
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
err_setstr(RegexError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
re->re_regs_valid = result >= 0;
|
||||
return newintobject((long)result); /* Position of the match or -1 */
|
||||
}
|
||||
|
||||
static object *
|
||||
reg_regs(re, args)
|
||||
regexobject *re;
|
||||
object *args;
|
||||
{
|
||||
if (!re->re_regs_valid) {
|
||||
err_setstr(RegexError,
|
||||
"regs only valid after successful match/search");
|
||||
return NULL;
|
||||
}
|
||||
return makeresult(&re->re_regs);
|
||||
}
|
||||
|
||||
static struct methodlist reg_methods[] = {
|
||||
{"match", reg_match},
|
||||
{"search", reg_search},
|
||||
{"regs", reg_regs},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
static object *
|
||||
reg_getattr(re, name)
|
||||
regexobject *re;
|
||||
char *name;
|
||||
{
|
||||
return findmethod(reg_methods, (object *)re, name);
|
||||
}
|
||||
|
||||
static typeobject Regextype = {
|
||||
OB_HEAD_INIT(&Typetype)
|
||||
0, /*ob_size*/
|
||||
"regex", /*tp_name*/
|
||||
sizeof(regexobject), /*tp_size*/
|
||||
0, /*tp_itemsize*/
|
||||
/* methods */
|
||||
reg_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
reg_getattr, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
};
|
||||
|
||||
static regexobject *
|
||||
newregexobject(string)
|
||||
object *string;
|
||||
{
|
||||
regexobject *re;
|
||||
re = NEWOBJ(regexobject, &Regextype);
|
||||
if (re != NULL) {
|
||||
char *error;
|
||||
INCREF(string);
|
||||
re->re_string = string;
|
||||
re->re_patbuf.buffer = NULL;
|
||||
re->re_patbuf.allocated = 0;
|
||||
re->re_patbuf.fastmap = re->re_fastmap;
|
||||
re->re_patbuf.translate = NULL;
|
||||
re->re_regs_valid = 0;
|
||||
error = re_compile_pattern(getstringvalue(string),
|
||||
getstringsize(string),
|
||||
&re->re_patbuf);
|
||||
if (error != NULL) {
|
||||
err_setstr(RegexError, error);
|
||||
DECREF(re);
|
||||
re = NULL;
|
||||
}
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
static object *
|
||||
regex_compile(self, args)
|
||||
object *self;
|
||||
object *args;
|
||||
{
|
||||
object *string;
|
||||
if (!getstrarg(args, &string))
|
||||
return NULL;
|
||||
return (object *)newregexobject(string);
|
||||
}
|
||||
|
||||
static object *cache_pat;
|
||||
static object *cache_prog;
|
||||
|
||||
static int
|
||||
update_cache(pat)
|
||||
object *pat;
|
||||
{
|
||||
if (pat != cache_pat) {
|
||||
XDECREF(cache_pat);
|
||||
cache_pat = NULL;
|
||||
XDECREF(cache_prog);
|
||||
cache_prog = regex_compile((object *)NULL, pat);
|
||||
if (cache_prog == NULL)
|
||||
return -1;
|
||||
cache_pat = pat;
|
||||
INCREF(cache_pat);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static object *
|
||||
regex_match(self, args)
|
||||
object *self;
|
||||
object *args;
|
||||
{
|
||||
object *pat, *string;
|
||||
if (!getstrstrarg(args, &pat, &string))
|
||||
return NULL;
|
||||
if (update_cache(pat) < 0)
|
||||
return NULL;
|
||||
return reg_match((regexobject *)cache_prog, string);
|
||||
}
|
||||
|
||||
static object *
|
||||
regex_search(self, args)
|
||||
object *self;
|
||||
object *args;
|
||||
{
|
||||
object *pat, *string;
|
||||
if (!getstrstrarg(args, &pat, &string))
|
||||
return NULL;
|
||||
if (update_cache(pat) < 0)
|
||||
return NULL;
|
||||
return reg_search((regexobject *)cache_prog, string);
|
||||
}
|
||||
|
||||
static object *
|
||||
regex_set_syntax(self, args)
|
||||
object *self, *args;
|
||||
{
|
||||
int syntax;
|
||||
if (!getintarg(args, &syntax))
|
||||
return NULL;
|
||||
syntax = re_set_syntax(syntax);
|
||||
return newintobject((long)syntax);
|
||||
}
|
||||
|
||||
static struct methodlist regex_global_methods[] = {
|
||||
{"compile", regex_compile},
|
||||
{"match", regex_match},
|
||||
{"search", regex_search},
|
||||
{"set_syntax", regex_set_syntax},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
initregex()
|
||||
{
|
||||
object *m, *d;
|
||||
|
||||
m = initmodule("regex", regex_global_methods);
|
||||
d = getmoduledict(m);
|
||||
|
||||
/* Initialize regex.error exception */
|
||||
RegexError = newstringobject("regex.error");
|
||||
if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
|
||||
fatal("can't define regex.error");
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue