fixed #449964: sre.sub raises an exception if the template contains a

\g<x> group reference followed by a character escape

(also restructured a few things on the way to fixing #449000)
This commit is contained in:
Fredrik Lundh 2001-09-18 20:55:24 +00:00
parent ab3b0343b8
commit 59b68656f8
4 changed files with 30 additions and 21 deletions

View file

@ -251,11 +251,13 @@ def _subn(pattern, template, text, count=0, sub=0):
else: else:
template = _compile_repl(template, pattern) template = _compile_repl(template, pattern)
literals = template[1] literals = template[1]
sub = 0 # temporarly disabled, see bug #449000 if sub and not count:
if (sub and not count and pattern._isliteral() and literal = pattern._getliteral()
len(literals) == 1 and literals[0]): if literal and "\\" in literal:
# shortcut: both pattern and string are literals literal = None # may contain untranslated escapes
return string.replace(text, pattern.pattern, literals[0]), 0 if literal is not None and len(literals) == 1 and literals[0]:
# shortcut: both pattern and string are literals
return string.replace(text, pattern.pattern, literals[0]), 0
def filter(match, template=template): def filter(match, template=template):
return sre_parse.expand_template(template, match) return sre_parse.expand_template(template, match)
n = i = 0 n = i = 0

View file

@ -647,9 +647,9 @@ def parse_template(source, pattern):
p.append((LITERAL, literal)) p.append((LITERAL, literal))
sep = source[:0] sep = source[:0]
if type(sep) is type(""): if type(sep) is type(""):
char = chr makechar = chr
else: else:
char = unichr makechar = unichr
while 1: while 1:
this = s.get() this = s.get()
if this is None: if this is None:
@ -693,14 +693,14 @@ def parse_template(source, pattern):
break break
if not code: if not code:
this = this[1:] this = this[1:]
code = LITERAL, char(atoi(this[-6:], 8) & 0xff) code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff)
if code[0] is LITERAL: if code[0] is LITERAL:
literal(code[1]) literal(code[1])
else: else:
a(code) a(code)
else: else:
try: try:
this = char(ESCAPES[this][1]) this = makechar(ESCAPES[this][1])
except KeyError: except KeyError:
pass pass
literal(this) literal(this)

View file

@ -104,6 +104,9 @@ test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
# bug 449964: fails for group followed by other escape
test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b')
test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a') test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))

View file

@ -31,7 +31,7 @@
* 2001-04-28 fl added __copy__ methods (work in progress) * 2001-04-28 fl added __copy__ methods (work in progress)
* 2001-05-14 fl fixes for 1.5.2 * 2001-05-14 fl fixes for 1.5.2
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
* 2001-09-18 fl * 2001-09-18 fl added _getliteral helper
* *
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
* *
@ -1959,25 +1959,29 @@ pattern_deepcopy(PatternObject* self, PyObject* args)
} }
static PyObject* static PyObject*
pattern_isliteral(PatternObject* self, PyObject* args) pattern_getliteral(PatternObject* self, PyObject* args)
{ {
/* internal: return true if pattern consists of literal text only */ /* internal: if the pattern is a literal string, return that
string. otherwise, return None */
SRE_CODE* code; SRE_CODE* code;
PyObject* isliteral; PyObject* literal;
if (!PyArg_ParseTuple(args, ":_isliteral")) if (!PyArg_ParseTuple(args, ":_getliteral"))
return NULL; return NULL;
code = PatternObject_GetCode(self); code = PatternObject_GetCode(self);
if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) {
isliteral = Py_True; /* FIXME: extract literal string from code buffer. we can't
else use the pattern member, since it may contain untranslated
isliteral = Py_False; escape codes (see SF bug 449000) */
literal = Py_None;
} else
literal = Py_None; /* no literal */
Py_INCREF(isliteral); Py_INCREF(literal);
return isliteral; return literal;
} }
static PyMethodDef pattern_methods[] = { static PyMethodDef pattern_methods[] = {
@ -1990,7 +1994,7 @@ static PyMethodDef pattern_methods[] = {
{"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
{"__copy__", (PyCFunction) pattern_copy, METH_VARARGS}, {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
{"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
{"_isliteral", (PyCFunction) pattern_isliteral, METH_VARARGS}, {"_getliteral", (PyCFunction) pattern_getliteral, METH_VARARGS},
{NULL, NULL} {NULL, NULL}
}; };