mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
- fixed split behaviour on empty matches
- fixed compiler problems when using locale/unicode flags - fixed group/octal code parsing in sub/subn templates
This commit is contained in:
parent
5d6ae76c09
commit
01016fe972
4 changed files with 55 additions and 42 deletions
15
Lib/sre.py
15
Lib/sre.py
|
@ -109,16 +109,13 @@ def _subn(pattern, template, string, count=0):
|
||||||
m = c.search()
|
m = c.search()
|
||||||
if not m:
|
if not m:
|
||||||
break
|
break
|
||||||
j = m.start()
|
b, e = m.span()
|
||||||
if j > i:
|
if i < b:
|
||||||
append(string[i:j])
|
append(string[i:b])
|
||||||
append(filter(m))
|
append(filter(m))
|
||||||
i = m.end()
|
i = e
|
||||||
if i <= j:
|
|
||||||
break
|
|
||||||
n = n + 1
|
n = n + 1
|
||||||
if i < len(string):
|
append(string[i:])
|
||||||
append(string[i:])
|
|
||||||
return string[:0].join(s), n
|
return string[:0].join(s), n
|
||||||
|
|
||||||
def _split(pattern, string, maxsplit=0):
|
def _split(pattern, string, maxsplit=0):
|
||||||
|
@ -128,7 +125,7 @@ def _split(pattern, string, maxsplit=0):
|
||||||
append = s.append
|
append = s.append
|
||||||
extend = s.extend
|
extend = s.extend
|
||||||
c = pattern.scanner(string)
|
c = pattern.scanner(string)
|
||||||
g = c.groups
|
g = pattern.groups
|
||||||
while not maxsplit or n < maxsplit:
|
while not maxsplit or n < maxsplit:
|
||||||
m = c.search()
|
m = c.search()
|
||||||
if not m:
|
if not m:
|
||||||
|
|
|
@ -61,9 +61,9 @@ def _compile(code, pattern, flags):
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if flags & SRE_FLAG_LOCALE:
|
if flags & SRE_FLAG_LOCALE:
|
||||||
emit(CH_LOCALE[CHCODES[av]])
|
emit(CHCODES[CH_LOCALE[av]])
|
||||||
elif flags & SRE_FLAG_UNICODE:
|
elif flags & SRE_FLAG_UNICODE:
|
||||||
emit(CH_UNICODE[CHCODES[av]])
|
emit(CHCODES[CH_UNICODE[av]])
|
||||||
else:
|
else:
|
||||||
emit(CHCODES[av])
|
emit(CHCODES[av])
|
||||||
elif op is GROUP:
|
elif op is GROUP:
|
||||||
|
@ -92,9 +92,9 @@ def _compile(code, pattern, flags):
|
||||||
emit(fixup(av[1]))
|
emit(fixup(av[1]))
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
if flags & SRE_FLAG_LOCALE:
|
if flags & SRE_FLAG_LOCALE:
|
||||||
emit(CH_LOCALE[CHCODES[av]])
|
emit(CHCODES[CH_LOCALE[av]])
|
||||||
elif flags & SRE_FLAG_UNICODE:
|
elif flags & SRE_FLAG_UNICODE:
|
||||||
emit(CH_UNICODE[CHCODES[av]])
|
emit(CHCODES[CH_UNICODE[av]])
|
||||||
else:
|
else:
|
||||||
emit(CHCODES[av])
|
emit(CHCODES[av])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -30,26 +30,27 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||||
WHITESPACE = string.whitespace
|
WHITESPACE = string.whitespace
|
||||||
|
|
||||||
ESCAPES = {
|
ESCAPES = {
|
||||||
"\\a": (LITERAL, chr(7)),
|
r"\a": (LITERAL, chr(7)),
|
||||||
"\\b": (LITERAL, chr(8)),
|
r"\b": (LITERAL, chr(8)),
|
||||||
"\\f": (LITERAL, chr(12)),
|
r"\f": (LITERAL, chr(12)),
|
||||||
"\\n": (LITERAL, chr(10)),
|
r"\n": (LITERAL, chr(10)),
|
||||||
"\\r": (LITERAL, chr(13)),
|
r"\r": (LITERAL, chr(13)),
|
||||||
"\\t": (LITERAL, chr(9)),
|
r"\t": (LITERAL, chr(9)),
|
||||||
"\\v": (LITERAL, chr(11))
|
r"\v": (LITERAL, chr(11)),
|
||||||
|
r"\\": (LITERAL, "\\")
|
||||||
}
|
}
|
||||||
|
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
"\\A": (AT, AT_BEGINNING), # start of string
|
r"\A": (AT, AT_BEGINNING), # start of string
|
||||||
"\\b": (AT, AT_BOUNDARY),
|
r"\b": (AT, AT_BOUNDARY),
|
||||||
"\\B": (AT, AT_NON_BOUNDARY),
|
r"\B": (AT, AT_NON_BOUNDARY),
|
||||||
"\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
|
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
|
||||||
"\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
|
r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
|
||||||
"\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
|
r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
|
||||||
"\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
|
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
|
||||||
"\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
|
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
|
||||||
"\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
|
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
|
||||||
"\\Z": (AT, AT_END), # end of string
|
r"\Z": (AT, AT_END), # end of string
|
||||||
}
|
}
|
||||||
|
|
||||||
FLAGS = {
|
FLAGS = {
|
||||||
|
@ -185,11 +186,11 @@ def isname(name):
|
||||||
return 0
|
return 0
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
def _group(escape, state):
|
def _group(escape, groups):
|
||||||
# check if the escape string represents a valid group
|
# check if the escape string represents a valid group
|
||||||
try:
|
try:
|
||||||
group = int(escape[1:])
|
group = int(escape[1:])
|
||||||
if group and group < state.groups:
|
if group and group < groups:
|
||||||
return group
|
return group
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
@ -239,10 +240,10 @@ def _escape(source, escape, state):
|
||||||
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
|
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
|
||||||
elif escape[1:2] in DIGITS:
|
elif escape[1:2] in DIGITS:
|
||||||
while 1:
|
while 1:
|
||||||
group = _group(escape, state)
|
group = _group(escape, state.groups)
|
||||||
if group:
|
if group:
|
||||||
if (not source.next or
|
if (not source.next or
|
||||||
not _group(escape + source.next, state)):
|
not _group(escape + source.next, state.groups)):
|
||||||
return GROUP, group
|
return GROUP, group
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
elif source.next in OCTDIGITS:
|
elif source.next in OCTDIGITS:
|
||||||
|
@ -534,6 +535,7 @@ def parse_template(source, pattern):
|
||||||
if this is None:
|
if this is None:
|
||||||
break # end of replacement string
|
break # end of replacement string
|
||||||
if this and this[0] == "\\":
|
if this and this[0] == "\\":
|
||||||
|
# group
|
||||||
if this == "\\g":
|
if this == "\\g":
|
||||||
name = ""
|
name = ""
|
||||||
if s.match("<"):
|
if s.match("<"):
|
||||||
|
@ -557,15 +559,29 @@ def parse_template(source, pattern):
|
||||||
raise IndexError, "unknown group name"
|
raise IndexError, "unknown group name"
|
||||||
a((MARK, index))
|
a((MARK, index))
|
||||||
elif len(this) > 1 and this[1] in DIGITS:
|
elif len(this) > 1 and this[1] in DIGITS:
|
||||||
while s.next in DIGITS:
|
code = None
|
||||||
this = this + s.get()
|
while 1:
|
||||||
a((MARK, int(this[1:])))
|
group = _group(this, pattern.groups+1)
|
||||||
|
if group:
|
||||||
|
if (not s.next or
|
||||||
|
not _group(this + s.next, pattern.groups+1)):
|
||||||
|
code = MARK, int(group)
|
||||||
|
break
|
||||||
|
elif s.next in OCTDIGITS:
|
||||||
|
this = this + s.get()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if not code:
|
||||||
|
this = this[1:]
|
||||||
|
# FIXME: support unicode characters!
|
||||||
|
code = LITERAL, chr(int(this[-6:], 8) & 0xff)
|
||||||
|
a(code)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
a(ESCAPES[this])
|
a(ESCAPES[this])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
for char in this:
|
for c in this:
|
||||||
a((LITERAL, char))
|
a((LITERAL, c))
|
||||||
else:
|
else:
|
||||||
a((LITERAL, this))
|
a((LITERAL, this))
|
||||||
return p
|
return p
|
||||||
|
|
|
@ -1534,6 +1534,9 @@ pattern_getattr(PatternObject* self, char* name)
|
||||||
if (!strcmp(name, "flags"))
|
if (!strcmp(name, "flags"))
|
||||||
return Py_BuildValue("i", self->flags);
|
return Py_BuildValue("i", self->flags);
|
||||||
|
|
||||||
|
if (!strcmp(name, "groups"))
|
||||||
|
return Py_BuildValue("i", self->groups);
|
||||||
|
|
||||||
if (!strcmp(name, "groupindex") && self->groupindex) {
|
if (!strcmp(name, "groupindex") && self->groupindex) {
|
||||||
Py_INCREF(self->groupindex);
|
Py_INCREF(self->groupindex);
|
||||||
return self->groupindex;
|
return self->groupindex;
|
||||||
|
@ -1939,9 +1942,6 @@ scanner_getattr(ScannerObject* self, char* name)
|
||||||
return self->pattern;
|
return self->pattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strcmp(name, "groups"))
|
|
||||||
return Py_BuildValue("i", ((PatternObject*) self->pattern)->groups);
|
|
||||||
|
|
||||||
PyErr_SetString(PyExc_AttributeError, name);
|
PyErr_SetString(PyExc_AttributeError, name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue