mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #22578: Added attributes to the re.error class.
This commit is contained in:
parent
eb99e51574
commit
ad446d57a9
5 changed files with 174 additions and 62 deletions
|
@ -733,13 +733,36 @@ form.
|
||||||
Clear the regular expression cache.
|
Clear the regular expression cache.
|
||||||
|
|
||||||
|
|
||||||
.. exception:: error
|
.. exception:: error(msg, pattern=None, pos=None)
|
||||||
|
|
||||||
Exception raised when a string passed to one of the functions here is not a
|
Exception raised when a string passed to one of the functions here is not a
|
||||||
valid regular expression (for example, it might contain unmatched parentheses)
|
valid regular expression (for example, it might contain unmatched parentheses)
|
||||||
or when some other error occurs during compilation or matching. It is never an
|
or when some other error occurs during compilation or matching. It is never an
|
||||||
error if a string contains no match for a pattern.
|
error if a string contains no match for a pattern. The error instance has
|
||||||
|
the following additional attributes:
|
||||||
|
|
||||||
|
.. attribute:: msg
|
||||||
|
|
||||||
|
The unformatted error message.
|
||||||
|
|
||||||
|
.. attribute:: pattern
|
||||||
|
|
||||||
|
The regular expression pattern.
|
||||||
|
|
||||||
|
.. attribute:: pos
|
||||||
|
|
||||||
|
The index of *pattern* where compilation failed.
|
||||||
|
|
||||||
|
.. attribute:: lineno
|
||||||
|
|
||||||
|
The line corresponding to *pos*.
|
||||||
|
|
||||||
|
.. attribute:: colno
|
||||||
|
|
||||||
|
The column corresponding to *pos*.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.5
|
||||||
|
Added additional attributes.
|
||||||
|
|
||||||
.. _re-objects:
|
.. _re-objects:
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
|
||||||
# should this really be here?
|
# should this really be here?
|
||||||
|
|
||||||
class error(Exception):
|
class error(Exception):
|
||||||
pass
|
def __init__(self, msg, pattern=None, pos=None):
|
||||||
|
self.msg = msg
|
||||||
|
self.pattern = pattern
|
||||||
|
self.pos = pos
|
||||||
|
if pattern is not None and pos is not None:
|
||||||
|
msg = '%s at position %d' % (msg, pos)
|
||||||
|
if isinstance(pattern, str):
|
||||||
|
newline = '\n'
|
||||||
|
else:
|
||||||
|
newline = b'\n'
|
||||||
|
self.lineno = pattern.count(newline, 0, pos) + 1
|
||||||
|
self.colno = pos - pattern.rfind(newline, 0, pos)
|
||||||
|
if newline in pattern:
|
||||||
|
msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
|
||||||
|
else:
|
||||||
|
self.lineno = self.colno = None
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
def linecol(doc, pos):
|
||||||
|
if isinstance(pattern, str):
|
||||||
|
newline = '\n'
|
||||||
|
else:
|
||||||
|
newline = b'\n'
|
||||||
|
lineno = pattern.count(newline, 0, pos) + 1
|
||||||
|
if lineno == 1:
|
||||||
|
colno = pos + 1
|
||||||
|
else:
|
||||||
|
colno = pos - doc.rindex(newline, 0, pos)
|
||||||
|
return lineno, colno
|
||||||
|
|
||||||
|
|
||||||
class _NamedIntConstant(int):
|
class _NamedIntConstant(int):
|
||||||
|
|
139
Lib/sre_parse.py
139
Lib/sre_parse.py
|
@ -81,8 +81,8 @@ class Pattern:
|
||||||
if name is not None:
|
if name is not None:
|
||||||
ogid = self.groupdict.get(name, None)
|
ogid = self.groupdict.get(name, None)
|
||||||
if ogid is not None:
|
if ogid is not None:
|
||||||
raise error("redefinition of group name %s as group %d; "
|
raise error("redefinition of group name %r as group %d; "
|
||||||
"was group %d" % (repr(name), gid, ogid))
|
"was group %d" % (name, gid, ogid))
|
||||||
self.groupdict[name] = gid
|
self.groupdict[name] = gid
|
||||||
return gid
|
return gid
|
||||||
def closegroup(self, gid, p):
|
def closegroup(self, gid, p):
|
||||||
|
@ -206,24 +206,25 @@ class SubPattern:
|
||||||
class Tokenizer:
|
class Tokenizer:
|
||||||
def __init__(self, string):
|
def __init__(self, string):
|
||||||
self.istext = isinstance(string, str)
|
self.istext = isinstance(string, str)
|
||||||
|
self.string = string
|
||||||
if not self.istext:
|
if not self.istext:
|
||||||
string = str(string, 'latin1')
|
string = str(string, 'latin1')
|
||||||
self.string = string
|
self.decoded_string = string
|
||||||
self.index = 0
|
self.index = 0
|
||||||
self.__next()
|
self.__next()
|
||||||
def __next(self):
|
def __next(self):
|
||||||
index = self.index
|
index = self.index
|
||||||
try:
|
try:
|
||||||
char = self.string[index]
|
char = self.decoded_string[index]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.next = None
|
self.next = None
|
||||||
return
|
return
|
||||||
if char == "\\":
|
if char == "\\":
|
||||||
index += 1
|
index += 1
|
||||||
try:
|
try:
|
||||||
char += self.string[index]
|
char += self.decoded_string[index]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise error("bogus escape (end of line)")
|
raise self.error("bogus escape (end of line)") from None
|
||||||
self.index = index + 1
|
self.index = index + 1
|
||||||
self.next = char
|
self.next = char
|
||||||
def match(self, char):
|
def match(self, char):
|
||||||
|
@ -250,15 +251,19 @@ class Tokenizer:
|
||||||
c = self.next
|
c = self.next
|
||||||
self.__next()
|
self.__next()
|
||||||
if c is None:
|
if c is None:
|
||||||
raise error("unterminated name")
|
raise self.error("unterminated name")
|
||||||
if c == terminator:
|
if c == terminator:
|
||||||
break
|
break
|
||||||
result += c
|
result += c
|
||||||
return result
|
return result
|
||||||
def tell(self):
|
def tell(self):
|
||||||
return self.index, self.next
|
return self.index - len(self.next or '')
|
||||||
def seek(self, index):
|
def seek(self, index):
|
||||||
self.index, self.next = index
|
self.index = index
|
||||||
|
self.__next()
|
||||||
|
|
||||||
|
def error(self, msg, offset=0):
|
||||||
|
return error(msg, self.string, self.tell() - offset)
|
||||||
|
|
||||||
# The following three functions are not used in this module anymore, but we keep
|
# The following three functions are not used in this module anymore, but we keep
|
||||||
# them here (with DeprecationWarnings) for backwards compatibility.
|
# them here (with DeprecationWarnings) for backwards compatibility.
|
||||||
|
@ -322,8 +327,8 @@ def _class_escape(source, escape):
|
||||||
escape += source.getwhile(2, OCTDIGITS)
|
escape += source.getwhile(2, OCTDIGITS)
|
||||||
c = int(escape[1:], 8)
|
c = int(escape[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise error('octal escape value %r outside of '
|
raise source.error('octal escape value %r outside of '
|
||||||
'range 0-0o377' % escape)
|
'range 0-0o377' % escape, len(escape))
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
elif c in DIGITS:
|
elif c in DIGITS:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
@ -331,7 +336,7 @@ def _class_escape(source, escape):
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise error("bogus escape: %s" % repr(escape))
|
raise source.error("bogus escape: %r" % escape, len(escape))
|
||||||
|
|
||||||
def _escape(source, escape, state):
|
def _escape(source, escape, state):
|
||||||
# handle escape code in expression
|
# handle escape code in expression
|
||||||
|
@ -377,21 +382,23 @@ def _escape(source, escape, state):
|
||||||
escape += source.get()
|
escape += source.get()
|
||||||
c = int(escape[1:], 8)
|
c = int(escape[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise error('octal escape value %r outside of '
|
raise source.error('octal escape value %r outside of '
|
||||||
'range 0-0o377' % escape)
|
'range 0-0o377' % escape,
|
||||||
|
len(escape))
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
# not an octal escape, so this is a group reference
|
# not an octal escape, so this is a group reference
|
||||||
group = int(escape[1:])
|
group = int(escape[1:])
|
||||||
if group < state.groups:
|
if group < state.groups:
|
||||||
if not state.checkgroup(group):
|
if not state.checkgroup(group):
|
||||||
raise error("cannot refer to open group")
|
raise source.error("cannot refer to open group",
|
||||||
|
len(escape))
|
||||||
return GROUPREF, group
|
return GROUPREF, group
|
||||||
raise ValueError
|
raise ValueError
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise error("bogus escape: %s" % repr(escape))
|
raise source.error("bogus escape: %r" % escape, len(escape))
|
||||||
|
|
||||||
def _parse_sub(source, state, nested=True):
|
def _parse_sub(source, state, nested=True):
|
||||||
# parse an alternation: a|b|c
|
# parse an alternation: a|b|c
|
||||||
|
@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
|
||||||
if not sourcematch("|"):
|
if not sourcematch("|"):
|
||||||
break
|
break
|
||||||
if nested and source.next is not None and source.next != ")":
|
if nested and source.next is not None and source.next != ")":
|
||||||
raise error("pattern not properly closed")
|
raise source.error("pattern not properly closed")
|
||||||
|
|
||||||
if len(items) == 1:
|
if len(items) == 1:
|
||||||
return items[0]
|
return items[0]
|
||||||
|
@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
|
||||||
if source.match("|"):
|
if source.match("|"):
|
||||||
item_no = _parse(source, state)
|
item_no = _parse(source, state)
|
||||||
if source.next == "|":
|
if source.next == "|":
|
||||||
raise error("conditional backref with more than two branches")
|
raise source.error("conditional backref with more than two branches")
|
||||||
else:
|
else:
|
||||||
item_no = None
|
item_no = None
|
||||||
if source.next is not None and source.next != ")":
|
if source.next is not None and source.next != ")":
|
||||||
raise error("pattern not properly closed")
|
raise source.error("pattern not properly closed")
|
||||||
subpattern = SubPattern(state)
|
subpattern = SubPattern(state)
|
||||||
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
|
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
|
||||||
return subpattern
|
return subpattern
|
||||||
|
@ -510,7 +517,7 @@ def _parse(source, state):
|
||||||
while True:
|
while True:
|
||||||
this = sourceget()
|
this = sourceget()
|
||||||
if this is None:
|
if this is None:
|
||||||
raise error("unexpected end of regular expression")
|
raise source.error("unexpected end of regular expression")
|
||||||
if this == "]" and set != start:
|
if this == "]" and set != start:
|
||||||
break
|
break
|
||||||
elif this[0] == "\\":
|
elif this[0] == "\\":
|
||||||
|
@ -521,7 +528,7 @@ def _parse(source, state):
|
||||||
# potential range
|
# potential range
|
||||||
this = sourceget()
|
this = sourceget()
|
||||||
if this is None:
|
if this is None:
|
||||||
raise error("unexpected end of regular expression")
|
raise source.error("unexpected end of regular expression")
|
||||||
if this == "]":
|
if this == "]":
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
code1 = code1[1][0]
|
code1 = code1[1][0]
|
||||||
|
@ -533,11 +540,11 @@ def _parse(source, state):
|
||||||
else:
|
else:
|
||||||
code2 = LITERAL, _ord(this)
|
code2 = LITERAL, _ord(this)
|
||||||
if code1[0] != LITERAL or code2[0] != LITERAL:
|
if code1[0] != LITERAL or code2[0] != LITERAL:
|
||||||
raise error("bad character range")
|
raise source.error("bad character range", len(this))
|
||||||
lo = code1[1]
|
lo = code1[1]
|
||||||
hi = code2[1]
|
hi = code2[1]
|
||||||
if hi < lo:
|
if hi < lo:
|
||||||
raise error("bad character range")
|
raise source.error("bad character range", len(this))
|
||||||
setappend((RANGE, (lo, hi)))
|
setappend((RANGE, (lo, hi)))
|
||||||
else:
|
else:
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
|
@ -555,6 +562,7 @@ def _parse(source, state):
|
||||||
|
|
||||||
elif this in REPEAT_CHARS:
|
elif this in REPEAT_CHARS:
|
||||||
# repeat previous item
|
# repeat previous item
|
||||||
|
here = source.tell()
|
||||||
if this == "?":
|
if this == "?":
|
||||||
min, max = 0, 1
|
min, max = 0, 1
|
||||||
elif this == "*":
|
elif this == "*":
|
||||||
|
@ -566,7 +574,6 @@ def _parse(source, state):
|
||||||
if source.next == "}":
|
if source.next == "}":
|
||||||
subpatternappend((LITERAL, _ord(this)))
|
subpatternappend((LITERAL, _ord(this)))
|
||||||
continue
|
continue
|
||||||
here = source.tell()
|
|
||||||
min, max = 0, MAXREPEAT
|
min, max = 0, MAXREPEAT
|
||||||
lo = hi = ""
|
lo = hi = ""
|
||||||
while source.next in DIGITS:
|
while source.next in DIGITS:
|
||||||
|
@ -589,18 +596,21 @@ def _parse(source, state):
|
||||||
if max >= MAXREPEAT:
|
if max >= MAXREPEAT:
|
||||||
raise OverflowError("the repetition number is too large")
|
raise OverflowError("the repetition number is too large")
|
||||||
if max < min:
|
if max < min:
|
||||||
raise error("bad repeat interval")
|
raise source.error("bad repeat interval",
|
||||||
|
source.tell() - here)
|
||||||
else:
|
else:
|
||||||
raise error("not supported")
|
raise source.error("not supported", len(this))
|
||||||
# figure out which item to repeat
|
# figure out which item to repeat
|
||||||
if subpattern:
|
if subpattern:
|
||||||
item = subpattern[-1:]
|
item = subpattern[-1:]
|
||||||
else:
|
else:
|
||||||
item = None
|
item = None
|
||||||
if not item or (_len(item) == 1 and item[0][0] == AT):
|
if not item or (_len(item) == 1 and item[0][0] == AT):
|
||||||
raise error("nothing to repeat")
|
raise source.error("nothing to repeat",
|
||||||
|
source.tell() - here + len(this))
|
||||||
if item[0][0] in _REPEATCODES:
|
if item[0][0] in _REPEATCODES:
|
||||||
raise error("multiple repeat")
|
raise source.error("multiple repeat",
|
||||||
|
source.tell() - here + len(this))
|
||||||
if sourcematch("?"):
|
if sourcematch("?"):
|
||||||
subpattern[-1] = (MIN_REPEAT, (min, max, item))
|
subpattern[-1] = (MIN_REPEAT, (min, max, item))
|
||||||
else:
|
else:
|
||||||
|
@ -618,7 +628,7 @@ def _parse(source, state):
|
||||||
# options
|
# options
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None:
|
if char is None:
|
||||||
raise error("unexpected end of pattern")
|
raise self.error("unexpected end of pattern")
|
||||||
if char == "P":
|
if char == "P":
|
||||||
# python extensions
|
# python extensions
|
||||||
if sourcematch("<"):
|
if sourcematch("<"):
|
||||||
|
@ -626,28 +636,32 @@ def _parse(source, state):
|
||||||
name = source.getuntil(">")
|
name = source.getuntil(">")
|
||||||
group = 1
|
group = 1
|
||||||
if not name:
|
if not name:
|
||||||
raise error("missing group name")
|
raise source.error("missing group name", 1)
|
||||||
if not name.isidentifier():
|
if not name.isidentifier():
|
||||||
raise error("bad character in group name %r" % name)
|
raise source.error("bad character in group name "
|
||||||
|
"%r" % name,
|
||||||
|
len(name) + 1)
|
||||||
elif sourcematch("="):
|
elif sourcematch("="):
|
||||||
# named backreference
|
# named backreference
|
||||||
name = source.getuntil(")")
|
name = source.getuntil(")")
|
||||||
if not name:
|
if not name:
|
||||||
raise error("missing group name")
|
raise source.error("missing group name", 1)
|
||||||
if not name.isidentifier():
|
if not name.isidentifier():
|
||||||
raise error("bad character in backref group name "
|
raise source.error("bad character in backref "
|
||||||
"%r" % name)
|
"group name %r" % name,
|
||||||
|
len(name) + 1)
|
||||||
gid = state.groupdict.get(name)
|
gid = state.groupdict.get(name)
|
||||||
if gid is None:
|
if gid is None:
|
||||||
msg = "unknown group name: {0!r}".format(name)
|
msg = "unknown group name: {0!r}".format(name)
|
||||||
raise error(msg)
|
raise source.error(msg, len(name) + 1)
|
||||||
subpatternappend((GROUPREF, gid))
|
subpatternappend((GROUPREF, gid))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None:
|
if char is None:
|
||||||
raise error("unexpected end of pattern")
|
raise source.error("unexpected end of pattern")
|
||||||
raise error("unknown specifier: ?P%s" % char)
|
raise source.error("unknown specifier: ?P%s" % char,
|
||||||
|
len(char))
|
||||||
elif char == ":":
|
elif char == ":":
|
||||||
# non-capturing group
|
# non-capturing group
|
||||||
group = 2
|
group = 2
|
||||||
|
@ -655,7 +669,7 @@ def _parse(source, state):
|
||||||
# comment
|
# comment
|
||||||
while True:
|
while True:
|
||||||
if source.next is None:
|
if source.next is None:
|
||||||
raise error("unbalanced parenthesis")
|
raise source.error("unbalanced parenthesis")
|
||||||
if sourceget() == ")":
|
if sourceget() == ")":
|
||||||
break
|
break
|
||||||
continue
|
continue
|
||||||
|
@ -665,11 +679,11 @@ def _parse(source, state):
|
||||||
if char == "<":
|
if char == "<":
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None or char not in "=!":
|
if char is None or char not in "=!":
|
||||||
raise error("syntax error")
|
raise source.error("syntax error")
|
||||||
dir = -1 # lookbehind
|
dir = -1 # lookbehind
|
||||||
p = _parse_sub(source, state)
|
p = _parse_sub(source, state)
|
||||||
if not sourcematch(")"):
|
if not sourcematch(")"):
|
||||||
raise error("unbalanced parenthesis")
|
raise source.error("unbalanced parenthesis")
|
||||||
if char == "=":
|
if char == "=":
|
||||||
subpatternappend((ASSERT, (dir, p)))
|
subpatternappend((ASSERT, (dir, p)))
|
||||||
else:
|
else:
|
||||||
|
@ -680,23 +694,26 @@ def _parse(source, state):
|
||||||
condname = source.getuntil(")")
|
condname = source.getuntil(")")
|
||||||
group = 2
|
group = 2
|
||||||
if not condname:
|
if not condname:
|
||||||
raise error("missing group name")
|
raise source.error("missing group name", 1)
|
||||||
if condname.isidentifier():
|
if condname.isidentifier():
|
||||||
condgroup = state.groupdict.get(condname)
|
condgroup = state.groupdict.get(condname)
|
||||||
if condgroup is None:
|
if condgroup is None:
|
||||||
msg = "unknown group name: {0!r}".format(condname)
|
msg = "unknown group name: {0!r}".format(condname)
|
||||||
raise error(msg)
|
raise source.error(msg, len(condname) + 1)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
condgroup = int(condname)
|
condgroup = int(condname)
|
||||||
if condgroup < 0:
|
if condgroup < 0:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise error("bad character in group name")
|
raise source.error("bad character in group name",
|
||||||
|
len(condname) + 1)
|
||||||
if not condgroup:
|
if not condgroup:
|
||||||
raise error("bad group number")
|
raise source.error("bad group number",
|
||||||
|
len(condname) + 1)
|
||||||
if condgroup >= MAXGROUPS:
|
if condgroup >= MAXGROUPS:
|
||||||
raise error("the group number is too large")
|
raise source.error("the group number is too large",
|
||||||
|
len(condname) + 1)
|
||||||
elif char in FLAGS:
|
elif char in FLAGS:
|
||||||
# flags
|
# flags
|
||||||
state.flags |= FLAGS[char]
|
state.flags |= FLAGS[char]
|
||||||
|
@ -704,20 +721,23 @@ def _parse(source, state):
|
||||||
state.flags |= FLAGS[sourceget()]
|
state.flags |= FLAGS[sourceget()]
|
||||||
verbose = state.flags & SRE_FLAG_VERBOSE
|
verbose = state.flags & SRE_FLAG_VERBOSE
|
||||||
else:
|
else:
|
||||||
raise error("unexpected end of pattern " + char)
|
raise source.error("unexpected end of pattern")
|
||||||
if group:
|
if group:
|
||||||
# parse group contents
|
# parse group contents
|
||||||
if group == 2:
|
if group == 2:
|
||||||
# anonymous group
|
# anonymous group
|
||||||
group = None
|
group = None
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
group = state.opengroup(name)
|
group = state.opengroup(name)
|
||||||
|
except error as err:
|
||||||
|
raise source.error(err.msg, len(name) + 1)
|
||||||
if condgroup:
|
if condgroup:
|
||||||
p = _parse_sub_cond(source, state, condgroup)
|
p = _parse_sub_cond(source, state, condgroup)
|
||||||
else:
|
else:
|
||||||
p = _parse_sub(source, state)
|
p = _parse_sub(source, state)
|
||||||
if not sourcematch(")"):
|
if not sourcematch(")"):
|
||||||
raise error("unbalanced parenthesis")
|
raise source.error("unbalanced parenthesis")
|
||||||
if group is not None:
|
if group is not None:
|
||||||
state.closegroup(group, p)
|
state.closegroup(group, p)
|
||||||
subpatternappend((SUBPATTERN, (group, p)))
|
subpatternappend((SUBPATTERN, (group, p)))
|
||||||
|
@ -725,10 +745,10 @@ def _parse(source, state):
|
||||||
while True:
|
while True:
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None:
|
if char is None:
|
||||||
raise error("unexpected end of pattern")
|
raise source.error("unexpected end of pattern")
|
||||||
if char == ")":
|
if char == ")":
|
||||||
break
|
break
|
||||||
raise error("unknown extension")
|
raise source.error("unknown extension", len(char))
|
||||||
|
|
||||||
elif this == "^":
|
elif this == "^":
|
||||||
subpatternappend((AT, AT_BEGINNING))
|
subpatternappend((AT, AT_BEGINNING))
|
||||||
|
@ -737,7 +757,7 @@ def _parse(source, state):
|
||||||
subpattern.append((AT, AT_END))
|
subpattern.append((AT, AT_END))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise error("parser error")
|
raise source.error("parser error", len(this))
|
||||||
|
|
||||||
return subpattern
|
return subpattern
|
||||||
|
|
||||||
|
@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
|
||||||
|
|
||||||
if source.next is not None:
|
if source.next is not None:
|
||||||
if source.next == ")":
|
if source.next == ")":
|
||||||
raise error("unbalanced parenthesis")
|
raise source.error("unbalanced parenthesis")
|
||||||
else:
|
else:
|
||||||
raise error("bogus characters at end of regular expression")
|
raise source.error("bogus characters at end of regular expression",
|
||||||
|
len(tail))
|
||||||
|
|
||||||
if flags & SRE_FLAG_DEBUG:
|
if flags & SRE_FLAG_DEBUG:
|
||||||
p.dump()
|
p.dump()
|
||||||
|
@ -809,16 +830,18 @@ def parse_template(source, pattern):
|
||||||
if s.match("<"):
|
if s.match("<"):
|
||||||
name = s.getuntil(">")
|
name = s.getuntil(">")
|
||||||
if not name:
|
if not name:
|
||||||
raise error("missing group name")
|
raise s.error("missing group name", 1)
|
||||||
try:
|
try:
|
||||||
index = int(name)
|
index = int(name)
|
||||||
if index < 0:
|
if index < 0:
|
||||||
raise error("negative group number")
|
raise s.error("negative group number", len(name) + 1)
|
||||||
if index >= MAXGROUPS:
|
if index >= MAXGROUPS:
|
||||||
raise error("the group number is too large")
|
raise s.error("the group number is too large",
|
||||||
|
len(name) + 1)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
if not name.isidentifier():
|
if not name.isidentifier():
|
||||||
raise error("bad character in group name")
|
raise s.error("bad character in group name",
|
||||||
|
len(name) + 1)
|
||||||
try:
|
try:
|
||||||
index = pattern.groupindex[name]
|
index = pattern.groupindex[name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -841,8 +864,8 @@ def parse_template(source, pattern):
|
||||||
isoctal = True
|
isoctal = True
|
||||||
c = int(this[1:], 8)
|
c = int(this[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise error('octal escape value %r outside of '
|
raise s.error('octal escape value %r outside of '
|
||||||
'range 0-0o377' % this)
|
'range 0-0o377' % this, len(this))
|
||||||
lappend(chr(c))
|
lappend(chr(c))
|
||||||
if not isoctal:
|
if not isoctal:
|
||||||
addgroup(int(this[1:]))
|
addgroup(int(this[1:]))
|
||||||
|
|
|
@ -1419,6 +1419,42 @@ SUBPATTERN None
|
||||||
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
|
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
|
||||||
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
|
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
|
||||||
|
|
||||||
|
def test_error(self):
|
||||||
|
with self.assertRaises(re.error) as cm:
|
||||||
|
re.compile('(\u20ac))')
|
||||||
|
err = cm.exception
|
||||||
|
self.assertIsInstance(err.pattern, str)
|
||||||
|
self.assertEqual(err.pattern, '(\u20ac))')
|
||||||
|
self.assertEqual(err.pos, 3)
|
||||||
|
self.assertEqual(err.lineno, 1)
|
||||||
|
self.assertEqual(err.colno, 4)
|
||||||
|
self.assertIn(err.msg, str(err))
|
||||||
|
self.assertIn(' at position 3', str(err))
|
||||||
|
self.assertNotIn(' at position 3', err.msg)
|
||||||
|
# Bytes pattern
|
||||||
|
with self.assertRaises(re.error) as cm:
|
||||||
|
re.compile(b'(\xa4))')
|
||||||
|
err = cm.exception
|
||||||
|
self.assertIsInstance(err.pattern, bytes)
|
||||||
|
self.assertEqual(err.pattern, b'(\xa4))')
|
||||||
|
self.assertEqual(err.pos, 3)
|
||||||
|
# Multiline pattern
|
||||||
|
with self.assertRaises(re.error) as cm:
|
||||||
|
re.compile("""
|
||||||
|
(
|
||||||
|
abc
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(
|
||||||
|
""", re.VERBOSE)
|
||||||
|
err = cm.exception
|
||||||
|
self.assertEqual(err.pos, 77)
|
||||||
|
self.assertEqual(err.lineno, 5)
|
||||||
|
self.assertEqual(err.colno, 17)
|
||||||
|
self.assertIn(err.msg, str(err))
|
||||||
|
self.assertIn(' at position 77', str(err))
|
||||||
|
self.assertIn('(line 5, column 17)', str(err))
|
||||||
|
|
||||||
|
|
||||||
class PatternReprTests(unittest.TestCase):
|
class PatternReprTests(unittest.TestCase):
|
||||||
def check(self, pattern, expected):
|
def check(self, pattern, expected):
|
||||||
|
|
|
@ -183,6 +183,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22578: Added attributes to the re.error class.
|
||||||
|
|
||||||
- Issue #12728: Different Unicode characters having the same uppercase but
|
- Issue #12728: Different Unicode characters having the same uppercase but
|
||||||
different lowercase are now matched in case-insensitive regular expressions.
|
different lowercase are now matched in case-insensitive regular expressions.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue