mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #25953: re.sub() now raises an error for invalid numerical group
reference in replacement template even if the pattern is not found in the string. Error message for invalid group reference now includes the group index and the position of the reference. Based on patch by SilentGhost.
This commit is contained in:
commit
ee8337a99d
3 changed files with 38 additions and 29 deletions
|
@ -395,7 +395,7 @@ def _escape(source, escape, state):
|
||||||
len(escape))
|
len(escape))
|
||||||
state.checklookbehindgroup(group, source)
|
state.checklookbehindgroup(group, source)
|
||||||
return GROUPREF, group
|
return GROUPREF, group
|
||||||
raise source.error("invalid group reference", len(escape))
|
raise source.error("invalid group reference %d" % group, len(escape) - 1)
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
if c in ASCIILETTERS:
|
if c in ASCIILETTERS:
|
||||||
raise source.error("bad escape %s" % escape, len(escape))
|
raise source.error("bad escape %s" % escape, len(escape))
|
||||||
|
@ -725,8 +725,8 @@ def _parse(source, state, verbose):
|
||||||
raise source.error("bad group number",
|
raise source.error("bad group number",
|
||||||
len(condname) + 1)
|
len(condname) + 1)
|
||||||
if condgroup >= MAXGROUPS:
|
if condgroup >= MAXGROUPS:
|
||||||
raise source.error("invalid group reference",
|
msg = "invalid group reference %d" % condgroup
|
||||||
len(condname) + 1)
|
raise source.error(msg, len(condname) + 1)
|
||||||
state.checklookbehindgroup(condgroup, source)
|
state.checklookbehindgroup(condgroup, source)
|
||||||
elif char in FLAGS or char == "-":
|
elif char in FLAGS or char == "-":
|
||||||
# flags
|
# flags
|
||||||
|
@ -883,7 +883,9 @@ def parse_template(source, pattern):
|
||||||
literals = []
|
literals = []
|
||||||
literal = []
|
literal = []
|
||||||
lappend = literal.append
|
lappend = literal.append
|
||||||
def addgroup(index):
|
def addgroup(index, pos):
|
||||||
|
if index > pattern.groups:
|
||||||
|
raise s.error("invalid group reference %d" % index, pos)
|
||||||
if literal:
|
if literal:
|
||||||
literals.append(''.join(literal))
|
literals.append(''.join(literal))
|
||||||
del literal[:]
|
del literal[:]
|
||||||
|
@ -916,9 +918,9 @@ def parse_template(source, pattern):
|
||||||
raise s.error("bad character in group name %r" % name,
|
raise s.error("bad character in group name %r" % name,
|
||||||
len(name) + 1) from None
|
len(name) + 1) from None
|
||||||
if index >= MAXGROUPS:
|
if index >= MAXGROUPS:
|
||||||
raise s.error("invalid group reference",
|
raise s.error("invalid group reference %d" % index,
|
||||||
len(name) + 1)
|
len(name) + 1)
|
||||||
addgroup(index)
|
addgroup(index, len(name) + 1)
|
||||||
elif c == "0":
|
elif c == "0":
|
||||||
if s.next in OCTDIGITS:
|
if s.next in OCTDIGITS:
|
||||||
this += sget()
|
this += sget()
|
||||||
|
@ -939,7 +941,7 @@ def parse_template(source, pattern):
|
||||||
'range 0-0o377' % this, len(this))
|
'range 0-0o377' % this, len(this))
|
||||||
lappend(chr(c))
|
lappend(chr(c))
|
||||||
if not isoctal:
|
if not isoctal:
|
||||||
addgroup(int(this[1:]))
|
addgroup(int(this[1:]), len(this) - 1)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
this = chr(ESCAPES[this][1])
|
this = chr(ESCAPES[this][1])
|
||||||
|
@ -966,5 +968,5 @@ def expand_template(template, match):
|
||||||
for index, group in groups:
|
for index, group in groups:
|
||||||
literals[index] = g(group) or empty
|
literals[index] = g(group) or empty
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise error("invalid group reference")
|
raise error("invalid group reference %d" % index)
|
||||||
return empty.join(literals)
|
return empty.join(literals)
|
||||||
|
|
|
@ -5,7 +5,6 @@ import locale
|
||||||
import re
|
import re
|
||||||
from re import Scanner
|
from re import Scanner
|
||||||
import sre_compile
|
import sre_compile
|
||||||
import sre_constants
|
|
||||||
import sys
|
import sys
|
||||||
import string
|
import string
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -186,18 +185,19 @@ class ReTests(unittest.TestCase):
|
||||||
r'octal escape value \777 outside of '
|
r'octal escape value \777 outside of '
|
||||||
r'range 0-0o377', 0)
|
r'range 0-0o377', 0)
|
||||||
|
|
||||||
self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
|
||||||
self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
|
||||||
self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
|
||||||
self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
|
||||||
self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
|
||||||
self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
|
||||||
self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
|
||||||
self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
|
||||||
self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
|
self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
|
||||||
self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
|
self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
|
||||||
self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
|
self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
|
||||||
self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
|
self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
|
||||||
|
self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
|
||||||
|
|
||||||
# in python2.3 (etc), these loop endlessly in sre_parser.py
|
# in python2.3 (etc), these loop endlessly in sre_parser.py
|
||||||
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
|
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
|
||||||
|
@ -271,9 +271,9 @@ class ReTests(unittest.TestCase):
|
||||||
self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
|
self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
|
||||||
"bad character in group name '1a1'", 3)
|
"bad character in group name '1a1'", 3)
|
||||||
self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
|
self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
|
||||||
'invalid group reference')
|
'invalid group reference 2', 3)
|
||||||
self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
|
self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
|
||||||
'invalid group reference')
|
'invalid group reference 2', 1)
|
||||||
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
|
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
|
||||||
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
|
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
|
||||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
||||||
|
@ -558,10 +558,11 @@ class ReTests(unittest.TestCase):
|
||||||
'two branches', 10)
|
'two branches', 10)
|
||||||
|
|
||||||
def test_re_groupref_overflow(self):
|
def test_re_groupref_overflow(self):
|
||||||
self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx',
|
from sre_constants import MAXGROUPS
|
||||||
'invalid group reference', 3)
|
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
|
||||||
self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
|
'invalid group reference %d' % MAXGROUPS, 3)
|
||||||
'invalid group reference', 10)
|
self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
|
||||||
|
'invalid group reference %d' % MAXGROUPS, 10)
|
||||||
|
|
||||||
def test_re_groupref(self):
|
def test_re_groupref(self):
|
||||||
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
|
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
|
||||||
|
@ -1007,7 +1008,7 @@ class ReTests(unittest.TestCase):
|
||||||
self.checkPatternError(r"\567",
|
self.checkPatternError(r"\567",
|
||||||
r'octal escape value \567 outside of '
|
r'octal escape value \567 outside of '
|
||||||
r'range 0-0o377', 0)
|
r'range 0-0o377', 0)
|
||||||
self.checkPatternError(r"\911", 'invalid group reference', 0)
|
self.checkPatternError(r"\911", 'invalid group reference 91', 1)
|
||||||
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
|
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
|
||||||
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
|
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
|
||||||
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
|
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
|
||||||
|
@ -1061,7 +1062,7 @@ class ReTests(unittest.TestCase):
|
||||||
self.checkPatternError(br"\567",
|
self.checkPatternError(br"\567",
|
||||||
r'octal escape value \567 outside of '
|
r'octal escape value \567 outside of '
|
||||||
r'range 0-0o377', 0)
|
r'range 0-0o377', 0)
|
||||||
self.checkPatternError(br"\911", 'invalid group reference', 0)
|
self.checkPatternError(br"\911", 'invalid group reference 91', 1)
|
||||||
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
|
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
|
||||||
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
|
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,12 @@ Core and Builtins
|
||||||
- Issue #23782: Fixed possible memory leak in _PyTraceback_Add() and exception
|
- Issue #23782: Fixed possible memory leak in _PyTraceback_Add() and exception
|
||||||
loss in PyTraceBack_Here().
|
loss in PyTraceBack_Here().
|
||||||
|
|
||||||
|
- Issue #25953: re.sub() now raises an error for invalid numerical group
|
||||||
|
reference in replacement template even if the pattern is not found in
|
||||||
|
the string. Error message for invalid group reference now includes the
|
||||||
|
group index and the position of the reference.
|
||||||
|
Based on patch by SilentGhost.
|
||||||
|
|
||||||
- Issue #28183: Optimize and cleanup dict iteration.
|
- Issue #28183: Optimize and cleanup dict iteration.
|
||||||
|
|
||||||
- Issue #26081: Added C implementation of asyncio.Future.
|
- Issue #26081: Added C implementation of asyncio.Future.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue