mirror of
https://github.com/python/cpython.git
synced 2025-10-14 18:59:46 +00:00
gh-91760: More strict rules for numerical group references and group names in RE (GH-91792)
Only sequence of ASCII digits is now accepted as a numerical reference. The group name in bytes patterns and replacement strings can now only contain ASCII letters and digits and underscore.
This commit is contained in:
parent
7b024e3a3f
commit
a84a56d80f
5 changed files with 62 additions and 91 deletions
|
@ -275,21 +275,12 @@ class ReTests(unittest.TestCase):
|
|||
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
|
||||
self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
|
||||
self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\\xc2\\xb5' "
|
||||
r"at position 4") as w:
|
||||
re.compile(b'(?P<\xc2\xb5>x)')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\\xc2\\xb5' "
|
||||
r"at position 4"):
|
||||
self.checkPatternError(b'(?P=\xc2\xb5)',
|
||||
r"unknown group name '\xc2\xb5'", 4)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\\xc2\\xb5' "
|
||||
r"at position 3"):
|
||||
self.checkPatternError(b'(?(\xc2\xb5)y)',
|
||||
r"unknown group name '\xc2\xb5'", 3)
|
||||
self.checkPatternError(b'(?P<\xc2\xb5>x)',
|
||||
r"bad character in group name '\xc2\xb5'", 4)
|
||||
self.checkPatternError(b'(?P=\xc2\xb5)',
|
||||
r"bad character in group name '\xc2\xb5'", 4)
|
||||
self.checkPatternError(b'(?(\xc2\xb5)y)',
|
||||
r"bad character in group name '\xc2\xb5'", 3)
|
||||
|
||||
def test_symbolic_refs(self):
|
||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
||||
|
@ -322,35 +313,22 @@ class ReTests(unittest.TestCase):
|
|||
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
|
||||
"bad character in group name '-1'", 3)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\+1' "
|
||||
r"at position 3") as w:
|
||||
re.sub('(?P<a>x)', r'\g<+1>', 'xx')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '1_0' "
|
||||
r"at position 3"):
|
||||
re.sub('()'*10, r'\g<1_0>', 'xx')
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name ' 1 ' "
|
||||
r"at position 3"):
|
||||
re.sub('(?P<a>x)', r'\g< 1 >', 'xx')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<+1>', 'xx',
|
||||
"bad character in group name '+1'", 3)
|
||||
self.checkTemplateError('()'*10, r'\g<1_0>', 'xx',
|
||||
"bad character in group name '1_0'", 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g< 1 >', 'xx',
|
||||
"bad character in group name ' 1 '", 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
|
||||
"bad character in group name '©'", 3)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\\xc2\\xb5' "
|
||||
r"at position 3") as w:
|
||||
with self.assertRaisesRegex(IndexError, "unknown group name '\xc2\xb5'"):
|
||||
re.sub(b'(?P<a>x)', b'\\g<\xc2\xb5>', b'xx')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
self.checkTemplateError(b'(?P<a>x)', b'\\g<\xc2\xb5>', b'xx',
|
||||
r"bad character in group name '\xc2\xb5'", 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
|
||||
"bad character in group name '㊀'", 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
|
||||
"bad character in group name '¹'", 3)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '१' "
|
||||
r"at position 3"):
|
||||
re.sub('(?P<a>x)', r'\g<१>', 'xx')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<१>', 'xx',
|
||||
"bad character in group name '१'", 3)
|
||||
|
||||
def test_re_subn(self):
|
||||
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
||||
|
@ -616,27 +594,18 @@ class ReTests(unittest.TestCase):
|
|||
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
|
||||
self.checkPatternError(r'()(?(-1)a|b)',
|
||||
"bad character in group name '-1'", 5)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '\+1' "
|
||||
r"at position 5") as w:
|
||||
re.compile(r'()(?(+1)a|b)')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '1_0' "
|
||||
r"at position 23"):
|
||||
re.compile(r'()'*10 + r'(?(1_0)a|b)')
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name ' 1 ' "
|
||||
r"at position 5"):
|
||||
re.compile(r'()(?( 1 )a|b)')
|
||||
self.checkPatternError(r'()(?(+1)a|b)',
|
||||
"bad character in group name '+1'", 5)
|
||||
self.checkPatternError(r'()'*10 + r'(?(1_0)a|b)',
|
||||
"bad character in group name '1_0'", 23)
|
||||
self.checkPatternError(r'()(?( 1 )a|b)',
|
||||
"bad character in group name ' 1 '", 5)
|
||||
self.checkPatternError(r'()(?(㊀)a|b)',
|
||||
"bad character in group name '㊀'", 5)
|
||||
self.checkPatternError(r'()(?(¹)a|b)',
|
||||
"bad character in group name '¹'", 5)
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r"bad character in group name '१' "
|
||||
r"at position 5"):
|
||||
re.compile(r'()(?(१)a|b)')
|
||||
self.checkPatternError(r'()(?(१)a|b)',
|
||||
"bad character in group name '१'", 5)
|
||||
self.checkPatternError(r'()(?(1',
|
||||
"missing ), unterminated name", 5)
|
||||
self.checkPatternError(r'()(?(1)a',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue