Add more tests for group names and refs in RE (GH-91695)

(cherry picked from commit 74070085da)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2022-04-19 07:35:21 -07:00 committed by GitHub
parent 1c27a1a7a5
commit 63af7b3b11
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -217,6 +217,16 @@ class ReTests(unittest.TestCase):
re.compile(r'(?P<a>x)(?P=a)(?(a)y)') re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)') re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
re.compile(r'(?P<a1>x)\1(?(1)y)') re.compile(r'(?P<a1>x)\1(?(1)y)')
re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
# New valid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_groups_errors(self):
self.checkPatternError(r'(?P<a>)(?P<a>)', self.checkPatternError(r'(?P<a>)(?P<a>)',
"redefinition of group name 'a' as group 2; " "redefinition of group name 'a' as group 2; "
"was group 1") "was group 1")
@ -242,16 +252,22 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
# New valid/invalid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4) self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
# Support > 100 groups. self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_refs(self): def test_symbolic_refs(self):
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
# New valid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
def test_symbolic_refs_errors(self):
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx', self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
'missing >, unterminated name', 3) 'missing >, unterminated name', 3)
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx', self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
@ -269,18 +285,14 @@ class ReTests(unittest.TestCase):
'invalid group reference 2', 1) 'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?P<a>x)', r'\g<ab>', 'xx') re.sub('(?P<a>x)', r'\g<ab>', 'xx')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx', self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
"bad character in group name '-1'", 3) "bad character in group name '-1'", 3)
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx', self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
"bad character in group name '©'", 3) "bad character in group name '©'", 3)
# Support > 100 groups. self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) "bad character in group name ''", 3)
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
"bad character in group name '¹'", 3)
def test_re_subn(self): def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@ -542,9 +554,23 @@ class ReTests(unittest.TestCase):
pat = '(?:%s)(?(200)z)' % pat pat = '(?:%s)(?(200)z)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10) def test_re_groupref_exists_errors(self):
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
self.checkPatternError(r'()(?(-1)a|b)',
"bad character in group name '-1'", 5)
self.checkPatternError(r'()(?(㊀)a|b)',
"bad character in group name ''", 5)
self.checkPatternError(r'()(?(¹)a|b)',
"bad character in group name '¹'", 5)
self.checkPatternError(r'()(?(1',
"missing ), unterminated name", 5)
self.checkPatternError(r'()(?(1)a',
"missing ), unterminated subpattern", 2)
self.checkPatternError(r'()(?(1)a|b', self.checkPatternError(r'()(?(1)a|b',
'missing ), unterminated subpattern', 2) 'missing ), unterminated subpattern', 2)
self.checkPatternError(r'()(?(1)a|b|c',
'conditional backref with more than '
'two branches', 10)
self.checkPatternError(r'()(?(1)a|b|c)', self.checkPatternError(r'()(?(1)a|b|c)',
'conditional backref with more than ' 'conditional backref with more than '
'two branches', 10) 'two branches', 10)