mirror of
https://github.com/python/cpython.git
synced 2025-10-14 18:59:46 +00:00
Issue #3665: \u and \U escapes are now supported in unicode regular expressions.
Patch by Serhiy Storchaka.
This commit is contained in:
parent
c9aa8425c4
commit
463badf06c
4 changed files with 144 additions and 34 deletions
|
@ -526,24 +526,92 @@ class ReTests(unittest.TestCase):
|
|||
self.assertNotEqual(re.compile('^pattern$', flag), None)
|
||||
|
||||
def test_sre_character_literals(self):
|
||||
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
||||
self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
|
||||
self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
|
||||
self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
|
||||
self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
|
||||
self.assertRaises(re.error, re.match, "\911", "")
|
||||
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
|
||||
if i < 256:
|
||||
self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
|
||||
self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
|
||||
self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
|
||||
self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
|
||||
if i < 0x10000:
|
||||
self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
|
||||
self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
|
||||
self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
|
||||
self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
|
||||
self.assertIsNotNone(re.match(r"\0", "\000"))
|
||||
self.assertIsNotNone(re.match(r"\08", "\0008"))
|
||||
self.assertIsNotNone(re.match(r"\01", "\001"))
|
||||
self.assertIsNotNone(re.match(r"\018", "\0018"))
|
||||
self.assertIsNotNone(re.match(r"\567", chr(0o167)))
|
||||
self.assertRaises(re.error, re.match, r"\911", "")
|
||||
self.assertRaises(re.error, re.match, r"\x1", "")
|
||||
self.assertRaises(re.error, re.match, r"\x1z", "")
|
||||
self.assertRaises(re.error, re.match, r"\u123", "")
|
||||
self.assertRaises(re.error, re.match, r"\u123z", "")
|
||||
self.assertRaises(re.error, re.match, r"\U0001234", "")
|
||||
self.assertRaises(re.error, re.match, r"\U0001234z", "")
|
||||
self.assertRaises(re.error, re.match, r"\U00110000", "")
|
||||
|
||||
def test_sre_character_class_literals(self):
|
||||
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
|
||||
if i < 256:
|
||||
self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
|
||||
if i < 0x10000:
|
||||
self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
|
||||
self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
|
||||
self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
|
||||
self.assertRaises(re.error, re.match, r"[\911]", "")
|
||||
self.assertRaises(re.error, re.match, r"[\x1z]", "")
|
||||
self.assertRaises(re.error, re.match, r"[\u123z]", "")
|
||||
self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
|
||||
self.assertRaises(re.error, re.match, r"[\U00110000]", "")
|
||||
|
||||
def test_sre_byte_literals(self):
|
||||
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
||||
self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
|
||||
self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
|
||||
self.assertRaises(re.error, re.match, "[\911]", "")
|
||||
self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
|
||||
self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
|
||||
self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
|
||||
self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
|
||||
self.assertIsNotNone(re.match(br"\u", b'u'))
|
||||
self.assertIsNotNone(re.match(br"\U", b'U'))
|
||||
self.assertIsNotNone(re.match(br"\0", b"\000"))
|
||||
self.assertIsNotNone(re.match(br"\08", b"\0008"))
|
||||
self.assertIsNotNone(re.match(br"\01", b"\001"))
|
||||
self.assertIsNotNone(re.match(br"\018", b"\0018"))
|
||||
self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
|
||||
self.assertRaises(re.error, re.match, br"\911", b"")
|
||||
self.assertRaises(re.error, re.match, br"\x1", b"")
|
||||
self.assertRaises(re.error, re.match, br"\x1z", b"")
|
||||
|
||||
def test_sre_byte_class_literals(self):
|
||||
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
||||
self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
|
||||
self.assertIsNotNone(re.match(br"[\u]", b'u'))
|
||||
self.assertIsNotNone(re.match(br"[\U]", b'U'))
|
||||
self.assertRaises(re.error, re.match, br"[\911]", "")
|
||||
self.assertRaises(re.error, re.match, br"[\x1z]", "")
|
||||
|
||||
def test_bug_113254(self):
|
||||
self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue