bpo-30215: Make re.compile() locale agnostic. (#1361)

Compiled regular expression objects with the re.LOCALE flag no longer
depend on the locale at compile time.  Only the locale at matching
time affects the result of matching.
This commit is contained in:
Serhiy Storchaka 2017-05-05 08:53:40 +03:00 committed by GitHub
parent 647c3d381e
commit 898ff03e1e
9 changed files with 141 additions and 23 deletions

View file

@ -1730,6 +1730,38 @@ SUBPATTERN None 0 0
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
def test_locale_compiled(self):
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
p1 = re.compile(b'\xc5\xe5', re.L|re.I)
p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I)
p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I)
p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I)
for p in p1, p2, p3:
self.assertTrue(p.match(b'\xc5\xe5'))
self.assertTrue(p.match(b'\xe5\xe5'))
self.assertTrue(p.match(b'\xc5\xc5'))
self.assertIsNone(p4.match(b'\xe5\xc5'))
self.assertIsNone(p4.match(b'\xe5\xe5'))
self.assertIsNone(p4.match(b'\xc5\xc5'))
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
for p in p1, p2, p3:
self.assertTrue(p.match(b'\xc5\xe5'))
self.assertIsNone(p.match(b'\xe5\xe5'))
self.assertIsNone(p.match(b'\xc5\xc5'))
self.assertTrue(p4.match(b'\xe5\xc5'))
self.assertIsNone(p4.match(b'\xe5\xe5'))
self.assertIsNone(p4.match(b'\xc5\xc5'))
def test_error(self):
with self.assertRaises(re.error) as cm:
re.compile('(\u20ac))')