Issue #6561: '\d' regular expression should not match characters of

category [No]; only those of category [Nd].  (Backport of r74237
from py3k.)
This commit is contained in:
Mark Dickinson 2009-07-28 20:35:03 +00:00
parent 8d31f5413c
commit fe67bd9168
4 changed files with 28 additions and 2 deletions

View file

@ -636,6 +636,27 @@ class ReTests(unittest.TestCase):
self.assertEqual(iter.next().span(), (4, 4))
self.assertRaises(StopIteration, iter.next)
def test_bug_6561(self):
# '\d' should match characters in Unicode category 'Nd'
# (Number, Decimal Digit), but not those in 'Nl' (Number,
# Letter) or 'No' (Number, Other).
decimal_digits = [
u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
not_decimal_digits = [
u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
self.assertIsNone(re.match('^\d$', x, re.UNICODE))
def test_empty_array(self):
# SF buf 1647541
import array