Issue #6561: '\d' in a regular expression should match only Unicode

character category [Nd], not [No].
2025-10-17 12:18:23 +00:00 · 2009-07-28 17:22:36 +00:00 · 2009-07-28 17:22:36 +00:00 · 1f268285ff
commit 1f268285ff
parent 6bd13fbbc8
4 changed files with 32 additions and 6 deletions
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@ -605,6 +605,27 @@ class ReTests(unittest.TestCase):
        self.assertEqual(next(iter).span(), (4, 4))
        self.assertRaises(StopIteration, next, iter)

+    def test_bug_6561(self):
+        # '\d' should match characters in Unicode category 'Nd'
+        # (Number, Decimal Digit), but not those in 'Nl' (Number,
+        # Letter) or 'No' (Number, Other).
+        decimal_digits = [
+            '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+            '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+            '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            ]
+        for x in decimal_digits:
+            self.assertEqual(re.match('^\d$', x).group(0), x)
+
+        not_decimal_digits = [
+            '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+            '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            ]
+        for x in not_decimal_digits:
+            self.assertIsNone(re.match('^\d$', x))
+
    def test_empty_array(self):
        # SF buf 1647541
        import array