[3.9] gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665) (GH-91830) (GH-91834)

re.error is now raised instead of TypeError.
(cherry picked from commit 6ccfa31421)
(cherry picked from commit 9c18d783c3)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2022-04-22 11:34:31 -07:00 committed by GitHub
parent 2f75d43f1e
commit 97d14e1dfb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 2 deletions

View file

@ -330,7 +330,7 @@ def _class_escape(source, escape):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
except KeyError:
except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}'))
return LITERAL, c
@ -390,7 +390,7 @@ def _escape(source, escape, state):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
except KeyError:
except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}'))
return LITERAL, c

View file

@ -753,6 +753,10 @@ class ReTests(unittest.TestCase):
"undefined character name 'SPAM'", 0)
self.checkPatternError(r'[\N{SPAM}]',
"undefined character name 'SPAM'", 1)
self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
"undefined character name 'KEYCAP NUMBER SIGN'", 0)
self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
"undefined character name 'KEYCAP NUMBER SIGN'", 1)
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)

View file

@ -0,0 +1,3 @@
Parsing ``\N`` escapes of Unicode Named Character Sequences in a
:mod:`regular expression <re>` raises now :exc:`re.error` instead of
``TypeError``.