mirror of
https://github.com/python/cpython.git
synced 2025-07-29 14:15:07 +00:00
[3.13] gh-126505: Fix bugs in compiling case-insensitive character classes (GH-126557) (GH-126689)
* upper-case non-BMP character was ignored
* the ASCII flag was ignored when matching a character range whose
upper bound is beyond the BMP region
(cherry picked from commit 819830f34a
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
fc10908f7d
commit
7db6d4282f
3 changed files with 73 additions and 9 deletions
|
@ -248,11 +248,11 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
|
|||
while True:
|
||||
try:
|
||||
if op is LITERAL:
|
||||
if fixup:
|
||||
lo = fixup(av)
|
||||
charmap[lo] = 1
|
||||
if fixes and lo in fixes:
|
||||
for k in fixes[lo]:
|
||||
if fixup: # IGNORECASE and not LOCALE
|
||||
av = fixup(av)
|
||||
charmap[av] = 1
|
||||
if fixes and av in fixes:
|
||||
for k in fixes[av]:
|
||||
charmap[k] = 1
|
||||
if not hascased and iscased(av):
|
||||
hascased = True
|
||||
|
@ -260,7 +260,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
|
|||
charmap[av] = 1
|
||||
elif op is RANGE:
|
||||
r = range(av[0], av[1]+1)
|
||||
if fixup:
|
||||
if fixup: # IGNORECASE and not LOCALE
|
||||
if fixes:
|
||||
for i in map(fixup, r):
|
||||
charmap[i] = 1
|
||||
|
@ -287,8 +287,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
|
|||
# Character set contains non-BMP character codes.
|
||||
# For range, all BMP characters in the range are already
|
||||
# proceeded.
|
||||
if fixup:
|
||||
hascased = True
|
||||
if fixup: # IGNORECASE and not LOCALE
|
||||
# For now, IN_UNI_IGNORE+LITERAL and
|
||||
# IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
|
||||
# characters, because two characters (at least one of
|
||||
|
@ -299,7 +298,13 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
|
|||
# Also, both c.lower() and c.lower().upper() are single
|
||||
# characters for every non-BMP character.
|
||||
if op is RANGE:
|
||||
op = RANGE_UNI_IGNORE
|
||||
if fixes: # not ASCII
|
||||
op = RANGE_UNI_IGNORE
|
||||
hascased = True
|
||||
else:
|
||||
assert op is LITERAL
|
||||
if not hascased and iscased(av):
|
||||
hascased = True
|
||||
tail.append((op, av))
|
||||
break
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue