gh-89973: Fix re.error in the fnmatch module. (GH-93072)

Character ranges with upper bound less that lower bound (e.g. [c-a])
are now interpreted as empty ranges, for compatibility with other glob
pattern implementations. Previously it was re.error.
(cherry picked from commit 0902c3d8ed)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2022-06-05 02:39:03 -07:00 committed by GitHub
parent 60adc4b92a
commit 2f8aae38b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 140 additions and 7 deletions

View file

@ -108,7 +108,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
@ -120,7 +120,16 @@ def translate(pat):
chunks.append(pat[i:k])
i = k+1
k = k+3
chunks.append(pat[i:j])
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
@ -128,11 +137,18 @@ def translate(pat):
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n