mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
bpo-40480 "fnmatch" exponential execution time (GH-19908)
bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time.
This commit is contained in:
parent
96074de573
commit
b9c46a2c2d
3 changed files with 71 additions and 7 deletions
|
@ -77,15 +77,19 @@ def translate(pat):
|
|||
There is no way to quote meta-characters.
|
||||
"""
|
||||
|
||||
STAR = object()
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(pat)
|
||||
res = ''
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i = i+1
|
||||
if c == '*':
|
||||
res = res + '.*'
|
||||
# compress consecutive `*` into one
|
||||
if (not res) or res[-1] is not STAR:
|
||||
add(STAR)
|
||||
elif c == '?':
|
||||
res = res + '.'
|
||||
add('.')
|
||||
elif c == '[':
|
||||
j = i
|
||||
if j < n and pat[j] == '!':
|
||||
|
@ -95,7 +99,7 @@ def translate(pat):
|
|||
while j < n and pat[j] != ']':
|
||||
j = j+1
|
||||
if j >= n:
|
||||
res = res + '\\['
|
||||
add('\\[')
|
||||
else:
|
||||
stuff = pat[i:j]
|
||||
if '--' not in stuff:
|
||||
|
@ -122,7 +126,49 @@ def translate(pat):
|
|||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
stuff = '\\' + stuff
|
||||
res = '%s[%s]' % (res, stuff)
|
||||
add(f'[{stuff}]')
|
||||
else:
|
||||
res = res + re.escape(c)
|
||||
return r'(?s:%s)\Z' % res
|
||||
add(re.escape(c))
|
||||
assert i == n
|
||||
|
||||
# Deal with STARs.
|
||||
inp = res
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(inp)
|
||||
# Fixed pieces at the start?
|
||||
while i < n and inp[i] is not STAR:
|
||||
add(inp[i])
|
||||
i += 1
|
||||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# We can't spell that directly, but can trick it into working by matching
|
||||
# .*?fixed
|
||||
# in a lookahead assertion, save the matched part in a group, then
|
||||
# consume that group via a backreference. If the overall match fails,
|
||||
# the lookahead assertion won't try alternatives. So the translation is:
|
||||
# (?=(P<name>.*?fixed))(?P=name)
|
||||
# Group names are created as needed: g1, g2, g3, ...
|
||||
groupnum = 0
|
||||
while i < n:
|
||||
assert inp[i] is STAR
|
||||
i += 1
|
||||
if i == n:
|
||||
add(".*")
|
||||
break
|
||||
assert inp[i] is not STAR
|
||||
fixed = []
|
||||
while i < n and inp[i] is not STAR:
|
||||
fixed.append(inp[i])
|
||||
i += 1
|
||||
fixed = "".join(fixed)
|
||||
if i == n:
|
||||
add(".*")
|
||||
add(fixed)
|
||||
else:
|
||||
groupnum += 1
|
||||
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
||||
assert i == n
|
||||
res = "".join(res)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue