mirror of
https://github.com/python/cpython.git
synced 2025-10-17 04:08:28 +00:00
gh-122288: Improve performances of fnmatch.translate
(#122289)
Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com>
This commit is contained in:
parent
14a05a8f43
commit
78cb377c62
4 changed files with 112 additions and 47 deletions
|
@ -77,24 +77,30 @@ def translate(pat):
|
|||
There is no way to quote meta-characters.
|
||||
"""
|
||||
|
||||
STAR = object()
|
||||
parts = _translate(pat, STAR, '.')
|
||||
return _join_translated_parts(parts, STAR)
|
||||
parts, star_indices = _translate(pat, '*', '.')
|
||||
return _join_translated_parts(parts, star_indices)
|
||||
|
||||
_re_setops_sub = re.compile(r'([&~|])').sub
|
||||
_re_escape = functools.lru_cache(maxsize=512)(re.escape)
|
||||
|
||||
def _translate(pat, STAR, QUESTION_MARK):
|
||||
def _translate(pat, star, question_mark):
|
||||
res = []
|
||||
add = res.append
|
||||
star_indices = []
|
||||
|
||||
i, n = 0, len(pat)
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i = i+1
|
||||
if c == '*':
|
||||
# store the position of the wildcard
|
||||
star_indices.append(len(res))
|
||||
add(star)
|
||||
# compress consecutive `*` into one
|
||||
if (not res) or res[-1] is not STAR:
|
||||
add(STAR)
|
||||
while i < n and pat[i] == '*':
|
||||
i += 1
|
||||
elif c == '?':
|
||||
add(QUESTION_MARK)
|
||||
add(question_mark)
|
||||
elif c == '[':
|
||||
j = i
|
||||
if j < n and pat[j] == '!':
|
||||
|
@ -133,8 +139,6 @@ def _translate(pat, STAR, QUESTION_MARK):
|
|||
# Hyphens that create ranges shouldn't be escaped.
|
||||
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
|
||||
for s in chunks)
|
||||
# Escape set operations (&&, ~~ and ||).
|
||||
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
|
||||
i = j+1
|
||||
if not stuff:
|
||||
# Empty range: never match.
|
||||
|
@ -143,50 +147,40 @@ def _translate(pat, STAR, QUESTION_MARK):
|
|||
# Negated empty range: match any character.
|
||||
add('.')
|
||||
else:
|
||||
# Escape set operations (&&, ~~ and ||).
|
||||
stuff = _re_setops_sub(r'\\\1', stuff)
|
||||
if stuff[0] == '!':
|
||||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
stuff = '\\' + stuff
|
||||
add(f'[{stuff}]')
|
||||
else:
|
||||
add(re.escape(c))
|
||||
add(_re_escape(c))
|
||||
assert i == n
|
||||
return res
|
||||
return res, star_indices
|
||||
|
||||
|
||||
def _join_translated_parts(inp, STAR):
|
||||
# Deal with STARs.
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(inp)
|
||||
# Fixed pieces at the start?
|
||||
while i < n and inp[i] is not STAR:
|
||||
add(inp[i])
|
||||
i += 1
|
||||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# Atomic groups ("(?>...)") allow us to spell that directly.
|
||||
# Note: people rely on the undocumented ability to join multiple
|
||||
# translate() results together via "|" to build large regexps matching
|
||||
# "one of many" shell patterns.
|
||||
while i < n:
|
||||
assert inp[i] is STAR
|
||||
i += 1
|
||||
if i == n:
|
||||
add(".*")
|
||||
break
|
||||
assert inp[i] is not STAR
|
||||
fixed = []
|
||||
while i < n and inp[i] is not STAR:
|
||||
fixed.append(inp[i])
|
||||
i += 1
|
||||
fixed = "".join(fixed)
|
||||
if i == n:
|
||||
add(".*")
|
||||
add(fixed)
|
||||
else:
|
||||
add(f"(?>.*?{fixed})")
|
||||
assert i == n
|
||||
res = "".join(res)
|
||||
def _join_translated_parts(parts, star_indices):
|
||||
if not star_indices:
|
||||
return fr'(?s:{"".join(parts)})\Z'
|
||||
iter_star_indices = iter(star_indices)
|
||||
j = next(iter_star_indices)
|
||||
buffer = parts[:j] # fixed pieces at the start
|
||||
append, extend = buffer.append, buffer.extend
|
||||
i = j + 1
|
||||
for j in iter_star_indices:
|
||||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# Atomic groups ("(?>...)") allow us to spell that directly.
|
||||
# Note: people rely on the undocumented ability to join multiple
|
||||
# translate() results together via "|" to build large regexps matching
|
||||
# "one of many" shell patterns.
|
||||
append('(?>.*?')
|
||||
extend(parts[i:j])
|
||||
append(')')
|
||||
i = j + 1
|
||||
append('.*')
|
||||
extend(parts[i:])
|
||||
res = ''.join(buffer)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue