mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com>
		
			
				
	
	
		
			186 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			186 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""Filename matching with shell patterns.
 | 
						|
 | 
						|
fnmatch(FILENAME, PATTERN) matches according to the local convention.
 | 
						|
fnmatchcase(FILENAME, PATTERN) always takes case in account.
 | 
						|
 | 
						|
The functions operate by translating the pattern into a regular
 | 
						|
expression.  They cache the compiled regular expressions for speed.
 | 
						|
 | 
						|
The function translate(PATTERN) returns a regular expression
 | 
						|
corresponding to PATTERN.  (It does not compile it.)
 | 
						|
"""
 | 
						|
import os
 | 
						|
import posixpath
 | 
						|
import re
 | 
						|
import functools
 | 
						|
 | 
						|
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 | 
						|
 | 
						|
def fnmatch(name, pat):
 | 
						|
    """Test whether FILENAME matches PATTERN.
 | 
						|
 | 
						|
    Patterns are Unix shell style:
 | 
						|
 | 
						|
    *       matches everything
 | 
						|
    ?       matches any single character
 | 
						|
    [seq]   matches any character in seq
 | 
						|
    [!seq]  matches any char not in seq
 | 
						|
 | 
						|
    An initial period in FILENAME is not special.
 | 
						|
    Both FILENAME and PATTERN are first case-normalized
 | 
						|
    if the operating system requires it.
 | 
						|
    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
 | 
						|
    """
 | 
						|
    name = os.path.normcase(name)
 | 
						|
    pat = os.path.normcase(pat)
 | 
						|
    return fnmatchcase(name, pat)
 | 
						|
 | 
						|
@functools.lru_cache(maxsize=32768, typed=True)
 | 
						|
def _compile_pattern(pat):
 | 
						|
    if isinstance(pat, bytes):
 | 
						|
        pat_str = str(pat, 'ISO-8859-1')
 | 
						|
        res_str = translate(pat_str)
 | 
						|
        res = bytes(res_str, 'ISO-8859-1')
 | 
						|
    else:
 | 
						|
        res = translate(pat)
 | 
						|
    return re.compile(res).match
 | 
						|
 | 
						|
def filter(names, pat):
 | 
						|
    """Construct a list from those elements of the iterable NAMES that match PAT."""
 | 
						|
    result = []
 | 
						|
    pat = os.path.normcase(pat)
 | 
						|
    match = _compile_pattern(pat)
 | 
						|
    if os.path is posixpath:
 | 
						|
        # normcase on posix is NOP. Optimize it away from the loop.
 | 
						|
        for name in names:
 | 
						|
            if match(name):
 | 
						|
                result.append(name)
 | 
						|
    else:
 | 
						|
        for name in names:
 | 
						|
            if match(os.path.normcase(name)):
 | 
						|
                result.append(name)
 | 
						|
    return result
 | 
						|
 | 
						|
def fnmatchcase(name, pat):
 | 
						|
    """Test whether FILENAME matches PATTERN, including case.
 | 
						|
 | 
						|
    This is a version of fnmatch() which doesn't case-normalize
 | 
						|
    its arguments.
 | 
						|
    """
 | 
						|
    match = _compile_pattern(pat)
 | 
						|
    return match(name) is not None
 | 
						|
 | 
						|
 | 
						|
def translate(pat):
 | 
						|
    """Translate a shell PATTERN to a regular expression.
 | 
						|
 | 
						|
    There is no way to quote meta-characters.
 | 
						|
    """
 | 
						|
 | 
						|
    parts, star_indices = _translate(pat, '*', '.')
 | 
						|
    return _join_translated_parts(parts, star_indices)
 | 
						|
 | 
						|
_re_setops_sub = re.compile(r'([&~|])').sub
 | 
						|
_re_escape = functools.lru_cache(maxsize=512)(re.escape)
 | 
						|
 | 
						|
def _translate(pat, star, question_mark):
 | 
						|
    res = []
 | 
						|
    add = res.append
 | 
						|
    star_indices = []
 | 
						|
 | 
						|
    i, n = 0, len(pat)
 | 
						|
    while i < n:
 | 
						|
        c = pat[i]
 | 
						|
        i = i+1
 | 
						|
        if c == '*':
 | 
						|
            # store the position of the wildcard
 | 
						|
            star_indices.append(len(res))
 | 
						|
            add(star)
 | 
						|
            # compress consecutive `*` into one
 | 
						|
            while i < n and pat[i] == '*':
 | 
						|
                i += 1
 | 
						|
        elif c == '?':
 | 
						|
            add(question_mark)
 | 
						|
        elif c == '[':
 | 
						|
            j = i
 | 
						|
            if j < n and pat[j] == '!':
 | 
						|
                j = j+1
 | 
						|
            if j < n and pat[j] == ']':
 | 
						|
                j = j+1
 | 
						|
            while j < n and pat[j] != ']':
 | 
						|
                j = j+1
 | 
						|
            if j >= n:
 | 
						|
                add('\\[')
 | 
						|
            else:
 | 
						|
                stuff = pat[i:j]
 | 
						|
                if '-' not in stuff:
 | 
						|
                    stuff = stuff.replace('\\', r'\\')
 | 
						|
                else:
 | 
						|
                    chunks = []
 | 
						|
                    k = i+2 if pat[i] == '!' else i+1
 | 
						|
                    while True:
 | 
						|
                        k = pat.find('-', k, j)
 | 
						|
                        if k < 0:
 | 
						|
                            break
 | 
						|
                        chunks.append(pat[i:k])
 | 
						|
                        i = k+1
 | 
						|
                        k = k+3
 | 
						|
                    chunk = pat[i:j]
 | 
						|
                    if chunk:
 | 
						|
                        chunks.append(chunk)
 | 
						|
                    else:
 | 
						|
                        chunks[-1] += '-'
 | 
						|
                    # Remove empty ranges -- invalid in RE.
 | 
						|
                    for k in range(len(chunks)-1, 0, -1):
 | 
						|
                        if chunks[k-1][-1] > chunks[k][0]:
 | 
						|
                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
 | 
						|
                            del chunks[k]
 | 
						|
                    # Escape backslashes and hyphens for set difference (--).
 | 
						|
                    # Hyphens that create ranges shouldn't be escaped.
 | 
						|
                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
 | 
						|
                                     for s in chunks)
 | 
						|
                i = j+1
 | 
						|
                if not stuff:
 | 
						|
                    # Empty range: never match.
 | 
						|
                    add('(?!)')
 | 
						|
                elif stuff == '!':
 | 
						|
                    # Negated empty range: match any character.
 | 
						|
                    add('.')
 | 
						|
                else:
 | 
						|
                    # Escape set operations (&&, ~~ and ||).
 | 
						|
                    stuff = _re_setops_sub(r'\\\1', stuff)
 | 
						|
                    if stuff[0] == '!':
 | 
						|
                        stuff = '^' + stuff[1:]
 | 
						|
                    elif stuff[0] in ('^', '['):
 | 
						|
                        stuff = '\\' + stuff
 | 
						|
                    add(f'[{stuff}]')
 | 
						|
        else:
 | 
						|
            add(_re_escape(c))
 | 
						|
    assert i == n
 | 
						|
    return res, star_indices
 | 
						|
 | 
						|
 | 
						|
def _join_translated_parts(parts, star_indices):
 | 
						|
    if not star_indices:
 | 
						|
        return fr'(?s:{"".join(parts)})\Z'
 | 
						|
    iter_star_indices = iter(star_indices)
 | 
						|
    j = next(iter_star_indices)
 | 
						|
    buffer = parts[:j]  # fixed pieces at the start
 | 
						|
    append, extend = buffer.append, buffer.extend
 | 
						|
    i = j + 1
 | 
						|
    for j in iter_star_indices:
 | 
						|
        # Now deal with STAR fixed STAR fixed ...
 | 
						|
        # For an interior `STAR fixed` pairing, we want to do a minimal
 | 
						|
        # .*? match followed by `fixed`, with no possibility of backtracking.
 | 
						|
        # Atomic groups ("(?>...)") allow us to spell that directly.
 | 
						|
        # Note: people rely on the undocumented ability to join multiple
 | 
						|
        # translate() results together via "|" to build large regexps matching
 | 
						|
        # "one of many" shell patterns.
 | 
						|
        append('(?>.*?')
 | 
						|
        extend(parts[i:j])
 | 
						|
        append(')')
 | 
						|
        i = j + 1
 | 
						|
    append('.*')
 | 
						|
    extend(parts[i:])
 | 
						|
    res = ''.join(buffer)
 | 
						|
    return fr'(?s:{res})\Z'
 |