mirror of
https://github.com/python/cpython.git
synced 2025-07-12 22:05:16 +00:00
GH-117586: Speed up pathlib.Path.glob()
by working with strings (#117589)
Move pathlib globbing implementation into a new private class: `glob._Globber`. This class implements fast string-based globbing. It's called by `pathlib.Path.glob()`, which then converts strings back to path objects. In the private pathlib ABCs, add a `pathlib._abc.Globber` subclass that works with `PathBase` objects rather than strings, and calls user-defined path methods like `PathBase.stat()` rather than `os.stat()`. This sets the stage for two more improvements: - GH-115060: Query non-wildcard segments with `lstat()` - GH-116380: Unify `pathlib` and `glob` implementations of globbing. No change to the implementations of `glob.glob()` and `glob.iglob()`.
This commit is contained in:
parent
689ada7915
commit
6258844c27
4 changed files with 269 additions and 195 deletions
186
Lib/glob.py
186
Lib/glob.py
|
@ -4,7 +4,9 @@ import contextlib
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
|
import operator
|
||||||
import stat
|
import stat
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -256,7 +258,9 @@ def escape(pathname):
|
||||||
return drive + pathname
|
return drive + pathname
|
||||||
|
|
||||||
|
|
||||||
|
_special_parts = ('', '.', '..')
|
||||||
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
||||||
|
_no_recurse_symlinks = object()
|
||||||
|
|
||||||
|
|
||||||
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
||||||
|
@ -312,3 +316,185 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
||||||
results.append(any_sep)
|
results.append(any_sep)
|
||||||
res = ''.join(results)
|
res = ''.join(results)
|
||||||
return fr'(?s:{res})\Z'
|
return fr'(?s:{res})\Z'
|
||||||
|
|
||||||
|
|
||||||
|
@functools.lru_cache(maxsize=512)
|
||||||
|
def _compile_pattern(pat, sep, case_sensitive, recursive=True):
|
||||||
|
"""Compile given glob pattern to a re.Pattern object (observing case
|
||||||
|
sensitivity)."""
|
||||||
|
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
||||||
|
regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
|
||||||
|
return re.compile(regex, flags=flags).match
|
||||||
|
|
||||||
|
|
||||||
|
class _Globber:
|
||||||
|
"""Class providing shell-style pattern matching and globbing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, sep, case_sensitive, recursive=False):
|
||||||
|
self.sep = sep
|
||||||
|
self.case_sensitive = case_sensitive
|
||||||
|
self.recursive = recursive
|
||||||
|
|
||||||
|
# Low-level methods
|
||||||
|
|
||||||
|
lstat = staticmethod(os.lstat)
|
||||||
|
scandir = staticmethod(os.scandir)
|
||||||
|
parse_entry = operator.attrgetter('path')
|
||||||
|
concat_path = operator.add
|
||||||
|
|
||||||
|
if os.name == 'nt':
|
||||||
|
@staticmethod
|
||||||
|
def add_slash(pathname):
|
||||||
|
tail = os.path.splitroot(pathname)[2]
|
||||||
|
if not tail or tail[-1] in '\\/':
|
||||||
|
return pathname
|
||||||
|
return f'{pathname}\\'
|
||||||
|
else:
|
||||||
|
@staticmethod
|
||||||
|
def add_slash(pathname):
|
||||||
|
if not pathname or pathname[-1] == '/':
|
||||||
|
return pathname
|
||||||
|
return f'{pathname}/'
|
||||||
|
|
||||||
|
# High-level methods
|
||||||
|
|
||||||
|
def compile(self, pat):
|
||||||
|
return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive)
|
||||||
|
|
||||||
|
def selector(self, parts):
|
||||||
|
"""Returns a function that selects from a given path, walking and
|
||||||
|
filtering according to the glob-style pattern parts in *parts*.
|
||||||
|
"""
|
||||||
|
if not parts:
|
||||||
|
return self.select_exists
|
||||||
|
part = parts.pop()
|
||||||
|
if self.recursive and part == '**':
|
||||||
|
selector = self.recursive_selector
|
||||||
|
elif part in _special_parts:
|
||||||
|
selector = self.special_selector
|
||||||
|
else:
|
||||||
|
selector = self.wildcard_selector
|
||||||
|
return selector(part, parts)
|
||||||
|
|
||||||
|
def special_selector(self, part, parts):
|
||||||
|
"""Returns a function that selects special children of the given path.
|
||||||
|
"""
|
||||||
|
select_next = self.selector(parts)
|
||||||
|
|
||||||
|
def select_special(path, exists=False):
|
||||||
|
path = self.concat_path(self.add_slash(path), part)
|
||||||
|
return select_next(path, exists)
|
||||||
|
return select_special
|
||||||
|
|
||||||
|
def wildcard_selector(self, part, parts):
|
||||||
|
"""Returns a function that selects direct children of a given path,
|
||||||
|
filtering by pattern.
|
||||||
|
"""
|
||||||
|
|
||||||
|
match = None if part == '*' else self.compile(part)
|
||||||
|
dir_only = bool(parts)
|
||||||
|
if dir_only:
|
||||||
|
select_next = self.selector(parts)
|
||||||
|
|
||||||
|
def select_wildcard(path, exists=False):
|
||||||
|
try:
|
||||||
|
# We must close the scandir() object before proceeding to
|
||||||
|
# avoid exhausting file descriptors when globbing deep trees.
|
||||||
|
with self.scandir(path) as scandir_it:
|
||||||
|
entries = list(scandir_it)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for entry in entries:
|
||||||
|
if match is None or match(entry.name):
|
||||||
|
if dir_only:
|
||||||
|
try:
|
||||||
|
if not entry.is_dir():
|
||||||
|
continue
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
entry_path = self.parse_entry(entry)
|
||||||
|
if dir_only:
|
||||||
|
yield from select_next(entry_path, exists=True)
|
||||||
|
else:
|
||||||
|
yield entry_path
|
||||||
|
return select_wildcard
|
||||||
|
|
||||||
|
def recursive_selector(self, part, parts):
|
||||||
|
"""Returns a function that selects a given path and all its children,
|
||||||
|
recursively, filtering by pattern.
|
||||||
|
"""
|
||||||
|
# Optimization: consume following '**' parts, which have no effect.
|
||||||
|
while parts and parts[-1] == '**':
|
||||||
|
parts.pop()
|
||||||
|
|
||||||
|
# Optimization: consume and join any following non-special parts here,
|
||||||
|
# rather than leaving them for the next selector. They're used to
|
||||||
|
# build a regular expression, which we use to filter the results of
|
||||||
|
# the recursive walk. As a result, non-special pattern segments
|
||||||
|
# following a '**' wildcard don't require additional filesystem access
|
||||||
|
# to expand.
|
||||||
|
follow_symlinks = self.recursive is not _no_recurse_symlinks
|
||||||
|
if follow_symlinks:
|
||||||
|
while parts and parts[-1] not in _special_parts:
|
||||||
|
part += self.sep + parts.pop()
|
||||||
|
|
||||||
|
match = None if part == '**' else self.compile(part)
|
||||||
|
dir_only = bool(parts)
|
||||||
|
select_next = self.selector(parts)
|
||||||
|
|
||||||
|
def select_recursive(path, exists=False):
|
||||||
|
path = self.add_slash(path)
|
||||||
|
match_pos = len(str(path))
|
||||||
|
if match is None or match(str(path), match_pos):
|
||||||
|
yield from select_next(path, exists)
|
||||||
|
stack = [path]
|
||||||
|
while stack:
|
||||||
|
yield from select_recursive_step(stack, match_pos)
|
||||||
|
|
||||||
|
def select_recursive_step(stack, match_pos):
|
||||||
|
path = stack.pop()
|
||||||
|
try:
|
||||||
|
# We must close the scandir() object before proceeding to
|
||||||
|
# avoid exhausting file descriptors when globbing deep trees.
|
||||||
|
with self.scandir(path) as scandir_it:
|
||||||
|
entries = list(scandir_it)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for entry in entries:
|
||||||
|
is_dir = False
|
||||||
|
try:
|
||||||
|
if entry.is_dir(follow_symlinks=follow_symlinks):
|
||||||
|
is_dir = True
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if is_dir or not dir_only:
|
||||||
|
entry_path = self.parse_entry(entry)
|
||||||
|
if match is None or match(str(entry_path), match_pos):
|
||||||
|
if dir_only:
|
||||||
|
yield from select_next(entry_path, exists=True)
|
||||||
|
else:
|
||||||
|
# Optimization: directly yield the path if this is
|
||||||
|
# last pattern part.
|
||||||
|
yield entry_path
|
||||||
|
if is_dir:
|
||||||
|
stack.append(entry_path)
|
||||||
|
|
||||||
|
return select_recursive
|
||||||
|
|
||||||
|
def select_exists(self, path, exists=False):
|
||||||
|
"""Yields the given path, if it exists.
|
||||||
|
"""
|
||||||
|
if exists:
|
||||||
|
# Optimization: this path is already known to exist, e.g. because
|
||||||
|
# it was returned from os.scandir(), so we skip calling lstat().
|
||||||
|
yield path
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.lstat(path)
|
||||||
|
yield path
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
|
@ -5,8 +5,10 @@ paths with operations that have semantics appropriate for different
|
||||||
operating systems.
|
operating systems.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import glob
|
||||||
import io
|
import io
|
||||||
import ntpath
|
import ntpath
|
||||||
|
import operator
|
||||||
import os
|
import os
|
||||||
import posixpath
|
import posixpath
|
||||||
import sys
|
import sys
|
||||||
|
@ -111,6 +113,7 @@ class PurePath(_abc.PurePathBase):
|
||||||
'_hash',
|
'_hash',
|
||||||
)
|
)
|
||||||
parser = os.path
|
parser = os.path
|
||||||
|
_globber = glob._Globber
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
"""Construct a PurePath from one or several strings and or existing
|
"""Construct a PurePath from one or several strings and or existing
|
||||||
|
@ -253,14 +256,17 @@ class PurePath(_abc.PurePathBase):
|
||||||
return cls.parser.sep.join(tail)
|
return cls.parser.sep.join(tail)
|
||||||
|
|
||||||
def _from_parsed_parts(self, drv, root, tail):
|
def _from_parsed_parts(self, drv, root, tail):
|
||||||
path_str = self._format_parsed_parts(drv, root, tail)
|
path = self._from_parsed_string(self._format_parsed_parts(drv, root, tail))
|
||||||
path = self.with_segments(path_str)
|
|
||||||
path._str = path_str or '.'
|
|
||||||
path._drv = drv
|
path._drv = drv
|
||||||
path._root = root
|
path._root = root
|
||||||
path._tail_cached = tail
|
path._tail_cached = tail
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
def _from_parsed_string(self, path_str):
|
||||||
|
path = self.with_segments(path_str)
|
||||||
|
path._str = path_str or '.'
|
||||||
|
return path
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _parse_path(cls, path):
|
def _parse_path(cls, path):
|
||||||
if not path:
|
if not path:
|
||||||
|
@ -453,21 +459,6 @@ class PurePath(_abc.PurePathBase):
|
||||||
from urllib.parse import quote_from_bytes
|
from urllib.parse import quote_from_bytes
|
||||||
return prefix + quote_from_bytes(os.fsencode(path))
|
return prefix + quote_from_bytes(os.fsencode(path))
|
||||||
|
|
||||||
@property
|
|
||||||
def _pattern_stack(self):
|
|
||||||
"""Stack of path components, to be used with patterns in glob()."""
|
|
||||||
parts = self._tail.copy()
|
|
||||||
pattern = self._raw_path
|
|
||||||
if self.anchor:
|
|
||||||
raise NotImplementedError("Non-relative patterns are unsupported")
|
|
||||||
elif not parts:
|
|
||||||
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
|
|
||||||
elif pattern[-1] in (self.parser.sep, self.parser.altsep):
|
|
||||||
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
|
|
||||||
parts.append('')
|
|
||||||
parts.reverse()
|
|
||||||
return parts
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _pattern_str(self):
|
def _pattern_str(self):
|
||||||
"""The path expressed as a string, for use in pattern-matching."""
|
"""The path expressed as a string, for use in pattern-matching."""
|
||||||
|
@ -576,6 +567,17 @@ class Path(_abc.PathBase, PurePath):
|
||||||
encoding = io.text_encoding(encoding)
|
encoding = io.text_encoding(encoding)
|
||||||
return _abc.PathBase.write_text(self, data, encoding, errors, newline)
|
return _abc.PathBase.write_text(self, data, encoding, errors, newline)
|
||||||
|
|
||||||
|
_remove_leading_dot = operator.itemgetter(slice(2, None))
|
||||||
|
_remove_trailing_slash = operator.itemgetter(slice(-1))
|
||||||
|
|
||||||
|
def _filter_trailing_slash(self, paths):
|
||||||
|
sep = self.parser.sep
|
||||||
|
anchor_len = len(self.anchor)
|
||||||
|
for path_str in paths:
|
||||||
|
if len(path_str) > anchor_len and path_str[-1] == sep:
|
||||||
|
path_str = path_str[:-1]
|
||||||
|
yield path_str
|
||||||
|
|
||||||
def iterdir(self):
|
def iterdir(self):
|
||||||
"""Yield path objects of the directory contents.
|
"""Yield path objects of the directory contents.
|
||||||
|
|
||||||
|
@ -587,13 +589,9 @@ class Path(_abc.PathBase, PurePath):
|
||||||
def _scandir(self):
|
def _scandir(self):
|
||||||
return os.scandir(self)
|
return os.scandir(self)
|
||||||
|
|
||||||
def _direntry_str(self, entry):
|
|
||||||
# Transform an entry yielded from _scandir() into a path string.
|
|
||||||
return entry.name if str(self) == '.' else entry.path
|
|
||||||
|
|
||||||
def _make_child_direntry(self, entry):
|
def _make_child_direntry(self, entry):
|
||||||
# Transform an entry yielded from _scandir() into a path object.
|
# Transform an entry yielded from _scandir() into a path object.
|
||||||
path_str = self._direntry_str(entry)
|
path_str = entry.name if str(self) == '.' else entry.path
|
||||||
path = self.with_segments(path_str)
|
path = self.with_segments(path_str)
|
||||||
path._str = path_str
|
path._str = path_str
|
||||||
path._drv = self.drive
|
path._drv = self.drive
|
||||||
|
@ -626,8 +624,30 @@ class Path(_abc.PathBase, PurePath):
|
||||||
sys.audit("pathlib.Path.glob", self, pattern)
|
sys.audit("pathlib.Path.glob", self, pattern)
|
||||||
if not isinstance(pattern, PurePath):
|
if not isinstance(pattern, PurePath):
|
||||||
pattern = self.with_segments(pattern)
|
pattern = self.with_segments(pattern)
|
||||||
return _abc.PathBase.glob(
|
if pattern.anchor:
|
||||||
self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
|
raise NotImplementedError("Non-relative patterns are unsupported")
|
||||||
|
parts = pattern._tail.copy()
|
||||||
|
if not parts:
|
||||||
|
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
|
||||||
|
raw = pattern._raw_path
|
||||||
|
if raw[-1] in (self.parser.sep, self.parser.altsep):
|
||||||
|
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
|
||||||
|
parts.append('')
|
||||||
|
if not self.is_dir():
|
||||||
|
return iter([])
|
||||||
|
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
|
||||||
|
root = str(self)
|
||||||
|
paths = select(root, exists=True)
|
||||||
|
|
||||||
|
# Normalize results
|
||||||
|
if root == '.':
|
||||||
|
paths = map(self._remove_leading_dot, paths)
|
||||||
|
if parts[-1] == '':
|
||||||
|
paths = map(self._remove_trailing_slash, paths)
|
||||||
|
elif parts[-1] == '**':
|
||||||
|
paths = self._filter_trailing_slash(paths)
|
||||||
|
paths = map(self._from_parsed_string, paths)
|
||||||
|
return paths
|
||||||
|
|
||||||
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
|
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
|
||||||
"""Recursively yield all existing files (of any kind, including
|
"""Recursively yield all existing files (of any kind, including
|
||||||
|
@ -638,8 +658,7 @@ class Path(_abc.PathBase, PurePath):
|
||||||
if not isinstance(pattern, PurePath):
|
if not isinstance(pattern, PurePath):
|
||||||
pattern = self.with_segments(pattern)
|
pattern = self.with_segments(pattern)
|
||||||
pattern = '**' / pattern
|
pattern = '**' / pattern
|
||||||
return _abc.PathBase.glob(
|
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
|
||||||
self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
|
|
||||||
|
|
||||||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||||
"""Walk the directory tree from this directory, similar to os.walk()."""
|
"""Walk the directory tree from this directory, similar to os.walk()."""
|
||||||
|
@ -669,9 +688,7 @@ class Path(_abc.PathBase, PurePath):
|
||||||
# of joining, and we exploit the fact that getcwd() returns a
|
# of joining, and we exploit the fact that getcwd() returns a
|
||||||
# fully-normalized string by storing it in _str. This is used to
|
# fully-normalized string by storing it in _str. This is used to
|
||||||
# implement Path.cwd().
|
# implement Path.cwd().
|
||||||
result = self.with_segments(cwd)
|
return self._from_parsed_string(cwd)
|
||||||
result._str = cwd
|
|
||||||
return result
|
|
||||||
drive, root, rel = os.path.splitroot(cwd)
|
drive, root, rel = os.path.splitroot(cwd)
|
||||||
if not rel:
|
if not rel:
|
||||||
return self._from_parsed_parts(drive, root, self._tail)
|
return self._from_parsed_parts(drive, root, self._tail)
|
||||||
|
|
|
@ -12,6 +12,8 @@ resemble pathlib's PurePath and Path respectively.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
|
import glob
|
||||||
|
import operator
|
||||||
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
|
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
|
||||||
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
|
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
|
||||||
|
|
||||||
|
@ -40,109 +42,23 @@ def _ignore_error(exception):
|
||||||
def _is_case_sensitive(parser):
|
def _is_case_sensitive(parser):
|
||||||
return parser.normcase('Aa') == 'Aa'
|
return parser.normcase('Aa') == 'Aa'
|
||||||
|
|
||||||
#
|
|
||||||
# Globbing helpers
|
|
||||||
#
|
|
||||||
|
|
||||||
re = glob = None
|
class Globber(glob._Globber):
|
||||||
|
lstat = operator.methodcaller('lstat')
|
||||||
|
scandir = operator.methodcaller('_scandir')
|
||||||
|
add_slash = operator.methodcaller('joinpath', '')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def concat_path(path, text):
|
||||||
|
"""Appends text to the given path.
|
||||||
|
"""
|
||||||
|
return path.with_segments(path._raw_path + text)
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=512)
|
@staticmethod
|
||||||
def _compile_pattern(pat, sep, case_sensitive, recursive=True):
|
def parse_entry(entry):
|
||||||
"""Compile given glob pattern to a re.Pattern object (observing case
|
"""Returns the path of an entry yielded from scandir().
|
||||||
sensitivity)."""
|
"""
|
||||||
global re, glob
|
return entry
|
||||||
if re is None:
|
|
||||||
import re, glob
|
|
||||||
|
|
||||||
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
|
||||||
regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep)
|
|
||||||
return re.compile(regex, flags=flags).match
|
|
||||||
|
|
||||||
|
|
||||||
def _select_special(paths, part):
|
|
||||||
"""Yield special literal children of the given paths."""
|
|
||||||
for path in paths:
|
|
||||||
yield path._make_child_relpath(part)
|
|
||||||
|
|
||||||
|
|
||||||
def _select_children(parent_paths, dir_only, match):
|
|
||||||
"""Yield direct children of given paths, filtering by name and type."""
|
|
||||||
for parent_path in parent_paths:
|
|
||||||
try:
|
|
||||||
# We must close the scandir() object before proceeding to
|
|
||||||
# avoid exhausting file descriptors when globbing deep trees.
|
|
||||||
with parent_path._scandir() as scandir_it:
|
|
||||||
entries = list(scandir_it)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
for entry in entries:
|
|
||||||
if dir_only:
|
|
||||||
try:
|
|
||||||
if not entry.is_dir():
|
|
||||||
continue
|
|
||||||
except OSError:
|
|
||||||
continue
|
|
||||||
# Avoid cost of making a path object for non-matching paths by
|
|
||||||
# matching against the os.DirEntry.name string.
|
|
||||||
if match is None or match(entry.name):
|
|
||||||
yield parent_path._make_child_direntry(entry)
|
|
||||||
|
|
||||||
|
|
||||||
def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
|
|
||||||
"""Yield given paths and all their children, recursively, filtering by
|
|
||||||
string and type.
|
|
||||||
"""
|
|
||||||
for parent_path in parent_paths:
|
|
||||||
if match is not None:
|
|
||||||
# If we're filtering paths through a regex, record the length of
|
|
||||||
# the parent path. We'll pass it to match(path, pos=...) later.
|
|
||||||
parent_len = len(str(parent_path._make_child_relpath('_'))) - 1
|
|
||||||
paths = [parent_path._make_child_relpath('')]
|
|
||||||
while paths:
|
|
||||||
path = paths.pop()
|
|
||||||
if match is None or match(str(path), parent_len):
|
|
||||||
# Yield *directory* path that matches pattern (if any).
|
|
||||||
yield path
|
|
||||||
try:
|
|
||||||
# We must close the scandir() object before proceeding to
|
|
||||||
# avoid exhausting file descriptors when globbing deep trees.
|
|
||||||
with path._scandir() as scandir_it:
|
|
||||||
entries = list(scandir_it)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
for entry in entries:
|
|
||||||
# Handle directory entry.
|
|
||||||
try:
|
|
||||||
if entry.is_dir(follow_symlinks=follow_symlinks):
|
|
||||||
# Recurse into this directory.
|
|
||||||
paths.append(path._make_child_direntry(entry))
|
|
||||||
continue
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Handle file entry.
|
|
||||||
if not dir_only:
|
|
||||||
# Avoid cost of making a path object for non-matching
|
|
||||||
# files by matching against the os.DirEntry object.
|
|
||||||
if match is None or match(path._direntry_str(entry), parent_len):
|
|
||||||
# Yield *file* path that matches pattern (if any).
|
|
||||||
yield path._make_child_direntry(entry)
|
|
||||||
|
|
||||||
|
|
||||||
def _select_unique(paths):
|
|
||||||
"""Yields the given paths, filtering out duplicates."""
|
|
||||||
yielded = set()
|
|
||||||
try:
|
|
||||||
for path in paths:
|
|
||||||
path_str = str(path)
|
|
||||||
if path_str not in yielded:
|
|
||||||
yield path
|
|
||||||
yielded.add(path_str)
|
|
||||||
finally:
|
|
||||||
yielded.clear()
|
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedOperation(NotImplementedError):
|
class UnsupportedOperation(NotImplementedError):
|
||||||
|
@ -218,6 +134,7 @@ class PurePathBase:
|
||||||
'_resolving',
|
'_resolving',
|
||||||
)
|
)
|
||||||
parser = ParserBase()
|
parser = ParserBase()
|
||||||
|
_globber = Globber
|
||||||
|
|
||||||
def __init__(self, path, *paths):
|
def __init__(self, path, *paths):
|
||||||
self._raw_path = self.parser.join(path, *paths) if paths else path
|
self._raw_path = self.parser.join(path, *paths) if paths else path
|
||||||
|
@ -454,14 +371,6 @@ class PurePathBase:
|
||||||
a drive)."""
|
a drive)."""
|
||||||
return self.parser.isabs(self._raw_path)
|
return self.parser.isabs(self._raw_path)
|
||||||
|
|
||||||
@property
|
|
||||||
def _pattern_stack(self):
|
|
||||||
"""Stack of path components, to be used with patterns in glob()."""
|
|
||||||
anchor, parts = self._stack
|
|
||||||
if anchor:
|
|
||||||
raise NotImplementedError("Non-relative patterns are unsupported")
|
|
||||||
return parts
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _pattern_str(self):
|
def _pattern_str(self):
|
||||||
"""The path expressed as a string, for use in pattern-matching."""
|
"""The path expressed as a string, for use in pattern-matching."""
|
||||||
|
@ -487,8 +396,9 @@ class PurePathBase:
|
||||||
return False
|
return False
|
||||||
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
|
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
|
||||||
return False
|
return False
|
||||||
|
globber = self._globber(sep, case_sensitive)
|
||||||
for path_part, pattern_part in zip(path_parts, pattern_parts):
|
for path_part, pattern_part in zip(path_parts, pattern_parts):
|
||||||
match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False)
|
match = globber.compile(pattern_part)
|
||||||
if match(path_part) is None:
|
if match(path_part) is None:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
@ -502,7 +412,8 @@ class PurePathBase:
|
||||||
pattern = self.with_segments(pattern)
|
pattern = self.with_segments(pattern)
|
||||||
if case_sensitive is None:
|
if case_sensitive is None:
|
||||||
case_sensitive = _is_case_sensitive(self.parser)
|
case_sensitive = _is_case_sensitive(self.parser)
|
||||||
match = _compile_pattern(pattern._pattern_str, pattern.parser.sep, case_sensitive)
|
globber = self._globber(pattern.parser.sep, case_sensitive, recursive=True)
|
||||||
|
match = globber.compile(pattern._pattern_str)
|
||||||
return match(self._pattern_str) is not None
|
return match(self._pattern_str) is not None
|
||||||
|
|
||||||
|
|
||||||
|
@ -772,11 +683,6 @@ class PathBase(PurePathBase):
|
||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
return nullcontext(self.iterdir())
|
return nullcontext(self.iterdir())
|
||||||
|
|
||||||
def _direntry_str(self, entry):
|
|
||||||
# Transform an entry yielded from _scandir() into a path string.
|
|
||||||
# PathBase._scandir() yields PathBase objects, so use str().
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
def _make_child_direntry(self, entry):
|
def _make_child_direntry(self, entry):
|
||||||
# Transform an entry yielded from _scandir() into a path object.
|
# Transform an entry yielded from _scandir() into a path object.
|
||||||
# PathBase._scandir() yields PathBase objects, so this is a no-op.
|
# PathBase._scandir() yields PathBase objects, so this is a no-op.
|
||||||
|
@ -785,62 +691,26 @@ class PathBase(PurePathBase):
|
||||||
def _make_child_relpath(self, name):
|
def _make_child_relpath(self, name):
|
||||||
return self.joinpath(name)
|
return self.joinpath(name)
|
||||||
|
|
||||||
|
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
|
||||||
|
if case_sensitive is None:
|
||||||
|
case_sensitive = _is_case_sensitive(self.parser)
|
||||||
|
recursive = True if recurse_symlinks else glob._no_recurse_symlinks
|
||||||
|
globber = self._globber(self.parser.sep, case_sensitive, recursive)
|
||||||
|
return globber.selector(parts)
|
||||||
|
|
||||||
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
||||||
"""Iterate over this subtree and yield all existing files (of any
|
"""Iterate over this subtree and yield all existing files (of any
|
||||||
kind, including directories) matching the given relative pattern.
|
kind, including directories) matching the given relative pattern.
|
||||||
"""
|
"""
|
||||||
if not isinstance(pattern, PurePathBase):
|
if not isinstance(pattern, PurePathBase):
|
||||||
pattern = self.with_segments(pattern)
|
pattern = self.with_segments(pattern)
|
||||||
if case_sensitive is None:
|
anchor, parts = pattern._stack
|
||||||
# TODO: evaluate case-sensitivity of each directory in _select_children().
|
if anchor:
|
||||||
case_sensitive = _is_case_sensitive(self.parser)
|
raise NotImplementedError("Non-relative patterns are unsupported")
|
||||||
|
if not self.is_dir():
|
||||||
stack = pattern._pattern_stack
|
return iter([])
|
||||||
specials = ('', '.', '..')
|
select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
|
||||||
deduplicate_paths = False
|
return select(self, exists=True)
|
||||||
sep = self.parser.sep
|
|
||||||
paths = iter([self] if self.is_dir() else [])
|
|
||||||
while stack:
|
|
||||||
part = stack.pop()
|
|
||||||
if part in specials:
|
|
||||||
# Join special component (e.g. '..') onto paths.
|
|
||||||
paths = _select_special(paths, part)
|
|
||||||
|
|
||||||
elif part == '**':
|
|
||||||
# Consume following '**' components, which have no effect.
|
|
||||||
while stack and stack[-1] == '**':
|
|
||||||
stack.pop()
|
|
||||||
|
|
||||||
# Consume following non-special components, provided we're
|
|
||||||
# treating symlinks consistently. Each component is joined
|
|
||||||
# onto 'part', which is used to generate an re.Pattern object.
|
|
||||||
if recurse_symlinks:
|
|
||||||
while stack and stack[-1] not in specials:
|
|
||||||
part += sep + stack.pop()
|
|
||||||
|
|
||||||
# If the previous loop consumed pattern components, compile an
|
|
||||||
# re.Pattern object based on those components.
|
|
||||||
match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None
|
|
||||||
|
|
||||||
# Recursively walk directories, filtering by type and regex.
|
|
||||||
paths = _select_recursive(paths, bool(stack), recurse_symlinks, match)
|
|
||||||
|
|
||||||
# De-duplicate if we've already seen a '**' component.
|
|
||||||
if deduplicate_paths:
|
|
||||||
paths = _select_unique(paths)
|
|
||||||
deduplicate_paths = True
|
|
||||||
|
|
||||||
elif '**' in part:
|
|
||||||
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
|
||||||
|
|
||||||
else:
|
|
||||||
# If the pattern component isn't '*', compile an re.Pattern
|
|
||||||
# object based on the component.
|
|
||||||
match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None
|
|
||||||
|
|
||||||
# Iterate over directories' children filtering by type and regex.
|
|
||||||
paths = _select_children(paths, bool(stack), match)
|
|
||||||
return paths
|
|
||||||
|
|
||||||
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
||||||
"""Recursively yield all existing files (of any kind, including
|
"""Recursively yield all existing files (of any kind, including
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Speed up :meth:`pathlib.Path.glob` by working with strings internally.
|
Loading…
Add table
Add a link
Reference in a new issue