Revert "GH-116380: Speed up glob.[i]glob() by making fewer system calls. (#116392)" (#130743)

This broke tests on the 'aarch64 Fedora Stable Clang Installed 3.x' and
'AMD64 Fedora Stable Clang Installed 3.x' build bots.

This reverts commit da4899b94a.
This commit is contained in:
Barney Gale 2025-03-01 20:04:01 +00:00 committed by GitHub
parent 5221d9ce0e
commit 5326c27fc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 228 additions and 239 deletions

View file

@ -75,6 +75,10 @@ The :mod:`glob` module defines the following functions:
Using the "``**``" pattern in large directory trees may consume Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time. an inordinate amount of time.
.. note::
This function may return duplicate path names if *pathname*
contains multiple "``**``" patterns and *recursive* is true.
.. versionchanged:: 3.5 .. versionchanged:: 3.5
Support for recursive globs using "``**``". Support for recursive globs using "``**``".
@ -84,11 +88,6 @@ The :mod:`glob` module defines the following functions:
.. versionchanged:: 3.11 .. versionchanged:: 3.11
Added the *include_hidden* parameter. Added the *include_hidden* parameter.
.. versionchanged:: 3.14
Matching path names are returned only once. In previous versions, this
function may return duplicate path names if *pathname* contains multiple
"``**``" patterns and *recursive* is true.
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \ .. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \
include_hidden=False) include_hidden=False)
@ -99,6 +98,10 @@ The :mod:`glob` module defines the following functions:
.. audit-event:: glob.glob pathname,recursive glob.iglob .. audit-event:: glob.glob pathname,recursive glob.iglob
.. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.iglob .. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.iglob
.. note::
This function may return duplicate path names if *pathname*
contains multiple "``**``" patterns and *recursive* is true.
.. versionchanged:: 3.5 .. versionchanged:: 3.5
Support for recursive globs using "``**``". Support for recursive globs using "``**``".
@ -108,11 +111,6 @@ The :mod:`glob` module defines the following functions:
.. versionchanged:: 3.11 .. versionchanged:: 3.11
Added the *include_hidden* parameter. Added the *include_hidden* parameter.
.. versionchanged:: 3.14
Matching path names are yielded only once. In previous versions, this
function may yield duplicate path names if *pathname* contains multiple
"``**``" patterns and *recursive* is true.
.. function:: escape(pathname) .. function:: escape(pathname)

View file

@ -968,14 +968,6 @@ base64
(Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.) (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.)
glob
----
* Reduce the number of system calls in :func:`glob.glob` and :func:`~glob.iglob`,
thereby improving the speed of globbing operations by 20-80%.
(Contributed by Barney Gale in :gh:`116380`.)
io io
--- ---
* :mod:`io` which provides the built-in :func:`open` makes less system calls * :mod:`io` which provides the built-in :func:`open` makes less system calls

View file

@ -1,10 +1,13 @@
"""Filename globbing utility.""" """Filename globbing utility."""
import contextlib
import os import os
import re import re
import fnmatch import fnmatch
import functools import functools
import itertools
import operator import operator
import stat
import sys import sys
@ -42,35 +45,82 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
""" """
sys.audit("glob.glob", pathname, recursive) sys.audit("glob.glob", pathname, recursive)
sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
pathname = os.fspath(pathname) if root_dir is not None:
if isinstance(pathname, bytes): root_dir = os.fspath(root_dir)
pathname = os.fsdecode(pathname)
if root_dir is not None:
root_dir = os.fsdecode(root_dir)
paths = _iglob(pathname, root_dir, dir_fd, recursive, include_hidden)
return map(os.fsencode, paths)
else: else:
return _iglob(pathname, root_dir, dir_fd, recursive, include_hidden) root_dir = pathname[:0]
it = _iglob(pathname, root_dir, dir_fd, recursive, False,
include_hidden=include_hidden)
if not pathname or recursive and _isrecursive(pathname[:2]):
try:
s = next(it) # skip empty string
if s:
it = itertools.chain((s,), it)
except StopIteration:
pass
return it
def _iglob(pathname, root_dir, dir_fd, recursive, include_hidden): def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
if os.path.altsep: include_hidden=False):
pathname = pathname.replace(os.path.altsep, os.path.sep) dirname, basename = os.path.split(pathname)
drive, root, tail = os.path.splitroot(pathname) if not has_magic(pathname):
parts = tail.split(os.path.sep)[::-1] if tail else [] assert not dironly
globber = _StringGlobber(recursive=recursive, include_hidden=include_hidden) if basename:
select = globber.selector(parts) if _lexists(_join(root_dir, pathname), dir_fd):
if drive: yield pathname
root = drive + root else:
return select(root, dir_fd, root) # Patterns ending with a slash should match only directories
elif root: if _isdir(_join(root_dir, dirname), dir_fd):
return select(root, dir_fd, root, exists=True) yield pathname
elif not root_dir: return
return select(root, dir_fd, root, empty=True) if not dirname:
if recursive and _isrecursive(basename):
yield from _glob2(root_dir, basename, dir_fd, dironly,
include_hidden=include_hidden)
else:
yield from _glob1(root_dir, basename, dir_fd, dironly,
include_hidden=include_hidden)
return
# `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
include_hidden=include_hidden)
else: else:
root = os.path.join(root_dir, '') dirs = [dirname]
root_len = len(root) if has_magic(basename):
paths = select(root, dir_fd, root, empty=True) if recursive and _isrecursive(basename):
return (path[root_len:] for path in paths) glob_in_dir = _glob2
else:
glob_in_dir = _glob1
else:
glob_in_dir = _glob0
for dirname in dirs:
for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
include_hidden=include_hidden):
yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. _glob1 accepts a pattern while _glob0
# takes a literal basename (so it only has to check for its existence).
def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
names = _listdir(dirname, dir_fd, dironly)
if not (include_hidden or _ishidden(pattern)):
names = (x for x in names if not _ishidden(x))
return fnmatch.filter(names, pattern)
def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
if basename:
if _lexists(_join(dirname, basename), dir_fd):
return [basename]
else:
# `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if _isdir(dirname, dir_fd):
return [basename]
return []
_deprecated_function_message = ( _deprecated_function_message = (
"{name} is deprecated and will be removed in Python {remove}. Use " "{name} is deprecated and will be removed in Python {remove}. Use "
@ -80,16 +130,102 @@ _deprecated_function_message = (
def glob0(dirname, pattern): def glob0(dirname, pattern):
import warnings import warnings
warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15))
dirname = os.path.join(dirname, '') return _glob0(dirname, pattern, None, False)
select = _StringGlobber().literal_selector(pattern, [])
return [path[len(dirname):] for path in select(dirname)]
def glob1(dirname, pattern): def glob1(dirname, pattern):
import warnings import warnings
warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15))
dirname = os.path.join(dirname, '') return _glob1(dirname, pattern, None, False)
select = _StringGlobber().wildcard_selector(pattern, [])
return [path[len(dirname):] for path in select(dirname)] # This helper function recursively yields relative pathnames inside a literal
# directory.
def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
assert _isrecursive(pattern)
if not dirname or _isdir(dirname, dir_fd):
yield pattern[:0]
yield from _rlistdir(dirname, dir_fd, dironly,
include_hidden=include_hidden)
# If dironly is false, yields all file names inside a directory.
# If dironly is true, yields only directory names.
def _iterdir(dirname, dir_fd, dironly):
try:
fd = None
fsencode = None
if dir_fd is not None:
if dirname:
fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
else:
arg = dir_fd
if isinstance(dirname, bytes):
fsencode = os.fsencode
elif dirname:
arg = dirname
elif isinstance(dirname, bytes):
arg = bytes(os.curdir, 'ASCII')
else:
arg = os.curdir
try:
with os.scandir(arg) as it:
for entry in it:
try:
if not dironly or entry.is_dir():
if fsencode is not None:
yield fsencode(entry.name)
else:
yield entry.name
except OSError:
pass
finally:
if fd is not None:
os.close(fd)
except OSError:
return
def _listdir(dirname, dir_fd, dironly):
with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
return list(it)
# Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
names = _listdir(dirname, dir_fd, dironly)
for x in names:
if include_hidden or not _ishidden(x):
yield x
path = _join(dirname, x) if dirname else x
for y in _rlistdir(path, dir_fd, dironly,
include_hidden=include_hidden):
yield _join(x, y)
def _lexists(pathname, dir_fd):
# Same as os.path.lexists(), but with dir_fd
if dir_fd is None:
return os.path.lexists(pathname)
try:
os.lstat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return True
def _isdir(pathname, dir_fd):
# Same as os.path.isdir(), but with dir_fd
if dir_fd is None:
return os.path.isdir(pathname)
try:
st = os.stat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return stat.S_ISDIR(st.st_mode)
def _join(dirname, basename):
# It is common if dirname or basename is empty
if not dirname or not basename:
return dirname or basename
return os.path.join(dirname, basename)
magic_check = re.compile('([*?[])') magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])') magic_check_bytes = re.compile(b'([*?[])')
@ -101,6 +237,15 @@ def has_magic(s):
match = magic_check.search(s) match = magic_check.search(s)
return match is not None return match is not None
def _ishidden(path):
return path[0] in ('.', b'.'[0])
def _isrecursive(pattern):
if isinstance(pattern, bytes):
return pattern == b'**'
else:
return pattern == '**'
def escape(pathname): def escape(pathname):
"""Escape all special characters. """Escape all special characters.
""" """
@ -174,13 +319,12 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
return fr'(?s:{res})\Z' return fr'(?s:{res})\Z'
@functools.lru_cache(maxsize=1024) @functools.lru_cache(maxsize=512)
def _compile_pattern(pat, sep, case_sensitive, recursive, include_hidden): def _compile_pattern(pat, sep, case_sensitive, recursive=True):
"""Compile given glob pattern to a re.Pattern object (observing case """Compile given glob pattern to a re.Pattern object (observing case
sensitivity).""" sensitivity)."""
flags = re.NOFLAG if case_sensitive else re.IGNORECASE flags = re.NOFLAG if case_sensitive else re.IGNORECASE
regex = translate(pat, recursive=recursive, regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
include_hidden=include_hidden, seps=sep)
return re.compile(regex, flags=flags).match return re.compile(regex, flags=flags).match
@ -188,13 +332,11 @@ class _GlobberBase:
"""Abstract class providing shell-style pattern matching and globbing. """Abstract class providing shell-style pattern matching and globbing.
""" """
def __init__(self, sep=os.path.sep, case_sensitive=os.name != 'nt', def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
case_pedantic=False, recursive=False, include_hidden=False):
self.sep = sep self.sep = sep
self.case_sensitive = case_sensitive self.case_sensitive = case_sensitive
self.case_pedantic = case_pedantic self.case_pedantic = case_pedantic
self.recursive = recursive self.recursive = recursive
self.include_hidden = include_hidden
# Abstract methods # Abstract methods
@ -204,38 +346,12 @@ class _GlobberBase:
""" """
raise NotImplementedError raise NotImplementedError
@staticmethod
def lstat(path, dir_fd=None):
"""Implements os.lstat()
"""
raise NotImplementedError
@staticmethod
def open(path, flags, dir_fd=None):
"""Implements os.open()
"""
raise NotImplementedError
@staticmethod @staticmethod
def scandir(path): def scandir(path):
"""Like os.scandir(), but generates (entry, name, path) tuples. """Like os.scandir(), but generates (entry, name, path) tuples.
""" """
raise NotImplementedError raise NotImplementedError
@staticmethod
def scandir_cwd():
raise NotImplementedError
@staticmethod
def scandir_fd(fd, prefix):
raise NotImplementedError
@staticmethod
def close(fd):
"""Implements os.close().
"""
raise NotImplementedError
@staticmethod @staticmethod
def concat_path(path, text): def concat_path(path, text):
"""Implements path concatenation. """Implements path concatenation.
@ -245,8 +361,7 @@ class _GlobberBase:
# High-level methods # High-level methods
def compile(self, pat): def compile(self, pat):
return _compile_pattern(pat, self.sep, self.case_sensitive, return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive)
self.recursive, self.include_hidden)
def selector(self, parts): def selector(self, parts):
"""Returns a function that selects from a given path, walking and """Returns a function that selects from a given path, walking and
@ -271,14 +386,10 @@ class _GlobberBase:
if parts: if parts:
part += self.sep part += self.sep
select_next = self.selector(parts) select_next = self.selector(parts)
if not part:
return select_next
def select_special(path, dir_fd=None, rel_path=None, exists=False, empty=False): def select_special(path, exists=False):
path = self.concat_path(path, part) path = self.concat_path(path, part)
if dir_fd is not None: return select_next(path, exists)
rel_path = self.concat_path(rel_path, part)
return select_next(path, dir_fd, rel_path, exists)
return select_special return select_special
def literal_selector(self, part, parts): def literal_selector(self, part, parts):
@ -295,11 +406,9 @@ class _GlobberBase:
select_next = self.selector(parts) select_next = self.selector(parts)
def select_literal(path, dir_fd=None, rel_path=None, exists=False, empty=False): def select_literal(path, exists=False):
path = self.concat_path(path, part) path = self.concat_path(path, part)
if dir_fd is not None: return select_next(path, exists=False)
rel_path = self.concat_path(rel_path, part)
return select_next(path, dir_fd, rel_path)
return select_literal return select_literal
def wildcard_selector(self, part, parts): def wildcard_selector(self, part, parts):
@ -307,24 +416,14 @@ class _GlobberBase:
filtering by pattern. filtering by pattern.
""" """
match = None if self.include_hidden and part == '*' else self.compile(part) match = None if part == '*' else self.compile(part)
dir_only = bool(parts) dir_only = bool(parts)
if dir_only: if dir_only:
select_next = self.selector(parts) select_next = self.selector(parts)
def select_wildcard(path, dir_fd=None, rel_path=None, exists=False, empty=False): def select_wildcard(path, exists=False):
close_fd = False
try: try:
if dir_fd is None: entries = self.scandir(path)
fd = None
entries = self.scandir(path) if path else self.scandir_cwd()
elif not rel_path:
fd = dir_fd
entries = self.scandir_fd(fd, path)
else:
fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd)
close_fd = True
entries = self.scandir_fd(fd, path)
except OSError: except OSError:
pass pass
else: else:
@ -337,17 +436,9 @@ class _GlobberBase:
except OSError: except OSError:
continue continue
entry_path = self.concat_path(entry_path, self.sep) entry_path = self.concat_path(entry_path, self.sep)
if fd is not None: yield from select_next(entry_path, exists=True)
entry_name = entry_name + self.sep
yield from select_next(
entry_path, fd, entry_name, exists=True)
else: else:
# Optimization: directly yield the path if this is
# last pattern part.
yield entry_path yield entry_path
finally:
if close_fd:
self.close(fd)
return select_wildcard return select_wildcard
def recursive_selector(self, part, parts): def recursive_selector(self, part, parts):
@ -369,49 +460,26 @@ class _GlobberBase:
while parts and parts[-1] not in _special_parts: while parts and parts[-1] not in _special_parts:
part += self.sep + parts.pop() part += self.sep + parts.pop()
match = None if self.include_hidden and part == '**' else self.compile(part) match = None if part == '**' else self.compile(part)
dir_only = bool(parts) dir_only = bool(parts)
select_next = self.selector(parts) select_next = self.selector(parts)
def select_recursive(path, dir_fd=None, rel_path=None, exists=False, empty=False): def select_recursive(path, exists=False):
match_pos = len(str(path)) match_pos = len(str(path))
if match is None or match(str(path), match_pos): if match is None or match(str(path), match_pos):
yield from select_next(path, dir_fd, rel_path, exists, empty) yield from select_next(path, exists)
stack = [(path, dir_fd, rel_path)] stack = [path]
try: while stack:
while stack: yield from select_recursive_step(stack, match_pos)
yield from select_recursive_step(stack, match_pos)
finally:
# Close any file descriptors still on the stack.
while stack:
path, dir_fd, _rel_path = stack.pop()
if path is None:
try:
self.close(dir_fd)
except OSError:
pass
def select_recursive_step(stack, match_pos): def select_recursive_step(stack, match_pos):
path, dir_fd, rel_path = stack.pop() path = stack.pop()
try: try:
if path is None: entries = self.scandir(path)
self.close(dir_fd)
return
elif dir_fd is None:
fd = None
entries = self.scandir(path) if path else self.scandir_cwd()
elif not rel_path:
fd = dir_fd
entries = self.scandir_fd(fd, path)
else:
fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd)
# Schedule the file descriptor to be closed next step.
stack.append((None, fd, None))
entries = self.scandir_fd(fd, path)
except OSError: except OSError:
pass pass
else: else:
for entry, entry_name, entry_path in entries: for entry, _entry_name, entry_path in entries:
is_dir = False is_dir = False
try: try:
if entry.is_dir(follow_symlinks=follow_symlinks): if entry.is_dir(follow_symlinks=follow_symlinks):
@ -423,38 +491,25 @@ class _GlobberBase:
entry_path_str = str(entry_path) entry_path_str = str(entry_path)
if dir_only: if dir_only:
entry_path = self.concat_path(entry_path, self.sep) entry_path = self.concat_path(entry_path, self.sep)
if fd is not None:
entry_name = entry_name + self.sep
if match is None or match(entry_path_str, match_pos): if match is None or match(entry_path_str, match_pos):
if dir_only: if dir_only:
yield from select_next( yield from select_next(entry_path, exists=True)
entry_path, fd, entry_name, exists=True)
else: else:
# Optimization: directly yield the path if this is # Optimization: directly yield the path if this is
# last pattern part. # last pattern part.
yield entry_path yield entry_path
if is_dir: if is_dir:
stack.append((entry_path, fd, entry_name)) stack.append(entry_path)
return select_recursive return select_recursive
def select_exists(self, path, dir_fd=None, rel_path=None, exists=False, empty=False): def select_exists(self, path, exists=False):
"""Yields the given path, if it exists. If *dir_fd* is given, we check """Yields the given path, if it exists.
whether *rel_path* exists relative to the fd.
""" """
if empty: if exists:
# Suppress initial path so iglob() doesn't yield the empty string.
pass
elif exists:
# Optimization: this path is already known to exist, e.g. because # Optimization: this path is already known to exist, e.g. because
# it was returned from os.scandir(), so we skip calling lstat(). # it was returned from os.scandir(), so we skip calling lstat().
yield path yield path
elif dir_fd is not None:
try:
self.lstat(rel_path, dir_fd=dir_fd)
yield path
except OSError:
pass
elif self.lexists(path): elif self.lexists(path):
yield path yield path
@ -463,9 +518,6 @@ class _StringGlobber(_GlobberBase):
"""Provides shell-style pattern matching and globbing for string paths. """Provides shell-style pattern matching and globbing for string paths.
""" """
lexists = staticmethod(os.path.lexists) lexists = staticmethod(os.path.lexists)
lstat = staticmethod(os.lstat)
open = staticmethod(os.open)
close = staticmethod(os.close)
concat_path = operator.add concat_path = operator.add
@staticmethod @staticmethod
@ -476,20 +528,6 @@ class _StringGlobber(_GlobberBase):
entries = list(scandir_it) entries = list(scandir_it)
return ((entry, entry.name, entry.path) for entry in entries) return ((entry, entry.name, entry.path) for entry in entries)
@staticmethod
def scandir_cwd():
with os.scandir() as scandir_it:
entries = list(scandir_it)
# Suppress leading dot when scanning current directory.
return ((entry, entry.name, entry.name) for entry in entries)
@staticmethod
def scandir_fd(fd, prefix):
prefix = os.path.join(prefix, prefix[:0])
with os.scandir(fd) as scandir_it:
entries = list(scandir_it)
return ((entry, entry.name, prefix + entry.name) for entry in entries)
class _PathGlobber(_GlobberBase): class _PathGlobber(_GlobberBase):
"""Provides shell-style pattern matching and globbing for pathlib paths. """Provides shell-style pattern matching and globbing for pathlib paths.

View file

@ -196,8 +196,7 @@ class JoinablePath(ABC):
pattern = self.with_segments(pattern) pattern = self.with_segments(pattern)
if case_sensitive is None: if case_sensitive is None:
case_sensitive = self.parser.normcase('Aa') == 'Aa' case_sensitive = self.parser.normcase('Aa') == 'Aa'
globber = _PathGlobber(pattern.parser.sep, case_sensitive, globber = _PathGlobber(pattern.parser.sep, case_sensitive, recursive=True)
recursive=True, include_hidden=True)
match = globber.compile(str(pattern)) match = globber.compile(str(pattern))
return match(str(self)) is not None return match(str(self)) is not None

View file

@ -558,8 +558,7 @@ class PurePath:
# paths shouldn't match wildcards, so we change it to the empty string. # paths shouldn't match wildcards, so we change it to the empty string.
path = str(self) if self.parts else '' path = str(self) if self.parts else ''
pattern = str(pattern) if pattern.parts else '' pattern = str(pattern) if pattern.parts else ''
globber = _StringGlobber(self.parser.sep, case_sensitive, globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True)
recursive=True, include_hidden=True)
return globber.compile(pattern)(path) is not None return globber.compile(pattern)(path) is not None
def match(self, path_pattern, *, case_sensitive=None): def match(self, path_pattern, *, case_sensitive=None):
@ -581,8 +580,7 @@ class PurePath:
return False return False
if len(path_parts) > len(pattern_parts) and path_pattern.anchor: if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
return False return False
globber = _StringGlobber(self.parser.sep, case_sensitive, globber = _StringGlobber(self.parser.sep, case_sensitive)
include_hidden=True)
for path_part, pattern_part in zip(path_parts, pattern_parts): for path_part, pattern_part in zip(path_parts, pattern_parts):
match = globber.compile(pattern_part) match = globber.compile(pattern_part)
if match(path_part) is None: if match(path_part) is None:
@ -857,8 +855,7 @@ class Path(PurePath):
case_pedantic = True case_pedantic = True
parts = self._parse_pattern(pattern) parts = self._parse_pattern(pattern)
recursive = True if recurse_symlinks else _no_recurse_symlinks recursive = True if recurse_symlinks else _no_recurse_symlinks
globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive)
recursive, include_hidden=True)
select = globber.selector(parts[::-1]) select = globber.selector(parts[::-1])
root = str(self) root = str(self)
paths = select(self.parser.join(root, '')) paths = select(self.parser.join(root, ''))

View file

@ -4,18 +4,14 @@ import re
import shutil import shutil
import sys import sys
import unittest import unittest
import unittest.mock
import warnings import warnings
from test import support from test import support
from test.support import is_wasi, Py_DEBUG, infinite_recursion from test.support import is_wasi, Py_DEBUG
from test.support.os_helper import (TESTFN, skip_unless_symlink, from test.support.os_helper import (TESTFN, skip_unless_symlink,
can_symlink, create_empty_file, change_cwd) can_symlink, create_empty_file, change_cwd)
_supports_dir_fd = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd
class GlobTests(unittest.TestCase): class GlobTests(unittest.TestCase):
dir_fd = None dir_fd = None
@ -53,7 +49,7 @@ class GlobTests(unittest.TestCase):
def open_dirfd(self): def open_dirfd(self):
if self.dir_fd is not None: if self.dir_fd is not None:
os.close(self.dir_fd) os.close(self.dir_fd)
if _supports_dir_fd: if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY) self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
else: else:
self.dir_fd = None self.dir_fd = None
@ -181,18 +177,20 @@ class GlobTests(unittest.TestCase):
self.assertEqual(glob.glob(self.norm('Z*Z') + sep), []) self.assertEqual(glob.glob(self.norm('Z*Z') + sep), [])
self.assertEqual(glob.glob(self.norm('ZZZ') + sep), []) self.assertEqual(glob.glob(self.norm('ZZZ') + sep), [])
self.assertEqual(glob.glob(self.norm('aaa') + sep), self.assertEqual(glob.glob(self.norm('aaa') + sep),
[self.norm('aaa') + os.sep]) [self.norm('aaa') + sep])
# Redundant separators are preserved and normalized # Preserving the redundant separators is an implementation detail.
self.assertEqual(glob.glob(self.norm('aaa') + sep*2), self.assertEqual(glob.glob(self.norm('aaa') + sep*2),
[self.norm('aaa') + os.sep*2]) [self.norm('aaa') + sep*2])
# When there is a wildcard pattern which ends with a pathname # When there is a wildcard pattern which ends with a pathname
# separator, glob() doesn't blow. # separator, glob() doesn't blow.
# The result should end with the pathname separator. # The result should end with the pathname separator.
# Normalizing the trailing separator is an implementation detail.
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
eq(glob.glob(self.norm('aa*') + sep), eq(glob.glob(self.norm('aa*') + sep),
[self.norm('aaa') + os.sep, self.norm('aab') + os.sep]) [self.norm('aaa') + os.sep, self.norm('aab') + os.sep])
# Stripping the redundant separators is an implementation detail.
eq(glob.glob(self.norm('aa*') + sep*2), eq(glob.glob(self.norm('aa*') + sep*2),
[self.norm('aaa') + os.sep*2, self.norm('aab') + os.sep*2]) [self.norm('aaa') + os.sep, self.norm('aab') + os.sep])
def test_glob_bytes_directory_with_trailing_slash(self): def test_glob_bytes_directory_with_trailing_slash(self):
# Same as test_glob_directory_with_trailing_slash, but with a # Same as test_glob_directory_with_trailing_slash, but with a
@ -202,16 +200,16 @@ class GlobTests(unittest.TestCase):
self.assertEqual(glob.glob(os.fsencode(self.norm('Z*Z') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('Z*Z') + sep)), [])
self.assertEqual(glob.glob(os.fsencode(self.norm('ZZZ') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('ZZZ') + sep)), [])
self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep)), self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep)),
[os.fsencode(self.norm('aaa') + os.sep)]) [os.fsencode(self.norm('aaa') + sep)])
self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep*2)), self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep*2)),
[os.fsencode(self.norm('aaa') + os.sep*2)]) [os.fsencode(self.norm('aaa') + sep*2)])
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
eq(glob.glob(os.fsencode(self.norm('aa*') + sep)), eq(glob.glob(os.fsencode(self.norm('aa*') + sep)),
[os.fsencode(self.norm('aaa') + os.sep), [os.fsencode(self.norm('aaa') + os.sep),
os.fsencode(self.norm('aab') + os.sep)]) os.fsencode(self.norm('aab') + os.sep)])
eq(glob.glob(os.fsencode(self.norm('aa*') + sep*2)), eq(glob.glob(os.fsencode(self.norm('aa*') + sep*2)),
[os.fsencode(self.norm('aaa') + os.sep*2), [os.fsencode(self.norm('aaa') + os.sep),
os.fsencode(self.norm('aab') + os.sep*2)]) os.fsencode(self.norm('aab') + os.sep)])
@skip_unless_symlink @skip_unless_symlink
def test_glob_symlinks(self): def test_glob_symlinks(self):
@ -328,12 +326,8 @@ class GlobTests(unittest.TestCase):
with change_cwd(self.tempdir): with change_cwd(self.tempdir):
join = os.path.join join = os.path.join
eq(glob.glob('**', recursive=True), [join(*i) for i in full]) eq(glob.glob('**', recursive=True), [join(*i) for i in full])
eq(glob.glob(join('**', '**'), recursive=True),
[join(*i) for i in full])
eq(glob.glob(join('**', ''), recursive=True), eq(glob.glob(join('**', ''), recursive=True),
[join(*i) for i in dirs]) [join(*i) for i in dirs])
eq(glob.glob(join('**', '**', ''), recursive=True),
[join(*i) for i in dirs])
eq(glob.glob(join('**', '*'), recursive=True), eq(glob.glob(join('**', '*'), recursive=True),
[join(*i) for i in full]) [join(*i) for i in full])
eq(glob.glob(join(os.curdir, '**'), recursive=True), eq(glob.glob(join(os.curdir, '**'), recursive=True),
@ -400,33 +394,6 @@ class GlobTests(unittest.TestCase):
for it in iters: for it in iters:
self.assertEqual(next(it), p) self.assertEqual(next(it), p)
def test_glob_above_recursion_limit(self):
depth = 30
base = os.path.join(self.tempdir, 'deep')
p = os.path.join(base, *(['d']*depth))
os.makedirs(p)
pattern = os.path.join(base, '**', 'd')
with infinite_recursion(depth - 5):
glob.glob(pattern, recursive=True)
@unittest.skipUnless(_supports_dir_fd, "Needs support for iglob(dir_fd=...)")
def test_iglob_iter_close(self):
base = os.path.join(self.tempdir, 'deep')
p = os.path.join(base, *(['d'] * 10))
os.makedirs(p)
with (
unittest.mock.patch("glob._StringGlobber.open", wraps=os.open) as os_open,
unittest.mock.patch("glob._StringGlobber.close", wraps=os.close) as os_close
):
self.assertEqual(os_open.call_count, os_close.call_count)
iter = glob.iglob('**/*/d', dir_fd=self.dir_fd, recursive=True)
self.assertEqual(os_open.call_count, os_close.call_count)
self.assertEqual(next(iter), 'deep/d')
self.assertEqual(next(iter), 'deep/d/d')
self.assertGreater(os_open.call_count, os_close.call_count)
iter.close()
self.assertEqual(os_open.call_count, os_close.call_count)
def test_glob0(self): def test_glob0(self):
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
glob.glob0(self.tempdir, 'a') glob.glob0(self.tempdir, 'a')

View file

@ -1,2 +0,0 @@
Speed up :func:`glob.glob` and :func:`glob.iglob` by making use of
:func:`glob.translate` and tracking path existence more precisely.