bpo-38144: Add the root_dir and dir_fd parameters in glob.glob(). (GH-16075)

This commit is contained in:
Serhiy Storchaka 2020-06-18 22:08:27 +03:00 committed by GitHub
parent 8f192d12af
commit 8a64ceaf98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 175 additions and 47 deletions

View file

@ -36,7 +36,7 @@ For example, ``'[?]'`` matches the character ``'?'``.
The :mod:`pathlib` module offers high-level path objects. The :mod:`pathlib` module offers high-level path objects.
.. function:: glob(pathname, *, recursive=False) .. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False)
Return a possibly-empty list of path names that match *pathname*, which must be Return a possibly-empty list of path names that match *pathname*, which must be
a string containing a path specification. *pathname* can be either absolute a string containing a path specification. *pathname* can be either absolute
@ -45,6 +45,15 @@ For example, ``'[?]'`` matches the character ``'?'``.
symlinks are included in the results (as in the shell). Whether or not the symlinks are included in the results (as in the shell). Whether or not the
results are sorted depends on the file system. results are sorted depends on the file system.
If *root_dir* is not ``None``, it should be a :term:`path-like object`
specifying the root directory for searching. It has the same effect on
:func:`glob` as changing the current directory before calling it. If
*pathname* is relative, the result will contain paths relative to
*root_dir*.
This function can support :ref:`paths relative to directory descriptors
<dir_fd>` with the *dir_fd* parameter.
.. index:: .. index::
single: **; in glob-style wildcards single: **; in glob-style wildcards
@ -62,8 +71,11 @@ For example, ``'[?]'`` matches the character ``'?'``.
.. versionchanged:: 3.5 .. versionchanged:: 3.5
Support for recursive globs using "``**``". Support for recursive globs using "``**``".
.. versionchanged:: 3.10
Added the *root_dir* and *dir_fd* parameters.
.. function:: iglob(pathname, *, recursive=False)
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False)
Return an :term:`iterator` which yields the same values as :func:`glob` Return an :term:`iterator` which yields the same values as :func:`glob`
without actually storing them all simultaneously. without actually storing them all simultaneously.

View file

@ -100,6 +100,14 @@ New Modules
Improved Modules Improved Modules
================ ================
glob
----
Added the *root_dir* and *dir_fd* parameters in :func:`~glob.glob` and
:func:`~glob.iglob` which allow to specify the root directory for searching.
(Contributed by Serhiy Storchaka in :issue:`38144`.)
Optimizations Optimizations
============= =============

View file

@ -3,11 +3,13 @@
import os import os
import re import re
import fnmatch import fnmatch
import itertools
import stat
import sys import sys
__all__ = ["glob", "iglob", "escape"] __all__ = ["glob", "iglob", "escape"]
def glob(pathname, *, recursive=False): def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
"""Return a list of paths matching a pathname pattern. """Return a list of paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la The pattern may contain simple shell-style wildcards a la
@ -18,9 +20,9 @@ def glob(pathname, *, recursive=False):
If recursive is true, the pattern '**' will match any files and If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories. zero or more directories and subdirectories.
""" """
return list(iglob(pathname, recursive=recursive)) return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))
def iglob(pathname, *, recursive=False): def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
"""Return an iterator which yields the paths matching a pathname pattern. """Return an iterator which yields the paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la The pattern may contain simple shell-style wildcards a la
@ -31,36 +33,43 @@ def iglob(pathname, *, recursive=False):
If recursive is true, the pattern '**' will match any files and If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories. zero or more directories and subdirectories.
""" """
sys.audit("glob.glob", pathname, recursive) if root_dir is not None:
it = _iglob(pathname, recursive, False) root_dir = os.fspath(root_dir)
if recursive and _isrecursive(pathname): else:
s = next(it) # skip empty string root_dir = pathname[:0]
assert not s it = _iglob(pathname, root_dir, dir_fd, recursive, False)
if not pathname or recursive and _isrecursive(pathname[:2]):
try:
s = next(it) # skip empty string
if s:
it = itertools.chain((s,), it)
except StopIteration:
pass
return it return it
def _iglob(pathname, recursive, dironly): def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
dirname, basename = os.path.split(pathname) dirname, basename = os.path.split(pathname)
if not has_magic(pathname): if not has_magic(pathname):
assert not dironly assert not dironly
if basename: if basename:
if os.path.lexists(pathname): if _lexists(_join(root_dir, pathname), dir_fd):
yield pathname yield pathname
else: else:
# Patterns ending with a slash should match only directories # Patterns ending with a slash should match only directories
if os.path.isdir(dirname): if _isdir(_join(root_dir, dirname), dir_fd):
yield pathname yield pathname
return return
if not dirname: if not dirname:
if recursive and _isrecursive(basename): if recursive and _isrecursive(basename):
yield from _glob2(dirname, basename, dironly) yield from _glob2(root_dir, basename, dir_fd, dironly)
else: else:
yield from _glob1(dirname, basename, dironly) yield from _glob1(root_dir, basename, dir_fd, dironly)
return return
# `os.path.split()` returns the argument itself as a dirname if it is a # `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:'). # contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname): if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, recursive, True) dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
else: else:
dirs = [dirname] dirs = [dirname]
if has_magic(basename): if has_magic(basename):
@ -71,76 +80,121 @@ def _iglob(pathname, recursive, dironly):
else: else:
glob_in_dir = _glob0 glob_in_dir = _glob0
for dirname in dirs: for dirname in dirs:
for name in glob_in_dir(dirname, basename, dironly): for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
yield os.path.join(dirname, name) yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory. # These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. _glob1 accepts a pattern while _glob0 # They return a list of basenames. _glob1 accepts a pattern while _glob0
# takes a literal basename (so it only has to check for its existence). # takes a literal basename (so it only has to check for its existence).
def _glob1(dirname, pattern, dironly): def _glob1(dirname, pattern, dir_fd, dironly):
names = list(_iterdir(dirname, dironly)) names = list(_iterdir(dirname, dir_fd, dironly))
if not _ishidden(pattern): if not _ishidden(pattern):
names = (x for x in names if not _ishidden(x)) names = (x for x in names if not _ishidden(x))
return fnmatch.filter(names, pattern) return fnmatch.filter(names, pattern)
def _glob0(dirname, basename, dironly): def _glob0(dirname, basename, dir_fd, dironly):
if not basename: if basename:
# `os.path.split()` returns an empty basename for paths ending with a if _lexists(_join(dirname, basename), dir_fd):
# directory separator. 'q*x/' should match only directories.
if os.path.isdir(dirname):
return [basename] return [basename]
else: else:
if os.path.lexists(os.path.join(dirname, basename)): # `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if _isdir(dirname, dir_fd):
return [basename] return [basename]
return [] return []
# Following functions are not public but can be used by third-party code. # Following functions are not public but can be used by third-party code.
def glob0(dirname, pattern): def glob0(dirname, pattern):
return _glob0(dirname, pattern, False) return _glob0(dirname, pattern, None, False)
def glob1(dirname, pattern): def glob1(dirname, pattern):
return _glob1(dirname, pattern, False) return _glob1(dirname, pattern, None, False)
# This helper function recursively yields relative pathnames inside a literal # This helper function recursively yields relative pathnames inside a literal
# directory. # directory.
def _glob2(dirname, pattern, dironly): def _glob2(dirname, pattern, dir_fd, dironly):
assert _isrecursive(pattern) assert _isrecursive(pattern)
yield pattern[:0] yield pattern[:0]
yield from _rlistdir(dirname, dironly) yield from _rlistdir(dirname, dir_fd, dironly)
# If dironly is false, yields all file names inside a directory. # If dironly is false, yields all file names inside a directory.
# If dironly is true, yields only directory names. # If dironly is true, yields only directory names.
def _iterdir(dirname, dironly): def _iterdir(dirname, dir_fd, dironly):
if not dirname:
if isinstance(dirname, bytes):
dirname = bytes(os.curdir, 'ASCII')
else:
dirname = os.curdir
try: try:
with os.scandir(dirname) as it: fd = None
for entry in it: fsencode = None
try: if dir_fd is not None:
if not dironly or entry.is_dir(): if dirname:
yield entry.name fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
except OSError: else:
pass arg = dir_fd
if isinstance(dirname, bytes):
fsencode = os.fsencode
elif dirname:
arg = dirname
elif isinstance(dirname, bytes):
arg = bytes(os.curdir, 'ASCII')
else:
arg = os.curdir
try:
with os.scandir(arg) as it:
for entry in it:
try:
if not dironly or entry.is_dir():
if fsencode is not None:
yield fsencode(entry.name)
else:
yield entry.name
except OSError:
pass
finally:
if fd is not None:
os.close(fd)
except OSError: except OSError:
return return
# Recursively yields relative pathnames inside a literal directory. # Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dironly): def _rlistdir(dirname, dir_fd, dironly):
names = list(_iterdir(dirname, dironly)) names = list(_iterdir(dirname, dir_fd, dironly))
for x in names: for x in names:
if not _ishidden(x): if not _ishidden(x):
yield x yield x
path = os.path.join(dirname, x) if dirname else x path = _join(dirname, x) if dirname else x
for y in _rlistdir(path, dironly): for y in _rlistdir(path, dir_fd, dironly):
yield os.path.join(x, y) yield _join(x, y)
def _lexists(pathname, dir_fd):
# Same as os.path.lexists(), but with dir_fd
if dir_fd is None:
return os.path.lexists(pathname)
try:
os.lstat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return True
def _isdir(pathname, dir_fd):
# Same as os.path.isdir(), but with dir_fd
if dir_fd is None:
return os.path.isdir(pathname)
try:
st = os.stat(pathname, dir_fd=dir_fd)
except (OSError, ValueError):
return False
else:
return stat.S_ISDIR(st.st_mode)
def _join(dirname, basename):
# It is common if dirname or basename is empty
if not dirname or not basename:
return dirname or basename
return os.path.join(dirname, basename)
magic_check = re.compile('([*?[])') magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])') magic_check_bytes = re.compile(b'([*?[])')
@ -171,3 +225,6 @@ def escape(pathname):
else: else:
pathname = magic_check.sub(r'[\1]', pathname) pathname = magic_check.sub(r'[\1]', pathname)
return drive + pathname return drive + pathname
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)

View file

@ -9,6 +9,7 @@ from test.support import (TESTFN, skip_unless_symlink,
class GlobTests(unittest.TestCase): class GlobTests(unittest.TestCase):
dir_fd = None
def norm(self, *parts): def norm(self, *parts):
return os.path.normpath(os.path.join(self.tempdir, *parts)) return os.path.normpath(os.path.join(self.tempdir, *parts))
@ -38,8 +39,14 @@ class GlobTests(unittest.TestCase):
os.symlink(self.norm('broken'), self.norm('sym1')) os.symlink(self.norm('broken'), self.norm('sym1'))
os.symlink('broken', self.norm('sym2')) os.symlink('broken', self.norm('sym2'))
os.symlink(os.path.join('a', 'bcd'), self.norm('sym3')) os.symlink(os.path.join('a', 'bcd'), self.norm('sym3'))
if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
else:
self.dir_fd = None
def tearDown(self): def tearDown(self):
if self.dir_fd is not None:
os.close(self.dir_fd)
shutil.rmtree(self.tempdir) shutil.rmtree(self.tempdir)
def glob(self, *parts, **kwargs): def glob(self, *parts, **kwargs):
@ -53,6 +60,41 @@ class GlobTests(unittest.TestCase):
bres = [os.fsencode(x) for x in res] bres = [os.fsencode(x) for x in res]
self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres) self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres)
self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres) self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)
with change_cwd(self.tempdir):
res2 = glob.glob(pattern, **kwargs)
for x in res2:
self.assertFalse(os.path.isabs(x), x)
if pattern == '**' or pattern == '**' + os.sep:
expected = res[1:]
else:
expected = res
self.assertCountEqual([os.path.join(self.tempdir, x) for x in res2],
expected)
self.assertCountEqual(glob.iglob(pattern, **kwargs), res2)
bpattern = os.fsencode(pattern)
bres2 = [os.fsencode(x) for x in res2]
self.assertCountEqual(glob.glob(bpattern, **kwargs), bres2)
self.assertCountEqual(glob.iglob(bpattern, **kwargs), bres2)
self.assertCountEqual(glob.glob(pattern, root_dir=self.tempdir, **kwargs), res2)
self.assertCountEqual(glob.iglob(pattern, root_dir=self.tempdir, **kwargs), res2)
btempdir = os.fsencode(self.tempdir)
self.assertCountEqual(
glob.glob(bpattern, root_dir=btempdir, **kwargs), bres2)
self.assertCountEqual(
glob.iglob(bpattern, root_dir=btempdir, **kwargs), bres2)
if self.dir_fd is not None:
self.assertCountEqual(
glob.glob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
self.assertCountEqual(
glob.iglob(pattern, dir_fd=self.dir_fd, **kwargs), res2)
self.assertCountEqual(
glob.glob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
self.assertCountEqual(
glob.iglob(bpattern, dir_fd=self.dir_fd, **kwargs), bres2)
return res return res
def assertSequencesEqual_noorder(self, l1, l2): def assertSequencesEqual_noorder(self, l1, l2):
@ -78,6 +120,14 @@ class GlobTests(unittest.TestCase):
res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*')) res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*'))
self.assertEqual({type(r) for r in res}, {bytes}) self.assertEqual({type(r) for r in res}, {bytes})
def test_glob_empty_pattern(self):
self.assertEqual(glob.glob(''), [])
self.assertEqual(glob.glob(b''), [])
self.assertEqual(glob.glob('', root_dir=self.tempdir), [])
self.assertEqual(glob.glob(b'', root_dir=os.fsencode(self.tempdir)), [])
self.assertEqual(glob.glob('', dir_fd=self.dir_fd), [])
self.assertEqual(glob.glob(b'', dir_fd=self.dir_fd), [])
def test_glob_one_directory(self): def test_glob_one_directory(self):
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa'])) eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa']))

View file

@ -0,0 +1 @@
Added the *root_dir* and *dir_fd* parameters in :func:`glob.glob`.