mirror of
https://github.com/python/cpython.git
synced 2025-11-26 21:33:10 +00:00
Issue #13968: The glob module now supports recursive search in
subdirectories using the "**" pattern.
This commit is contained in:
parent
d5fd6188e2
commit
c2edcdd194
5 changed files with 199 additions and 19 deletions
|
|
@ -29,7 +29,7 @@ For example, ``'[?]'`` matches the character ``'?'``.
|
||||||
The :mod:`pathlib` module offers high-level path objects.
|
The :mod:`pathlib` module offers high-level path objects.
|
||||||
|
|
||||||
|
|
||||||
.. function:: glob(pathname)
|
.. function:: glob(pathname, *, recursive=False)
|
||||||
|
|
||||||
Return a possibly-empty list of path names that match *pathname*, which must be
|
Return a possibly-empty list of path names that match *pathname*, which must be
|
||||||
a string containing a path specification. *pathname* can be either absolute
|
a string containing a path specification. *pathname* can be either absolute
|
||||||
|
|
@ -37,8 +37,19 @@ For example, ``'[?]'`` matches the character ``'?'``.
|
||||||
:file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken
|
:file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken
|
||||||
symlinks are included in the results (as in the shell).
|
symlinks are included in the results (as in the shell).
|
||||||
|
|
||||||
|
If *recursive* is true, the pattern "``**``" will match any files and zero or
|
||||||
|
more directories and subdirectories. If the pattern is followed by a
|
||||||
|
``os.sep``, only directories and subdirectories match.
|
||||||
|
|
||||||
.. function:: iglob(pathname)
|
.. note::
|
||||||
|
Using the "``**``" pattern in large directory trees may consume
|
||||||
|
an inordinate amount of time.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.5
|
||||||
|
Support for recursive globs using "``**``".
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: iglob(pathname, recursive=False)
|
||||||
|
|
||||||
Return an :term:`iterator` which yields the same values as :func:`glob`
|
Return an :term:`iterator` which yields the same values as :func:`glob`
|
||||||
without actually storing them all simultaneously.
|
without actually storing them all simultaneously.
|
||||||
|
|
@ -55,8 +66,9 @@ For example, ``'[?]'`` matches the character ``'?'``.
|
||||||
.. versionadded:: 3.4
|
.. versionadded:: 3.4
|
||||||
|
|
||||||
|
|
||||||
For example, consider a directory containing only the following files:
|
For example, consider a directory containing the following files:
|
||||||
:file:`1.gif`, :file:`2.txt`, and :file:`card.gif`. :func:`glob` will produce
|
:file:`1.gif`, :file:`2.txt`, :file:`card.gif` and a subdirectory :file:`sub`
|
||||||
|
which contains only the file :file:`3.txt`. :func:`glob` will produce
|
||||||
the following results. Notice how any leading components of the path are
|
the following results. Notice how any leading components of the path are
|
||||||
preserved. ::
|
preserved. ::
|
||||||
|
|
||||||
|
|
@ -67,6 +79,10 @@ preserved. ::
|
||||||
['1.gif', 'card.gif']
|
['1.gif', 'card.gif']
|
||||||
>>> glob.glob('?.gif')
|
>>> glob.glob('?.gif')
|
||||||
['1.gif']
|
['1.gif']
|
||||||
|
>>> glob.glob('**/*.txt', recursive=True)
|
||||||
|
['2.txt', 'sub/3.txt']
|
||||||
|
>>> glob.glob('./**/', recursive=True)
|
||||||
|
['./', './sub/']
|
||||||
|
|
||||||
If the directory contains files starting with ``.`` they won't be matched by
|
If the directory contains files starting with ``.`` they won't be matched by
|
||||||
default. For example, consider a directory containing :file:`card.gif` and
|
default. For example, consider a directory containing :file:`card.gif` and
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,13 @@ doctest
|
||||||
*module* contains no docstrings instead of raising :exc:`ValueError`
|
*module* contains no docstrings instead of raising :exc:`ValueError`
|
||||||
(contributed by Glenn Jones in :issue:`15916`).
|
(contributed by Glenn Jones in :issue:`15916`).
|
||||||
|
|
||||||
|
glob
|
||||||
|
----
|
||||||
|
|
||||||
|
* :func:`~glob.iglob` and :func:`~glob.glob` now support recursive search in
|
||||||
|
subdirectories using the "``**``" pattern.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`13968`.)
|
||||||
|
|
||||||
imaplib
|
imaplib
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
||||||
56
Lib/glob.py
56
Lib/glob.py
|
|
@ -6,7 +6,7 @@ import fnmatch
|
||||||
|
|
||||||
__all__ = ["glob", "iglob"]
|
__all__ = ["glob", "iglob"]
|
||||||
|
|
||||||
def glob(pathname):
|
def glob(pathname, *, recursive=False):
|
||||||
"""Return a list of paths matching a pathname pattern.
|
"""Return a list of paths matching a pathname pattern.
|
||||||
|
|
||||||
The pattern may contain simple shell-style wildcards a la
|
The pattern may contain simple shell-style wildcards a la
|
||||||
|
|
@ -14,10 +14,12 @@ def glob(pathname):
|
||||||
dot are special cases that are not matched by '*' and '?'
|
dot are special cases that are not matched by '*' and '?'
|
||||||
patterns.
|
patterns.
|
||||||
|
|
||||||
|
If recursive is true, the pattern '**' will match any files and
|
||||||
|
zero or more directories and subdirectories.
|
||||||
"""
|
"""
|
||||||
return list(iglob(pathname))
|
return list(iglob(pathname, recursive=recursive))
|
||||||
|
|
||||||
def iglob(pathname):
|
def iglob(pathname, *, recursive=False):
|
||||||
"""Return an iterator which yields the paths matching a pathname pattern.
|
"""Return an iterator which yields the paths matching a pathname pattern.
|
||||||
|
|
||||||
The pattern may contain simple shell-style wildcards a la
|
The pattern may contain simple shell-style wildcards a la
|
||||||
|
|
@ -25,6 +27,8 @@ def iglob(pathname):
|
||||||
dot are special cases that are not matched by '*' and '?'
|
dot are special cases that are not matched by '*' and '?'
|
||||||
patterns.
|
patterns.
|
||||||
|
|
||||||
|
If recursive is true, the pattern '**' will match any files and
|
||||||
|
zero or more directories and subdirectories.
|
||||||
"""
|
"""
|
||||||
dirname, basename = os.path.split(pathname)
|
dirname, basename = os.path.split(pathname)
|
||||||
if not has_magic(pathname):
|
if not has_magic(pathname):
|
||||||
|
|
@ -37,17 +41,23 @@ def iglob(pathname):
|
||||||
yield pathname
|
yield pathname
|
||||||
return
|
return
|
||||||
if not dirname:
|
if not dirname:
|
||||||
yield from glob1(None, basename)
|
if recursive and _isrecursive(basename):
|
||||||
|
yield from glob2(dirname, basename)
|
||||||
|
else:
|
||||||
|
yield from glob1(dirname, basename)
|
||||||
return
|
return
|
||||||
# `os.path.split()` returns the argument itself as a dirname if it is a
|
# `os.path.split()` returns the argument itself as a dirname if it is a
|
||||||
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
|
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
|
||||||
# contains magic characters (i.e. r'\\?\C:').
|
# contains magic characters (i.e. r'\\?\C:').
|
||||||
if dirname != pathname and has_magic(dirname):
|
if dirname != pathname and has_magic(dirname):
|
||||||
dirs = iglob(dirname)
|
dirs = iglob(dirname, recursive=recursive)
|
||||||
else:
|
else:
|
||||||
dirs = [dirname]
|
dirs = [dirname]
|
||||||
if has_magic(basename):
|
if has_magic(basename):
|
||||||
glob_in_dir = glob1
|
if recursive and _isrecursive(basename):
|
||||||
|
glob_in_dir = glob2
|
||||||
|
else:
|
||||||
|
glob_in_dir = glob1
|
||||||
else:
|
else:
|
||||||
glob_in_dir = glob0
|
glob_in_dir = glob0
|
||||||
for dirname in dirs:
|
for dirname in dirs:
|
||||||
|
|
@ -83,6 +93,34 @@ def glob0(dirname, basename):
|
||||||
return [basename]
|
return [basename]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# This helper function recursively yields relative pathnames inside a literal
|
||||||
|
# directory.
|
||||||
|
|
||||||
|
def glob2(dirname, pattern):
|
||||||
|
assert _isrecursive(pattern)
|
||||||
|
if dirname:
|
||||||
|
yield pattern[:0]
|
||||||
|
yield from _rlistdir(dirname)
|
||||||
|
|
||||||
|
# Recursively yields relative pathnames inside a literal directory.
|
||||||
|
|
||||||
|
def _rlistdir(dirname):
|
||||||
|
if not dirname:
|
||||||
|
if isinstance(dirname, bytes):
|
||||||
|
dirname = bytes(os.curdir, 'ASCII')
|
||||||
|
else:
|
||||||
|
dirname = os.curdir
|
||||||
|
try:
|
||||||
|
names = os.listdir(dirname)
|
||||||
|
except os.error:
|
||||||
|
return
|
||||||
|
for x in names:
|
||||||
|
if not _ishidden(x):
|
||||||
|
yield x
|
||||||
|
path = os.path.join(dirname, x) if dirname else x
|
||||||
|
for y in _rlistdir(path):
|
||||||
|
yield os.path.join(x, y)
|
||||||
|
|
||||||
|
|
||||||
magic_check = re.compile('([*?[])')
|
magic_check = re.compile('([*?[])')
|
||||||
magic_check_bytes = re.compile(b'([*?[])')
|
magic_check_bytes = re.compile(b'([*?[])')
|
||||||
|
|
@ -97,6 +135,12 @@ def has_magic(s):
|
||||||
def _ishidden(path):
|
def _ishidden(path):
|
||||||
return path[0] in ('.', b'.'[0])
|
return path[0] in ('.', b'.'[0])
|
||||||
|
|
||||||
|
def _isrecursive(pattern):
|
||||||
|
if isinstance(pattern, bytes):
|
||||||
|
return pattern == b'**'
|
||||||
|
else:
|
||||||
|
return pattern == '**'
|
||||||
|
|
||||||
def escape(pathname):
|
def escape(pathname):
|
||||||
"""Escape all special characters.
|
"""Escape all special characters.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import shutil
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from test.support import (run_unittest, TESTFN, skip_unless_symlink,
|
from test.support import (TESTFN, skip_unless_symlink,
|
||||||
can_symlink, create_empty_file)
|
can_symlink, create_empty_file)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -13,6 +13,9 @@ class GlobTests(unittest.TestCase):
|
||||||
def norm(self, *parts):
|
def norm(self, *parts):
|
||||||
return os.path.normpath(os.path.join(self.tempdir, *parts))
|
return os.path.normpath(os.path.join(self.tempdir, *parts))
|
||||||
|
|
||||||
|
def joins(self, *tuples):
|
||||||
|
return [os.path.join(self.tempdir, *parts) for parts in tuples]
|
||||||
|
|
||||||
def mktemp(self, *parts):
|
def mktemp(self, *parts):
|
||||||
filename = self.norm(*parts)
|
filename = self.norm(*parts)
|
||||||
base, file = os.path.split(filename)
|
base, file = os.path.split(filename)
|
||||||
|
|
@ -38,17 +41,17 @@ class GlobTests(unittest.TestCase):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
shutil.rmtree(self.tempdir)
|
shutil.rmtree(self.tempdir)
|
||||||
|
|
||||||
def glob(self, *parts):
|
def glob(self, *parts, **kwargs):
|
||||||
if len(parts) == 1:
|
if len(parts) == 1:
|
||||||
pattern = parts[0]
|
pattern = parts[0]
|
||||||
else:
|
else:
|
||||||
pattern = os.path.join(*parts)
|
pattern = os.path.join(*parts)
|
||||||
p = os.path.join(self.tempdir, pattern)
|
p = os.path.join(self.tempdir, pattern)
|
||||||
res = glob.glob(p)
|
res = glob.glob(p, **kwargs)
|
||||||
self.assertEqual(list(glob.iglob(p)), res)
|
self.assertEqual(list(glob.iglob(p, **kwargs)), res)
|
||||||
bres = [os.fsencode(x) for x in res]
|
bres = [os.fsencode(x) for x in res]
|
||||||
self.assertEqual(glob.glob(os.fsencode(p)), bres)
|
self.assertEqual(glob.glob(os.fsencode(p), **kwargs), bres)
|
||||||
self.assertEqual(list(glob.iglob(os.fsencode(p))), bres)
|
self.assertEqual(list(glob.iglob(os.fsencode(p), **kwargs)), bres)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def assertSequencesEqual_noorder(self, l1, l2):
|
def assertSequencesEqual_noorder(self, l1, l2):
|
||||||
|
|
@ -192,9 +195,116 @@ class GlobTests(unittest.TestCase):
|
||||||
check('//?/c:/?', '//?/c:/[?]')
|
check('//?/c:/?', '//?/c:/[?]')
|
||||||
check('//*/*/*', '//*/*/[*]')
|
check('//*/*/*', '//*/*/[*]')
|
||||||
|
|
||||||
def test_main():
|
def rglob(self, *parts, **kwargs):
|
||||||
run_unittest(GlobTests)
|
return self.glob(*parts, recursive=True, **kwargs)
|
||||||
|
|
||||||
|
def test_recursive_glob(self):
|
||||||
|
eq = self.assertSequencesEqual_noorder
|
||||||
|
full = [('ZZZ',),
|
||||||
|
('a',), ('a', 'D'),
|
||||||
|
('a', 'bcd'),
|
||||||
|
('a', 'bcd', 'EF'),
|
||||||
|
('a', 'bcd', 'efg'),
|
||||||
|
('a', 'bcd', 'efg', 'ha'),
|
||||||
|
('aaa',), ('aaa', 'zzzF'),
|
||||||
|
('aab',), ('aab', 'F'),
|
||||||
|
]
|
||||||
|
if can_symlink():
|
||||||
|
full += [('sym1',), ('sym2',),
|
||||||
|
('sym3',),
|
||||||
|
('sym3', 'EF'),
|
||||||
|
('sym3', 'efg'),
|
||||||
|
('sym3', 'efg', 'ha'),
|
||||||
|
]
|
||||||
|
eq(self.rglob('**'), self.joins(('',), *full))
|
||||||
|
eq(self.rglob('.', '**'), self.joins(('.',''),
|
||||||
|
*(('.',) + i for i in full)))
|
||||||
|
dirs = [('a', ''), ('a', 'bcd', ''), ('a', 'bcd', 'efg', ''),
|
||||||
|
('aaa', ''), ('aab', '')]
|
||||||
|
if can_symlink():
|
||||||
|
dirs += [('sym3', ''), ('sym3', 'efg', '')]
|
||||||
|
eq(self.rglob('**', ''), self.joins(('',), *dirs))
|
||||||
|
|
||||||
|
eq(self.rglob('a', '**'), self.joins(
|
||||||
|
('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'),
|
||||||
|
('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha')))
|
||||||
|
eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',)))
|
||||||
|
expect = [('a', 'bcd', 'EF')]
|
||||||
|
if can_symlink():
|
||||||
|
expect += [('sym3', 'EF')]
|
||||||
|
eq(self.rglob('**', 'EF'), self.joins(*expect))
|
||||||
|
expect = [('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F')]
|
||||||
|
if can_symlink():
|
||||||
|
expect += [('sym3', 'EF')]
|
||||||
|
eq(self.rglob('**', '*F'), self.joins(*expect))
|
||||||
|
eq(self.rglob('**', '*F', ''), [])
|
||||||
|
eq(self.rglob('**', 'bcd', '*'), self.joins(
|
||||||
|
('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')))
|
||||||
|
eq(self.rglob('a', '**', 'bcd'), self.joins(('a', 'bcd')))
|
||||||
|
|
||||||
|
predir = os.path.abspath(os.curdir)
|
||||||
|
try:
|
||||||
|
os.chdir(self.tempdir)
|
||||||
|
join = os.path.join
|
||||||
|
eq(glob.glob('**', recursive=True), [join(*i) for i in full])
|
||||||
|
eq(glob.glob(join('**', ''), recursive=True),
|
||||||
|
[join(*i) for i in dirs])
|
||||||
|
eq(glob.glob(join('**','zz*F'), recursive=True),
|
||||||
|
[join('aaa', 'zzzF')])
|
||||||
|
eq(glob.glob('**zz*F', recursive=True), [])
|
||||||
|
expect = [join('a', 'bcd', 'EF')]
|
||||||
|
if can_symlink():
|
||||||
|
expect += [join('sym3', 'EF')]
|
||||||
|
eq(glob.glob(join('**', 'EF'), recursive=True), expect)
|
||||||
|
finally:
|
||||||
|
os.chdir(predir)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_unless_symlink
|
||||||
|
class SymlinkLoopGlobTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_selflink(self):
|
||||||
|
tempdir = TESTFN + "_dir"
|
||||||
|
os.makedirs(tempdir)
|
||||||
|
create_empty_file(os.path.join(tempdir, 'file'))
|
||||||
|
os.symlink(os.curdir, os.path.join(tempdir, 'link'))
|
||||||
|
self.addCleanup(shutil.rmtree, tempdir)
|
||||||
|
|
||||||
|
results = glob.glob('**', recursive=True)
|
||||||
|
self.assertEqual(len(results), len(set(results)))
|
||||||
|
results = set(results)
|
||||||
|
depth = 0
|
||||||
|
while results:
|
||||||
|
path = os.path.join(*([tempdir] + ['link'] * depth))
|
||||||
|
self.assertIn(path, results)
|
||||||
|
results.remove(path)
|
||||||
|
if not results:
|
||||||
|
break
|
||||||
|
path = os.path.join(path, 'file')
|
||||||
|
self.assertIn(path, results)
|
||||||
|
results.remove(path)
|
||||||
|
depth += 1
|
||||||
|
|
||||||
|
results = glob.glob(os.path.join('**', 'file'), recursive=True)
|
||||||
|
self.assertEqual(len(results), len(set(results)))
|
||||||
|
results = set(results)
|
||||||
|
depth = 0
|
||||||
|
while results:
|
||||||
|
path = os.path.join(*([tempdir] + ['link'] * depth + ['file']))
|
||||||
|
self.assertIn(path, results)
|
||||||
|
results.remove(path)
|
||||||
|
depth += 1
|
||||||
|
|
||||||
|
results = glob.glob(os.path.join('**', ''), recursive=True)
|
||||||
|
self.assertEqual(len(results), len(set(results)))
|
||||||
|
results = set(results)
|
||||||
|
depth = 0
|
||||||
|
while results:
|
||||||
|
path = os.path.join(*([tempdir] + ['link'] * depth + ['']))
|
||||||
|
self.assertIn(path, results)
|
||||||
|
results.remove(path)
|
||||||
|
depth += 1
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -132,6 +132,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13968: The glob module now supports recursive search in
|
||||||
|
subdirectories using the "**" pattern.
|
||||||
|
|
||||||
- Issue #21951: Fixed a crash in Tkinter on AIX when called Tcl command with
|
- Issue #21951: Fixed a crash in Tkinter on AIX when called Tcl command with
|
||||||
empty string or tuple argument.
|
empty string or tuple argument.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue