mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Patch #943206:
`glob.glob()` currently calls itself recursively to build a list of matches of the dirname part of the pattern and then filters by the basename part. This is effectively BFS. ``glob.glob('*/*/*/*/*/foo')`` will build a huge list of all directories 5 levels deep even if only a handful of them contain a ``foo`` entry. A generator-based recusion would never have to store these list at once by implementing DFS. This patch converts the `glob` function to an `iglob` recursive generator . `glob()` now just returns ``list(iglob(pattern))``. I also cleaned up the code a bit (reduced duplicate `has_magic()` checks and created a second `glob0` helper func so that the main loop need not be duplicated). Thanks to Cherniavsky Beni for the patch!
This commit is contained in:
parent
e4172eadf3
commit
836f5433f7
3 changed files with 50 additions and 24 deletions
|
@ -16,7 +16,7 @@ and \function{os.path.expandvars()}.)
|
||||||
\index{filenames!pathname expansion}
|
\index{filenames!pathname expansion}
|
||||||
|
|
||||||
\begin{funcdesc}{glob}{pathname}
|
\begin{funcdesc}{glob}{pathname}
|
||||||
Returns a possibly-empty list of path names that match \var{pathname},
|
Return a possibly-empty list of path names that match \var{pathname},
|
||||||
which must be a string containing a path specification.
|
which must be a string containing a path specification.
|
||||||
\var{pathname} can be either absolute (like
|
\var{pathname} can be either absolute (like
|
||||||
\file{/usr/src/Python-1.5/Makefile}) or relative (like
|
\file{/usr/src/Python-1.5/Makefile}) or relative (like
|
||||||
|
@ -24,6 +24,12 @@ which must be a string containing a path specification.
|
||||||
Broken symlinks are included in the results (as in the shell).
|
Broken symlinks are included in the results (as in the shell).
|
||||||
\end{funcdesc}
|
\end{funcdesc}
|
||||||
|
|
||||||
|
\begin{funcdesc}{iglob}{pathname}
|
||||||
|
Return an iterator which yields the same values as \function{glob()}
|
||||||
|
without actually storing them all simultaneously.
|
||||||
|
\versionadded{2.5}
|
||||||
|
\end{funcdesc}
|
||||||
|
|
||||||
For example, consider a directory containing only the following files:
|
For example, consider a directory containing only the following files:
|
||||||
\file{1.gif}, \file{2.txt}, and \file{card.gif}. \function{glob()}
|
\file{1.gif}, \file{2.txt}, and \file{card.gif}. \function{glob()}
|
||||||
will produce the following results. Notice how any leading components
|
will produce the following results. Notice how any leading components
|
||||||
|
|
62
Lib/glob.py
62
Lib/glob.py
|
@ -4,43 +4,50 @@ import os
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import re
|
import re
|
||||||
|
|
||||||
__all__ = ["glob"]
|
__all__ = ["glob", "iglob"]
|
||||||
|
|
||||||
def glob(pathname):
|
def glob(pathname):
|
||||||
"""Return a list of paths matching a pathname pattern.
|
"""Return a list of paths matching a pathname pattern.
|
||||||
|
|
||||||
The pattern may contain simple shell-style wildcards a la fnmatch.
|
The pattern may contain simple shell-style wildcards a la fnmatch.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return list(iglob(pathname))
|
||||||
|
|
||||||
|
def iglob(pathname):
|
||||||
|
"""Return a list of paths matching a pathname pattern.
|
||||||
|
|
||||||
|
The pattern may contain simple shell-style wildcards a la fnmatch.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not has_magic(pathname):
|
if not has_magic(pathname):
|
||||||
if os.path.lexists(pathname):
|
if os.path.lexists(pathname):
|
||||||
return [pathname]
|
yield pathname
|
||||||
else:
|
return
|
||||||
return []
|
|
||||||
dirname, basename = os.path.split(pathname)
|
dirname, basename = os.path.split(pathname)
|
||||||
if not dirname:
|
if not dirname:
|
||||||
return glob1(os.curdir, basename)
|
for name in glob1(os.curdir, basename):
|
||||||
elif has_magic(dirname):
|
yield name
|
||||||
list = glob(dirname)
|
return
|
||||||
|
if has_magic(dirname):
|
||||||
|
dirs = iglob(dirname)
|
||||||
else:
|
else:
|
||||||
list = [dirname]
|
dirs = [dirname]
|
||||||
if not has_magic(basename):
|
if has_magic(basename):
|
||||||
result = []
|
glob_in_dir = glob1
|
||||||
for dirname in list:
|
|
||||||
if basename or os.path.isdir(dirname):
|
|
||||||
name = os.path.join(dirname, basename)
|
|
||||||
if os.path.lexists(name):
|
|
||||||
result.append(name)
|
|
||||||
else:
|
else:
|
||||||
result = []
|
glob_in_dir = glob0
|
||||||
for dirname in list:
|
for dirname in dirs:
|
||||||
sublist = glob1(dirname, basename)
|
for name in glob_in_dir(dirname, basename):
|
||||||
for name in sublist:
|
yield os.path.join(dirname, name)
|
||||||
result.append(os.path.join(dirname, name))
|
|
||||||
return result
|
# These 2 helper functions non-recursively glob inside a literal directory.
|
||||||
|
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
|
||||||
|
# takes a literal basename (so it only has to check for its existence).
|
||||||
|
|
||||||
def glob1(dirname, pattern):
|
def glob1(dirname, pattern):
|
||||||
if not dirname: dirname = os.curdir
|
if not dirname:
|
||||||
|
dirname = os.curdir
|
||||||
try:
|
try:
|
||||||
names = os.listdir(dirname)
|
names = os.listdir(dirname)
|
||||||
except os.error:
|
except os.error:
|
||||||
|
@ -49,6 +56,17 @@ def glob1(dirname, pattern):
|
||||||
names=filter(lambda x: x[0]!='.',names)
|
names=filter(lambda x: x[0]!='.',names)
|
||||||
return fnmatch.filter(names,pattern)
|
return fnmatch.filter(names,pattern)
|
||||||
|
|
||||||
|
def glob0(dirname, basename):
|
||||||
|
if basename == '':
|
||||||
|
# `os.path.split()` returns an empty basename for paths ending with a
|
||||||
|
# directory separator. 'q*x/' should match only directories.
|
||||||
|
if os.isdir(dirname):
|
||||||
|
return [basename]
|
||||||
|
else:
|
||||||
|
if os.path.lexists(os.path.join(dirname, basename)):
|
||||||
|
return [basename]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
magic_check = re.compile('[*?[]')
|
magic_check = re.compile('[*?[]')
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,9 @@ class GlobTests(unittest.TestCase):
|
||||||
else:
|
else:
|
||||||
pattern = os.path.join(*parts)
|
pattern = os.path.join(*parts)
|
||||||
p = os.path.join(self.tempdir, pattern)
|
p = os.path.join(self.tempdir, pattern)
|
||||||
return glob.glob(p)
|
res = glob.glob(p)
|
||||||
|
self.assertEqual(list(glob.iglob(p)), res)
|
||||||
|
return res
|
||||||
|
|
||||||
def assertSequencesEqual_noorder(self, l1, l2):
|
def assertSequencesEqual_noorder(self, l1, l2):
|
||||||
self.assertEqual(set(l1), set(l2))
|
self.assertEqual(set(l1), set(l2))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue