mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
New generator os.walk() does a bit more than os.path.walk() does, and
seems much easier to use. Code, docs, NEWS, and additions to test_os.py (testing this sucker is a bitch!).
This commit is contained in:
parent
e7adda9035
commit
c4e0940042
5 changed files with 242 additions and 5 deletions
|
|
@ -1050,6 +1050,71 @@ which is used to set the access and modified times, respectively.
|
||||||
Availability: Macintosh, \UNIX, Windows.
|
Availability: Macintosh, \UNIX, Windows.
|
||||||
\end{funcdesc}
|
\end{funcdesc}
|
||||||
|
|
||||||
|
\begin{funcdesc}{walk}{top\optional{, topdown=True}}
|
||||||
|
\index{directory!walking}
|
||||||
|
\index{directory!traversal}
|
||||||
|
|
||||||
|
\function{walk()} generates the file names in a directory tree.
|
||||||
|
For each directory in the tree rooted at directory \var{top} (including
|
||||||
|
\var{top} itself), it yields a 3-tuple
|
||||||
|
\code{(\var{dirpath}, \var{dirnames}, \var{filenames})}.
|
||||||
|
|
||||||
|
\var{dirpath} is a string, the path to the directory. \var{dirnames} is
|
||||||
|
a list of the names of the subdirectories in \var{dirpath}
|
||||||
|
(excluding \code{'.'} and \code{'..'}). \var{filenames} is a list of
|
||||||
|
the names of the non-directory files in \var{dirpath}. Note that the
|
||||||
|
names in the lists contain no path components. To get a full
|
||||||
|
path (which begins with \var{top)) to a file or directory in
|
||||||
|
\var{dirpath}, do \code{os.path.join(\var{dirpath}, \var{name})}.
|
||||||
|
|
||||||
|
If optional argument \var{topdown} is true or not specified, the triple
|
||||||
|
for a directory is generated before the triples for any of its
|
||||||
|
subdirectories (directories are generated top down). If \var{topdown} is
|
||||||
|
false, the triple for a directory is generated after the triples for all
|
||||||
|
of its subdirectories (directories are generated bottom up).
|
||||||
|
|
||||||
|
When \var{topdown} is true, the caller can modify the \var{dirnames} list
|
||||||
|
in-place (e.g., via \keyword{del} or slice assignment), and
|
||||||
|
\function{walk()} will only recurse into the subdirectories whose names
|
||||||
|
remain in \var{dirnames}; this can be used to prune the search,
|
||||||
|
impose a specific order of visiting, or even to inform \function{walk()}
|
||||||
|
about directories the caller creates or renames before it resumes
|
||||||
|
\function{walk()} again. Modifying \var{dirnames} when \var{topdown} is
|
||||||
|
false is ineffective, because in bottom-up mode the directories in
|
||||||
|
\var{dirnames} are generated before \var{dirnames} itself is generated.
|
||||||
|
|
||||||
|
\begin{notice}
|
||||||
|
If you pass a relative pathname, don't change the current working
|
||||||
|
directory between resumptions of \function{walk}. \function{walk}
|
||||||
|
never changes the current directory, and assumes that its caller
|
||||||
|
doesn't either.
|
||||||
|
\end{notice}
|
||||||
|
|
||||||
|
\begin{notice}
|
||||||
|
On systems that support symbolic links, links to subdirectories appear
|
||||||
|
in \var{dirnames} lists, but \function{walk()} will not visit them
|
||||||
|
(infinite loops are hard to avoid when following symbolic links).
|
||||||
|
To visit linked directories, you can identify them with
|
||||||
|
\code{os.path.islink(\var{path})}, and invoke \function{walk(\var{path})}
|
||||||
|
on each directly.
|
||||||
|
\end{notice}
|
||||||
|
|
||||||
|
This example displays the number of bytes taken by non-directory files
|
||||||
|
in each directory under the starting directory, except that it doesn't
|
||||||
|
look under any CVS subdirectory:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
import os
|
||||||
|
from os.path import join, getsize
|
||||||
|
for root, dirs, files in os.walk('python/Lib/email'):
|
||||||
|
print root, "consumes",
|
||||||
|
print sum([getsize(join(root, name)) for name in files]),
|
||||||
|
print "bytes in", len(files), "non-directory files"
|
||||||
|
if 'CVS' in dirs:
|
||||||
|
dirs.remove('CVS') # don't visit CVS directories
|
||||||
|
\end{verbatim}
|
||||||
|
\versionadded{2.3}
|
||||||
|
\end{funcdesc}
|
||||||
|
|
||||||
\subsection{Process Management \label{os-process}}
|
\subsection{Process Management \label{os-process}}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -237,6 +237,12 @@ directories you must identify them with
|
||||||
\code{os.path.isdir(\var{file})}, and invoke \function{walk()} as
|
\code{os.path.isdir(\var{file})}, and invoke \function{walk()} as
|
||||||
necessary.
|
necessary.
|
||||||
\end{notice}
|
\end{notice}
|
||||||
|
|
||||||
|
\begin{seealso}
|
||||||
|
\seemodule{os}{The newer \function{os.walk()} generator supplies similar
|
||||||
|
functionality and can be easier to use.
|
||||||
|
\end{seealso}
|
||||||
|
|
||||||
\end{funcdesc}
|
\end{funcdesc}
|
||||||
|
|
||||||
\begin{datadesc}{supports_unicode_filenames}
|
\begin{datadesc}{supports_unicode_filenames}
|
||||||
|
|
|
||||||
81
Lib/os.py
81
Lib/os.py
|
|
@ -26,6 +26,7 @@ import sys
|
||||||
|
|
||||||
_names = sys.builtin_module_names
|
_names = sys.builtin_module_names
|
||||||
|
|
||||||
|
# Note: more names are added to __all__ later.
|
||||||
__all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
|
__all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
|
||||||
"defpath", "name", "path"]
|
"defpath", "name", "path"]
|
||||||
|
|
||||||
|
|
@ -158,7 +159,7 @@ def removedirs(name):
|
||||||
Super-rmdir; remove a leaf directory and empty all intermediate
|
Super-rmdir; remove a leaf directory and empty all intermediate
|
||||||
ones. Works like rmdir except that, if the leaf directory is
|
ones. Works like rmdir except that, if the leaf directory is
|
||||||
successfully removed, directories corresponding to rightmost path
|
successfully removed, directories corresponding to rightmost path
|
||||||
segments will be pruned way until either the whole path is
|
segments will be pruned away until either the whole path is
|
||||||
consumed or an error occurs. Errors during this latter phase are
|
consumed or an error occurs. Errors during this latter phase are
|
||||||
ignored -- they generally mean that a directory was not empty.
|
ignored -- they generally mean that a directory was not empty.
|
||||||
|
|
||||||
|
|
@ -202,6 +203,84 @@ def renames(old, new):
|
||||||
|
|
||||||
__all__.extend(["makedirs", "removedirs", "renames"])
|
__all__.extend(["makedirs", "removedirs", "renames"])
|
||||||
|
|
||||||
|
def walk(top, topdown=True):
|
||||||
|
"""Directory tree generator.
|
||||||
|
|
||||||
|
For each directory in the directory tree rooted at top (including top
|
||||||
|
itself, but excluding '.' and '..'), yields a 3-tuple
|
||||||
|
|
||||||
|
dirpath, dirnames, filenames
|
||||||
|
|
||||||
|
dirpath is a string, the path to the directory. dirnames is a list of
|
||||||
|
the names of the subdirectories in dirpath (excluding '.' and '..').
|
||||||
|
filenames is a list of the names of the non-directory files in dirpath.
|
||||||
|
Note that the names in the lists are just names, with no path components.
|
||||||
|
To get a full path (which begins with top) to a file or directory in
|
||||||
|
dirpath, do os.path.join(dirpath, name).
|
||||||
|
|
||||||
|
If optional arg 'topdown' is true or not specified, the triple for a
|
||||||
|
directory is generated before the triples for any of its subdirectories
|
||||||
|
(directories are generated top down). If topdown is false, the triple
|
||||||
|
for a directory is generated after the triples for all of its
|
||||||
|
subdirectories (directories are generated bottom up).
|
||||||
|
|
||||||
|
When topdown is true, the caller can modify the dirnames list in-place
|
||||||
|
(e.g., via del or slice assignment), and walk will only recurse into the
|
||||||
|
subdirectories whose names remain in dirnames; this can be used to prune
|
||||||
|
the search, or to impose a specific order of visiting. Modifying
|
||||||
|
dirnames when topdown is false is ineffective, since the directories in
|
||||||
|
dirnames have already been generated by the time dirnames itself is
|
||||||
|
generated.
|
||||||
|
|
||||||
|
Caution: if you pass a relative pathname for top, don't change the
|
||||||
|
current working directory between resumptions of walk. walk never
|
||||||
|
changes the current directory, and assumes that the client doesn't
|
||||||
|
either.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
from os.path import join, getsize
|
||||||
|
for root, dirs, files in walk('python/Lib/email'):
|
||||||
|
print root, "consumes",
|
||||||
|
print sum([getsize(join(root, name)) for name in files]),
|
||||||
|
print "bytes in", len(files), "non-directory files"
|
||||||
|
if 'CVS' in dirs:
|
||||||
|
dirs.remove('CVS') # don't visit CVS directories
|
||||||
|
"""
|
||||||
|
|
||||||
|
from os.path import join, isdir, islink
|
||||||
|
|
||||||
|
# We may not have read permission for top, in which case we can't
|
||||||
|
# get a list of the files the directory contains. os.path.walk
|
||||||
|
# always suppressed the exception then, rather than blow up for a
|
||||||
|
# minor reason when (say) a thousand readable directories are still
|
||||||
|
# left to visit. That logic is copied here.
|
||||||
|
try:
|
||||||
|
# Note that listdir and error are globals in this module due
|
||||||
|
# to earlier import-*.
|
||||||
|
names = listdir(top)
|
||||||
|
except error:
|
||||||
|
return
|
||||||
|
|
||||||
|
dirs, nondirs = [], []
|
||||||
|
for name in names:
|
||||||
|
if isdir(join(top, name)):
|
||||||
|
dirs.append(name)
|
||||||
|
else:
|
||||||
|
nondirs.append(name)
|
||||||
|
|
||||||
|
if topdown:
|
||||||
|
yield top, dirs, nondirs
|
||||||
|
for name in dirs:
|
||||||
|
path = join(top, name)
|
||||||
|
if not islink(path):
|
||||||
|
for x in walk(path, topdown):
|
||||||
|
yield x
|
||||||
|
if not topdown:
|
||||||
|
yield top, dirs, nondirs
|
||||||
|
|
||||||
|
__all__.append("walk")
|
||||||
|
|
||||||
# Make sure os.environ exists, at least
|
# Make sure os.environ exists, at least
|
||||||
try:
|
try:
|
||||||
environ
|
environ
|
||||||
|
|
|
||||||
|
|
@ -202,11 +202,93 @@ class EnvironTests(TestMappingProtocol):
|
||||||
os.environ.clear()
|
os.environ.clear()
|
||||||
os.environ.update(self.__save)
|
os.environ.update(self.__save)
|
||||||
|
|
||||||
|
class WalkTests(unittest.TestCase):
|
||||||
|
"""Tests for os.walk()."""
|
||||||
|
|
||||||
|
def test_traversal(self):
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
# Build:
|
||||||
|
# TESTFN/ a file kid and two directory kids
|
||||||
|
# tmp1
|
||||||
|
# SUB1/ a file kid and a directory kid
|
||||||
|
# tmp2
|
||||||
|
# SUB11/ no kids
|
||||||
|
# SUB2/ just a file kid
|
||||||
|
# tmp3
|
||||||
|
sub1_path = join(TESTFN, "SUB1")
|
||||||
|
sub11_path = join(sub1_path, "SUB11")
|
||||||
|
sub2_path = join(TESTFN, "SUB2")
|
||||||
|
tmp1_path = join(TESTFN, "tmp1")
|
||||||
|
tmp2_path = join(sub1_path, "tmp2")
|
||||||
|
tmp3_path = join(sub2_path, "tmp3")
|
||||||
|
|
||||||
|
# Create stuff.
|
||||||
|
os.makedirs(sub11_path)
|
||||||
|
os.makedirs(sub2_path)
|
||||||
|
for path in tmp1_path, tmp2_path, tmp3_path:
|
||||||
|
f = file(path, "w")
|
||||||
|
f.write("I'm " + path + " and proud of it. Blame test_os.\n")
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# Walk top-down.
|
||||||
|
all = list(os.walk(TESTFN))
|
||||||
|
self.assertEqual(len(all), 4)
|
||||||
|
# We can't know which order SUB1 and SUB2 will appear in.
|
||||||
|
# Not flipped: TESTFN, SUB1, SUB11, SUB2
|
||||||
|
# flipped: TESTFN, SUB2, SUB1, SUB11
|
||||||
|
flipped = all[0][1][0] != "SUB1"
|
||||||
|
all[0][1].sort()
|
||||||
|
self.assertEqual(all[0], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
|
||||||
|
self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"]))
|
||||||
|
self.assertEqual(all[2 + flipped], (sub11_path, [], []))
|
||||||
|
self.assertEqual(all[3 - 2 * flipped], (sub2_path, [], ["tmp3"]))
|
||||||
|
|
||||||
|
# Prune the search.
|
||||||
|
all = []
|
||||||
|
for root, dirs, files in os.walk(TESTFN):
|
||||||
|
all.append((root, dirs, files))
|
||||||
|
# Don't descend into SUB1.
|
||||||
|
if 'SUB1' in dirs:
|
||||||
|
# Note that this also mutates the dirs we appended to all!
|
||||||
|
dirs.remove('SUB1')
|
||||||
|
self.assertEqual(len(all), 2)
|
||||||
|
self.assertEqual(all[0], (TESTFN, ["SUB2"], ["tmp1"]))
|
||||||
|
self.assertEqual(all[1], (sub2_path, [], ["tmp3"]))
|
||||||
|
|
||||||
|
# Walk bottom-up.
|
||||||
|
all = list(os.walk(TESTFN, topdown=False))
|
||||||
|
self.assertEqual(len(all), 4)
|
||||||
|
# We can't know which order SUB1 and SUB2 will appear in.
|
||||||
|
# Not flipped: SUB11, SUB1, SUB2, TESTFN
|
||||||
|
# flipped: SUB2, SUB11, SUB1, TESTFN
|
||||||
|
flipped = all[3][1][0] != "SUB1"
|
||||||
|
all[3][1].sort()
|
||||||
|
self.assertEqual(all[3], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
|
||||||
|
self.assertEqual(all[flipped], (sub11_path, [], []))
|
||||||
|
self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"]))
|
||||||
|
self.assertEqual(all[2 - 2 * flipped], (sub2_path, [], ["tmp3"]))
|
||||||
|
|
||||||
|
# Tear everything down. This is a decent use for bottom-up on
|
||||||
|
# Windows, which doesn't have a recursive delete command. The
|
||||||
|
# (not so) subtlety is that rmdir will fail unless the dir's
|
||||||
|
# kids are removed first, so bottom up is essential.
|
||||||
|
for root, dirs, files in os.walk(TESTFN, topdown=False):
|
||||||
|
for name in files:
|
||||||
|
os.remove(join(root, name))
|
||||||
|
for name in dirs:
|
||||||
|
os.rmdir(join(root, name))
|
||||||
|
os.rmdir(TESTFN)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
suite = unittest.TestSuite()
|
suite = unittest.TestSuite()
|
||||||
suite.addTest(unittest.makeSuite(TemporaryFileTests))
|
for cls in (TemporaryFileTests,
|
||||||
suite.addTest(unittest.makeSuite(StatAttributeTests))
|
StatAttributeTests,
|
||||||
suite.addTest(unittest.makeSuite(EnvironTests))
|
EnvironTests,
|
||||||
|
WalkTests,
|
||||||
|
):
|
||||||
|
suite.addTest(unittest.makeSuite(cls))
|
||||||
run_suite(suite)
|
run_suite(suite)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ Extension modules
|
||||||
Subsumed the times() function into repeat().
|
Subsumed the times() function into repeat().
|
||||||
Added chain() and cycle().
|
Added chain() and cycle().
|
||||||
|
|
||||||
- The rotor module is now deprecated; the encryption algorithm it uses
|
- The rotor module is now deprecated; the encryption algorithm it uses
|
||||||
is not believed to be secure, and including crypto code with Python
|
is not believed to be secure, and including crypto code with Python
|
||||||
has implications for exporting and importing it in various countries.
|
has implications for exporting and importing it in various countries.
|
||||||
|
|
||||||
|
|
@ -139,6 +139,11 @@ Extension modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- New generator function os.walk() is an easy-to-use alternative to
|
||||||
|
os.path.walk(). See os module docs for details. os.path.walk()
|
||||||
|
isn't deprecated at this time, but may become deprecated in a
|
||||||
|
future release.
|
||||||
|
|
||||||
- Added new module "platform" which provides a wide range of tools
|
- Added new module "platform" which provides a wide range of tools
|
||||||
for querying platform dependent features.
|
for querying platform dependent features.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue