bpo-25996: Added support of file descriptors in os.scandir() on Unix. (#502)

os.fwalk() is sped up by 2 times by using os.scandir().
This commit is contained in:
Serhiy Storchaka 2017-03-30 09:12:31 +03:00 committed by GitHub
parent 0a58f72762
commit ea720fe7e9
7 changed files with 160 additions and 44 deletions

View file

@ -2029,6 +2029,9 @@ features:
attributes of each :class:`os.DirEntry` will be ``bytes``; in all other attributes of each :class:`os.DirEntry` will be ``bytes``; in all other
circumstances, they will be of type ``str``. circumstances, they will be of type ``str``.
This function can also support :ref:`specifying a file descriptor
<path_fd>`; the file descriptor must refer to a directory.
The :func:`scandir` iterator supports the :term:`context manager` protocol The :func:`scandir` iterator supports the :term:`context manager` protocol
and has the following method: and has the following method:
@ -2075,6 +2078,9 @@ features:
The function accepts a :term:`path-like object`. The function accepts a :term:`path-like object`.
.. versionchanged:: 3.7
Added support for :ref:`file descriptors <path_fd>` on Unix.
.. class:: DirEntry .. class:: DirEntry
@ -2114,7 +2120,9 @@ features:
The entry's full path name: equivalent to ``os.path.join(scandir_path, The entry's full path name: equivalent to ``os.path.join(scandir_path,
entry.name)`` where *scandir_path* is the :func:`scandir` *path* entry.name)`` where *scandir_path* is the :func:`scandir` *path*
argument. The path is only absolute if the :func:`scandir` *path* argument. The path is only absolute if the :func:`scandir` *path*
argument was absolute. argument was absolute. If the :func:`scandir` *path*
argument was a :ref:`file descriptor <path_fd>`, the :attr:`path`
attribute is the same as the :attr:`name` attribute.
The :attr:`path` attribute will be ``bytes`` if the :func:`scandir` The :attr:`path` attribute will be ``bytes`` if the :func:`scandir`
*path* argument is of type ``bytes`` and ``str`` otherwise. Use *path* argument is of type ``bytes`` and ``str`` otherwise. Use

View file

@ -108,6 +108,9 @@ os
Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by
Serhiy Storchaka in :issue:`28682`.) Serhiy Storchaka in :issue:`28682`.)
Added support for :ref:`file descriptors <path_fd>` in :func:`~os.scandir`
on Unix. (Contributed by Serhiy Storchaka in :issue:`25996`.)
unittest.mock unittest.mock
------------- -------------
@ -148,6 +151,10 @@ Optimizations
:func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy :func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy
Storchaka in :issue:`26121`.) Storchaka in :issue:`26121`.)
* The :func:`os.fwalk` function has been sped up by 2 times. This was done
using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`25996`.)
Build and C API Changes Build and C API Changes
======================= =======================

View file

@ -129,6 +129,7 @@ if _exists("_have_functions"):
_add("HAVE_FCHMOD", "chmod") _add("HAVE_FCHMOD", "chmod")
_add("HAVE_FCHOWN", "chown") _add("HAVE_FCHOWN", "chown")
_add("HAVE_FDOPENDIR", "listdir") _add("HAVE_FDOPENDIR", "listdir")
_add("HAVE_FDOPENDIR", "scandir")
_add("HAVE_FEXECVE", "execve") _add("HAVE_FEXECVE", "execve")
_set.add(stat) # fstat always works _set.add(stat) # fstat always works
_add("HAVE_FTRUNCATE", "truncate") _add("HAVE_FTRUNCATE", "truncate")
@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
__all__.append("walk") __all__.append("walk")
if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd: if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None): def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
"""Directory tree generator. """Directory tree generator.
@ -455,7 +456,8 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
top = fspath(top) top = fspath(top)
# Note: To guard against symlink races, we use the standard # Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick. # lstat()/open()/fstat() trick.
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY, dir_fd=dir_fd) topfd = open(top, O_RDONLY, dir_fd=dir_fd)
try: try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
@ -470,35 +472,41 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
# necessary, it can be adapted to only require O(1) FDs, see issue # necessary, it can be adapted to only require O(1) FDs, see issue
# #13734. # #13734.
names = listdir(topfd) scandir_it = scandir(topfd)
if isbytes: dirs = []
names = map(fsencode, names) nondirs = []
dirs, nondirs = [], [] entries = None if topdown or follow_symlinks else []
for name in names: for entry in scandir_it:
name = entry.name
if isbytes:
name = fsencode(name)
try: try:
# Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with if entry.is_dir():
# walk() which reports symlinks to directories as directories.
# We do however check for symlinks before recursing into
# a subdirectory.
if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
dirs.append(name) dirs.append(name)
if entries is not None:
entries.append(entry)
else: else:
nondirs.append(name) nondirs.append(name)
except OSError: except OSError:
try: try:
# Add dangling symlinks, ignore disappeared files # Add dangling symlinks, ignore disappeared files
if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False) if entry.is_symlink():
.st_mode):
nondirs.append(name) nondirs.append(name)
except OSError: except OSError:
continue pass
if topdown: if topdown:
yield toppath, dirs, nondirs, topfd yield toppath, dirs, nondirs, topfd
for name in dirs: for name in dirs if entries is None else zip(dirs, entries):
try: try:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks) if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY, dir_fd=topfd) dirfd = open(name, O_RDONLY, dir_fd=topfd)
except OSError as err: except OSError as err:
if onerror is not None: if onerror is not None:

View file

@ -3313,6 +3313,35 @@ class TestScandir(unittest.TestCase):
self.assertEqual(entry.path, self.assertEqual(entry.path,
os.fsencode(os.path.join(self.path, 'file.txt'))) os.fsencode(os.path.join(self.path, 'file.txt')))
@unittest.skipUnless(os.listdir in os.supports_fd,
'fd support for listdir required for this test.')
def test_fd(self):
self.assertIn(os.scandir, os.supports_fd)
self.create_file('file.txt')
expected_names = ['file.txt']
if support.can_symlink():
os.symlink('file.txt', os.path.join(self.path, 'link'))
expected_names.append('link')
fd = os.open(self.path, os.O_RDONLY)
try:
with os.scandir(fd) as it:
entries = list(it)
names = [entry.name for entry in entries]
self.assertEqual(sorted(names), expected_names)
self.assertEqual(names, os.listdir(fd))
for entry in entries:
self.assertEqual(entry.path, entry.name)
self.assertEqual(os.fspath(entry), entry.name)
self.assertEqual(entry.is_symlink(), entry.name == 'link')
if os.stat in os.supports_dir_fd:
st = os.stat(entry.name, dir_fd=fd)
self.assertEqual(entry.stat(), st)
st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False)
self.assertEqual(entry.stat(follow_symlinks=False), st)
finally:
os.close(fd)
def test_empty_path(self): def test_empty_path(self):
self.assertRaises(FileNotFoundError, os.scandir, '') self.assertRaises(FileNotFoundError, os.scandir, '')
@ -3328,7 +3357,7 @@ class TestScandir(unittest.TestCase):
self.assertEqual(len(entries2), 0, entries2) self.assertEqual(len(entries2), 0, entries2)
def test_bad_path_type(self): def test_bad_path_type(self):
for obj in [1234, 1.234, {}, []]: for obj in [1.234, {}, []]:
self.assertRaises(TypeError, os.scandir, obj) self.assertRaises(TypeError, os.scandir, obj)
def test_close(self): def test_close(self):

View file

@ -294,6 +294,9 @@ Extension Modules
Library Library
------- -------
- bpo-25996: Added support of file descriptors in os.scandir() on Unix.
os.fwalk() is sped up by 2 times by using os.scandir().
- bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an - bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an
exception at the very first of an iterable may swallow the exception or exception at the very first of an iterable may swallow the exception or
make the program hang. Patch by Davin Potts and Xiang Zhang. make the program hang. Patch by Davin Potts and Xiang Zhang.

View file

@ -5926,7 +5926,7 @@ os_scandir(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwname
PyObject *return_value = NULL; PyObject *return_value = NULL;
static const char * const _keywords[] = {"path", NULL}; static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0}; static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0};
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0); path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR);
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
path_converter, &path)) { path_converter, &path)) {
@ -6493,4 +6493,4 @@ exit:
#ifndef OS_GETRANDOM_METHODDEF #ifndef OS_GETRANDOM_METHODDEF
#define OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF
#endif /* !defined(OS_GETRANDOM_METHODDEF) */ #endif /* !defined(OS_GETRANDOM_METHODDEF) */
/*[clinic end generated code: output=5a0be969e3f71660 input=a9049054013a1b77]*/ /*[clinic end generated code: output=5529857101c08b49 input=a9049054013a1b77]*/

View file

@ -11161,6 +11161,7 @@ typedef struct {
unsigned char d_type; unsigned char d_type;
#endif #endif
ino_t d_ino; ino_t d_ino;
int dir_fd;
#endif #endif
} DirEntry; } DirEntry;
@ -11210,19 +11211,31 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
PyObject *ub; PyObject *ub;
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
if (PyUnicode_FSDecoder(self->path, &ub)) { if (!PyUnicode_FSDecoder(self->path, &ub))
const wchar_t *path = PyUnicode_AsUnicode(ub); return NULL;
const wchar_t *path = PyUnicode_AsUnicode(ub);
#else /* POSIX */ #else /* POSIX */
if (PyUnicode_FSConverter(self->path, &ub)) { if (!PyUnicode_FSConverter(self->path, &ub))
const char *path = PyBytes_AS_STRING(ub); return NULL;
const char *path = PyBytes_AS_STRING(ub);
if (self->dir_fd != DEFAULT_DIR_FD) {
#ifdef HAVE_FSTATAT
result = fstatat(self->dir_fd, path, &st,
follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW);
#else
PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat");
return NULL;
#endif /* HAVE_FSTATAT */
}
else
#endif #endif
{
if (follow_symlinks) if (follow_symlinks)
result = STAT(path, &st); result = STAT(path, &st);
else else
result = LSTAT(path, &st); result = LSTAT(path, &st);
Py_DECREF(ub); }
} else Py_DECREF(ub);
return NULL;
if (result != 0) if (result != 0)
return path_object_error(self->path); return path_object_error(self->path);
@ -11633,20 +11646,36 @@ DirEntry_from_posix_info(path_t *path, const char *name, Py_ssize_t name_len,
entry->stat = NULL; entry->stat = NULL;
entry->lstat = NULL; entry->lstat = NULL;
joined_path = join_path_filename(path->narrow, name, name_len); if (path->fd != -1) {
if (!joined_path) entry->dir_fd = path->fd;
goto error; joined_path = NULL;
}
else {
entry->dir_fd = DEFAULT_DIR_FD;
joined_path = join_path_filename(path->narrow, name, name_len);
if (!joined_path)
goto error;
}
if (!path->narrow || !PyBytes_Check(path->object)) { if (!path->narrow || !PyBytes_Check(path->object)) {
entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
entry->path = PyUnicode_DecodeFSDefault(joined_path); if (joined_path)
entry->path = PyUnicode_DecodeFSDefault(joined_path);
} }
else { else {
entry->name = PyBytes_FromStringAndSize(name, name_len); entry->name = PyBytes_FromStringAndSize(name, name_len);
entry->path = PyBytes_FromString(joined_path); if (joined_path)
entry->path = PyBytes_FromString(joined_path);
} }
PyMem_Free(joined_path); PyMem_Free(joined_path);
if (!entry->name || !entry->path) if (!entry->name)
goto error;
if (path->fd != -1) {
entry->path = entry->name;
Py_INCREF(entry->path);
}
else if (!entry->path)
goto error; goto error;
#ifdef HAVE_DIRENT_D_TYPE #ifdef HAVE_DIRENT_D_TYPE
@ -11674,6 +11703,9 @@ typedef struct {
#else /* POSIX */ #else /* POSIX */
DIR *dirp; DIR *dirp;
#endif #endif
#ifdef HAVE_FDOPENDIR
int fd;
#endif
} ScandirIterator; } ScandirIterator;
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
@ -11758,6 +11790,10 @@ ScandirIterator_closedir(ScandirIterator *iterator)
iterator->dirp = NULL; iterator->dirp = NULL;
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
#ifdef HAVE_FDOPENDIR
if (iterator->path.fd != -1)
rewinddir(dirp);
#endif
closedir(dirp); closedir(dirp);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
return; return;
@ -11933,7 +11969,7 @@ static PyTypeObject ScandirIteratorType = {
/*[clinic input] /*[clinic input]
os.scandir os.scandir
path : path_t(nullable=True) = None path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None
Return an iterator of DirEntry objects for given path. Return an iterator of DirEntry objects for given path.
@ -11946,13 +11982,16 @@ If path is None, uses the path='.'.
static PyObject * static PyObject *
os_scandir_impl(PyObject *module, path_t *path) os_scandir_impl(PyObject *module, path_t *path)
/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/ /*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/
{ {
ScandirIterator *iterator; ScandirIterator *iterator;
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
wchar_t *path_strW; wchar_t *path_strW;
#else #else
const char *path_str; const char *path_str;
#ifdef HAVE_FDOPENDIR
int fd = -1;
#endif
#endif #endif
iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
@ -11988,18 +12027,40 @@ os_scandir_impl(PyObject *module, path_t *path)
goto error; goto error;
} }
#else /* POSIX */ #else /* POSIX */
if (iterator->path.narrow)
path_str = iterator->path.narrow;
else
path_str = ".";
errno = 0; errno = 0;
Py_BEGIN_ALLOW_THREADS #ifdef HAVE_FDOPENDIR
iterator->dirp = opendir(path_str); if (path->fd != -1) {
Py_END_ALLOW_THREADS /* closedir() closes the FD, so we duplicate it */
fd = _Py_dup(path->fd);
if (fd == -1)
goto error;
Py_BEGIN_ALLOW_THREADS
iterator->dirp = fdopendir(fd);
Py_END_ALLOW_THREADS
}
else
#endif
{
if (iterator->path.narrow)
path_str = iterator->path.narrow;
else
path_str = ".";
Py_BEGIN_ALLOW_THREADS
iterator->dirp = opendir(path_str);
Py_END_ALLOW_THREADS
}
if (!iterator->dirp) { if (!iterator->dirp) {
path_error(&iterator->path); path_error(&iterator->path);
#ifdef HAVE_FDOPENDIR
if (fd != -1) {
Py_BEGIN_ALLOW_THREADS
close(fd);
Py_END_ALLOW_THREADS
}
#endif
goto error; goto error;
} }
#endif #endif