mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible * refactoring: use ctx manager * add test with non-regular file obj * emulate case where file size can't be determined * reference _copyfileobj_sendfile directly * add test for offset() at certain position * add test for empty file * add test for non regular file dst * small refactoring * leave copyfileobj() alone in order to not introduce any incompatibility * minor refactoring * remove old test * update docstring * update docstring; rename exception class * detect platforms which only support file to socket zero copy * don't run test on platforms where file-to-file zero copy is not supported * use tempfiles * reset verbosity * add test for smaller chunks * add big file size test * add comment * update doc * update whatsnew doc * update doc * catch Exception * remove unused import * add test case for error on second sendfile() call * turn docstring into comment * add one more test * update comment * add Misc/NEWS entry * get rid of COPY_BUFSIZE; it belongs to another PR * update doc * expose posix._fcopyfile() for OSX * merge from linux branch * merge from linux branch * expose fcopyfile * arg clinic for the win implementation * convert path type to path_t * expose CopyFileW * fix windows tests * release GIL * minor refactoring * update doc * update comment * update docstrings * rename functions * rename test classes * update doc * update doc * update docstrings and comments * avoid do import nt|posix modules if unnecessary * set nt|posix modules to None if not available * micro speedup * update description * add doc note * use better wording in doc * rename function using 'fastcopy' prefix instead of 'zerocopy' * use :ref: in rst doc * change wording in doc * add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies * move CopyFileW in _winapi and actually expose CopyFileExW instead * fix line endings * add tests for mode bits * add docstring * remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies * update doc to reflect new changes * update doc * adjust tests on win * fix argument clinic error * update doc * OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python * osx / copyfile: use path_t instead of char * do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux) * add same file test * add test for same file * have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted * turn PermissionError into appropriate SameFileError * expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError * honour follow_symlinks arg when using CopyFileEx * update Misc/NEWS * expose CreateDirectoryEx mock * change C type * CreateDirectoryExW actual implementation * provide specific makedirs() implementation for win * fix typo * skeleton for SetNamedSecurityInfo * get security info for src path * finally set security attrs * add unit tests * mimick os.makedirs() behavior and raise if dst dir exists * set 2 paths for OSError object * set 2 paths for OSError object * expand windows test * in case of exception on os.sendfile() set filename and filename2 exception attributes * set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX * update doc * do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation * use bytearray() and readinto() * use memoryview() with bytearray() * refactoring + introduce a new _fastcopy_binfileobj() fun * remove CopyFileEx and other C wrappers * remove code related to CopyFileEx * Recognize binary files in copyfileobj() ...and use fastest _fastcopy_binfileobj() when possible * set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable * use ctx manager for memoryview() * update doc * remove outdated doc * remove last CopyFileEx remnants * OSX - use fcopyfile(3) instead of copyfile(3) ...as an extra safety measure: in case src/dst are "exotic" files (non regular or living on a network fs etc.) we better fail on open() instead of copyfile(3) as we're not quite sure what's gonna happen in that case. * update doc
This commit is contained in:
parent
33cd058f21
commit
4a172ccc73
8 changed files with 595 additions and 19 deletions
157
Lib/shutil.py
157
Lib/shutil.py
|
@ -10,6 +10,7 @@ import stat
|
|||
import fnmatch
|
||||
import collections
|
||||
import errno
|
||||
import io
|
||||
|
||||
try:
|
||||
import zlib
|
||||
|
@ -42,6 +43,16 @@ try:
|
|||
except ImportError:
|
||||
getgrnam = None
|
||||
|
||||
posix = nt = None
|
||||
if os.name == 'posix':
|
||||
import posix
|
||||
elif os.name == 'nt':
|
||||
import nt
|
||||
|
||||
COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024
|
||||
_HAS_SENDFILE = posix and hasattr(os, "sendfile")
|
||||
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX
|
||||
|
||||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
||||
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
||||
"ExecError", "make_archive", "get_archive_formats",
|
||||
|
@ -72,14 +83,124 @@ class RegistryError(Exception):
|
|||
"""Raised when a registry operation with the archiving
|
||||
and unpacking registries fails"""
|
||||
|
||||
class _GiveupOnFastCopy(Exception):
|
||||
"""Raised as a signal to fallback on using raw read()/write()
|
||||
file copy when fast-copy functions fail to do so.
|
||||
"""
|
||||
|
||||
def copyfileobj(fsrc, fdst, length=16*1024):
|
||||
def _fastcopy_osx(fsrc, fdst, flags):
|
||||
"""Copy a regular file content or metadata by using high-performance
|
||||
fcopyfile(3) syscall (OSX).
|
||||
"""
|
||||
try:
|
||||
infd = fsrc.fileno()
|
||||
outfd = fdst.fileno()
|
||||
except Exception as err:
|
||||
raise _GiveupOnFastCopy(err) # not a regular file
|
||||
|
||||
try:
|
||||
posix._fcopyfile(infd, outfd, flags)
|
||||
except OSError as err:
|
||||
err.filename = fsrc.name
|
||||
err.filename2 = fdst.name
|
||||
if err.errno in {errno.EINVAL, errno.ENOTSUP}:
|
||||
raise _GiveupOnFastCopy(err)
|
||||
else:
|
||||
raise err from None
|
||||
|
||||
def _fastcopy_sendfile(fsrc, fdst):
|
||||
"""Copy data from one regular mmap-like fd to another by using
|
||||
high-performance sendfile(2) syscall.
|
||||
This should work on Linux >= 2.6.33 and Solaris only.
|
||||
"""
|
||||
# Note: copyfileobj() is left alone in order to not introduce any
|
||||
# unexpected breakage. Possible risks by using zero-copy calls
|
||||
# in copyfileobj() are:
|
||||
# - fdst cannot be open in "a"(ppend) mode
|
||||
# - fsrc and fdst may be open in "t"(ext) mode
|
||||
# - fsrc may be a BufferedReader (which hides unread data in a buffer),
|
||||
# GzipFile (which decompresses data), HTTPResponse (which decodes
|
||||
# chunks).
|
||||
# - possibly others (e.g. encrypted fs/partition?)
|
||||
global _HAS_SENDFILE
|
||||
try:
|
||||
infd = fsrc.fileno()
|
||||
outfd = fdst.fileno()
|
||||
except Exception as err:
|
||||
raise _GiveupOnFastCopy(err) # not a regular file
|
||||
|
||||
# Hopefully the whole file will be copied in a single call.
|
||||
# sendfile() is called in a loop 'till EOF is reached (0 return)
|
||||
# so a bufsize smaller or bigger than the actual file size
|
||||
# should not make any difference, also in case the file content
|
||||
# changes while being copied.
|
||||
try:
|
||||
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
|
||||
except Exception:
|
||||
blocksize = 2 ** 27 # 128MB
|
||||
|
||||
offset = 0
|
||||
while True:
|
||||
try:
|
||||
sent = os.sendfile(outfd, infd, offset, blocksize)
|
||||
except OSError as err:
|
||||
# ...in oder to have a more informative exception.
|
||||
err.filename = fsrc.name
|
||||
err.filename2 = fdst.name
|
||||
|
||||
if err.errno == errno.ENOTSOCK:
|
||||
# sendfile() on this platform (probably Linux < 2.6.33)
|
||||
# does not support copies between regular files (only
|
||||
# sockets).
|
||||
_HAS_SENDFILE = False
|
||||
raise _GiveupOnFastCopy(err)
|
||||
|
||||
if err.errno == errno.ENOSPC: # filesystem is full
|
||||
raise err from None
|
||||
|
||||
# Give up on first call and if no data was copied.
|
||||
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
|
||||
raise _GiveupOnFastCopy(err)
|
||||
|
||||
raise err
|
||||
else:
|
||||
if sent == 0:
|
||||
break # EOF
|
||||
offset += sent
|
||||
|
||||
def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
|
||||
"""Copy 2 regular file objects open in binary mode."""
|
||||
# Localize variable access to minimize overhead.
|
||||
fsrc_readinto = fsrc.readinto
|
||||
fdst_write = fdst.write
|
||||
with memoryview(bytearray(length)) as mv:
|
||||
while True:
|
||||
n = fsrc_readinto(mv)
|
||||
if not n:
|
||||
break
|
||||
elif n < length:
|
||||
fdst_write(mv[:n])
|
||||
else:
|
||||
fdst_write(mv)
|
||||
|
||||
def _is_binary_files_pair(fsrc, fdst):
|
||||
return hasattr(fsrc, 'readinto') and \
|
||||
isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \
|
||||
isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '')
|
||||
|
||||
def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
|
||||
"""copy data from file-like object fsrc to file-like object fdst"""
|
||||
while 1:
|
||||
buf = fsrc.read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst.write(buf)
|
||||
if _is_binary_files_pair(fsrc, fdst):
|
||||
_copybinfileobj(fsrc, fdst, length=length)
|
||||
else:
|
||||
# Localize variable access to minimize overhead.
|
||||
fsrc_read = fsrc.read
|
||||
fdst_write = fdst.write
|
||||
while 1:
|
||||
buf = fsrc_read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst_write(buf)
|
||||
|
||||
def _samefile(src, dst):
|
||||
# Macintosh, Unix.
|
||||
|
@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True):
|
|||
if not follow_symlinks and os.path.islink(src):
|
||||
os.symlink(os.readlink(src), dst)
|
||||
else:
|
||||
with open(src, 'rb') as fsrc:
|
||||
with open(dst, 'wb') as fdst:
|
||||
copyfileobj(fsrc, fdst)
|
||||
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
|
||||
if _HAS_SENDFILE:
|
||||
try:
|
||||
_fastcopy_sendfile(fsrc, fdst)
|
||||
return dst
|
||||
except _GiveupOnFastCopy:
|
||||
pass
|
||||
|
||||
if _HAS_FCOPYFILE:
|
||||
try:
|
||||
_fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA)
|
||||
return dst
|
||||
except _GiveupOnFastCopy:
|
||||
pass
|
||||
|
||||
_copybinfileobj(fsrc, fdst)
|
||||
|
||||
return dst
|
||||
|
||||
def copymode(src, dst, *, follow_symlinks=True):
|
||||
|
@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True):
|
|||
|
||||
def copy2(src, dst, *, follow_symlinks=True):
|
||||
"""Copy data and all stat info ("cp -p src dst"). Return the file's
|
||||
destination."
|
||||
destination.
|
||||
|
||||
The destination may be a directory.
|
||||
|
||||
If follow_symlinks is false, symlinks won't be followed. This
|
||||
resembles GNU's "cp -P src dst".
|
||||
|
||||
"""
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
|
@ -1015,7 +1149,6 @@ if hasattr(os, 'statvfs'):
|
|||
|
||||
elif os.name == 'nt':
|
||||
|
||||
import nt
|
||||
__all__.append('disk_usage')
|
||||
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue