mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible * refactoring: use ctx manager * add test with non-regular file obj * emulate case where file size can't be determined * reference _copyfileobj_sendfile directly * add test for offset() at certain position * add test for empty file * add test for non regular file dst * small refactoring * leave copyfileobj() alone in order to not introduce any incompatibility * minor refactoring * remove old test * update docstring * update docstring; rename exception class * detect platforms which only support file to socket zero copy * don't run test on platforms where file-to-file zero copy is not supported * use tempfiles * reset verbosity * add test for smaller chunks * add big file size test * add comment * update doc * update whatsnew doc * update doc * catch Exception * remove unused import * add test case for error on second sendfile() call * turn docstring into comment * add one more test * update comment * add Misc/NEWS entry * get rid of COPY_BUFSIZE; it belongs to another PR * update doc * expose posix._fcopyfile() for OSX * merge from linux branch * merge from linux branch * expose fcopyfile * arg clinic for the win implementation * convert path type to path_t * expose CopyFileW * fix windows tests * release GIL * minor refactoring * update doc * update comment * update docstrings * rename functions * rename test classes * update doc * update doc * update docstrings and comments * avoid do import nt|posix modules if unnecessary * set nt|posix modules to None if not available * micro speedup * update description * add doc note * use better wording in doc * rename function using 'fastcopy' prefix instead of 'zerocopy' * use :ref: in rst doc * change wording in doc * add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies * move CopyFileW in _winapi and actually expose CopyFileExW instead * fix line endings * add tests for mode bits * add docstring * remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies * update doc to reflect new changes * update doc * adjust tests on win * fix argument clinic error * update doc * OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python * osx / copyfile: use path_t instead of char * do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux) * add same file test * add test for same file * have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted * turn PermissionError into appropriate SameFileError * expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError * honour follow_symlinks arg when using CopyFileEx * update Misc/NEWS * expose CreateDirectoryEx mock * change C type * CreateDirectoryExW actual implementation * provide specific makedirs() implementation for win * fix typo * skeleton for SetNamedSecurityInfo * get security info for src path * finally set security attrs * add unit tests * mimick os.makedirs() behavior and raise if dst dir exists * set 2 paths for OSError object * set 2 paths for OSError object * expand windows test * in case of exception on os.sendfile() set filename and filename2 exception attributes * set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX * update doc * do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation * use bytearray() and readinto() * use memoryview() with bytearray() * refactoring + introduce a new _fastcopy_binfileobj() fun * remove CopyFileEx and other C wrappers * remove code related to CopyFileEx * Recognize binary files in copyfileobj() ...and use fastest _fastcopy_binfileobj() when possible * set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable * use ctx manager for memoryview() * update doc * remove outdated doc * remove last CopyFileEx remnants * OSX - use fcopyfile(3) instead of copyfile(3) ...as an extra safety measure: in case src/dst are "exotic" files (non regular or living on a network fs etc.) we better fail on open() instead of copyfile(3) as we're not quite sure what's gonna happen in that case. * update doc
This commit is contained in:
parent
33cd058f21
commit
4a172ccc73
8 changed files with 595 additions and 19 deletions
|
@ -12,20 +12,28 @@ import errno
|
|||
import functools
|
||||
import pathlib
|
||||
import subprocess
|
||||
import random
|
||||
import string
|
||||
import contextlib
|
||||
import io
|
||||
from shutil import (make_archive,
|
||||
register_archive_format, unregister_archive_format,
|
||||
get_archive_formats, Error, unpack_archive,
|
||||
register_unpack_format, RegistryError,
|
||||
unregister_unpack_format, get_unpack_formats,
|
||||
SameFileError)
|
||||
SameFileError, _GiveupOnFastCopy)
|
||||
import tarfile
|
||||
import zipfile
|
||||
try:
|
||||
import posix
|
||||
except ImportError:
|
||||
posix = None
|
||||
|
||||
from test import support
|
||||
from test.support import TESTFN, FakePath
|
||||
|
||||
TESTFN2 = TESTFN + "2"
|
||||
|
||||
OSX = sys.platform.startswith("darwin")
|
||||
try:
|
||||
import grp
|
||||
import pwd
|
||||
|
@ -60,6 +68,24 @@ def write_file(path, content, binary=False):
|
|||
with open(path, 'wb' if binary else 'w') as fp:
|
||||
fp.write(content)
|
||||
|
||||
def write_test_file(path, size):
|
||||
"""Create a test file with an arbitrary size and random text content."""
|
||||
def chunks(total, step):
|
||||
assert total >= step
|
||||
while total > step:
|
||||
yield step
|
||||
total -= step
|
||||
if total:
|
||||
yield total
|
||||
|
||||
bufsize = min(size, 8192)
|
||||
chunk = b"".join([random.choice(string.ascii_letters).encode()
|
||||
for i in range(bufsize)])
|
||||
with open(path, 'wb') as f:
|
||||
for csize in chunks(size, bufsize):
|
||||
f.write(chunk)
|
||||
assert os.path.getsize(path) == size
|
||||
|
||||
def read_file(path, binary=False):
|
||||
"""Return contents from a file located at *path*.
|
||||
|
||||
|
@ -84,6 +110,37 @@ def rlistdir(path):
|
|||
res.append(name)
|
||||
return res
|
||||
|
||||
def supports_file2file_sendfile():
|
||||
# ...apparently Linux and Solaris are the only ones
|
||||
if not hasattr(os, "sendfile"):
|
||||
return False
|
||||
srcname = None
|
||||
dstname = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
|
||||
srcname = f.name
|
||||
f.write(b"0123456789")
|
||||
|
||||
with open(srcname, "rb") as src:
|
||||
with tempfile.NamedTemporaryFile("wb", delete=False) as dst:
|
||||
dstname = f.name
|
||||
infd = src.fileno()
|
||||
outfd = dst.fileno()
|
||||
try:
|
||||
os.sendfile(outfd, infd, 0, 2)
|
||||
except OSError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
finally:
|
||||
if srcname is not None:
|
||||
support.unlink(srcname)
|
||||
if dstname is not None:
|
||||
support.unlink(dstname)
|
||||
|
||||
|
||||
SUPPORTS_SENDFILE = supports_file2file_sendfile()
|
||||
|
||||
|
||||
class TestShutil(unittest.TestCase):
|
||||
|
||||
|
@ -1401,6 +1458,8 @@ class TestShutil(unittest.TestCase):
|
|||
self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file)
|
||||
# But Error should work too, to stay backward compatible.
|
||||
self.assertRaises(Error, shutil.copyfile, src_file, src_file)
|
||||
# Make sure file is not corrupted.
|
||||
self.assertEqual(read_file(src_file), 'foo')
|
||||
|
||||
def test_copytree_return_value(self):
|
||||
# copytree returns its destination path.
|
||||
|
@ -1749,6 +1808,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
|
||||
self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile')
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
def test_w_dest_open_fails(self):
|
||||
|
||||
srcfile = self.Faux()
|
||||
|
@ -1768,6 +1828,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
self.assertEqual(srcfile._exited_with[1].args,
|
||||
('Cannot open "destfile"',))
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
def test_w_dest_close_fails(self):
|
||||
|
||||
srcfile = self.Faux()
|
||||
|
@ -1790,6 +1851,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
self.assertEqual(srcfile._exited_with[1].args,
|
||||
('Cannot close',))
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
def test_w_source_close_fails(self):
|
||||
|
||||
srcfile = self.Faux(True)
|
||||
|
@ -1829,6 +1891,234 @@ class TestCopyFile(unittest.TestCase):
|
|||
finally:
|
||||
os.rmdir(dst_dir)
|
||||
|
||||
|
||||
class _ZeroCopyFileTest(object):
|
||||
"""Tests common to all zero-copy APIs."""
|
||||
FILESIZE = (10 * 1024 * 1024) # 10 MiB
|
||||
FILEDATA = b""
|
||||
PATCHPOINT = ""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
write_test_file(TESTFN, cls.FILESIZE)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
cls.FILEDATA = f.read()
|
||||
assert len(cls.FILEDATA) == cls.FILESIZE
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
support.unlink(TESTFN)
|
||||
|
||||
def tearDown(self):
|
||||
support.unlink(TESTFN2)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_files(self):
|
||||
with open(TESTFN, "rb") as src:
|
||||
with open(TESTFN2, "wb") as dst:
|
||||
yield (src, dst)
|
||||
|
||||
def zerocopy_fun(self, *args, **kwargs):
|
||||
raise NotImplementedError("must be implemented in subclass")
|
||||
|
||||
def reset(self):
|
||||
self.tearDown()
|
||||
self.tearDownClass()
|
||||
self.setUpClass()
|
||||
self.setUp()
|
||||
|
||||
# ---
|
||||
|
||||
def test_regular_copy(self):
|
||||
with self.get_files() as (src, dst):
|
||||
self.zerocopy_fun(src, dst)
|
||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||
# Make sure the fallback function is not called.
|
||||
with self.get_files() as (src, dst):
|
||||
with unittest.mock.patch('shutil.copyfileobj') as m:
|
||||
shutil.copyfile(TESTFN, TESTFN2)
|
||||
assert not m.called
|
||||
|
||||
def test_same_file(self):
|
||||
self.addCleanup(self.reset)
|
||||
with self.get_files() as (src, dst):
|
||||
with self.assertRaises(Exception):
|
||||
self.zerocopy_fun(src, src)
|
||||
# Make sure src file is not corrupted.
|
||||
self.assertEqual(read_file(TESTFN, binary=True), self.FILEDATA)
|
||||
|
||||
def test_non_existent_src(self):
|
||||
name = tempfile.mktemp()
|
||||
with self.assertRaises(FileNotFoundError) as cm:
|
||||
shutil.copyfile(name, "new")
|
||||
self.assertEqual(cm.exception.filename, name)
|
||||
|
||||
def test_empty_file(self):
|
||||
srcname = TESTFN + 'src'
|
||||
dstname = TESTFN + 'dst'
|
||||
self.addCleanup(lambda: support.unlink(srcname))
|
||||
self.addCleanup(lambda: support.unlink(dstname))
|
||||
with open(srcname, "wb"):
|
||||
pass
|
||||
|
||||
with open(srcname, "rb") as src:
|
||||
with open(dstname, "wb") as dst:
|
||||
self.zerocopy_fun(src, dst)
|
||||
|
||||
self.assertEqual(read_file(dstname, binary=True), b"")
|
||||
|
||||
def test_unhandled_exception(self):
|
||||
with unittest.mock.patch(self.PATCHPOINT,
|
||||
side_effect=ZeroDivisionError):
|
||||
self.assertRaises(ZeroDivisionError,
|
||||
shutil.copyfile, TESTFN, TESTFN2)
|
||||
|
||||
def test_exception_on_first_call(self):
|
||||
# Emulate a case where the first call to the zero-copy
|
||||
# function raises an exception in which case the function is
|
||||
# supposed to give up immediately.
|
||||
with unittest.mock.patch(self.PATCHPOINT,
|
||||
side_effect=OSError(errno.EINVAL, "yo")):
|
||||
with self.get_files() as (src, dst):
|
||||
with self.assertRaises(_GiveupOnFastCopy):
|
||||
self.zerocopy_fun(src, dst)
|
||||
|
||||
def test_filesystem_full(self):
|
||||
# Emulate a case where filesystem is full and sendfile() fails
|
||||
# on first call.
|
||||
with unittest.mock.patch(self.PATCHPOINT,
|
||||
side_effect=OSError(errno.ENOSPC, "yo")):
|
||||
with self.get_files() as (src, dst):
|
||||
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
|
||||
|
||||
|
||||
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
|
||||
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
||||
PATCHPOINT = "os.sendfile"
|
||||
|
||||
def zerocopy_fun(self, fsrc, fdst):
|
||||
return shutil._fastcopy_sendfile(fsrc, fdst)
|
||||
|
||||
def test_non_regular_file_src(self):
|
||||
with io.BytesIO(self.FILEDATA) as src:
|
||||
with open(TESTFN2, "wb") as dst:
|
||||
with self.assertRaises(_GiveupOnFastCopy):
|
||||
self.zerocopy_fun(src, dst)
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||
|
||||
def test_non_regular_file_dst(self):
|
||||
with open(TESTFN, "rb") as src:
|
||||
with io.BytesIO() as dst:
|
||||
with self.assertRaises(_GiveupOnFastCopy):
|
||||
self.zerocopy_fun(src, dst)
|
||||
shutil.copyfileobj(src, dst)
|
||||
dst.seek(0)
|
||||
self.assertEqual(dst.read(), self.FILEDATA)
|
||||
|
||||
def test_exception_on_second_call(self):
|
||||
def sendfile(*args, **kwargs):
|
||||
if not flag:
|
||||
flag.append(None)
|
||||
return orig_sendfile(*args, **kwargs)
|
||||
else:
|
||||
raise OSError(errno.EBADF, "yo")
|
||||
|
||||
flag = []
|
||||
orig_sendfile = os.sendfile
|
||||
with unittest.mock.patch('os.sendfile', create=True,
|
||||
side_effect=sendfile):
|
||||
with self.get_files() as (src, dst):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
shutil._fastcopy_sendfile(src, dst)
|
||||
assert flag
|
||||
self.assertEqual(cm.exception.errno, errno.EBADF)
|
||||
|
||||
def test_cant_get_size(self):
|
||||
# Emulate a case where src file size cannot be determined.
|
||||
# Internally bufsize will be set to a small value and
|
||||
# sendfile() will be called repeatedly.
|
||||
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
|
||||
with self.get_files() as (src, dst):
|
||||
shutil._fastcopy_sendfile(src, dst)
|
||||
assert m.called
|
||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||
|
||||
def test_small_chunks(self):
|
||||
# Force internal file size detection to be smaller than the
|
||||
# actual file size. We want to force sendfile() to be called
|
||||
# multiple times, also in order to emulate a src fd which gets
|
||||
# bigger while it is being copied.
|
||||
mock = unittest.mock.Mock()
|
||||
mock.st_size = 65536 + 1
|
||||
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
||||
with self.get_files() as (src, dst):
|
||||
shutil._fastcopy_sendfile(src, dst)
|
||||
assert m.called
|
||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||
|
||||
def test_big_chunk(self):
|
||||
# Force internal file size detection to be +100MB bigger than
|
||||
# the actual file size. Make sure sendfile() does not rely on
|
||||
# file size value except for (maybe) a better throughput /
|
||||
# performance.
|
||||
mock = unittest.mock.Mock()
|
||||
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
|
||||
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
||||
with self.get_files() as (src, dst):
|
||||
shutil._fastcopy_sendfile(src, dst)
|
||||
assert m.called
|
||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||
|
||||
def test_blocksize_arg(self):
|
||||
with unittest.mock.patch('os.sendfile',
|
||||
side_effect=ZeroDivisionError) as m:
|
||||
self.assertRaises(ZeroDivisionError,
|
||||
shutil.copyfile, TESTFN, TESTFN2)
|
||||
blocksize = m.call_args[0][3]
|
||||
# Make sure file size and the block size arg passed to
|
||||
# sendfile() are the same.
|
||||
self.assertEqual(blocksize, os.path.getsize(TESTFN))
|
||||
# ...unless we're dealing with a small file.
|
||||
support.unlink(TESTFN2)
|
||||
write_file(TESTFN2, b"hello", binary=True)
|
||||
self.addCleanup(support.unlink, TESTFN2 + '3')
|
||||
self.assertRaises(ZeroDivisionError,
|
||||
shutil.copyfile, TESTFN2, TESTFN2 + '3')
|
||||
blocksize = m.call_args[0][3]
|
||||
self.assertEqual(blocksize, 2 ** 23)
|
||||
|
||||
def test_file2file_not_supported(self):
|
||||
# Emulate a case where sendfile() only support file->socket
|
||||
# fds. In such a case copyfile() is supposed to skip the
|
||||
# fast-copy attempt from then on.
|
||||
assert shutil._HAS_SENDFILE
|
||||
try:
|
||||
with unittest.mock.patch(
|
||||
self.PATCHPOINT,
|
||||
side_effect=OSError(errno.ENOTSOCK, "yo")) as m:
|
||||
with self.get_files() as (src, dst):
|
||||
with self.assertRaises(_GiveupOnFastCopy):
|
||||
shutil._fastcopy_sendfile(src, dst)
|
||||
assert m.called
|
||||
assert not shutil._HAS_SENDFILE
|
||||
|
||||
with unittest.mock.patch(self.PATCHPOINT) as m:
|
||||
shutil.copyfile(TESTFN, TESTFN2)
|
||||
assert not m.called
|
||||
finally:
|
||||
shutil._HAS_SENDFILE = True
|
||||
|
||||
|
||||
@unittest.skipIf(not OSX, 'OSX only')
|
||||
class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase):
|
||||
PATCHPOINT = "posix._fcopyfile"
|
||||
|
||||
def zerocopy_fun(self, src, dst):
|
||||
return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA)
|
||||
|
||||
|
||||
class TermsizeTests(unittest.TestCase):
|
||||
def test_does_not_crash(self):
|
||||
"""Check if get_terminal_size() returns a meaningful value.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue