bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)

* have shutil.copyfileobj use sendfile() if possible

* refactoring: use ctx manager

* add test with non-regular file obj

* emulate case where file size can't be determined

* reference _copyfileobj_sendfile directly

* add test for offset() at certain position

* add test for empty file

* add test for non regular file dst

* small refactoring

* leave copyfileobj() alone in order to not introduce any incompatibility

* minor refactoring

* remove old test

* update docstring

* update docstring; rename exception class

* detect platforms which only support file to socket zero copy

* don't run test on platforms where file-to-file zero copy is not supported

* use tempfiles

* reset verbosity

* add test for smaller chunks

* add big file size test

* add comment

* update doc

* update whatsnew doc

* update doc

* catch Exception

* remove unused import

* add test case for error on second sendfile() call

* turn docstring into comment

* add one more test

* update comment

* add Misc/NEWS entry

* get rid of COPY_BUFSIZE; it belongs to another PR

* update doc

* expose posix._fcopyfile() for OSX

* merge from linux branch

* merge from linux branch

* expose fcopyfile

* arg clinic for the win implementation

* convert path type to path_t

* expose CopyFileW

* fix windows tests

* release GIL

* minor refactoring

* update doc

* update comment

* update docstrings

* rename functions

* rename test classes

* update doc

* update doc

* update docstrings and comments

* avoid do import nt|posix modules if unnecessary

* set nt|posix modules to None if not available

* micro speedup

* update description

* add doc note

* use better wording in doc

* rename function using 'fastcopy' prefix instead of 'zerocopy'

* use :ref: in rst doc

* change wording in doc

* add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies

* move CopyFileW in _winapi and actually expose CopyFileExW instead

* fix line endings

* add tests for mode bits

* add docstring

* remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies

* update doc to reflect new changes

* update doc

* adjust tests on win

* fix argument clinic error

* update doc

* OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python

* osx / copyfile: use path_t instead of char

* do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux)

* add same file test

* add test for same file

* have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted

* turn PermissionError into appropriate SameFileError

* expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError

* honour follow_symlinks arg when using CopyFileEx

* update Misc/NEWS

* expose CreateDirectoryEx mock

* change C type

* CreateDirectoryExW actual implementation

* provide specific makedirs() implementation for win

* fix typo

* skeleton for SetNamedSecurityInfo

* get security info for src path

* finally set security attrs

* add unit tests

* mimick os.makedirs() behavior and raise if dst dir exists

* set 2 paths for OSError object

* set 2 paths for OSError object

* expand windows test

* in case of exception on os.sendfile() set filename and filename2 exception attributes

* set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX

* update doc

* do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation

* use bytearray() and readinto()

* use memoryview() with bytearray()

* refactoring + introduce a new _fastcopy_binfileobj() fun

* remove CopyFileEx and other C wrappers

* remove code related to CopyFileEx

* Recognize binary files in copyfileobj()
...and use fastest _fastcopy_binfileobj() when possible

* set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable

* use ctx manager for memoryview()

* update doc

* remove outdated doc

* remove last CopyFileEx remnants

* OSX - use fcopyfile(3) instead of copyfile(3)

...as an extra safety measure: in case src/dst are "exotic" files (non
regular or living on a network fs etc.) we better fail on open() instead
of copyfile(3) as we're not quite sure what's gonna happen in that
case.

* update doc
This commit is contained in:
Giampaolo Rodola 2018-06-12 23:04:50 +02:00 committed by GitHub
parent 33cd058f21
commit 4a172ccc73
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 595 additions and 19 deletions

View file

@ -12,20 +12,28 @@ import errno
import functools
import pathlib
import subprocess
import random
import string
import contextlib
import io
from shutil import (make_archive,
register_archive_format, unregister_archive_format,
get_archive_formats, Error, unpack_archive,
register_unpack_format, RegistryError,
unregister_unpack_format, get_unpack_formats,
SameFileError)
SameFileError, _GiveupOnFastCopy)
import tarfile
import zipfile
try:
import posix
except ImportError:
posix = None
from test import support
from test.support import TESTFN, FakePath
TESTFN2 = TESTFN + "2"
OSX = sys.platform.startswith("darwin")
try:
import grp
import pwd
@ -60,6 +68,24 @@ def write_file(path, content, binary=False):
with open(path, 'wb' if binary else 'w') as fp:
fp.write(content)
def write_test_file(path, size):
"""Create a test file with an arbitrary size and random text content."""
def chunks(total, step):
assert total >= step
while total > step:
yield step
total -= step
if total:
yield total
bufsize = min(size, 8192)
chunk = b"".join([random.choice(string.ascii_letters).encode()
for i in range(bufsize)])
with open(path, 'wb') as f:
for csize in chunks(size, bufsize):
f.write(chunk)
assert os.path.getsize(path) == size
def read_file(path, binary=False):
"""Return contents from a file located at *path*.
@ -84,6 +110,37 @@ def rlistdir(path):
res.append(name)
return res
def supports_file2file_sendfile():
# ...apparently Linux and Solaris are the only ones
if not hasattr(os, "sendfile"):
return False
srcname = None
dstname = None
try:
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
srcname = f.name
f.write(b"0123456789")
with open(srcname, "rb") as src:
with tempfile.NamedTemporaryFile("wb", delete=False) as dst:
dstname = f.name
infd = src.fileno()
outfd = dst.fileno()
try:
os.sendfile(outfd, infd, 0, 2)
except OSError:
return False
else:
return True
finally:
if srcname is not None:
support.unlink(srcname)
if dstname is not None:
support.unlink(dstname)
SUPPORTS_SENDFILE = supports_file2file_sendfile()
class TestShutil(unittest.TestCase):
@ -1401,6 +1458,8 @@ class TestShutil(unittest.TestCase):
self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file)
# But Error should work too, to stay backward compatible.
self.assertRaises(Error, shutil.copyfile, src_file, src_file)
# Make sure file is not corrupted.
self.assertEqual(read_file(src_file), 'foo')
def test_copytree_return_value(self):
# copytree returns its destination path.
@ -1749,6 +1808,7 @@ class TestCopyFile(unittest.TestCase):
self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile')
@unittest.skipIf(OSX, "skipped on OSX")
def test_w_dest_open_fails(self):
srcfile = self.Faux()
@ -1768,6 +1828,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot open "destfile"',))
@unittest.skipIf(OSX, "skipped on OSX")
def test_w_dest_close_fails(self):
srcfile = self.Faux()
@ -1790,6 +1851,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot close',))
@unittest.skipIf(OSX, "skipped on OSX")
def test_w_source_close_fails(self):
srcfile = self.Faux(True)
@ -1829,6 +1891,234 @@ class TestCopyFile(unittest.TestCase):
finally:
os.rmdir(dst_dir)
class _ZeroCopyFileTest(object):
"""Tests common to all zero-copy APIs."""
FILESIZE = (10 * 1024 * 1024) # 10 MiB
FILEDATA = b""
PATCHPOINT = ""
@classmethod
def setUpClass(cls):
write_test_file(TESTFN, cls.FILESIZE)
with open(TESTFN, 'rb') as f:
cls.FILEDATA = f.read()
assert len(cls.FILEDATA) == cls.FILESIZE
@classmethod
def tearDownClass(cls):
support.unlink(TESTFN)
def tearDown(self):
support.unlink(TESTFN2)
@contextlib.contextmanager
def get_files(self):
with open(TESTFN, "rb") as src:
with open(TESTFN2, "wb") as dst:
yield (src, dst)
def zerocopy_fun(self, *args, **kwargs):
raise NotImplementedError("must be implemented in subclass")
def reset(self):
self.tearDown()
self.tearDownClass()
self.setUpClass()
self.setUp()
# ---
def test_regular_copy(self):
with self.get_files() as (src, dst):
self.zerocopy_fun(src, dst)
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
# Make sure the fallback function is not called.
with self.get_files() as (src, dst):
with unittest.mock.patch('shutil.copyfileobj') as m:
shutil.copyfile(TESTFN, TESTFN2)
assert not m.called
def test_same_file(self):
self.addCleanup(self.reset)
with self.get_files() as (src, dst):
with self.assertRaises(Exception):
self.zerocopy_fun(src, src)
# Make sure src file is not corrupted.
self.assertEqual(read_file(TESTFN, binary=True), self.FILEDATA)
def test_non_existent_src(self):
name = tempfile.mktemp()
with self.assertRaises(FileNotFoundError) as cm:
shutil.copyfile(name, "new")
self.assertEqual(cm.exception.filename, name)
def test_empty_file(self):
srcname = TESTFN + 'src'
dstname = TESTFN + 'dst'
self.addCleanup(lambda: support.unlink(srcname))
self.addCleanup(lambda: support.unlink(dstname))
with open(srcname, "wb"):
pass
with open(srcname, "rb") as src:
with open(dstname, "wb") as dst:
self.zerocopy_fun(src, dst)
self.assertEqual(read_file(dstname, binary=True), b"")
def test_unhandled_exception(self):
with unittest.mock.patch(self.PATCHPOINT,
side_effect=ZeroDivisionError):
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)
def test_exception_on_first_call(self):
# Emulate a case where the first call to the zero-copy
# function raises an exception in which case the function is
# supposed to give up immediately.
with unittest.mock.patch(self.PATCHPOINT,
side_effect=OSError(errno.EINVAL, "yo")):
with self.get_files() as (src, dst):
with self.assertRaises(_GiveupOnFastCopy):
self.zerocopy_fun(src, dst)
def test_filesystem_full(self):
# Emulate a case where filesystem is full and sendfile() fails
# on first call.
with unittest.mock.patch(self.PATCHPOINT,
side_effect=OSError(errno.ENOSPC, "yo")):
with self.get_files() as (src, dst):
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "os.sendfile"
def zerocopy_fun(self, fsrc, fdst):
return shutil._fastcopy_sendfile(fsrc, fdst)
def test_non_regular_file_src(self):
with io.BytesIO(self.FILEDATA) as src:
with open(TESTFN2, "wb") as dst:
with self.assertRaises(_GiveupOnFastCopy):
self.zerocopy_fun(src, dst)
shutil.copyfileobj(src, dst)
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_non_regular_file_dst(self):
with open(TESTFN, "rb") as src:
with io.BytesIO() as dst:
with self.assertRaises(_GiveupOnFastCopy):
self.zerocopy_fun(src, dst)
shutil.copyfileobj(src, dst)
dst.seek(0)
self.assertEqual(dst.read(), self.FILEDATA)
def test_exception_on_second_call(self):
def sendfile(*args, **kwargs):
if not flag:
flag.append(None)
return orig_sendfile(*args, **kwargs)
else:
raise OSError(errno.EBADF, "yo")
flag = []
orig_sendfile = os.sendfile
with unittest.mock.patch('os.sendfile', create=True,
side_effect=sendfile):
with self.get_files() as (src, dst):
with self.assertRaises(OSError) as cm:
shutil._fastcopy_sendfile(src, dst)
assert flag
self.assertEqual(cm.exception.errno, errno.EBADF)
def test_cant_get_size(self):
# Emulate a case where src file size cannot be determined.
# Internally bufsize will be set to a small value and
# sendfile() will be called repeatedly.
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_small_chunks(self):
# Force internal file size detection to be smaller than the
# actual file size. We want to force sendfile() to be called
# multiple times, also in order to emulate a src fd which gets
# bigger while it is being copied.
mock = unittest.mock.Mock()
mock.st_size = 65536 + 1
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_big_chunk(self):
# Force internal file size detection to be +100MB bigger than
# the actual file size. Make sure sendfile() does not rely on
# file size value except for (maybe) a better throughput /
# performance.
mock = unittest.mock.Mock()
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_blocksize_arg(self):
with unittest.mock.patch('os.sendfile',
side_effect=ZeroDivisionError) as m:
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)
blocksize = m.call_args[0][3]
# Make sure file size and the block size arg passed to
# sendfile() are the same.
self.assertEqual(blocksize, os.path.getsize(TESTFN))
# ...unless we're dealing with a small file.
support.unlink(TESTFN2)
write_file(TESTFN2, b"hello", binary=True)
self.addCleanup(support.unlink, TESTFN2 + '3')
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN2, TESTFN2 + '3')
blocksize = m.call_args[0][3]
self.assertEqual(blocksize, 2 ** 23)
def test_file2file_not_supported(self):
# Emulate a case where sendfile() only support file->socket
# fds. In such a case copyfile() is supposed to skip the
# fast-copy attempt from then on.
assert shutil._HAS_SENDFILE
try:
with unittest.mock.patch(
self.PATCHPOINT,
side_effect=OSError(errno.ENOTSOCK, "yo")) as m:
with self.get_files() as (src, dst):
with self.assertRaises(_GiveupOnFastCopy):
shutil._fastcopy_sendfile(src, dst)
assert m.called
assert not shutil._HAS_SENDFILE
with unittest.mock.patch(self.PATCHPOINT) as m:
shutil.copyfile(TESTFN, TESTFN2)
assert not m.called
finally:
shutil._HAS_SENDFILE = True
@unittest.skipIf(not OSX, 'OSX only')
class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "posix._fcopyfile"
def zerocopy_fun(self, src, dst):
return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA)
class TermsizeTests(unittest.TestCase):
def test_does_not_crash(self):
"""Check if get_terminal_size() returns a meaningful value.