mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
gh-129205: Add os.readinto() API for reading data into a caller provided buffer (#129211)
Add a new OS API which will read data directly into a caller provided writeable buffer protocol object. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
0ef8d470b7
commit
1ed4487968
7 changed files with 267 additions and 1 deletions
|
@ -1659,6 +1659,33 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
|
|||
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).
|
||||
|
||||
|
||||
.. function:: readinto(fd, buffer, /)
|
||||
|
||||
Read from a file descriptor *fd* into a mutable
|
||||
:ref:`buffer object <bufferobjects>` *buffer*.
|
||||
|
||||
The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
|
||||
success, returns the number of bytes read. Less bytes may be read than the
|
||||
size of the buffer. The underlying system call will be retried when
|
||||
interrupted by a signal, unless the signal handler raises an exception.
|
||||
Other errors will not be retried and an error will be raised.
|
||||
|
||||
Returns 0 if *fd* is at end of file or if the provided *buffer* has
|
||||
length 0 (which can be used to check for errors without reading data).
|
||||
Never returns negative.
|
||||
|
||||
.. note::
|
||||
|
||||
This function is intended for low-level I/O and must be applied to a file
|
||||
descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a
|
||||
"file object" returned by the built-in function :func:`open`, or
|
||||
:data:`sys.stdin`, use its member functions, for example
|
||||
:meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or
|
||||
:meth:`io.TextIOBase.read`
|
||||
|
||||
.. versionadded:: next
|
||||
|
||||
|
||||
.. function:: sendfile(out_fd, in_fd, offset, count)
|
||||
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)
|
||||
|
||||
|
|
|
@ -561,6 +561,10 @@ os
|
|||
to the :mod:`os` module.
|
||||
(Contributed by James Roy in :gh:`127688`.)
|
||||
|
||||
* Add the :func:`os.readinto` function to read into a
|
||||
:ref:`buffer object <bufferobjects>` from a file descriptor.
|
||||
(Contributed by Cody Maloney in :gh:`129205`.)
|
||||
|
||||
|
||||
pathlib
|
||||
-------
|
||||
|
|
|
@ -152,6 +152,37 @@ class OSEINTRTest(EINTRBaseTest):
|
|||
self.assertEqual(data, os.read(rd, len(data)))
|
||||
self.assertEqual(proc.wait(), 0)
|
||||
|
||||
def test_readinto(self):
|
||||
rd, wr = os.pipe()
|
||||
self.addCleanup(os.close, rd)
|
||||
# wr closed explicitly by parent
|
||||
|
||||
# the payload below are smaller than PIPE_BUF, hence the writes will be
|
||||
# atomic
|
||||
datas = [b"hello", b"world", b"spam"]
|
||||
|
||||
code = '\n'.join((
|
||||
'import os, sys, time',
|
||||
'',
|
||||
'wr = int(sys.argv[1])',
|
||||
'datas = %r' % datas,
|
||||
'sleep_time = %r' % self.sleep_time,
|
||||
'',
|
||||
'for data in datas:',
|
||||
' # let the parent block on read()',
|
||||
' time.sleep(sleep_time)',
|
||||
' os.write(wr, data)',
|
||||
))
|
||||
|
||||
proc = self.subprocess(code, str(wr), pass_fds=[wr])
|
||||
with kill_on_error(proc):
|
||||
os.close(wr)
|
||||
for data in datas:
|
||||
buffer = bytearray(len(data))
|
||||
self.assertEqual(os.readinto(rd, buffer), len(data))
|
||||
self.assertEqual(buffer, data)
|
||||
self.assertEqual(proc.wait(), 0)
|
||||
|
||||
def test_write(self):
|
||||
rd, wr = os.pipe()
|
||||
self.addCleanup(os.close, wr)
|
||||
|
|
|
@ -230,6 +230,93 @@ class FileTests(unittest.TestCase):
|
|||
self.assertEqual(type(s), bytes)
|
||||
self.assertEqual(s, b"spam")
|
||||
|
||||
def test_readinto(self):
|
||||
with open(os_helper.TESTFN, "w+b") as fobj:
|
||||
fobj.write(b"spam")
|
||||
fobj.flush()
|
||||
fd = fobj.fileno()
|
||||
os.lseek(fd, 0, 0)
|
||||
# Oversized so readinto without hitting end.
|
||||
buffer = bytearray(7)
|
||||
s = os.readinto(fd, buffer)
|
||||
self.assertEqual(type(s), int)
|
||||
self.assertEqual(s, 4)
|
||||
# Should overwrite the first 4 bytes of the buffer.
|
||||
self.assertEqual(buffer[:4], b"spam")
|
||||
|
||||
# Readinto at EOF should return 0 and not touch buffer.
|
||||
buffer[:] = b"notspam"
|
||||
s = os.readinto(fd, buffer)
|
||||
self.assertEqual(type(s), int)
|
||||
self.assertEqual(s, 0)
|
||||
self.assertEqual(bytes(buffer), b"notspam")
|
||||
s = os.readinto(fd, buffer)
|
||||
self.assertEqual(s, 0)
|
||||
self.assertEqual(bytes(buffer), b"notspam")
|
||||
|
||||
# Readinto a 0 length bytearray when at EOF should return 0
|
||||
self.assertEqual(os.readinto(fd, bytearray()), 0)
|
||||
|
||||
# Readinto a 0 length bytearray with data available should return 0.
|
||||
os.lseek(fd, 0, 0)
|
||||
self.assertEqual(os.readinto(fd, bytearray()), 0)
|
||||
|
||||
@unittest.skipUnless(hasattr(os, 'get_blocking'),
|
||||
'needs os.get_blocking() and os.set_blocking()')
|
||||
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
|
||||
def test_readinto_non_blocking(self):
|
||||
# Verify behavior of a readinto which would block on a non-blocking fd.
|
||||
r, w = os.pipe()
|
||||
try:
|
||||
os.set_blocking(r, False)
|
||||
with self.assertRaises(BlockingIOError):
|
||||
os.readinto(r, bytearray(5))
|
||||
|
||||
# Pass some data through
|
||||
os.write(w, b"spam")
|
||||
self.assertEqual(os.readinto(r, bytearray(4)), 4)
|
||||
|
||||
# Still don't block or return 0.
|
||||
with self.assertRaises(BlockingIOError):
|
||||
os.readinto(r, bytearray(5))
|
||||
|
||||
# At EOF should return size 0
|
||||
os.close(w)
|
||||
w = None
|
||||
self.assertEqual(os.readinto(r, bytearray(5)), 0)
|
||||
self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF
|
||||
|
||||
finally:
|
||||
os.close(r)
|
||||
if w is not None:
|
||||
os.close(w)
|
||||
|
||||
def test_readinto_badarg(self):
|
||||
with open(os_helper.TESTFN, "w+b") as fobj:
|
||||
fobj.write(b"spam")
|
||||
fobj.flush()
|
||||
fd = fobj.fileno()
|
||||
os.lseek(fd, 0, 0)
|
||||
|
||||
for bad_arg in ("test", bytes(), 14):
|
||||
with self.subTest(f"bad buffer {type(bad_arg)}"):
|
||||
with self.assertRaises(TypeError):
|
||||
os.readinto(fd, bad_arg)
|
||||
|
||||
with self.subTest("doesn't work on file objects"):
|
||||
with self.assertRaises(TypeError):
|
||||
os.readinto(fobj, bytearray(5))
|
||||
|
||||
# takes two args
|
||||
with self.assertRaises(TypeError):
|
||||
os.readinto(fd)
|
||||
|
||||
# No data should have been read with the bad arguments.
|
||||
buffer = bytearray(4)
|
||||
s = os.readinto(fd, buffer)
|
||||
self.assertEqual(s, 4)
|
||||
self.assertEqual(buffer, b"spam")
|
||||
|
||||
@support.cpython_only
|
||||
# Skip the test on 32-bit platforms: the number of bytes must fit in a
|
||||
# Py_ssize_t type
|
||||
|
@ -249,6 +336,29 @@ class FileTests(unittest.TestCase):
|
|||
# operating system is free to return less bytes than requested.
|
||||
self.assertEqual(data, b'test')
|
||||
|
||||
|
||||
@support.cpython_only
|
||||
# Skip the test on 32-bit platforms: the number of bytes must fit in a
|
||||
# Py_ssize_t type
|
||||
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
|
||||
"needs INT_MAX < PY_SSIZE_T_MAX")
|
||||
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
|
||||
def test_large_readinto(self, size):
|
||||
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
|
||||
create_file(os_helper.TESTFN, b'test')
|
||||
|
||||
# Issue #21932: For readinto the buffer contains the length rather than
|
||||
# a length being passed explicitly to read, should still get capped to a
|
||||
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
|
||||
buffer = bytearray(INT_MAX + 10)
|
||||
with open(os_helper.TESTFN, "rb") as fp:
|
||||
length = os.readinto(fp.fileno(), buffer)
|
||||
|
||||
# The test does not try to read more than 2 GiB at once because the
|
||||
# operating system is free to return less bytes than requested.
|
||||
self.assertEqual(length, 4)
|
||||
self.assertEqual(buffer[:4], b'test')
|
||||
|
||||
def test_write(self):
|
||||
# os.write() accepts bytes- and buffer-like objects but not strings
|
||||
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
|
||||
|
@ -2467,6 +2577,10 @@ class TestInvalidFD(unittest.TestCase):
|
|||
def test_read(self):
|
||||
self.check(os.read, 1)
|
||||
|
||||
@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
|
||||
def test_readinto(self):
|
||||
self.check(os.readinto, bytearray(5))
|
||||
|
||||
@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
|
||||
def test_readv(self):
|
||||
buf = bytearray(10)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add :func:`os.readinto` to read into a :ref:`buffer object <bufferobjects>` from a file descriptor.
|
58
Modules/clinic/posixmodule.c.h
generated
58
Modules/clinic/posixmodule.c.h
generated
|
@ -7577,6 +7577,62 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(os_readinto__doc__,
|
||||
"readinto($module, fd, buffer, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Read into a buffer object from a file descriptor.\n"
|
||||
"\n"
|
||||
"The buffer should be mutable and bytes-like. On success, returns the number of\n"
|
||||
"bytes read. Less bytes may be read than the size of the buffer. The underlying\n"
|
||||
"system call will be retried when interrupted by a signal, unless the signal\n"
|
||||
"handler raises an exception. Other errors will not be retried and an error will\n"
|
||||
"be raised.\n"
|
||||
"\n"
|
||||
"Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0\n"
|
||||
"(which can be used to check for errors without reading data). Never returns\n"
|
||||
"negative.");
|
||||
|
||||
#define OS_READINTO_METHODDEF \
|
||||
{"readinto", _PyCFunction_CAST(os_readinto), METH_FASTCALL, os_readinto__doc__},
|
||||
|
||||
static Py_ssize_t
|
||||
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer);
|
||||
|
||||
static PyObject *
|
||||
os_readinto(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
int fd;
|
||||
Py_buffer buffer = {NULL, NULL};
|
||||
Py_ssize_t _return_value;
|
||||
|
||||
if (!_PyArg_CheckPositional("readinto", nargs, 2, 2)) {
|
||||
goto exit;
|
||||
}
|
||||
fd = PyLong_AsInt(args[0]);
|
||||
if (fd == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
if (PyObject_GetBuffer(args[1], &buffer, PyBUF_WRITABLE) < 0) {
|
||||
_PyArg_BadArgument("readinto", "argument 2", "read-write bytes-like object", args[1]);
|
||||
goto exit;
|
||||
}
|
||||
_return_value = os_readinto_impl(module, fd, &buffer);
|
||||
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = PyLong_FromSsize_t(_return_value);
|
||||
|
||||
exit:
|
||||
/* Cleanup for buffer */
|
||||
if (buffer.obj) {
|
||||
PyBuffer_Release(&buffer);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#if defined(HAVE_READV)
|
||||
|
||||
PyDoc_STRVAR(os_readv__doc__,
|
||||
|
@ -13140,4 +13196,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored))
|
|||
#ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF
|
||||
#define OS__EMSCRIPTEN_DEBUGGER_METHODDEF
|
||||
#endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */
|
||||
/*[clinic end generated code: output=34cb96bd07bcef90 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=8318c26fc2cd236c input=a9049054013a1b77]*/
|
||||
|
|
|
@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
|
|||
return buffer;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
os.readinto -> Py_ssize_t
|
||||
fd: int
|
||||
buffer: Py_buffer(accept={rwbuffer})
|
||||
/
|
||||
|
||||
Read into a buffer object from a file descriptor.
|
||||
|
||||
The buffer should be mutable and bytes-like. On success, returns the number of
|
||||
bytes read. Less bytes may be read than the size of the buffer. The underlying
|
||||
system call will be retried when interrupted by a signal, unless the signal
|
||||
handler raises an exception. Other errors will not be retried and an error will
|
||||
be raised.
|
||||
|
||||
Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0
|
||||
(which can be used to check for errors without reading data). Never returns
|
||||
negative.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static Py_ssize_t
|
||||
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
|
||||
/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/
|
||||
{
|
||||
assert(buffer->len >= 0);
|
||||
Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len);
|
||||
/* Ensure negative is never returned without an error. Simplifies calling
|
||||
code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an
|
||||
error. */
|
||||
assert(result >= 0 || (result == -1 && PyErr_Occurred()));
|
||||
return result;
|
||||
}
|
||||
|
||||
#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
|
||||
|| defined(__APPLE__))) \
|
||||
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
|
||||
|
@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = {
|
|||
OS_LOCKF_METHODDEF
|
||||
OS_LSEEK_METHODDEF
|
||||
OS_READ_METHODDEF
|
||||
OS_READINTO_METHODDEF
|
||||
OS_READV_METHODDEF
|
||||
OS_PREAD_METHODDEF
|
||||
OS_PREADV_METHODDEF
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue