gh-129205: Add os.readinto() API for reading data into a caller provided buffer (#129211)

Add a new OS API which will read data directly into a caller provided
writeable buffer protocol object.

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Cody Maloney 2025-01-26 05:21:03 -08:00 committed by GitHub
parent 0ef8d470b7
commit 1ed4487968
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 267 additions and 1 deletions

View file

@ -1659,6 +1659,33 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).
.. function:: readinto(fd, buffer, /)
Read from a file descriptor *fd* into a mutable
:ref:`buffer object <bufferobjects>` *buffer*.
The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
success, returns the number of bytes read. Less bytes may be read than the
size of the buffer. The underlying system call will be retried when
interrupted by a signal, unless the signal handler raises an exception.
Other errors will not be retried and an error will be raised.
Returns 0 if *fd* is at end of file or if the provided *buffer* has
length 0 (which can be used to check for errors without reading data).
Never returns negative.
.. note::
This function is intended for low-level I/O and must be applied to a file
descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a
"file object" returned by the built-in function :func:`open`, or
:data:`sys.stdin`, use its member functions, for example
:meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or
:meth:`io.TextIOBase.read`
.. versionadded:: next
.. function:: sendfile(out_fd, in_fd, offset, count)
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)

View file

@ -561,6 +561,10 @@ os
to the :mod:`os` module.
(Contributed by James Roy in :gh:`127688`.)
* Add the :func:`os.readinto` function to read into a
:ref:`buffer object <bufferobjects>` from a file descriptor.
(Contributed by Cody Maloney in :gh:`129205`.)
pathlib
-------

View file

@ -152,6 +152,37 @@ class OSEINTRTest(EINTRBaseTest):
self.assertEqual(data, os.read(rd, len(data)))
self.assertEqual(proc.wait(), 0)
def test_readinto(self):
rd, wr = os.pipe()
self.addCleanup(os.close, rd)
# wr closed explicitly by parent
# the payload below are smaller than PIPE_BUF, hence the writes will be
# atomic
datas = [b"hello", b"world", b"spam"]
code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
'datas = %r' % datas,
'sleep_time = %r' % self.sleep_time,
'',
'for data in datas:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, data)',
))
proc = self.subprocess(code, str(wr), pass_fds=[wr])
with kill_on_error(proc):
os.close(wr)
for data in datas:
buffer = bytearray(len(data))
self.assertEqual(os.readinto(rd, buffer), len(data))
self.assertEqual(buffer, data)
self.assertEqual(proc.wait(), 0)
def test_write(self):
rd, wr = os.pipe()
self.addCleanup(os.close, wr)

View file

@ -230,6 +230,93 @@ class FileTests(unittest.TestCase):
self.assertEqual(type(s), bytes)
self.assertEqual(s, b"spam")
def test_readinto(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)
# Oversized so readinto without hitting end.
buffer = bytearray(7)
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 4)
# Should overwrite the first 4 bytes of the buffer.
self.assertEqual(buffer[:4], b"spam")
# Readinto at EOF should return 0 and not touch buffer.
buffer[:] = b"notspam"
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")
s = os.readinto(fd, buffer)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")
# Readinto a 0 length bytearray when at EOF should return 0
self.assertEqual(os.readinto(fd, bytearray()), 0)
# Readinto a 0 length bytearray with data available should return 0.
os.lseek(fd, 0, 0)
self.assertEqual(os.readinto(fd, bytearray()), 0)
@unittest.skipUnless(hasattr(os, 'get_blocking'),
'needs os.get_blocking() and os.set_blocking()')
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_readinto_non_blocking(self):
# Verify behavior of a readinto which would block on a non-blocking fd.
r, w = os.pipe()
try:
os.set_blocking(r, False)
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))
# Pass some data through
os.write(w, b"spam")
self.assertEqual(os.readinto(r, bytearray(4)), 4)
# Still don't block or return 0.
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))
# At EOF should return size 0
os.close(w)
w = None
self.assertEqual(os.readinto(r, bytearray(5)), 0)
self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF
finally:
os.close(r)
if w is not None:
os.close(w)
def test_readinto_badarg(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)
for bad_arg in ("test", bytes(), 14):
with self.subTest(f"bad buffer {type(bad_arg)}"):
with self.assertRaises(TypeError):
os.readinto(fd, bad_arg)
with self.subTest("doesn't work on file objects"):
with self.assertRaises(TypeError):
os.readinto(fobj, bytearray(5))
# takes two args
with self.assertRaises(TypeError):
os.readinto(fd)
# No data should have been read with the bad arguments.
buffer = bytearray(4)
s = os.readinto(fd, buffer)
self.assertEqual(s, 4)
self.assertEqual(buffer, b"spam")
@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@ -249,6 +336,29 @@ class FileTests(unittest.TestCase):
# operating system is free to return less bytes than requested.
self.assertEqual(data, b'test')
@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
"needs INT_MAX < PY_SSIZE_T_MAX")
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
def test_large_readinto(self, size):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
create_file(os_helper.TESTFN, b'test')
# Issue #21932: For readinto the buffer contains the length rather than
# a length being passed explicitly to read, should still get capped to a
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
buffer = bytearray(INT_MAX + 10)
with open(os_helper.TESTFN, "rb") as fp:
length = os.readinto(fp.fileno(), buffer)
# The test does not try to read more than 2 GiB at once because the
# operating system is free to return less bytes than requested.
self.assertEqual(length, 4)
self.assertEqual(buffer[:4], b'test')
def test_write(self):
# os.write() accepts bytes- and buffer-like objects but not strings
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
@ -2467,6 +2577,10 @@ class TestInvalidFD(unittest.TestCase):
def test_read(self):
self.check(os.read, 1)
@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
def test_readinto(self):
self.check(os.readinto, bytearray(5))
@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
def test_readv(self):
buf = bytearray(10)

View file

@ -0,0 +1 @@
Add :func:`os.readinto` to read into a :ref:`buffer object <bufferobjects>` from a file descriptor.

View file

@ -7577,6 +7577,62 @@ exit:
return return_value;
}
PyDoc_STRVAR(os_readinto__doc__,
"readinto($module, fd, buffer, /)\n"
"--\n"
"\n"
"Read into a buffer object from a file descriptor.\n"
"\n"
"The buffer should be mutable and bytes-like. On success, returns the number of\n"
"bytes read. Less bytes may be read than the size of the buffer. The underlying\n"
"system call will be retried when interrupted by a signal, unless the signal\n"
"handler raises an exception. Other errors will not be retried and an error will\n"
"be raised.\n"
"\n"
"Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0\n"
"(which can be used to check for errors without reading data). Never returns\n"
"negative.");
#define OS_READINTO_METHODDEF \
{"readinto", _PyCFunction_CAST(os_readinto), METH_FASTCALL, os_readinto__doc__},
static Py_ssize_t
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer);
static PyObject *
os_readinto(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
int fd;
Py_buffer buffer = {NULL, NULL};
Py_ssize_t _return_value;
if (!_PyArg_CheckPositional("readinto", nargs, 2, 2)) {
goto exit;
}
fd = PyLong_AsInt(args[0]);
if (fd == -1 && PyErr_Occurred()) {
goto exit;
}
if (PyObject_GetBuffer(args[1], &buffer, PyBUF_WRITABLE) < 0) {
_PyArg_BadArgument("readinto", "argument 2", "read-write bytes-like object", args[1]);
goto exit;
}
_return_value = os_readinto_impl(module, fd, &buffer);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromSsize_t(_return_value);
exit:
/* Cleanup for buffer */
if (buffer.obj) {
PyBuffer_Release(&buffer);
}
return return_value;
}
#if defined(HAVE_READV)
PyDoc_STRVAR(os_readv__doc__,
@ -13140,4 +13196,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored))
#ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF
#define OS__EMSCRIPTEN_DEBUGGER_METHODDEF
#endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */
/*[clinic end generated code: output=34cb96bd07bcef90 input=a9049054013a1b77]*/
/*[clinic end generated code: output=8318c26fc2cd236c input=a9049054013a1b77]*/

View file

@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
return buffer;
}
/*[clinic input]
os.readinto -> Py_ssize_t
fd: int
buffer: Py_buffer(accept={rwbuffer})
/
Read into a buffer object from a file descriptor.
The buffer should be mutable and bytes-like. On success, returns the number of
bytes read. Less bytes may be read than the size of the buffer. The underlying
system call will be retried when interrupted by a signal, unless the signal
handler raises an exception. Other errors will not be retried and an error will
be raised.
Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0
(which can be used to check for errors without reading data). Never returns
negative.
[clinic start generated code]*/
static Py_ssize_t
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/
{
assert(buffer->len >= 0);
Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len);
/* Ensure negative is never returned without an error. Simplifies calling
code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an
error. */
assert(result >= 0 || (result == -1 && PyErr_Occurred()));
return result;
}
#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
|| defined(__APPLE__))) \
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = {
OS_LOCKF_METHODDEF
OS_LSEEK_METHODDEF
OS_READ_METHODDEF
OS_READINTO_METHODDEF
OS_READV_METHODDEF
OS_PREAD_METHODDEF
OS_PREADV_METHODDEF