mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-117151: IO performance improvement, increase io.DEFAULT_BUFFER_SIZE to 128k (GH-118144)
Co-authored-by: rmorotti <romain.morotti@man.com>
This commit is contained in:
parent
4bf25a0dc8
commit
b1b4f9625c
7 changed files with 38 additions and 22 deletions
|
@ -1405,10 +1405,10 @@ are always available. They are listed here in alphabetical order.
|
|||
:func:`io.TextIOWrapper.reconfigure`. When no *buffering* argument is
|
||||
given, the default buffering policy works as follows:
|
||||
|
||||
* Binary files are buffered in fixed-size chunks; the size of the buffer is
|
||||
chosen using a heuristic trying to determine the underlying device's "block
|
||||
size" and falling back on :const:`io.DEFAULT_BUFFER_SIZE`. On many systems,
|
||||
the buffer will typically be 4096 or 8192 bytes long.
|
||||
* Binary files are buffered in fixed-size chunks; the size of the buffer
|
||||
is ``max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)``
|
||||
when the device block size is available.
|
||||
On most systems, the buffer will typically be 128 kilobytes long.
|
||||
|
||||
* "Interactive" text files (files for which :meth:`~io.IOBase.isatty`
|
||||
returns ``True``) use line buffering. Other text files use the policy
|
||||
|
|
15
Lib/_pyio.py
15
Lib/_pyio.py
|
@ -23,8 +23,9 @@ if hasattr(os, 'SEEK_HOLE') :
|
|||
valid_seek_flags.add(os.SEEK_HOLE)
|
||||
valid_seek_flags.add(os.SEEK_DATA)
|
||||
|
||||
# open() uses st_blksize whenever we can
|
||||
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
|
||||
# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)
|
||||
# when the device block size is available.
|
||||
DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes
|
||||
|
||||
# NOTE: Base classes defined here are registered with the "official" ABCs
|
||||
# defined in io.py. We don't use real inheritance though, because we don't want
|
||||
|
@ -123,10 +124,10 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
|
|||
the size of a fixed-size chunk buffer. When no buffering argument is
|
||||
given, the default buffering policy works as follows:
|
||||
|
||||
* Binary files are buffered in fixed-size chunks; the size of the buffer
|
||||
is chosen using a heuristic trying to determine the underlying device's
|
||||
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
|
||||
On many systems, the buffer will typically be 4096 or 8192 bytes long.
|
||||
* Binary files are buffered in fixed-size chunks; the size of the buffer
|
||||
is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)
|
||||
when the device block size is available.
|
||||
On most systems, the buffer will typically be 128 kilobytes long.
|
||||
|
||||
* "Interactive" text files (files for which isatty() returns True)
|
||||
use line buffering. Other text files use the policy described above
|
||||
|
@ -242,7 +243,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
|
|||
buffering = -1
|
||||
line_buffering = True
|
||||
if buffering < 0:
|
||||
buffering = raw._blksize
|
||||
buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE)
|
||||
if buffering < 0:
|
||||
raise ValueError("invalid buffering size")
|
||||
if buffering == 0:
|
||||
|
|
|
@ -216,6 +216,16 @@ class OtherFileTests:
|
|||
with self.assertWarnsRegex(RuntimeWarning, 'line buffering'):
|
||||
self._checkBufferSize(1)
|
||||
|
||||
def testDefaultBufferSize(self):
|
||||
with self.open(TESTFN, 'wb') as f:
|
||||
blksize = f.raw._blksize
|
||||
f.write(b"\0" * 5_000_000)
|
||||
|
||||
with self.open(TESTFN, 'rb') as f:
|
||||
data = f.read1()
|
||||
expected_size = max(min(blksize, 8192 * 1024), io.DEFAULT_BUFFER_SIZE)
|
||||
self.assertEqual(len(data), expected_size)
|
||||
|
||||
def testTruncateOnWindows(self):
|
||||
# SF bug <https://bugs.python.org/issue801631>
|
||||
# "file.truncate fault on windows"
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on
|
||||
platforms where :meth:`os.fstat` provides a ``st_blksize`` field (such as Linux)
|
||||
to use ``max(min(blocksize, 8 MiB), io.DEFAULT_BUFFER_SIZE)`` rather
|
||||
than always using the device block size. This should improve I/O performance.
|
||||
Patch by Romain Morotti.
|
|
@ -60,8 +60,7 @@ PyDoc_STRVAR(module_doc,
|
|||
"DEFAULT_BUFFER_SIZE\n"
|
||||
"\n"
|
||||
" An int containing the default buffer size used by the module's buffered\n"
|
||||
" I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n"
|
||||
" possible.\n"
|
||||
" I/O classes.\n"
|
||||
);
|
||||
|
||||
|
||||
|
@ -132,9 +131,9 @@ the size of a fixed-size chunk buffer. When no buffering argument is
|
|||
given, the default buffering policy works as follows:
|
||||
|
||||
* Binary files are buffered in fixed-size chunks; the size of the buffer
|
||||
is chosen using a heuristic trying to determine the underlying device's
|
||||
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
|
||||
On many systems, the buffer will typically be 4096 or 8192 bytes long.
|
||||
is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)
|
||||
when the device block size is available.
|
||||
On most systems, the buffer will typically be 128 kilobytes long.
|
||||
|
||||
* "Interactive" text files (files for which isatty() returns True)
|
||||
use line buffering. Other text files use the policy described above
|
||||
|
@ -200,7 +199,7 @@ static PyObject *
|
|||
_io_open_impl(PyObject *module, PyObject *file, const char *mode,
|
||||
int buffering, const char *encoding, const char *errors,
|
||||
const char *newline, int closefd, PyObject *opener)
|
||||
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=cd034e7cdfbf4e78]*/
|
||||
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=28027fdaabb8d744]*/
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -371,6 +370,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
|
|||
Py_DECREF(blksize_obj);
|
||||
if (buffering == -1 && PyErr_Occurred())
|
||||
goto error;
|
||||
buffering = Py_MAX(Py_MIN(buffering, 8192 * 1024), DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
if (buffering < 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
|
|
|
@ -78,7 +78,7 @@ extern Py_ssize_t _PyIO_find_line_ending(
|
|||
*/
|
||||
extern int _PyIO_trap_eintr(void);
|
||||
|
||||
#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */
|
||||
#define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */
|
||||
|
||||
/*
|
||||
* Offset type for positioning.
|
||||
|
|
8
Modules/_io/clinic/_iomodule.c.h
generated
8
Modules/_io/clinic/_iomodule.c.h
generated
|
@ -64,9 +64,9 @@ PyDoc_STRVAR(_io_open__doc__,
|
|||
"given, the default buffering policy works as follows:\n"
|
||||
"\n"
|
||||
"* Binary files are buffered in fixed-size chunks; the size of the buffer\n"
|
||||
" is chosen using a heuristic trying to determine the underlying device\'s\n"
|
||||
" \"block size\" and falling back on `io.DEFAULT_BUFFER_SIZE`.\n"
|
||||
" On many systems, the buffer will typically be 4096 or 8192 bytes long.\n"
|
||||
" is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)\n"
|
||||
" when the device block size is available.\n"
|
||||
" On most systems, the buffer will typically be 128 kilobytes long.\n"
|
||||
"\n"
|
||||
"* \"Interactive\" text files (files for which isatty() returns True)\n"
|
||||
" use line buffering. Other text files use the policy described above\n"
|
||||
|
@ -406,4 +406,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
|
|||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=ec1df2ff5265ab16 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=2eaf6e914503bcfd input=a9049054013a1b77]*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue