mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
gh-129005: Align FileIO.readall() allocation (#129458)
Both now use a pre-allocated buffer of length `bufsize`, fill it using a readinto(), and have matching "expand buffer" logic. On my machine this takes: `./python -m test -M8g -uall test_largefile -m test_large_read -v` from ~3.7 seconds to ~3.4 seconds.
This commit is contained in:
parent
6c63afc3be
commit
f927204f64
2 changed files with 20 additions and 9 deletions
27
Lib/_pyio.py
27
Lib/_pyio.py
|
@ -1674,22 +1674,31 @@ class FileIO(RawIOBase):
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
result = bytearray()
|
result = bytearray(bufsize)
|
||||||
|
bytes_read = 0
|
||||||
while True:
|
while True:
|
||||||
if len(result) >= bufsize:
|
if bytes_read >= bufsize:
|
||||||
bufsize = len(result)
|
# Parallels _io/fileio.c new_buffersize
|
||||||
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
|
if bufsize > 65536:
|
||||||
n = bufsize - len(result)
|
addend = bufsize >> 3
|
||||||
|
else:
|
||||||
|
addend = bufsize + 256
|
||||||
|
if addend < DEFAULT_BUFFER_SIZE:
|
||||||
|
addend = DEFAULT_BUFFER_SIZE
|
||||||
|
bufsize += addend
|
||||||
|
result[bytes_read:bufsize] = b'\0'
|
||||||
|
assert bufsize - bytes_read > 0, "Should always try and read at least one byte"
|
||||||
try:
|
try:
|
||||||
chunk = os.read(self._fd, n)
|
n = os.readinto(self._fd, memoryview(result)[bytes_read:])
|
||||||
except BlockingIOError:
|
except BlockingIOError:
|
||||||
if result:
|
if bytes_read > 0:
|
||||||
break
|
break
|
||||||
return None
|
return None
|
||||||
if not chunk: # reached the end of the file
|
if n == 0: # reached the end of the file
|
||||||
break
|
break
|
||||||
result += chunk
|
bytes_read += n
|
||||||
|
|
||||||
|
del result[bytes_read:]
|
||||||
return bytes(result)
|
return bytes(result)
|
||||||
|
|
||||||
def readinto(self, buffer):
|
def readinto(self, buffer):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer using
|
||||||
|
the same algorithm ``_io.FileIO.readall()`` uses.
|
Loading…
Add table
Add a link
Reference in a new issue