gh-129005: Align FileIO.readall() allocation (#129458)

Both now use a pre-allocated buffer of length `bufsize`, fill it using
a readinto(), and have matching "expand buffer" logic.

On my machine this takes:

`./python -m test -M8g -uall test_largefile -m test_large_read -v`

from ~3.7 seconds to ~3.4 seconds.
This commit is contained in:
Cody Maloney 2025-01-30 03:14:23 -08:00 committed by GitHub
parent 6c63afc3be
commit f927204f64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 20 additions and 9 deletions

View file

@ -1674,22 +1674,31 @@ class FileIO(RawIOBase):
except OSError:
pass
result = bytearray()
result = bytearray(bufsize)
bytes_read = 0
while True:
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
n = bufsize - len(result)
if bytes_read >= bufsize:
# Parallels _io/fileio.c new_buffersize
if bufsize > 65536:
addend = bufsize >> 3
else:
addend = bufsize + 256
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
bufsize += addend
result[bytes_read:bufsize] = b'\0'
assert bufsize - bytes_read > 0, "Should always try and read at least one byte"
try:
chunk = os.read(self._fd, n)
n = os.readinto(self._fd, memoryview(result)[bytes_read:])
except BlockingIOError:
if result:
if bytes_read > 0:
break
return None
if not chunk: # reached the end of the file
if n == 0: # reached the end of the file
break
result += chunk
bytes_read += n
del result[bytes_read:]
return bytes(result)
def readinto(self, buffer):