gh-129005: Remove copies from _pyio using take_bytes (#141539)

Memory usage now matches that of _io for large files.
2025-12-23 09:19:18 +00:00 · 2025-11-18 01:10:32 -08:00 · 2025-11-18 01:10:32 -08:00 · 58f3fe0d9b
commit 58f3fe0d9b
parent 4867f717e2
3 changed files with 8 additions and 9 deletions
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@ -546,7 +546,7 @@ class IOBase(metaclass=abc.ABCMeta):
            res += b
            if res.endswith(b"\n"):
                break
-        return bytes(res)
+        return res.take_bytes()

    def __iter__(self):
        self._checkClosed()
@ -620,7 +620,7 @@ class RawIOBase(IOBase):
        if n < 0 or n > len(b):
            raise ValueError(f"readinto returned {n} outside buffer size {len(b)}")
        del b[n:]
-        return bytes(b)
+        return b.take_bytes()

    def readall(self):
        """Read until EOF, using multiple read() call."""
@ -628,7 +628,7 @@ class RawIOBase(IOBase):
        while data := self.read(DEFAULT_BUFFER_SIZE):
            res += data
        if res:
-            return bytes(res)
+            return res.take_bytes()
        else:
            # b'' or None
            return data
@ -1738,7 +1738,7 @@ class FileIO(RawIOBase):
        assert len(result) - bytes_read >= 1, \
            "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
        result.resize(bytes_read)
-        return bytes(result)
+        return result.take_bytes()

    def readinto(self, buffer):
        """Same as RawIOBase.readinto()."""
--- a/Lib/test/test_io/test_bufferedio.py
+++ b/Lib/test/test_io/test_bufferedio.py
@ -1277,7 +1277,8 @@ class BufferedRandomTest(BufferedReaderTest, BufferedWriterTest):
        def _readinto(bufio, n=-1):
            b = bytearray(n if n >= 0 else 9999)
            n = bufio.readinto(b)
-            return bytes(b[:n])
+            b.resize(n)
+            return b.take_bytes()
        self.check_flush_and_read(_readinto)

    def test_flush_and_peek(self):
--- a/Lib/test/test_io/test_largefile.py
+++ b/Lib/test/test_io/test_largefile.py
@ -56,9 +56,7 @@ class TestFileMethods(LargeFileTest):
    (i.e. > 2 GiB) files.
    """

-    # _pyio.FileIO.readall() uses a temporary bytearray then casted to bytes,
-    # so memuse=2 is needed
-    @bigmemtest(size=size, memuse=2, dry_run=False)
+    @bigmemtest(size=size, memuse=1, dry_run=False)
    def test_large_read(self, _size):
        # bpo-24658: Test that a read greater than 2GB does not fail.
        with self.open(TESTFN, "rb") as f:
@ -154,7 +152,7 @@ class TestFileMethods(LargeFileTest):
                f.seek(pos)
                self.assertTrue(f.seekable())

-    @bigmemtest(size=size, memuse=2, dry_run=False)
+    @bigmemtest(size=size, memuse=1, dry_run=False)
    def test_seek_readall(self, _size):
        # Seek which doesn't change position should readall successfully.
        with self.open(TESTFN, 'rb') as f: