diff --git a/Doc/library/io.rst b/Doc/library/io.rst index de5cab5aee6..dfebccb5a9c 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -719,6 +719,9 @@ than raw I/O does. The optional argument *initial_bytes* is a :term:`bytes-like object` that contains initial data. + Methods may be used from multiple threads without external locking in + :term:`free threading` builds. + :class:`BytesIO` provides or overrides these methods in addition to those from :class:`BufferedIOBase` and :class:`IOBase`: diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fb2a6d049ca..5db8ce9244b 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -876,16 +876,28 @@ class BytesIO(BufferedIOBase): _buffer = None def __init__(self, initial_bytes=None): + # Use to keep self._buffer and self._pos consistent. + self._lock = Lock() + buf = bytearray() if initial_bytes is not None: buf += initial_bytes - self._buffer = buf - self._pos = 0 + + with self._lock: + self._buffer = buf + self._pos = 0 def __getstate__(self): if self.closed: raise ValueError("__getstate__ on closed file") - return self.__dict__.copy() + with self._lock: + state = self.__dict__.copy() + del state['_lock'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._lock = Lock() def getvalue(self): """Return the bytes value (contents) of the buffer @@ -918,14 +930,16 @@ class BytesIO(BufferedIOBase): raise TypeError(f"{size!r} is not an integer") else: size = size_index() - if size < 0: - size = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + size) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) + + with self._lock: + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) def read1(self, size=-1): """This is the same as read. @@ -941,12 +955,14 @@ class BytesIO(BufferedIOBase): n = view.nbytes # Size of any bytes-like object if n == 0: return 0 - pos = self._pos - if pos > len(self._buffer): - # Pad buffer to pos with null bytes. - self._buffer.resize(pos) - self._buffer[pos:pos + n] = b - self._pos += n + + with self._lock: + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = b + self._pos += n return n def seek(self, pos, whence=0): @@ -963,9 +979,11 @@ class BytesIO(BufferedIOBase): raise ValueError("negative seek position %r" % (pos,)) self._pos = pos elif whence == 1: - self._pos = max(0, self._pos + pos) + with self._lock: + self._pos = max(0, self._pos + pos) elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) + with self._lock: + self._pos = max(0, len(self._buffer) + pos) else: raise ValueError("unsupported whence value") return self._pos @@ -978,18 +996,20 @@ class BytesIO(BufferedIOBase): def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - else: - try: - pos_index = pos.__index__ - except AttributeError: - raise TypeError(f"{pos!r} is not an integer") + + with self._lock: + if pos is None: + pos = self._pos else: - pos = pos_index() - if pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] return pos def readable(self): diff --git a/Lib/test/test_free_threading/test_io.py b/Lib/test/test_free_threading/test_io.py index f9bec740ddf..41d89e04da8 100644 --- a/Lib/test/test_free_threading/test_io.py +++ b/Lib/test/test_free_threading/test_io.py @@ -1,12 +1,13 @@ +import io +import _pyio as pyio import threading from unittest import TestCase from test.support import threading_helper from random import randint -from io import BytesIO from sys import getsizeof -class TestBytesIO(TestCase): +class ThreadSafetyMixin: # Test pretty much everything that can break under free-threading. # Non-deterministic, but at least one of these things will fail if # BytesIO object is not free-thread safe. @@ -90,20 +91,27 @@ class TestBytesIO(TestCase): barrier.wait() getsizeof(b) - self.check([write] * 10, BytesIO()) - self.check([writelines] * 10, BytesIO()) - self.check([write] * 10 + [truncate] * 10, BytesIO()) - self.check([truncate] + [read] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [read1] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [readline] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readlines] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readinto] * 10, BytesIO(b'0\n'*204800), bytearray(b'0\n'*204800)) - self.check([close] + [write] * 10, BytesIO()) - self.check([truncate] + [getvalue] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [getbuffer] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [iter] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [getstate] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [setstate] * 10, BytesIO(b'0\n'*204800), (b'123', 0, None)) - self.check([truncate] + [sizeof] * 10, BytesIO(b'0\n'*204800)) + self.check([write] * 10, self.ioclass()) + self.check([writelines] * 10, self.ioclass()) + self.check([write] * 10 + [truncate] * 10, self.ioclass()) + self.check([truncate] + [read] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [read1] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800)) + self.check([close] + [write] * 10, self.ioclass()) + self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [iter] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [getstate] * 10, self.ioclass(b'0\n'*204800)) + state = self.ioclass(b'123').__getstate__() + self.check([truncate] + [setstate] * 10, self.ioclass(b'0\n'*204800), state) + self.check([truncate] + [sizeof] * 10, self.ioclass(b'0\n'*204800)) # no tests for seek or tell because they don't break anything + +class CBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = io.BytesIO + +class PyBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = pyio.BytesIO diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0c921ffbc25..b487bcabf01 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -9,6 +9,7 @@ # * test_univnewlines - tests universal newline support # * test_largefile - tests operations on a file greater than 2**32 bytes # (only enabled with -ulargefile) +# * test_free_threading/test_io - tests thread safety of io objects ################################################################################ # ATTENTION TEST WRITERS!!! diff --git a/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst b/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst new file mode 100644 index 00000000000..a2d0810cebe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst @@ -0,0 +1 @@ +Update Python implementation of :class:`io.BytesIO` to be thread safe.