mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	#2523: binary buffered reading is quadratic
This commit is contained in:
		
							parent
							
								
									711419388c
								
							
						
					
					
						commit
						c66f909f43
					
				
					 2 changed files with 64 additions and 30 deletions
				
			
		
							
								
								
									
										91
									
								
								Lib/io.py
									
										
									
									
									
								
							
							
						
						
									
										91
									
								
								Lib/io.py
									
										
									
									
									
								
							| 
						 | 
					@ -893,8 +893,12 @@ class BufferedReader(_BufferedIOMixin):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        raw._checkReadable()
 | 
					        raw._checkReadable()
 | 
				
			||||||
        _BufferedIOMixin.__init__(self, raw)
 | 
					        _BufferedIOMixin.__init__(self, raw)
 | 
				
			||||||
        self._read_buf = b""
 | 
					 | 
				
			||||||
        self.buffer_size = buffer_size
 | 
					        self.buffer_size = buffer_size
 | 
				
			||||||
 | 
					        self._reset_read_buf()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _reset_read_buf(self):
 | 
				
			||||||
 | 
					        self._read_buf = b""
 | 
				
			||||||
 | 
					        self._read_pos = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read(self, n=None):
 | 
					    def read(self, n=None):
 | 
				
			||||||
        """Read n bytes.
 | 
					        """Read n bytes.
 | 
				
			||||||
| 
						 | 
					@ -904,25 +908,50 @@ class BufferedReader(_BufferedIOMixin):
 | 
				
			||||||
        mode. If n is negative, read until EOF or until read() would
 | 
					        mode. If n is negative, read until EOF or until read() would
 | 
				
			||||||
        block.
 | 
					        block.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        if n is None:
 | 
					 | 
				
			||||||
            n = -1
 | 
					 | 
				
			||||||
        nodata_val = b""
 | 
					        nodata_val = b""
 | 
				
			||||||
        while n < 0 or len(self._read_buf) < n:
 | 
					        empty_values = (b"", None)
 | 
				
			||||||
            to_read = max(self.buffer_size,
 | 
					        buf = self._read_buf
 | 
				
			||||||
                          n if n is not None else 2*len(self._read_buf))
 | 
					        pos = self._read_pos
 | 
				
			||||||
            current = self.raw.read(to_read)
 | 
					
 | 
				
			||||||
            if current in (b"", None):
 | 
					        # Special case for when the number of bytes to read is unspecified.
 | 
				
			||||||
                nodata_val = current
 | 
					        if n is None or n == -1:
 | 
				
			||||||
 | 
					            self._reset_read_buf()
 | 
				
			||||||
 | 
					            chunks = [buf[pos:]]  # Strip the consumed bytes.
 | 
				
			||||||
 | 
					            current_size = 0
 | 
				
			||||||
 | 
					            while True:
 | 
				
			||||||
 | 
					                # Read until EOF or until read() would block.
 | 
				
			||||||
 | 
					                chunk = self.raw.read()
 | 
				
			||||||
 | 
					                if chunk in empty_values:
 | 
				
			||||||
 | 
					                    nodata_val = chunk
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
            self._read_buf += current
 | 
					                current_size += len(chunk)
 | 
				
			||||||
        if self._read_buf:
 | 
					                chunks.append(chunk)
 | 
				
			||||||
            if n < 0:
 | 
					            return b"".join(chunks) or nodata_val
 | 
				
			||||||
                n = len(self._read_buf)
 | 
					
 | 
				
			||||||
            out = self._read_buf[:n]
 | 
					        # The number of bytes to read is specified, return at most n bytes.
 | 
				
			||||||
            self._read_buf = self._read_buf[n:]
 | 
					        avail = len(buf) - pos  # Length of the available buffered data.
 | 
				
			||||||
        else:
 | 
					        if n <= avail:
 | 
				
			||||||
            out = nodata_val
 | 
					            # Fast path: the data to read is fully buffered.
 | 
				
			||||||
        return out
 | 
					            self._read_pos += n
 | 
				
			||||||
 | 
					            return buf[pos:pos+n]
 | 
				
			||||||
 | 
					        # Slow path: read from the stream until enough bytes are read,
 | 
				
			||||||
 | 
					        # or until an EOF occurs or until read() would block.
 | 
				
			||||||
 | 
					        chunks = [buf[pos:]]
 | 
				
			||||||
 | 
					        wanted = max(self.buffer_size, n)
 | 
				
			||||||
 | 
					        while avail < n:
 | 
				
			||||||
 | 
					            chunk = self.raw.read(wanted)
 | 
				
			||||||
 | 
					            if chunk in empty_values:
 | 
				
			||||||
 | 
					                nodata_val = chunk
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					            avail += len(chunk)
 | 
				
			||||||
 | 
					            chunks.append(chunk)
 | 
				
			||||||
 | 
					        # n is more then avail only when an EOF occurred or when
 | 
				
			||||||
 | 
					        # read() would have blocked.
 | 
				
			||||||
 | 
					        n = min(n, avail)
 | 
				
			||||||
 | 
					        out = b"".join(chunks)
 | 
				
			||||||
 | 
					        self._read_buf = out[n:]  # Save the extra data in the buffer.
 | 
				
			||||||
 | 
					        self._read_pos = 0
 | 
				
			||||||
 | 
					        return out[:n] if out else nodata_val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def peek(self, n=0):
 | 
					    def peek(self, n=0):
 | 
				
			||||||
        """Returns buffered bytes without advancing the position.
 | 
					        """Returns buffered bytes without advancing the position.
 | 
				
			||||||
| 
						 | 
					@ -932,13 +961,14 @@ class BufferedReader(_BufferedIOMixin):
 | 
				
			||||||
        than self.buffer_size.
 | 
					        than self.buffer_size.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        want = min(n, self.buffer_size)
 | 
					        want = min(n, self.buffer_size)
 | 
				
			||||||
        have = len(self._read_buf)
 | 
					        have = len(self._read_buf) - self._read_pos
 | 
				
			||||||
        if have < want:
 | 
					        if have < want:
 | 
				
			||||||
            to_read = self.buffer_size - have
 | 
					            to_read = self.buffer_size - have
 | 
				
			||||||
            current = self.raw.read(to_read)
 | 
					            current = self.raw.read(to_read)
 | 
				
			||||||
            if current:
 | 
					            if current:
 | 
				
			||||||
                self._read_buf += current
 | 
					                self._read_buf = self._read_buf[self._read_pos:] + current
 | 
				
			||||||
        return self._read_buf
 | 
					                self._read_pos = 0
 | 
				
			||||||
 | 
					        return self._read_buf[self._read_pos:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read1(self, n):
 | 
					    def read1(self, n):
 | 
				
			||||||
        """Reads up to n bytes, with at most one read() system call."""
 | 
					        """Reads up to n bytes, with at most one read() system call."""
 | 
				
			||||||
| 
						 | 
					@ -947,16 +977,16 @@ class BufferedReader(_BufferedIOMixin):
 | 
				
			||||||
        if n <= 0:
 | 
					        if n <= 0:
 | 
				
			||||||
            return b""
 | 
					            return b""
 | 
				
			||||||
        self.peek(1)
 | 
					        self.peek(1)
 | 
				
			||||||
        return self.read(min(n, len(self._read_buf)))
 | 
					        return self.read(min(n, len(self._read_buf) - self._read_pos))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def tell(self):
 | 
					    def tell(self):
 | 
				
			||||||
        return self.raw.tell() - len(self._read_buf)
 | 
					        return self.raw.tell() - len(self._read_buf) + self._read_pos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def seek(self, pos, whence=0):
 | 
					    def seek(self, pos, whence=0):
 | 
				
			||||||
        if whence == 1:
 | 
					        if whence == 1:
 | 
				
			||||||
            pos -= len(self._read_buf)
 | 
					            pos -= len(self._read_buf) - self._read_pos
 | 
				
			||||||
        pos = self.raw.seek(pos, whence)
 | 
					        pos = self.raw.seek(pos, whence)
 | 
				
			||||||
        self._read_buf = b""
 | 
					        self._reset_read_buf()
 | 
				
			||||||
        return pos
 | 
					        return pos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1125,14 +1155,14 @@ class BufferedRandom(BufferedWriter, BufferedReader):
 | 
				
			||||||
        # First do the raw seek, then empty the read buffer, so that
 | 
					        # First do the raw seek, then empty the read buffer, so that
 | 
				
			||||||
        # if the raw seek fails, we don't lose buffered data forever.
 | 
					        # if the raw seek fails, we don't lose buffered data forever.
 | 
				
			||||||
        pos = self.raw.seek(pos, whence)
 | 
					        pos = self.raw.seek(pos, whence)
 | 
				
			||||||
        self._read_buf = b""
 | 
					        self._reset_read_buf()
 | 
				
			||||||
        return pos
 | 
					        return pos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def tell(self):
 | 
					    def tell(self):
 | 
				
			||||||
        if (self._write_buf):
 | 
					        if self._write_buf:
 | 
				
			||||||
            return self.raw.tell() + len(self._write_buf)
 | 
					            return self.raw.tell() + len(self._write_buf)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return self.raw.tell() - len(self._read_buf)
 | 
					            return BufferedReader.tell(self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def truncate(self, pos=None):
 | 
					    def truncate(self, pos=None):
 | 
				
			||||||
        if pos is None:
 | 
					        if pos is None:
 | 
				
			||||||
| 
						 | 
					@ -1161,8 +1191,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def write(self, b):
 | 
					    def write(self, b):
 | 
				
			||||||
        if self._read_buf:
 | 
					        if self._read_buf:
 | 
				
			||||||
            self.raw.seek(-len(self._read_buf), 1) # Undo readahead
 | 
					            # Undo readahead
 | 
				
			||||||
            self._read_buf = b""
 | 
					            self.raw.seek(self._read_pos - len(self._read_buf), 1)
 | 
				
			||||||
 | 
					            self._reset_read_buf()
 | 
				
			||||||
        return BufferedWriter.write(self, b)
 | 
					        return BufferedWriter.write(self, b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,6 +22,9 @@ Library
 | 
				
			||||||
  file name rather than a ZipInfo instance, so files are extracted with
 | 
					  file name rather than a ZipInfo instance, so files are extracted with
 | 
				
			||||||
  mode 0600 rather than 000 under Unix.
 | 
					  mode 0600 rather than 000 under Unix.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Issue #2523: Fix quadratic behaviour when read()ing a binary file without
 | 
				
			||||||
 | 
					  asking for a specific length.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
What's new in Python 3.0b2?
 | 
					What's new in Python 3.0b2?
 | 
				
			||||||
===========================
 | 
					===========================
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue