mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
Issue #15068: Got rid of excessive buffering in the fileinput module.
The bufsize parameter is no longer used.
This commit is contained in:
parent
55e3218eee
commit
cc2dbc5844
4 changed files with 153 additions and 91 deletions
|
@ -71,6 +71,9 @@ The following function is the primary interface of this module:
|
||||||
.. versionchanged:: 3.2
|
.. versionchanged:: 3.2
|
||||||
Can be used as a context manager.
|
Can be used as a context manager.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.5.2
|
||||||
|
The *bufsize* parameter is no longer used.
|
||||||
|
|
||||||
|
|
||||||
The following functions use the global state created by :func:`fileinput.input`;
|
The following functions use the global state created by :func:`fileinput.input`;
|
||||||
if there is no active state, :exc:`RuntimeError` is raised.
|
if there is no active state, :exc:`RuntimeError` is raised.
|
||||||
|
@ -163,6 +166,9 @@ available for subclassing as well:
|
||||||
.. deprecated:: 3.4
|
.. deprecated:: 3.4
|
||||||
The ``'rU'`` and ``'U'`` modes.
|
The ``'rU'`` and ``'U'`` modes.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.5.2
|
||||||
|
The *bufsize* parameter is no longer used.
|
||||||
|
|
||||||
|
|
||||||
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is
|
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is
|
||||||
passed to :func:`fileinput.input` or to the :class:`FileInput` constructor, the
|
passed to :func:`fileinput.input` or to the :class:`FileInput` constructor, the
|
||||||
|
|
|
@ -64,13 +64,6 @@ deleted when the output file is closed. In-place filtering is
|
||||||
disabled when standard input is read. XXX The current implementation
|
disabled when standard input is read. XXX The current implementation
|
||||||
does not work for MS-DOS 8+3 filesystems.
|
does not work for MS-DOS 8+3 filesystems.
|
||||||
|
|
||||||
Performance: this module is unfortunately one of the slower ways of
|
|
||||||
processing large numbers of input lines. Nevertheless, a significant
|
|
||||||
speed-up has been obtained by using readlines(bufsize) instead of
|
|
||||||
readline(). A new keyword argument, bufsize=N, is present on the
|
|
||||||
input() function and the FileInput() class to override the default
|
|
||||||
buffer size.
|
|
||||||
|
|
||||||
XXX Possible additions:
|
XXX Possible additions:
|
||||||
|
|
||||||
- optional getopt argument processing
|
- optional getopt argument processing
|
||||||
|
@ -86,6 +79,7 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
|
||||||
|
|
||||||
_state = None
|
_state = None
|
||||||
|
|
||||||
|
# No longer used
|
||||||
DEFAULT_BUFSIZE = 8*1024
|
DEFAULT_BUFSIZE = 8*1024
|
||||||
|
|
||||||
def input(files=None, inplace=False, backup="", bufsize=0,
|
def input(files=None, inplace=False, backup="", bufsize=0,
|
||||||
|
@ -207,17 +201,15 @@ class FileInput:
|
||||||
self._files = files
|
self._files = files
|
||||||
self._inplace = inplace
|
self._inplace = inplace
|
||||||
self._backup = backup
|
self._backup = backup
|
||||||
self._bufsize = bufsize or DEFAULT_BUFSIZE
|
|
||||||
self._savestdout = None
|
self._savestdout = None
|
||||||
self._output = None
|
self._output = None
|
||||||
self._filename = None
|
self._filename = None
|
||||||
self._lineno = 0
|
self._startlineno = 0
|
||||||
self._filelineno = 0
|
self._filelineno = 0
|
||||||
self._file = None
|
self._file = None
|
||||||
|
self._readline = self._start_readline
|
||||||
self._isstdin = False
|
self._isstdin = False
|
||||||
self._backupfilename = None
|
self._backupfilename = None
|
||||||
self._buffer = []
|
|
||||||
self._bufindex = 0
|
|
||||||
# restrict mode argument to reading modes
|
# restrict mode argument to reading modes
|
||||||
if mode not in ('r', 'rU', 'U', 'rb'):
|
if mode not in ('r', 'rU', 'U', 'rb'):
|
||||||
raise ValueError("FileInput opening mode must be one of "
|
raise ValueError("FileInput opening mode must be one of "
|
||||||
|
@ -253,22 +245,18 @@ class FileInput:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
try:
|
line = self._readline()
|
||||||
line = self._buffer[self._bufindex]
|
if line:
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
self._bufindex += 1
|
|
||||||
self._lineno += 1
|
|
||||||
self._filelineno += 1
|
self._filelineno += 1
|
||||||
return line
|
return line
|
||||||
line = self.readline()
|
if not self._file:
|
||||||
if not line:
|
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
return line
|
self.nextfile()
|
||||||
|
# Recursive call
|
||||||
|
return self.__next__()
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
if i != self._lineno:
|
if i != self.lineno():
|
||||||
raise RuntimeError("accessing lines out of order")
|
raise RuntimeError("accessing lines out of order")
|
||||||
try:
|
try:
|
||||||
return self.__next__()
|
return self.__next__()
|
||||||
|
@ -289,6 +277,7 @@ class FileInput:
|
||||||
finally:
|
finally:
|
||||||
file = self._file
|
file = self._file
|
||||||
self._file = None
|
self._file = None
|
||||||
|
self._readline = self._start_readline
|
||||||
try:
|
try:
|
||||||
if file and not self._isstdin:
|
if file and not self._isstdin:
|
||||||
file.close()
|
file.close()
|
||||||
|
@ -300,20 +289,19 @@ class FileInput:
|
||||||
except OSError: pass
|
except OSError: pass
|
||||||
|
|
||||||
self._isstdin = False
|
self._isstdin = False
|
||||||
self._buffer = []
|
|
||||||
self._bufindex = 0
|
|
||||||
|
|
||||||
def readline(self):
|
def readline(self):
|
||||||
try:
|
while True:
|
||||||
line = self._buffer[self._bufindex]
|
line = self._readline()
|
||||||
except IndexError:
|
if line:
|
||||||
pass
|
|
||||||
else:
|
|
||||||
self._bufindex += 1
|
|
||||||
self._lineno += 1
|
|
||||||
self._filelineno += 1
|
self._filelineno += 1
|
||||||
return line
|
return line
|
||||||
if not self._file:
|
if not self._file:
|
||||||
|
return line
|
||||||
|
self.nextfile()
|
||||||
|
# repeat with next file
|
||||||
|
|
||||||
|
def _start_readline(self):
|
||||||
if not self._files:
|
if not self._files:
|
||||||
if 'b' in self._mode:
|
if 'b' in self._mode:
|
||||||
return b''
|
return b''
|
||||||
|
@ -321,6 +309,7 @@ class FileInput:
|
||||||
return ''
|
return ''
|
||||||
self._filename = self._files[0]
|
self._filename = self._files[0]
|
||||||
self._files = self._files[1:]
|
self._files = self._files[1:]
|
||||||
|
self._startlineno = self.lineno()
|
||||||
self._filelineno = 0
|
self._filelineno = 0
|
||||||
self._file = None
|
self._file = None
|
||||||
self._isstdin = False
|
self._isstdin = False
|
||||||
|
@ -367,18 +356,14 @@ class FileInput:
|
||||||
self._file = self._openhook(self._filename, self._mode)
|
self._file = self._openhook(self._filename, self._mode)
|
||||||
else:
|
else:
|
||||||
self._file = open(self._filename, self._mode)
|
self._file = open(self._filename, self._mode)
|
||||||
self._buffer = self._file.readlines(self._bufsize)
|
self._readline = self._file.readline
|
||||||
self._bufindex = 0
|
return self._readline()
|
||||||
if not self._buffer:
|
|
||||||
self.nextfile()
|
|
||||||
# Recursive call
|
|
||||||
return self.readline()
|
|
||||||
|
|
||||||
def filename(self):
|
def filename(self):
|
||||||
return self._filename
|
return self._filename
|
||||||
|
|
||||||
def lineno(self):
|
def lineno(self):
|
||||||
return self._lineno
|
return self._startlineno + self._filelineno
|
||||||
|
|
||||||
def filelineno(self):
|
def filelineno(self):
|
||||||
return self._filelineno
|
return self._filelineno
|
||||||
|
|
|
@ -46,6 +46,42 @@ def remove_tempfiles(*names):
|
||||||
if name:
|
if name:
|
||||||
safe_unlink(name)
|
safe_unlink(name)
|
||||||
|
|
||||||
|
class LineReader:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._linesread = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def linesread(self):
|
||||||
|
try:
|
||||||
|
return self._linesread[:]
|
||||||
|
finally:
|
||||||
|
self._linesread = []
|
||||||
|
|
||||||
|
def openhook(self, filename, mode):
|
||||||
|
self.it = iter(filename.splitlines(True))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def readline(self, size=None):
|
||||||
|
line = next(self.it, '')
|
||||||
|
self._linesread.append(line)
|
||||||
|
return line
|
||||||
|
|
||||||
|
def readlines(self, hint=-1):
|
||||||
|
lines = []
|
||||||
|
size = 0
|
||||||
|
while True:
|
||||||
|
line = self.readline()
|
||||||
|
if not line:
|
||||||
|
return lines
|
||||||
|
lines.append(line)
|
||||||
|
size += len(line)
|
||||||
|
if size >= hint:
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
class BufferSizesTests(unittest.TestCase):
|
class BufferSizesTests(unittest.TestCase):
|
||||||
def test_buffer_sizes(self):
|
def test_buffer_sizes(self):
|
||||||
# First, run the tests with default and teeny buffer size.
|
# First, run the tests with default and teeny buffer size.
|
||||||
|
@ -289,7 +325,7 @@ class FileInputTests(unittest.TestCase):
|
||||||
self.addCleanup(safe_unlink, TESTFN)
|
self.addCleanup(safe_unlink, TESTFN)
|
||||||
|
|
||||||
with FileInput(files=TESTFN,
|
with FileInput(files=TESTFN,
|
||||||
openhook=hook_encoded('ascii'), bufsize=8) as fi:
|
openhook=hook_encoded('ascii')) as fi:
|
||||||
try:
|
try:
|
||||||
self.assertEqual(fi.readline(), 'A\n')
|
self.assertEqual(fi.readline(), 'A\n')
|
||||||
self.assertEqual(fi.readline(), 'B\n')
|
self.assertEqual(fi.readline(), 'B\n')
|
||||||
|
@ -457,6 +493,38 @@ class FileInputTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(result, -1, "fileno() should return -1")
|
self.assertEqual(result, -1, "fileno() should return -1")
|
||||||
|
|
||||||
|
def test_readline_buffering(self):
|
||||||
|
src = LineReader()
|
||||||
|
with FileInput(files=['line1\nline2', 'line3\n'],
|
||||||
|
openhook=src.openhook) as fi:
|
||||||
|
self.assertEqual(src.linesread, [])
|
||||||
|
self.assertEqual(fi.readline(), 'line1\n')
|
||||||
|
self.assertEqual(src.linesread, ['line1\n'])
|
||||||
|
self.assertEqual(fi.readline(), 'line2')
|
||||||
|
self.assertEqual(src.linesread, ['line2'])
|
||||||
|
self.assertEqual(fi.readline(), 'line3\n')
|
||||||
|
self.assertEqual(src.linesread, ['', 'line3\n'])
|
||||||
|
self.assertEqual(fi.readline(), '')
|
||||||
|
self.assertEqual(src.linesread, [''])
|
||||||
|
self.assertEqual(fi.readline(), '')
|
||||||
|
self.assertEqual(src.linesread, [])
|
||||||
|
|
||||||
|
def test_iteration_buffering(self):
|
||||||
|
src = LineReader()
|
||||||
|
with FileInput(files=['line1\nline2', 'line3\n'],
|
||||||
|
openhook=src.openhook) as fi:
|
||||||
|
self.assertEqual(src.linesread, [])
|
||||||
|
self.assertEqual(next(fi), 'line1\n')
|
||||||
|
self.assertEqual(src.linesread, ['line1\n'])
|
||||||
|
self.assertEqual(next(fi), 'line2')
|
||||||
|
self.assertEqual(src.linesread, ['line2'])
|
||||||
|
self.assertEqual(next(fi), 'line3\n')
|
||||||
|
self.assertEqual(src.linesread, ['', 'line3\n'])
|
||||||
|
self.assertRaises(StopIteration, next, fi)
|
||||||
|
self.assertEqual(src.linesread, [''])
|
||||||
|
self.assertRaises(StopIteration, next, fi)
|
||||||
|
self.assertEqual(src.linesread, [])
|
||||||
|
|
||||||
class MockFileInput:
|
class MockFileInput:
|
||||||
"""A class that mocks out fileinput.FileInput for use during unit tests"""
|
"""A class that mocks out fileinput.FileInput for use during unit tests"""
|
||||||
|
|
||||||
|
|
|
@ -91,6 +91,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #15068: Got rid of excessive buffering in the fileinput module.
|
||||||
|
The bufsize parameter is no longer used.
|
||||||
|
|
||||||
- Issue #2202: Fix UnboundLocalError in
|
- Issue #2202: Fix UnboundLocalError in
|
||||||
AbstractDigestAuthHandler.get_algorithm_impls. Initial patch by Mathieu Dupuy.
|
AbstractDigestAuthHandler.get_algorithm_impls. Initial patch by Mathieu Dupuy.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue