mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Return complete lines from codec stream readers
even if there is an exception in later lines, resulting in correct line numbers for decoding errors in source code. Fixes #1178484. Will backport to 2.4.
This commit is contained in:
parent
6d2b346140
commit
56066d2e55
3 changed files with 26 additions and 4 deletions
|
@ -236,7 +236,7 @@ class StreamReader(Codec):
|
|||
def decode(self, input, errors='strict'):
|
||||
raise NotImplementedError
|
||||
|
||||
def read(self, size=-1, chars=-1):
|
||||
def read(self, size=-1, chars=-1, firstline=False):
|
||||
|
||||
""" Decodes data from the stream self.stream and returns the
|
||||
resulting object.
|
||||
|
@ -253,6 +253,11 @@ class StreamReader(Codec):
|
|||
is intended to prevent having to decode huge files in one
|
||||
step.
|
||||
|
||||
If firstline is true, and a UnicodeDecodeError happens
|
||||
after the first line terminator in the input only the first line
|
||||
will be returned, the rest of the input will be kept until the
|
||||
next call to read().
|
||||
|
||||
The method should use a greedy read strategy meaning that
|
||||
it should read as much data as is allowed within the
|
||||
definition of the encoding and the given size, e.g. if
|
||||
|
@ -275,7 +280,16 @@ class StreamReader(Codec):
|
|||
newdata = self.stream.read(size)
|
||||
# decode bytes (those remaining from the last call included)
|
||||
data = self.bytebuffer + newdata
|
||||
newchars, decodedbytes = self.decode(data, self.errors)
|
||||
try:
|
||||
newchars, decodedbytes = self.decode(data, self.errors)
|
||||
except UnicodeDecodeError, exc:
|
||||
if firstline:
|
||||
newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
|
||||
lines = newchars.splitlines(True)
|
||||
if len(lines)<=1:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
# keep undecoded bytes until the next call
|
||||
self.bytebuffer = data[decodedbytes:]
|
||||
# put new characters in the character buffer
|
||||
|
@ -306,7 +320,7 @@ class StreamReader(Codec):
|
|||
line = ""
|
||||
# If size is given, we call read() only once
|
||||
while True:
|
||||
data = self.read(readsize)
|
||||
data = self.read(readsize, firstline=True)
|
||||
if data:
|
||||
# If we're at a "\r" read one extra character (which might
|
||||
# be a "\n") to get a proper line ending. If the stream is
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue