mirror of
https://github.com/python/cpython.git
synced 2025-10-09 16:34:44 +00:00
Issue #4574: reading an UTF16-encoded text file crashes if \r on 64-char boundary.
This commit is contained in:
parent
ff94552763
commit
180a336f1a
3 changed files with 88 additions and 56 deletions
27
Lib/io.py
27
Lib/io.py
|
@ -1282,25 +1282,23 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
|||
"""
|
||||
def __init__(self, decoder, translate, errors='strict'):
|
||||
codecs.IncrementalDecoder.__init__(self, errors=errors)
|
||||
self.buffer = b''
|
||||
self.translate = translate
|
||||
self.decoder = decoder
|
||||
self.seennl = 0
|
||||
self.pendingcr = False
|
||||
|
||||
def decode(self, input, final=False):
|
||||
# decode input (with the eventual \r from a previous pass)
|
||||
if self.buffer:
|
||||
input = self.buffer + input
|
||||
|
||||
output = self.decoder.decode(input, final=final)
|
||||
if self.pendingcr and (output or final):
|
||||
output = "\r" + output
|
||||
self.pendingcr = False
|
||||
|
||||
# retain last \r even when not translating data:
|
||||
# then readline() is sure to get \r\n in one pass
|
||||
if output.endswith("\r") and not final:
|
||||
output = output[:-1]
|
||||
self.buffer = b'\r'
|
||||
else:
|
||||
self.buffer = b''
|
||||
self.pendingcr = True
|
||||
|
||||
# Record which newlines are read
|
||||
crlf = output.count('\r\n')
|
||||
|
@ -1319,20 +1317,19 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
|||
|
||||
def getstate(self):
|
||||
buf, flag = self.decoder.getstate()
|
||||
return buf + self.buffer, flag
|
||||
flag <<= 1
|
||||
if self.pendingcr:
|
||||
flag |= 1
|
||||
return buf, flag
|
||||
|
||||
def setstate(self, state):
|
||||
buf, flag = state
|
||||
if buf.endswith(b'\r'):
|
||||
self.buffer = b'\r'
|
||||
buf = buf[:-1]
|
||||
else:
|
||||
self.buffer = b''
|
||||
self.decoder.setstate((buf, flag))
|
||||
self.pendingcr = bool(flag & 1)
|
||||
self.decoder.setstate((buf, flag >> 1))
|
||||
|
||||
def reset(self):
|
||||
self.seennl = 0
|
||||
self.buffer = b''
|
||||
self.pendingcr = False
|
||||
self.decoder.reset()
|
||||
|
||||
_LF = 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue