mirror of
https://github.com/python/cpython.git
synced 2025-10-04 06:06:44 +00:00
Merged revisions 67762 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r67762 | antoine.pitrou | 2008-12-14 18:40:51 +0100 (dim., 14 déc. 2008) | 3 lines Backport r67759 (fix io.IncrementalNewlineDecoder for UTF-16 et al.). ........
This commit is contained in:
parent
30327242b3
commit
f8638a8d21
3 changed files with 89 additions and 56 deletions
27
Lib/io.py
27
Lib/io.py
|
@ -1292,25 +1292,23 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
||||||
"""
|
"""
|
||||||
def __init__(self, decoder, translate, errors='strict'):
|
def __init__(self, decoder, translate, errors='strict'):
|
||||||
codecs.IncrementalDecoder.__init__(self, errors=errors)
|
codecs.IncrementalDecoder.__init__(self, errors=errors)
|
||||||
self.buffer = b''
|
|
||||||
self.translate = translate
|
self.translate = translate
|
||||||
self.decoder = decoder
|
self.decoder = decoder
|
||||||
self.seennl = 0
|
self.seennl = 0
|
||||||
|
self.pendingcr = False
|
||||||
|
|
||||||
def decode(self, input, final=False):
|
def decode(self, input, final=False):
|
||||||
# decode input (with the eventual \r from a previous pass)
|
# decode input (with the eventual \r from a previous pass)
|
||||||
if self.buffer:
|
|
||||||
input = self.buffer + input
|
|
||||||
|
|
||||||
output = self.decoder.decode(input, final=final)
|
output = self.decoder.decode(input, final=final)
|
||||||
|
if self.pendingcr and (output or final):
|
||||||
|
output = "\r" + output
|
||||||
|
self.pendingcr = False
|
||||||
|
|
||||||
# retain last \r even when not translating data:
|
# retain last \r even when not translating data:
|
||||||
# then readline() is sure to get \r\n in one pass
|
# then readline() is sure to get \r\n in one pass
|
||||||
if output.endswith("\r") and not final:
|
if output.endswith("\r") and not final:
|
||||||
output = output[:-1]
|
output = output[:-1]
|
||||||
self.buffer = b'\r'
|
self.pendingcr = True
|
||||||
else:
|
|
||||||
self.buffer = b''
|
|
||||||
|
|
||||||
# Record which newlines are read
|
# Record which newlines are read
|
||||||
crlf = output.count('\r\n')
|
crlf = output.count('\r\n')
|
||||||
|
@ -1329,20 +1327,19 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
||||||
|
|
||||||
def getstate(self):
|
def getstate(self):
|
||||||
buf, flag = self.decoder.getstate()
|
buf, flag = self.decoder.getstate()
|
||||||
return buf + self.buffer, flag
|
flag <<= 1
|
||||||
|
if self.pendingcr:
|
||||||
|
flag |= 1
|
||||||
|
return buf, flag
|
||||||
|
|
||||||
def setstate(self, state):
|
def setstate(self, state):
|
||||||
buf, flag = state
|
buf, flag = state
|
||||||
if buf.endswith(b'\r'):
|
self.pendingcr = bool(flag & 1)
|
||||||
self.buffer = b'\r'
|
self.decoder.setstate((buf, flag >> 1))
|
||||||
buf = buf[:-1]
|
|
||||||
else:
|
|
||||||
self.buffer = b''
|
|
||||||
self.decoder.setstate((buf, flag))
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.seennl = 0
|
self.seennl = 0
|
||||||
self.buffer = b''
|
self.pendingcr = False
|
||||||
self.decoder.reset()
|
self.decoder.reset()
|
||||||
|
|
||||||
_LF = 1
|
_LF = 1
|
||||||
|
|
|
@ -680,8 +680,9 @@ class StatefulIncrementalDecoder(codecs.IncrementalDecoder):
|
||||||
@classmethod
|
@classmethod
|
||||||
def lookupTestDecoder(cls, name):
|
def lookupTestDecoder(cls, name):
|
||||||
if cls.codecEnabled and name == 'test_decoder':
|
if cls.codecEnabled and name == 'test_decoder':
|
||||||
|
latin1 = codecs.lookup('latin-1')
|
||||||
return codecs.CodecInfo(
|
return codecs.CodecInfo(
|
||||||
name='test_decoder', encode=None, decode=None,
|
name='test_decoder', encode=latin1.encode, decode=None,
|
||||||
incrementalencoder=None,
|
incrementalencoder=None,
|
||||||
streamreader=None, streamwriter=None,
|
streamreader=None, streamwriter=None,
|
||||||
incrementaldecoder=cls)
|
incrementaldecoder=cls)
|
||||||
|
@ -840,8 +841,11 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
[ '\r\n', [ "unix\nwindows\r\n", "os9\rlast\nnonl" ] ],
|
[ '\r\n', [ "unix\nwindows\r\n", "os9\rlast\nnonl" ] ],
|
||||||
[ '\r', [ "unix\nwindows\r", "\nos9\r", "last\nnonl" ] ],
|
[ '\r', [ "unix\nwindows\r", "\nos9\r", "last\nnonl" ] ],
|
||||||
]
|
]
|
||||||
|
encodings = (
|
||||||
encodings = ('utf-8', 'latin-1')
|
'utf-8', 'latin-1',
|
||||||
|
'utf-16', 'utf-16-le', 'utf-16-be',
|
||||||
|
'utf-32', 'utf-32-le', 'utf-32-be',
|
||||||
|
)
|
||||||
|
|
||||||
# Try a range of buffer sizes to test the case where \r is the last
|
# Try a range of buffer sizes to test the case where \r is the last
|
||||||
# character in TextIOWrapper._pending_line.
|
# character in TextIOWrapper._pending_line.
|
||||||
|
@ -1195,55 +1199,83 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(buffer.seekable(), txt.seekable())
|
self.assertEqual(buffer.seekable(), txt.seekable())
|
||||||
|
|
||||||
def test_newline_decoder(self):
|
def check_newline_decoder_utf8(self, decoder):
|
||||||
import codecs
|
# UTF-8 specific tests for a newline decoder
|
||||||
decoder = codecs.getincrementaldecoder("utf-8")()
|
def _check_decode(b, s, **kwargs):
|
||||||
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
|
# We exercise getstate() / setstate() as well as decode()
|
||||||
|
state = decoder.getstate()
|
||||||
|
self.assertEquals(decoder.decode(b, **kwargs), s)
|
||||||
|
decoder.setstate(state)
|
||||||
|
self.assertEquals(decoder.decode(b, **kwargs), s)
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")
|
_check_decode(b'\xe8\xa2\x88', "\u8888")
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\xe8'), u"")
|
_check_decode(b'\xe8', "")
|
||||||
self.assertEquals(decoder.decode(b'\xa2'), u"")
|
_check_decode(b'\xa2', "")
|
||||||
self.assertEquals(decoder.decode(b'\x88'), u"\u8888")
|
_check_decode(b'\x88', "\u8888")
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\xe8'), u"")
|
_check_decode(b'\xe8', "")
|
||||||
|
_check_decode(b'\xa2', "")
|
||||||
|
_check_decode(b'\x88', "\u8888")
|
||||||
|
|
||||||
|
_check_decode(b'\xe8', "")
|
||||||
self.assertRaises(UnicodeDecodeError, decoder.decode, b'', final=True)
|
self.assertRaises(UnicodeDecodeError, decoder.decode, b'', final=True)
|
||||||
|
|
||||||
decoder.setstate((b'', 0))
|
decoder.reset()
|
||||||
self.assertEquals(decoder.decode(b'\n'), u"\n")
|
_check_decode(b'\n', "\n")
|
||||||
self.assertEquals(decoder.decode(b'\r'), u"")
|
_check_decode(b'\r', "")
|
||||||
self.assertEquals(decoder.decode(b'', final=True), u"\n")
|
_check_decode(b'', "\n", final=True)
|
||||||
self.assertEquals(decoder.decode(b'\r', final=True), u"\n")
|
_check_decode(b'\r', "\n", final=True)
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\r'), u"")
|
_check_decode(b'\r', "")
|
||||||
self.assertEquals(decoder.decode(b'a'), u"\na")
|
_check_decode(b'a', "\na")
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\r\r\n'), u"\n\n")
|
_check_decode(b'\r\r\n', "\n\n")
|
||||||
self.assertEquals(decoder.decode(b'\r'), u"")
|
_check_decode(b'\r', "")
|
||||||
self.assertEquals(decoder.decode(b'\r'), u"\n")
|
_check_decode(b'\r', "\n")
|
||||||
self.assertEquals(decoder.decode(b'\na'), u"\na")
|
_check_decode(b'\na', "\na")
|
||||||
|
|
||||||
self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r\n'), u"\u8888\n")
|
_check_decode(b'\xe8\xa2\x88\r\n', "\u8888\n")
|
||||||
self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")
|
_check_decode(b'\xe8\xa2\x88', "\u8888")
|
||||||
self.assertEquals(decoder.decode(b'\n'), u"\n")
|
_check_decode(b'\n', "\n")
|
||||||
self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r'), u"\u8888")
|
_check_decode(b'\xe8\xa2\x88\r', "\u8888")
|
||||||
self.assertEquals(decoder.decode(b'\n'), u"\n")
|
_check_decode(b'\n', "\n")
|
||||||
|
|
||||||
|
def check_newline_decoder(self, decoder, encoding):
|
||||||
|
result = []
|
||||||
|
encoder = codecs.getincrementalencoder(encoding)()
|
||||||
|
def _decode_bytewise(s):
|
||||||
|
for b in encoder.encode(s):
|
||||||
|
result.append(decoder.decode(b))
|
||||||
|
self.assertEquals(decoder.newlines, None)
|
||||||
|
_decode_bytewise("abc\n\r")
|
||||||
|
self.assertEquals(decoder.newlines, '\n')
|
||||||
|
_decode_bytewise("\nabc")
|
||||||
|
self.assertEquals(decoder.newlines, ('\n', '\r\n'))
|
||||||
|
_decode_bytewise("abc\r")
|
||||||
|
self.assertEquals(decoder.newlines, ('\n', '\r\n'))
|
||||||
|
_decode_bytewise("abc")
|
||||||
|
self.assertEquals(decoder.newlines, ('\r', '\n', '\r\n'))
|
||||||
|
_decode_bytewise("abc\r")
|
||||||
|
self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc")
|
||||||
|
decoder.reset()
|
||||||
|
self.assertEquals(decoder.decode("abc".encode(encoding)), "abc")
|
||||||
|
self.assertEquals(decoder.newlines, None)
|
||||||
|
|
||||||
|
def test_newline_decoder(self):
|
||||||
|
encodings = (
|
||||||
|
'utf-8', 'latin-1',
|
||||||
|
'utf-16', 'utf-16-le', 'utf-16-be',
|
||||||
|
'utf-32', 'utf-32-le', 'utf-32-be',
|
||||||
|
)
|
||||||
|
for enc in encodings:
|
||||||
|
decoder = codecs.getincrementaldecoder(enc)()
|
||||||
|
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
|
||||||
|
self.check_newline_decoder(decoder, enc)
|
||||||
decoder = codecs.getincrementaldecoder("utf-8")()
|
decoder = codecs.getincrementaldecoder("utf-8")()
|
||||||
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
|
decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
|
||||||
self.assertEquals(decoder.newlines, None)
|
self.check_newline_decoder_utf8(decoder)
|
||||||
decoder.decode(b"abc\n\r")
|
|
||||||
self.assertEquals(decoder.newlines, u'\n')
|
|
||||||
decoder.decode(b"\nabc")
|
|
||||||
self.assertEquals(decoder.newlines, ('\n', '\r\n'))
|
|
||||||
decoder.decode(b"abc\r")
|
|
||||||
self.assertEquals(decoder.newlines, ('\n', '\r\n'))
|
|
||||||
decoder.decode(b"abc")
|
|
||||||
self.assertEquals(decoder.newlines, ('\r', '\n', '\r\n'))
|
|
||||||
decoder.decode(b"abc\r")
|
|
||||||
decoder.reset()
|
|
||||||
self.assertEquals(decoder.decode(b"abc"), "abc")
|
|
||||||
self.assertEquals(decoder.newlines, None)
|
|
||||||
|
|
||||||
# XXX Tests for open()
|
# XXX Tests for open()
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #4574: fix a crash in io.IncrementalNewlineDecoder when a carriage
|
||||||
|
return encodes to more than one byte in the source encoding (e.g. UTF-16)
|
||||||
|
and gets split on a chunk boundary.
|
||||||
|
|
||||||
- Issue #4223: inspect.getsource() will now correctly display source code
|
- Issue #4223: inspect.getsource() will now correctly display source code
|
||||||
for packages loaded via zipimport (or any other conformant PEP 302
|
for packages loaded via zipimport (or any other conformant PEP 302
|
||||||
loader). Original patch by Alexander Belopolsky.
|
loader). Original patch by Alexander Belopolsky.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue