mirror of
https://github.com/python/cpython.git
synced 2025-09-27 10:50:04 +00:00
Make attributes and local variables in the StreamReader str objects instead
of unicode objects, so that codecs that do a str->str decoding won't promote the result to unicode. This fixes SF bug #1241507.
This commit is contained in:
parent
21c825417f
commit
c9878e1b22
2 changed files with 24 additions and 5 deletions
|
@ -229,7 +229,9 @@ class StreamReader(Codec):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.errors = errors
|
self.errors = errors
|
||||||
self.bytebuffer = ""
|
self.bytebuffer = ""
|
||||||
self.charbuffer = u""
|
# For str->str decoding this will stay a str
|
||||||
|
# For str->unicode decoding the first read will promote it to unicode
|
||||||
|
self.charbuffer = ""
|
||||||
|
|
||||||
def decode(self, input, errors='strict'):
|
def decode(self, input, errors='strict'):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -284,7 +286,7 @@ class StreamReader(Codec):
|
||||||
if chars < 0:
|
if chars < 0:
|
||||||
# Return everything we've got
|
# Return everything we've got
|
||||||
result = self.charbuffer
|
result = self.charbuffer
|
||||||
self.charbuffer = u""
|
self.charbuffer = ""
|
||||||
else:
|
else:
|
||||||
# Return the first chars characters
|
# Return the first chars characters
|
||||||
result = self.charbuffer[:chars]
|
result = self.charbuffer[:chars]
|
||||||
|
@ -301,7 +303,7 @@ class StreamReader(Codec):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
readsize = size or 72
|
readsize = size or 72
|
||||||
line = u""
|
line = ""
|
||||||
# If size is given, we call read() only once
|
# If size is given, we call read() only once
|
||||||
while True:
|
while True:
|
||||||
data = self.read(readsize)
|
data = self.read(readsize)
|
||||||
|
@ -309,7 +311,7 @@ class StreamReader(Codec):
|
||||||
# If we're at a "\r" read one extra character (which might
|
# If we're at a "\r" read one extra character (which might
|
||||||
# be a "\n") to get a proper line ending. If the stream is
|
# be a "\n") to get a proper line ending. If the stream is
|
||||||
# temporarily exhausted we return the wrong line ending.
|
# temporarily exhausted we return the wrong line ending.
|
||||||
if data.endswith(u"\r"):
|
if data.endswith("\r"):
|
||||||
data += self.read(size=1, chars=1)
|
data += self.read(size=1, chars=1)
|
||||||
|
|
||||||
line += data
|
line += data
|
||||||
|
@ -319,7 +321,7 @@ class StreamReader(Codec):
|
||||||
line0withoutend = lines[0].splitlines(False)[0]
|
line0withoutend = lines[0].splitlines(False)[0]
|
||||||
if line0withend != line0withoutend: # We really have a line end
|
if line0withend != line0withoutend: # We really have a line end
|
||||||
# Put the rest back together and keep it until the next call
|
# Put the rest back together and keep it until the next call
|
||||||
self.charbuffer = u"".join(lines[1:]) + self.charbuffer
|
self.charbuffer = "".join(lines[1:]) + self.charbuffer
|
||||||
if keepends:
|
if keepends:
|
||||||
line = line0withend
|
line = line0withend
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -663,6 +663,22 @@ class StreamReaderTest(unittest.TestCase):
|
||||||
f = self.reader(self.stream)
|
f = self.reader(self.stream)
|
||||||
self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
|
self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
|
||||||
|
|
||||||
|
class Str2StrTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_read(self):
|
||||||
|
sin = "\x80".encode("base64_codec")
|
||||||
|
reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
|
||||||
|
sout = reader.read()
|
||||||
|
self.assertEqual(sout, "\x80")
|
||||||
|
self.assert_(isinstance(sout, str))
|
||||||
|
|
||||||
|
def test_readline(self):
|
||||||
|
sin = "\x80".encode("base64_codec")
|
||||||
|
reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
|
||||||
|
sout = reader.readline()
|
||||||
|
self.assertEqual(sout, "\x80")
|
||||||
|
self.assert_(isinstance(sout, str))
|
||||||
|
|
||||||
all_unicode_encodings = [
|
all_unicode_encodings = [
|
||||||
"ascii",
|
"ascii",
|
||||||
"base64_codec",
|
"base64_codec",
|
||||||
|
@ -867,6 +883,7 @@ def test_main():
|
||||||
CodecTest,
|
CodecTest,
|
||||||
CodecsModuleTest,
|
CodecsModuleTest,
|
||||||
StreamReaderTest,
|
StreamReaderTest,
|
||||||
|
Str2StrTest,
|
||||||
BasicUnicodeTest,
|
BasicUnicodeTest,
|
||||||
BasicStrTest
|
BasicStrTest
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue