mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Merged revisions 59041-59055 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r59044 | neal.norwitz | 2007-11-18 17:46:20 -0800 (Sun, 18 Nov 2007) | 1 line Use a slightly more recent version than 1.5.2b2. ........ r59047 | walter.doerwald | 2007-11-19 04:14:05 -0800 (Mon, 19 Nov 2007) | 2 lines Fix typo in comment. ........ r59049 | walter.doerwald | 2007-11-19 04:41:10 -0800 (Mon, 19 Nov 2007) | 4 lines Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode()) calling codecs.utf_8_decode() with final==True, which falled with incomplete byte sequences. Fix and test by James G. Sack. ........ r59051 | nick.coghlan | 2007-11-19 05:56:27 -0800 (Mon, 19 Nov 2007) | 1 line Enable some test_cmd_line_script debugging output to investigate failure on Mac OSX buildbot ........ r59053 | facundo.batista | 2007-11-19 08:30:24 -0800 (Mon, 19 Nov 2007) | 3 lines Fixed detail in add_type() explanation (issue 1463). ........ r59054 | guido.van.rossum | 2007-11-19 09:35:24 -0800 (Mon, 19 Nov 2007) | 2 lines Make this work stand-alone, too. ........ r59055 | guido.van.rossum | 2007-11-19 09:50:22 -0800 (Mon, 19 Nov 2007) | 3 lines Fix the OSX failures in this test -- they were due to /tmp being a symlink to /private/tmp. Adding a call to os.path.realpath() to temp_dir() fixed it. ........
This commit is contained in:
parent
87afcbfe54
commit
87c0f1d1c9
4 changed files with 66 additions and 16 deletions
|
@ -96,8 +96,8 @@ behavior of the module.
|
||||||
extension is already known, the new type will replace the old one. When the type
|
extension is already known, the new type will replace the old one. When the type
|
||||||
is already known the extension will be added to the list of known extensions.
|
is already known the extension will be added to the list of known extensions.
|
||||||
|
|
||||||
When *strict* is the mapping will added to the official MIME types, otherwise to
|
When *strict* is True (the default), the mapping will added to the official MIME
|
||||||
the non-standard ones.
|
types, otherwise to the non-standard ones.
|
||||||
|
|
||||||
|
|
||||||
.. data:: inited
|
.. data:: inited
|
||||||
|
|
|
@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def decode(self, input, errors='strict'):
|
def decode(self, input, errors='strict'):
|
||||||
if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
|
if len(input) < 3:
|
||||||
|
if codecs.BOM_UTF8.startswith(input):
|
||||||
# not enough data to decide if this is a BOM
|
# not enough data to decide if this is a BOM
|
||||||
# => try again on the next call
|
# => try again on the next call
|
||||||
return ("", 0)
|
return ("", 0)
|
||||||
|
elif input[:3] == codecs.BOM_UTF8:
|
||||||
self.decode = codecs.utf_8_decode
|
self.decode = codecs.utf_8_decode
|
||||||
return decode(input, errors)
|
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
|
||||||
|
return (output, consumed+3)
|
||||||
|
# (else) no BOM present
|
||||||
|
self.decode = codecs.utf_8_decode
|
||||||
|
return codecs.utf_8_decode(input, errors)
|
||||||
|
|
||||||
### encodings module API
|
### encodings module API
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ def _run_python(*args):
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def temp_dir():
|
def temp_dir():
|
||||||
dirname = tempfile.mkdtemp()
|
dirname = tempfile.mkdtemp()
|
||||||
|
dirname = os.path.realpath(dirname)
|
||||||
try:
|
try:
|
||||||
yield dirname
|
yield dirname
|
||||||
finally:
|
finally:
|
||||||
|
@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
|
||||||
zip_file.close()
|
zip_file.close()
|
||||||
# if verbose:
|
# if verbose:
|
||||||
# zip_file = zipfile.ZipFile(zip_name, 'r')
|
# zip_file = zipfile.ZipFile(zip_name, 'r')
|
||||||
# print "Contents of %r:" % zip_name
|
# print("Contents of %r:" % zip_name)
|
||||||
# zip_file.printdir()
|
# zip_file.printdir()
|
||||||
# zip_file.close()
|
# zip_file.close()
|
||||||
return zip_name
|
return zip_name
|
||||||
|
@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
|
||||||
class CmdLineTest(unittest.TestCase):
|
class CmdLineTest(unittest.TestCase):
|
||||||
def _check_script(self, script_name, expected_file, expected_argv0):
|
def _check_script(self, script_name, expected_file, expected_argv0):
|
||||||
exit_code, data = _run_python(script_name)
|
exit_code, data = _run_python(script_name)
|
||||||
# if verbose:
|
if verbose:
|
||||||
# print "Output from test script %r:" % script_name
|
print("Output from test script %r:" % script_name)
|
||||||
# print data
|
print(data)
|
||||||
self.assertEqual(exit_code, 0, data)
|
self.assertEqual(exit_code, 0, data)
|
||||||
printed_file = '__file__==%r' % expected_file
|
printed_file = '__file__==%r' % expected_file
|
||||||
printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
|
printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
|
||||||
|
|
|
@ -59,7 +59,7 @@ class MixInCheckStateHandling:
|
||||||
class ReadTest(unittest.TestCase, MixInCheckStateHandling):
|
class ReadTest(unittest.TestCase, MixInCheckStateHandling):
|
||||||
def check_partial(self, input, partialresults):
|
def check_partial(self, input, partialresults):
|
||||||
# get a StreamReader for the encoding and feed the bytestring version
|
# get a StreamReader for the encoding and feed the bytestring version
|
||||||
# of input to the reader byte by byte. Read every available from
|
# of input to the reader byte by byte. Read everything available from
|
||||||
# the StreamReader and check that the results equal the appropriate
|
# the StreamReader and check that the results equal the appropriate
|
||||||
# entries from partialresults.
|
# entries from partialresults.
|
||||||
q = Queue(b"")
|
q = Queue(b"")
|
||||||
|
@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest):
|
||||||
s = "spam"
|
s = "spam"
|
||||||
self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
|
self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
|
||||||
|
|
||||||
def test_decoder_state(self):
|
def test_stream_bom(self):
|
||||||
u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
|
unistring = "ABC\u00A1\u2200XYZ"
|
||||||
self.check_state_handling_decode(self.encoding,
|
bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
|
||||||
u, u.encode(self.encoding))
|
|
||||||
|
reader = codecs.getreader("utf-8-sig")
|
||||||
|
for sizehint in [None] + list(range(1, 11)) + \
|
||||||
|
[64, 128, 256, 512, 1024]:
|
||||||
|
istream = reader(io.BytesIO(bytestring))
|
||||||
|
ostream = io.StringIO()
|
||||||
|
while 1:
|
||||||
|
if sizehint is not None:
|
||||||
|
data = istream.read(sizehint)
|
||||||
|
else:
|
||||||
|
data = istream.read()
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
ostream.write(data)
|
||||||
|
|
||||||
|
got = ostream.getvalue()
|
||||||
|
self.assertEqual(got, unistring)
|
||||||
|
|
||||||
|
def test_stream_bare(self):
|
||||||
|
unistring = "ABC\u00A1\u2200XYZ"
|
||||||
|
bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
|
||||||
|
|
||||||
|
reader = codecs.getreader("utf-8-sig")
|
||||||
|
for sizehint in [None] + list(range(1, 11)) + \
|
||||||
|
[64, 128, 256, 512, 1024]:
|
||||||
|
istream = reader(io.BytesIO(bytestring))
|
||||||
|
ostream = io.StringIO()
|
||||||
|
while 1:
|
||||||
|
if sizehint is not None:
|
||||||
|
data = istream.read(sizehint)
|
||||||
|
else:
|
||||||
|
data = istream.read()
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
ostream.write(data)
|
||||||
|
|
||||||
|
got = ostream.getvalue()
|
||||||
|
self.assertEqual(got, unistring)
|
||||||
|
|
||||||
|
class EscapeDecodeTest(unittest.TestCase):
|
||||||
|
def test_empty(self):
|
||||||
|
self.assertEquals(codecs.escape_decode(""), ("", 0))
|
||||||
|
|
||||||
class RecodingTest(unittest.TestCase):
|
class RecodingTest(unittest.TestCase):
|
||||||
def test_recoding(self):
|
def test_recoding(self):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue