Issue #5915: Implement PEP 383, Non-decodable Bytes in

System Character Interfaces.
This commit is contained in:
Martin v. Löwis 2009-05-05 04:43:17 +00:00
parent 93f65a177b
commit 011e842033
15 changed files with 726 additions and 289 deletions

View file

@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
class Utf8bTest(unittest.TestCase):
def test_utf8(self):
# Bad byte
self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
"foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
b"foo\x80bar")
# bad-utf-8 encoded surrogate
self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
"\udced\udcb0\udc80")
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
b"\xed\xb0\x80")
def test_ascii(self):
# bad byte
self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
"foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
b"foo\x80bar")
def test_charmap(self):
# bad byte: \xa5 is unmapped in iso-8859-3
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
"foo\udca5bar")
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
b"foo\xa5bar")
def test_main():
support.run_unittest(
@ -1543,6 +1571,7 @@ def test_main():
CharmapTest,
WithStmtTest,
TypesTest,
Utf8bTest,
)

View file

@ -7,6 +7,7 @@ import errno
import unittest
import warnings
import sys
import shutil
from test import support
# Tests creating TESTFN
@ -698,9 +699,44 @@ if sys.platform != 'win32':
self.assertRaises(os.error, os.setregid, 0, 0)
self.assertRaises(OverflowError, os.setregid, 1<<32, 0)
self.assertRaises(OverflowError, os.setregid, 0, 1<<32)
class Pep383Tests(unittest.TestCase):
filenames = [b'foo\xf6bar', 'foo\xf6bar'.encode("utf-8")]
def setUp(self):
self.fsencoding = sys.getfilesystemencoding()
sys.setfilesystemencoding("utf-8")
self.dir = support.TESTFN
self.bdir = self.dir.encode("utf-8", "utf8b")
os.mkdir(self.dir)
self.unicodefn = []
for fn in self.filenames:
f = open(os.path.join(self.bdir, fn), "w")
f.close()
self.unicodefn.append(fn.decode("utf-8", "utf8b"))
def tearDown(self):
shutil.rmtree(self.dir)
sys.setfilesystemencoding(self.fsencoding)
def test_listdir(self):
expected = set(self.unicodefn)
found = set(os.listdir(support.TESTFN))
self.assertEquals(found, expected)
def test_open(self):
for fn in self.unicodefn:
f = open(os.path.join(self.dir, fn))
f.close()
def test_stat(self):
for fn in self.unicodefn:
os.stat(os.path.join(self.dir, fn))
else:
class PosixUidGidTests(unittest.TestCase):
pass
class Pep383Tests(unittest.TestCase):
pass
def test_main():
support.run_unittest(
@ -714,7 +750,8 @@ def test_main():
ExecTests,
Win32ErrorTests,
TestInvalidFD,
PosixUidGidTests
PosixUidGidTests,
Pep383Tests
)
if __name__ == "__main__":