Issue 19548: update codecs module documentation

- clarified the distinction between text encodings and other codecs
- clarified relationship with builtin open and the io module
- consolidated documentation of error handlers into one section
- clarified type constraints of some behaviours
- added tests for some of the new statements in the docs
This commit is contained in:
Nick Coghlan 2015-01-07 00:22:00 +10:00
parent fcfed19913
commit b9fdb7a452
9 changed files with 426 additions and 372 deletions

View file

@ -1139,6 +1139,8 @@ class RecodingTest(unittest.TestCase):
# Python used to crash on this at exit because of a refcount
# bug in _codecsmodule.c
self.assertTrue(f.closed)
# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
@ -1591,6 +1593,16 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
self.assertEqual(encoder.encode("", True), b"")
def test_errors(self):
"""Only supports "strict" error handler"""
"python.org".encode("idna", "strict")
b"python.org".decode("idna", "strict")
for errors in ("ignore", "replace", "backslashreplace",
"surrogateescape"):
self.assertRaises(Exception, "python.org".encode, "idna", errors)
self.assertRaises(Exception,
b"python.org".decode, "idna", errors)
class CodecsModuleTest(unittest.TestCase):
def test_decode(self):
@ -1668,6 +1680,24 @@ class CodecsModuleTest(unittest.TestCase):
for api in codecs.__all__:
getattr(codecs, api)
def test_open(self):
self.addCleanup(support.unlink, support.TESTFN)
for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'):
with self.subTest(mode), \
codecs.open(support.TESTFN, mode, 'ascii') as file:
self.assertIsInstance(file, codecs.StreamReaderWriter)
def test_undefined(self):
self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
self.assertRaises(UnicodeError, codecs.decode, b'abc', 'undefined')
self.assertRaises(UnicodeError, codecs.encode, '', 'undefined')
self.assertRaises(UnicodeError, codecs.decode, b'', 'undefined')
for errors in ('strict', 'ignore', 'replace', 'backslashreplace'):
self.assertRaises(UnicodeError,
codecs.encode, 'abc', 'undefined', errors)
self.assertRaises(UnicodeError,
codecs.decode, b'abc', 'undefined', errors)
class StreamReaderTest(unittest.TestCase):
def setUp(self):
@ -1801,13 +1831,10 @@ if hasattr(codecs, "mbcs_encode"):
# "undefined"
# The following encodings don't work in stateful mode
broken_unicode_with_streams = [
broken_unicode_with_stateful = [
"punycode",
"unicode_internal"
]
broken_incremental_coders = broken_unicode_with_streams + [
"idna",
]
class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_basics(self):
@ -1827,7 +1854,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
(chars, size) = codecs.getdecoder(encoding)(b)
self.assertEqual(chars, s, "encoding=%r" % encoding)
if encoding not in broken_unicode_with_streams:
if encoding not in broken_unicode_with_stateful:
# check stream reader/writer
q = Queue(b"")
writer = codecs.getwriter(encoding)(q)
@ -1845,7 +1872,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
decodedresult += reader.read()
self.assertEqual(decodedresult, s, "encoding=%r" % encoding)
if encoding not in broken_incremental_coders:
if encoding not in broken_unicode_with_stateful:
# check incremental decoder/encoder and iterencode()/iterdecode()
try:
encoder = codecs.getincrementalencoder(encoding)()
@ -1894,7 +1921,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
from _testcapi import codec_incrementalencoder, codec_incrementaldecoder
s = "abc123" # all codecs should be able to encode these
for encoding in all_unicode_encodings:
if encoding not in broken_incremental_coders:
if encoding not in broken_unicode_with_stateful:
# check incremental decoder/encoder (fetched via the C API)
try:
cencoder = codec_incrementalencoder(encoding)
@ -1934,7 +1961,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
for encoding in all_unicode_encodings:
if encoding == "idna": # FIXME: See SF bug #1163178
continue
if encoding in broken_unicode_with_streams:
if encoding in broken_unicode_with_stateful:
continue
reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding)))
for t in range(5):
@ -1967,7 +1994,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
# Check that getstate() and setstate() handle the state properly
u = "abc123"
for encoding in all_unicode_encodings:
if encoding not in broken_incremental_coders:
if encoding not in broken_unicode_with_stateful:
self.check_state_handling_decode(encoding, u, u.encode(encoding))
self.check_state_handling_encode(encoding, u, u.encode(encoding))
@ -2171,6 +2198,7 @@ class WithStmtTest(unittest.TestCase):
f = io.BytesIO(b"\xc3\xbc")
with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
self.assertEqual(ef.read(), b"\xfc")
self.assertTrue(f.closed)
def test_streamreaderwriter(self):
f = io.BytesIO(b"\xc3\xbc")