bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)

This commit is contained in:
Hai Shi 2020-10-14 23:43:31 +08:00 committed by GitHub
parent b4d895336a
commit c5b049b91c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 2 deletions

View file

@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase):
class CodecNameNormalizationTest(unittest.TestCase):
"""Test codec name normalization"""
def test_normalized_encoding(self):
def test_codecs_lookup(self):
FOUND = (1, 2, 3, 4)
NOT_FOUND = (None, None, None, None)
def search_function(encoding):
@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase):
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
def test_encodings_normalize_encoding(self):
# encodings.normalize_encoding() ignores non-ASCII characters.
normalize = encodings.normalize_encoding
self.assertEqual(normalize('utf_8'), 'utf_8')
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
self.assertEqual(normalize('utf 8'), 'utf_8')
# encodings.normalize_encoding() doesn't convert
# characters to lower case.
self.assertEqual(normalize('UTF 8'), 'UTF_8')
self.assertEqual(normalize('utf.8'), 'utf.8')
self.assertEqual(normalize('utf...8'), 'utf...8')
if __name__ == "__main__":
unittest.main()