Change normalize_encodings() to avoid using .translate() or depending on

the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
2025-11-25 21:11:09 +00:00 · 2007-06-07 21:43:46 +00:00 · 2007-06-07 21:43:46 +00:00 · ad5b9de288
commit ad5b9de288
parent c3b6ac796f
1 changed files with 11 additions and 14 deletions
--- a/Lib/encodings/init.py
+++ b/Lib/encodings/init.py
@ -34,12 +34,6 @@ from . import aliases
 _cache = {}
 _unknown = '--unknown--'
 _import_tail = ['*']
 _norm_encoding_map = ('                                              . '
                      '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
                      ' abcdefghijklmnopqrstuvwxyz                     '
                      '                                                '
                      '                                                '
                      '                ')
 _aliases = aliases.aliases
 class CodecRegistryError(LookupError, SystemError):
@ -58,14 +52,17 @@ def normalize_encoding(encoding):
        non-ASCII characters, these must be Latin-1 compatible.
    """
-    # Make sure we have an 8-bit string, because .translate() works
+    chars = []
-    # differently for Unicode strings.
+    punct = False
-    if isinstance(encoding, str):
+    for c in encoding:
-        # Note that .encode('latin-1') does *not* use the codec
+        if c.isalnum() or c == '.':
-        # registry, so this call doesn't recurse. (See unicodeobject.c
+            if punct and chars:
-        # PyUnicode_AsEncodedString() for details)
+                chars.append('_')
-        encoding = encoding.encode('latin-1')
+            chars.append(c)
-    return '_'.join(encoding.translate(_norm_encoding_map).split())
+            punct = False
        else:
            punct = True
    return ''.join(chars)
 def search_function(encoding):