mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
This commit is contained in:
parent
c3b6ac796f
commit
ad5b9de288
1 changed files with 11 additions and 14 deletions
|
@ -34,12 +34,6 @@ from . import aliases
|
|||
_cache = {}
|
||||
_unknown = '--unknown--'
|
||||
_import_tail = ['*']
|
||||
_norm_encoding_map = (' . '
|
||||
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
|
||||
' abcdefghijklmnopqrstuvwxyz '
|
||||
' '
|
||||
' '
|
||||
' ')
|
||||
_aliases = aliases.aliases
|
||||
|
||||
class CodecRegistryError(LookupError, SystemError):
|
||||
|
@ -58,14 +52,17 @@ def normalize_encoding(encoding):
|
|||
non-ASCII characters, these must be Latin-1 compatible.
|
||||
|
||||
"""
|
||||
# Make sure we have an 8-bit string, because .translate() works
|
||||
# differently for Unicode strings.
|
||||
if isinstance(encoding, str):
|
||||
# Note that .encode('latin-1') does *not* use the codec
|
||||
# registry, so this call doesn't recurse. (See unicodeobject.c
|
||||
# PyUnicode_AsEncodedString() for details)
|
||||
encoding = encoding.encode('latin-1')
|
||||
return '_'.join(encoding.translate(_norm_encoding_map).split())
|
||||
chars = []
|
||||
punct = False
|
||||
for c in encoding:
|
||||
if c.isalnum() or c == '.':
|
||||
if punct and chars:
|
||||
chars.append('_')
|
||||
chars.append(c)
|
||||
punct = False
|
||||
else:
|
||||
punct = True
|
||||
return ''.join(chars)
|
||||
|
||||
def search_function(encoding):
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue