mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
This commit is contained in:
parent
c3b6ac796f
commit
ad5b9de288
1 changed files with 11 additions and 14 deletions
|
|
@ -34,12 +34,6 @@ from . import aliases
|
||||||
_cache = {}
|
_cache = {}
|
||||||
_unknown = '--unknown--'
|
_unknown = '--unknown--'
|
||||||
_import_tail = ['*']
|
_import_tail = ['*']
|
||||||
_norm_encoding_map = (' . '
|
|
||||||
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
|
|
||||||
' abcdefghijklmnopqrstuvwxyz '
|
|
||||||
' '
|
|
||||||
' '
|
|
||||||
' ')
|
|
||||||
_aliases = aliases.aliases
|
_aliases = aliases.aliases
|
||||||
|
|
||||||
class CodecRegistryError(LookupError, SystemError):
|
class CodecRegistryError(LookupError, SystemError):
|
||||||
|
|
@ -58,14 +52,17 @@ def normalize_encoding(encoding):
|
||||||
non-ASCII characters, these must be Latin-1 compatible.
|
non-ASCII characters, these must be Latin-1 compatible.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Make sure we have an 8-bit string, because .translate() works
|
chars = []
|
||||||
# differently for Unicode strings.
|
punct = False
|
||||||
if isinstance(encoding, str):
|
for c in encoding:
|
||||||
# Note that .encode('latin-1') does *not* use the codec
|
if c.isalnum() or c == '.':
|
||||||
# registry, so this call doesn't recurse. (See unicodeobject.c
|
if punct and chars:
|
||||||
# PyUnicode_AsEncodedString() for details)
|
chars.append('_')
|
||||||
encoding = encoding.encode('latin-1')
|
chars.append(c)
|
||||||
return '_'.join(encoding.translate(_norm_encoding_map).split())
|
punct = False
|
||||||
|
else:
|
||||||
|
punct = True
|
||||||
|
return ''.join(chars)
|
||||||
|
|
||||||
def search_function(encoding):
|
def search_function(encoding):
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue