mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 01:47:38 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			152 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ Standard "encodings" Package
 | |
| 
 | |
|     Standard Python encoding modules are stored in this package
 | |
|     directory.
 | |
| 
 | |
|     Codec modules must have names corresponding to normalized encoding
 | |
|     names as defined in the normalize_encoding() function below, e.g.
 | |
|     'utf-8' must be implemented by the module 'utf_8.py'.
 | |
| 
 | |
|     Each codec module must export the following interface:
 | |
| 
 | |
|     * getregentry() -> codecs.CodecInfo object
 | |
|     The getregentry() API must return a CodecInfo object with encoder, decoder,
 | |
|     incrementalencoder, incrementaldecoder, streamwriter and streamreader
 | |
|     atttributes which adhere to the Python Codec Interface Standard.
 | |
| 
 | |
|     In addition, a module may optionally also define the following
 | |
|     APIs which are then used by the package's codec search function:
 | |
| 
 | |
|     * getaliases() -> sequence of encoding name strings to use as aliases
 | |
| 
 | |
|     Alias names returned by getaliases() must be normalized encoding
 | |
|     names as defined by normalize_encoding().
 | |
| 
 | |
| Written by Marc-Andre Lemburg (mal@lemburg.com).
 | |
| 
 | |
| (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 | |
| 
 | |
| """#"
 | |
| 
 | |
| import codecs
 | |
| from . import aliases
 | |
| 
 | |
| _cache = {}
 | |
| _unknown = '--unknown--'
 | |
| _import_tail = ['*']
 | |
| _aliases = aliases.aliases
 | |
| 
 | |
| class CodecRegistryError(LookupError, SystemError):
 | |
|     pass
 | |
| 
 | |
| def normalize_encoding(encoding):
 | |
| 
 | |
|     """ Normalize an encoding name.
 | |
| 
 | |
|         Normalization works as follows: all non-alphanumeric
 | |
|         characters except the dot used for Python package names are
 | |
|         collapsed and replaced with a single underscore, e.g. '  -;#'
 | |
|         becomes '_'. Leading and trailing underscores are removed.
 | |
| 
 | |
|         Note that encoding names should be ASCII only; if they do use
 | |
|         non-ASCII characters, these must be Latin-1 compatible.
 | |
| 
 | |
|     """
 | |
|     if isinstance(encoding, bytes):
 | |
|         encoding = str(encoding, "ascii")
 | |
|     chars = []
 | |
|     punct = False
 | |
|     for c in encoding:
 | |
|         if c.isalnum() or c == '.':
 | |
|             if punct and chars:
 | |
|                 chars.append('_')
 | |
|             chars.append(c)
 | |
|             punct = False
 | |
|         else:
 | |
|             punct = True
 | |
|     return ''.join(chars)
 | |
| 
 | |
| def search_function(encoding):
 | |
| 
 | |
|     # Cache lookup
 | |
|     entry = _cache.get(encoding, _unknown)
 | |
|     if entry is not _unknown:
 | |
|         return entry
 | |
| 
 | |
|     # Import the module:
 | |
|     #
 | |
|     # First try to find an alias for the normalized encoding
 | |
|     # name and lookup the module using the aliased name, then try to
 | |
|     # lookup the module using the standard import scheme, i.e. first
 | |
|     # try in the encodings package, then at top-level.
 | |
|     #
 | |
|     norm_encoding = normalize_encoding(encoding)
 | |
|     aliased_encoding = _aliases.get(norm_encoding) or \
 | |
|                        _aliases.get(norm_encoding.replace('.', '_'))
 | |
|     if aliased_encoding is not None:
 | |
|         modnames = [aliased_encoding,
 | |
|                     norm_encoding]
 | |
|     else:
 | |
|         modnames = [norm_encoding]
 | |
|     for modname in modnames:
 | |
|         if not modname or '.' in modname:
 | |
|             continue
 | |
|         try:
 | |
|             # Import is absolute to prevent the possibly malicious import of a
 | |
|             # module with side-effects that is not in the 'encodings' package.
 | |
|             mod = __import__('encodings.' + modname, fromlist=_import_tail,
 | |
|                              level=0)
 | |
|         except ImportError:
 | |
|             pass
 | |
|         else:
 | |
|             break
 | |
|     else:
 | |
|         mod = None
 | |
| 
 | |
|     try:
 | |
|         getregentry = mod.getregentry
 | |
|     except AttributeError:
 | |
|         # Not a codec module
 | |
|         mod = None
 | |
| 
 | |
|     if mod is None:
 | |
|         # Cache misses
 | |
|         _cache[encoding] = None
 | |
|         return None
 | |
| 
 | |
|     # Now ask the module for the registry entry
 | |
|     entry = getregentry()
 | |
|     if not isinstance(entry, codecs.CodecInfo):
 | |
|         if not 4 <= len(entry) <= 7:
 | |
|             raise CodecRegistryError('module "%s" (%s) failed to register'
 | |
|                                      % (mod.__name__, mod.__file__))
 | |
|         if not callable(entry[0]) or not callable(entry[1]) or \
 | |
|            (entry[2] is not None and not callable(entry[2])) or \
 | |
|            (entry[3] is not None and not callable(entry[3])) or \
 | |
|            (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
 | |
|            (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
 | |
|             raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
 | |
|                                      % (mod.__name__, mod.__file__))
 | |
|         if len(entry)<7 or entry[6] is None:
 | |
|             entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
 | |
|         entry = codecs.CodecInfo(*entry)
 | |
| 
 | |
|     # Cache the codec registry entry
 | |
|     _cache[encoding] = entry
 | |
| 
 | |
|     # Register its aliases (without overwriting previously registered
 | |
|     # aliases)
 | |
|     try:
 | |
|         codecaliases = mod.getaliases()
 | |
|     except AttributeError:
 | |
|         pass
 | |
|     else:
 | |
|         for alias in codecaliases:
 | |
|             if alias not in _aliases:
 | |
|                 _aliases[alias] = modname
 | |
| 
 | |
|     # Return the registry entry
 | |
|     return entry
 | |
| 
 | |
| # Register the search_function in the Python codec registry
 | |
| codecs.register(search_function)
 | 
