mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			57 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			57 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
""" Python 'utf-8-sig' Codec
 | 
						|
This work similar to UTF-8 with the following changes:
 | 
						|
 | 
						|
* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
 | 
						|
  first three bytes.
 | 
						|
 | 
						|
* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
 | 
						|
  bytes will be skipped.
 | 
						|
"""
 | 
						|
import codecs
 | 
						|
 | 
						|
### Codec APIs
 | 
						|
 | 
						|
def encode(input, errors='strict'):
 | 
						|
    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
 | 
						|
 | 
						|
def decode(input, errors='strict'):
 | 
						|
    prefix = 0
 | 
						|
    if input.startswith(codecs.BOM_UTF8):
 | 
						|
        input = input[3:]
 | 
						|
        prefix = 3
 | 
						|
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
 | 
						|
    return (output, consumed+prefix)
 | 
						|
 | 
						|
class StreamWriter(codecs.StreamWriter):
 | 
						|
    def reset(self):
 | 
						|
        codecs.StreamWriter.reset(self)
 | 
						|
        try:
 | 
						|
            del self.encode
 | 
						|
        except AttributeError:
 | 
						|
            pass
 | 
						|
 | 
						|
    def encode(self, input, errors='strict'):
 | 
						|
        self.encode = codecs.utf_8_encode
 | 
						|
        return encode(input, errors)
 | 
						|
 | 
						|
class StreamReader(codecs.StreamReader):
 | 
						|
    def reset(self):
 | 
						|
        codecs.StreamReader.reset(self)
 | 
						|
        try:
 | 
						|
            del self.decode
 | 
						|
        except AttributeError:
 | 
						|
            pass
 | 
						|
 | 
						|
    def decode(self, input, errors='strict'):
 | 
						|
        if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
 | 
						|
            # not enough data to decide if this is a BOM
 | 
						|
            # => try again on the next call
 | 
						|
            return (u"", 0)
 | 
						|
        self.decode = codecs.utf_8_decode
 | 
						|
        return decode(input, errors)
 | 
						|
 | 
						|
### encodings module API
 | 
						|
 | 
						|
def getregentry():
 | 
						|
 | 
						|
    return (encode,decode,StreamReader,StreamWriter)
 |