mirror of
https://github.com/python/cpython.git
synced 2025-08-23 18:24:46 +00:00
Merge part of the trunk changes into the p3yk branch. This merges from 43030
(branch-creation time) up to 43067. 43068 and 43069 contain a little swapping action between re.py and sre.py, and this mightily confuses svn merge, so later changes are going in separately. This merge should break no additional tests. The last-merged revision is going in a 'last_merge' property on '.' (the branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I couldn't find it, but we can easily change it afterwards ;)
This commit is contained in:
parent
d858f70617
commit
a977329b6f
116 changed files with 3409 additions and 709 deletions
183
Lib/codecs.py
183
Lib/codecs.py
|
@ -73,6 +73,23 @@ BOM64_BE = BOM_UTF32_BE
|
|||
|
||||
### Codec base classes (defining the API)
|
||||
|
||||
class CodecInfo(tuple):
|
||||
|
||||
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
|
||||
incrementalencoder=None, incrementaldecoder=None, name=None):
|
||||
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
|
||||
self.name = name
|
||||
self.encode = encode
|
||||
self.decode = decode
|
||||
self.incrementalencoder = incrementalencoder
|
||||
self.incrementaldecoder = incrementaldecoder
|
||||
self.streamwriter = streamwriter
|
||||
self.streamreader = streamreader
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
|
||||
|
||||
class Codec:
|
||||
|
||||
""" Defines the interface for stateless encoders/decoders.
|
||||
|
@ -137,6 +154,88 @@ class Codec:
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
class IncrementalEncoder(object):
|
||||
"""
|
||||
A IncrementalEncoder encodes an input in multiple steps. The input can be
|
||||
passed piece by piece to the encode() method. The IncrementalEncoder remembers
|
||||
the state of the Encoding process between calls to encode().
|
||||
"""
|
||||
def __init__(self, errors='strict'):
|
||||
"""
|
||||
Creates a IncrementalEncoder instance.
|
||||
|
||||
The IncrementalEncoder may use different error handling schemes by
|
||||
providing the errors keyword argument. See the module docstring
|
||||
for a list of possible values.
|
||||
"""
|
||||
self.errors = errors
|
||||
self.buffer = ""
|
||||
|
||||
def encode(self, input, final=False):
|
||||
"""
|
||||
Encodes input and returns the resulting object.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Resets the encoder to the initial state.
|
||||
"""
|
||||
|
||||
class IncrementalDecoder(object):
|
||||
"""
|
||||
An IncrementalDecoder decodes an input in multiple steps. The input can be
|
||||
passed piece by piece to the decode() method. The IncrementalDecoder
|
||||
remembers the state of the decoding process between calls to decode().
|
||||
"""
|
||||
def __init__(self, errors='strict'):
|
||||
"""
|
||||
Creates a IncrementalDecoder instance.
|
||||
|
||||
The IncrementalDecoder may use different error handling schemes by
|
||||
providing the errors keyword argument. See the module docstring
|
||||
for a list of possible values.
|
||||
"""
|
||||
self.errors = errors
|
||||
|
||||
def decode(self, input, final=False):
|
||||
"""
|
||||
Decodes input and returns the resulting object.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Resets the decoder to the initial state.
|
||||
"""
|
||||
|
||||
class BufferedIncrementalDecoder(IncrementalDecoder):
|
||||
"""
|
||||
This subclass of IncrementalDecoder can be used as the baseclass for an
|
||||
incremental decoder if the decoder must be able to handle incomplete byte
|
||||
sequences.
|
||||
"""
|
||||
def __init__(self, errors='strict'):
|
||||
IncrementalDecoder.__init__(self, errors)
|
||||
self.buffer = "" # undecoded input that is kept between calls to decode()
|
||||
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
# Overwrite this method in subclasses: It must decode input
|
||||
# and return an (output, length consumed) tuple
|
||||
raise NotImplementedError
|
||||
|
||||
def decode(self, input, final=False):
|
||||
# decode input (taking the buffer into account)
|
||||
data = self.buffer + input
|
||||
(result, consumed) = self._buffer_decode(data, self.errors, final)
|
||||
# keep undecoded input until the next call
|
||||
self.buffer = data[consumed:]
|
||||
return result
|
||||
|
||||
def reset(self):
|
||||
IncrementalDecoder.reset(self)
|
||||
self.bytebuffer = ""
|
||||
|
||||
#
|
||||
# The StreamWriter and StreamReader class provide generic working
|
||||
# interfaces which can be used to implement new encoding submodules
|
||||
|
@ -666,8 +765,8 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
|
|||
file = __builtin__.open(filename, mode, buffering)
|
||||
if encoding is None:
|
||||
return file
|
||||
(e, d, sr, sw) = lookup(encoding)
|
||||
srw = StreamReaderWriter(file, sr, sw, errors)
|
||||
info = lookup(encoding)
|
||||
srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
|
||||
# Add attributes to simplify introspection
|
||||
srw.encoding = encoding
|
||||
return srw
|
||||
|
@ -699,11 +798,9 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
|
|||
"""
|
||||
if file_encoding is None:
|
||||
file_encoding = data_encoding
|
||||
encode, decode = lookup(data_encoding)[:2]
|
||||
Reader, Writer = lookup(file_encoding)[2:]
|
||||
sr = StreamRecoder(file,
|
||||
encode, decode, Reader, Writer,
|
||||
errors)
|
||||
info = lookup(data_encoding)
|
||||
sr = StreamRecoder(file, info.encode, info.decode,
|
||||
info.streamreader, info.streamwriter, errors)
|
||||
# Add attributes to simplify introspection
|
||||
sr.data_encoding = data_encoding
|
||||
sr.file_encoding = file_encoding
|
||||
|
@ -719,7 +816,7 @@ def getencoder(encoding):
|
|||
Raises a LookupError in case the encoding cannot be found.
|
||||
|
||||
"""
|
||||
return lookup(encoding)[0]
|
||||
return lookup(encoding).encode
|
||||
|
||||
def getdecoder(encoding):
|
||||
|
||||
|
@ -729,7 +826,35 @@ def getdecoder(encoding):
|
|||
Raises a LookupError in case the encoding cannot be found.
|
||||
|
||||
"""
|
||||
return lookup(encoding)[1]
|
||||
return lookup(encoding).decode
|
||||
|
||||
def getincrementalencoder(encoding):
|
||||
|
||||
""" Lookup up the codec for the given encoding and return
|
||||
its IncrementalEncoder class or factory function.
|
||||
|
||||
Raises a LookupError in case the encoding cannot be found
|
||||
or the codecs doesn't provide an incremental encoder.
|
||||
|
||||
"""
|
||||
encoder = lookup(encoding).incrementalencoder
|
||||
if encoder is None:
|
||||
raise LookupError(encoding)
|
||||
return encoder
|
||||
|
||||
def getincrementaldecoder(encoding):
|
||||
|
||||
""" Lookup up the codec for the given encoding and return
|
||||
its IncrementalDecoder class or factory function.
|
||||
|
||||
Raises a LookupError in case the encoding cannot be found
|
||||
or the codecs doesn't provide an incremental decoder.
|
||||
|
||||
"""
|
||||
decoder = lookup(encoding).incrementaldecoder
|
||||
if decoder is None:
|
||||
raise LookupError(encoding)
|
||||
return decoder
|
||||
|
||||
def getreader(encoding):
|
||||
|
||||
|
@ -739,7 +864,7 @@ def getreader(encoding):
|
|||
Raises a LookupError in case the encoding cannot be found.
|
||||
|
||||
"""
|
||||
return lookup(encoding)[2]
|
||||
return lookup(encoding).streamreader
|
||||
|
||||
def getwriter(encoding):
|
||||
|
||||
|
@ -749,7 +874,43 @@ def getwriter(encoding):
|
|||
Raises a LookupError in case the encoding cannot be found.
|
||||
|
||||
"""
|
||||
return lookup(encoding)[3]
|
||||
return lookup(encoding).streamwriter
|
||||
|
||||
def iterencode(iterator, encoding, errors='strict', **kwargs):
|
||||
"""
|
||||
Encoding iterator.
|
||||
|
||||
Encodes the input strings from the iterator using a IncrementalEncoder.
|
||||
|
||||
errors and kwargs are passed through to the IncrementalEncoder
|
||||
constructor.
|
||||
"""
|
||||
encoder = getincrementalencoder(encoding)(errors, **kwargs)
|
||||
for input in iterator:
|
||||
output = encoder.encode(input)
|
||||
if output:
|
||||
yield output
|
||||
output = encoder.encode("", True)
|
||||
if output:
|
||||
yield output
|
||||
|
||||
def iterdecode(iterator, encoding, errors='strict', **kwargs):
|
||||
"""
|
||||
Decoding iterator.
|
||||
|
||||
Decodes the input strings from the iterator using a IncrementalDecoder.
|
||||
|
||||
errors and kwargs are passed through to the IncrementalDecoder
|
||||
constructor.
|
||||
"""
|
||||
decoder = getincrementaldecoder(encoding)(errors, **kwargs)
|
||||
for input in iterator:
|
||||
output = decoder.decode(input)
|
||||
if output:
|
||||
yield output
|
||||
output = decoder.decode("", True)
|
||||
if output:
|
||||
yield output
|
||||
|
||||
### Helpers for charmap-based codecs
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue