Patch #1436130: codecs.lookup() now returns a CodecInfo object (a subclass

of tuple) that provides incremental decoders and encoders (a way to use
stateful codecs without the stream API). Functions
codecs.getincrementaldecoder() and codecs.getincrementalencoder() have
been added.
This commit is contained in:
Walter Dörwald 2006-03-15 11:35:15 +00:00
parent e2ebb2d7f7
commit abb02e5994
98 changed files with 2212 additions and 420 deletions

View file

@ -73,6 +73,23 @@ BOM64_BE = BOM_UTF32_BE
### Codec base classes (defining the API)
class CodecInfo(tuple):
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
incrementalencoder=None, incrementaldecoder=None, name=None):
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
self.name = name
self.encode = encode
self.decode = decode
self.incrementalencoder = incrementalencoder
self.incrementaldecoder = incrementaldecoder
self.streamwriter = streamwriter
self.streamreader = streamreader
return self
def __repr__(self):
return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
class Codec:
""" Defines the interface for stateless encoders/decoders.
@ -137,6 +154,88 @@ class Codec:
"""
raise NotImplementedError
class IncrementalEncoder(object):
"""
A IncrementalEncoder encodes an input in multiple steps. The input can be
passed piece by piece to the encode() method. The IncrementalEncoder remembers
the state of the Encoding process between calls to encode().
"""
def __init__(self, errors='strict'):
"""
Creates a IncrementalEncoder instance.
The IncrementalEncoder may use different error handling schemes by
providing the errors keyword argument. See the module docstring
for a list of possible values.
"""
self.errors = errors
self.buffer = ""
def encode(self, input, final=False):
"""
Encodes input and returns the resulting object.
"""
raise NotImplementedError
def reset(self):
"""
Resets the encoder to the initial state.
"""
class IncrementalDecoder(object):
"""
An IncrementalDecoder decodes an input in multiple steps. The input can be
passed piece by piece to the decode() method. The IncrementalDecoder
remembers the state of the decoding process between calls to decode().
"""
def __init__(self, errors='strict'):
"""
Creates a IncrementalDecoder instance.
The IncrementalDecoder may use different error handling schemes by
providing the errors keyword argument. See the module docstring
for a list of possible values.
"""
self.errors = errors
def decode(self, input, final=False):
"""
Decodes input and returns the resulting object.
"""
raise NotImplementedError
def reset(self):
"""
Resets the decoder to the initial state.
"""
class BufferedIncrementalDecoder(IncrementalDecoder):
"""
This subclass of IncrementalDecoder can be used as the baseclass for an
incremental decoder if the decoder must be able to handle incomplete byte
sequences.
"""
def __init__(self, errors='strict'):
IncrementalDecoder.__init__(self, errors)
self.buffer = "" # undecoded input that is kept between calls to decode()
def _buffer_decode(self, input, errors, final):
# Overwrite this method in subclasses: It must decode input
# and return an (output, length consumed) tuple
raise NotImplementedError
def decode(self, input, final=False):
# decode input (taking the buffer into account)
data = self.buffer + input
(result, consumed) = self._buffer_decode(data, self.errors, final)
# keep undecoded input until the next call
self.buffer = data[consumed:]
return result
def reset(self):
IncrementalDecoder.reset(self)
self.bytebuffer = ""
#
# The StreamWriter and StreamReader class provide generic working
# interfaces which can be used to implement new encoding submodules
@ -666,8 +765,8 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
file = __builtin__.open(filename, mode, buffering)
if encoding is None:
return file
(e, d, sr, sw) = lookup(encoding)
srw = StreamReaderWriter(file, sr, sw, errors)
info = lookup(encoding)
srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
# Add attributes to simplify introspection
srw.encoding = encoding
return srw
@ -699,11 +798,9 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
"""
if file_encoding is None:
file_encoding = data_encoding
encode, decode = lookup(data_encoding)[:2]
Reader, Writer = lookup(file_encoding)[2:]
sr = StreamRecoder(file,
encode, decode, Reader, Writer,
errors)
info = lookup(data_encoding)
sr = StreamRecoder(file, info.encode, info.decode,
info.streamreader, info.streamwriter, errors)
# Add attributes to simplify introspection
sr.data_encoding = data_encoding
sr.file_encoding = file_encoding
@ -719,7 +816,7 @@ def getencoder(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
return lookup(encoding)[0]
return lookup(encoding).encode
def getdecoder(encoding):
@ -729,7 +826,35 @@ def getdecoder(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
return lookup(encoding)[1]
return lookup(encoding).decode
def getincrementalencoder(encoding):
""" Lookup up the codec for the given encoding and return
its IncrementalEncoder class or factory function.
Raises a LookupError in case the encoding cannot be found
or the codecs doesn't provide an incremental encoder.
"""
encoder = lookup(encoding).incrementalencoder
if encoder is None:
raise LookupError(encoding)
return encoder
def getincrementaldecoder(encoding):
""" Lookup up the codec for the given encoding and return
its IncrementalDecoder class or factory function.
Raises a LookupError in case the encoding cannot be found
or the codecs doesn't provide an incremental decoder.
"""
decoder = lookup(encoding).incrementaldecoder
if decoder is None:
raise LookupError(encoding)
return decoder
def getreader(encoding):
@ -739,7 +864,7 @@ def getreader(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
return lookup(encoding)[2]
return lookup(encoding).streamreader
def getwriter(encoding):
@ -749,7 +874,43 @@ def getwriter(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
return lookup(encoding)[3]
return lookup(encoding).streamwriter
def iterencode(iterator, encoding, errors='strict', **kwargs):
"""
Encoding iterator.
Encodes the input strings from the iterator using a IncrementalEncoder.
errors and kwargs are passed through to the IncrementalEncoder
constructor.
"""
encoder = getincrementalencoder(encoding)(errors, **kwargs)
for input in iterator:
output = encoder.encode(input)
if output:
yield output
output = encoder.encode("", True)
if output:
yield output
def iterdecode(iterator, encoding, errors='strict', **kwargs):
"""
Decoding iterator.
Decodes the input strings from the iterator using a IncrementalDecoder.
errors and kwargs are passed through to the IncrementalDecoder
constructor.
"""
decoder = getincrementaldecoder(encoding)(errors, **kwargs)
for input in iterator:
output = decoder.decode(input)
if output:
yield output
output = decoder.decode("", True)
if output:
yield output
### Helpers for charmap-based codecs