Apply SF patch #1698994: Add getstate() and setstate()

methods to incrementalcodecs. Also forward port r54786 (fix the incremental utf_8_sig decoder).
2025-12-04 00:30:19 +00:00 · 2007-04-16 22:10:50 +00:00 · 2007-04-16 22:10:50 +00:00 · 3abcb013b8
commit 3abcb013b8
parent 8981ad05c0
5 changed files with 249 additions and 31 deletions
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@ -12,7 +12,8 @@ import codecs
 ### Codec APIs

 def encode(input, errors='strict'):
-    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
+    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
+            len(input))

 def decode(input, errors='strict'):
    prefix = 0
@ -25,38 +26,61 @@ def decode(input, errors='strict'):
 class IncrementalEncoder(codecs.IncrementalEncoder):
    def __init__(self, errors='strict'):
        codecs.IncrementalEncoder.__init__(self, errors)
-        self.first = True
+        self.first = 1

    def encode(self, input, final=False):
        if self.first:
-            self.first = False
-            return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]
+            self.first = 0
+            return codecs.BOM_UTF8 + \
+                   codecs.utf_8_encode(input, self.errors)[0]
        else:
            return codecs.utf_8_encode(input, self.errors)[0]

    def reset(self):
        codecs.IncrementalEncoder.reset(self)
-        self.first = True
+        self.first = 1
+
+    def getstate(self):
+        return self.first
+
+    def setstate(self, state):
+        self.first = state

 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
    def __init__(self, errors='strict'):
        codecs.BufferedIncrementalDecoder.__init__(self, errors)
-        self.first = True
+        self.first = 1

    def _buffer_decode(self, input, errors, final):
-        if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+        if self.first:
            if len(input) < 3:
-                # not enough data to decide if this really is a BOM
-                # => try again on the next call
-                return (u"", 0)
-            (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
-            self.first = False
-            return (output, consumed+3)
+                if codecs.BOM_UTF8.startswith(input):
+                    # not enough data to decide if this really is a BOM
+                    # => try again on the next call
+                    return (u"", 0)
+                else:
+                    self.first = 0
+            else:
+                self.first = 0
+                if input[:3] == codecs.BOM_UTF8:
+                    (output, consumed) = \
+                       codecs.utf_8_decode(input[3:], errors, final)
+                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final)

    def reset(self):
        codecs.BufferedIncrementalDecoder.reset(self)
-        self.first = True
+        self.first = 1
+
+    def getstate(self):
+        state = codecs.BufferedIncrementalDecoder.getstate(self)
+        # state[1] must be 0 here, as it isn't passed along to the caller
+        return (state[0], self.first)
+
+    def setstate(self, state):
+        # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+        codecs.BufferedIncrementalDecoder.setstate(self, state)
+        self.first = state[1]

 class StreamWriter(codecs.StreamWriter):
    def reset(self):