Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the

first chunk fed to the decoder started with a BOM, but was longer than 3 bytes.
2025-11-25 04:34:37 +00:00 · 2007-04-12 10:35:00 +00:00 · 2007-04-12 10:35:00 +00:00 · 4234827e99
commit 4234827e99
parent 9aba6d6905
3 changed files with 19 additions and 7 deletions
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -429,6 +429,11 @@ class UTF8SigTest(ReadTest):
        # SF bug #1601501: check that the codec works with a buffer
        unicode("\xef\xbb\xbf", "utf-8-sig")

+    def test_bom(self):
+        d = codecs.getincrementaldecoder("utf-8-sig")()
+        s = u"spam"
+        self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
+
 class EscapeDecodeTest(unittest.TestCase):
    def test_empty(self):
        self.assertEquals(codecs.escape_decode(""), ("", 0))