mirror of
https://github.com/python/cpython.git
synced 2025-07-30 06:34:15 +00:00
Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the
first chunk fed to the decoder started with a BOM, but was longer than 3 bytes.
This commit is contained in:
parent
9aba6d6905
commit
4234827e99
3 changed files with 19 additions and 7 deletions
|
@ -44,13 +44,18 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||||
self.first = True
|
self.first = True
|
||||||
|
|
||||||
def _buffer_decode(self, input, errors, final):
|
def _buffer_decode(self, input, errors, final):
|
||||||
if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
|
if self.first:
|
||||||
if len(input) < 3:
|
if len(input) < 3:
|
||||||
|
if codecs.BOM_UTF8.startswith(input):
|
||||||
# not enough data to decide if this really is a BOM
|
# not enough data to decide if this really is a BOM
|
||||||
# => try again on the next call
|
# => try again on the next call
|
||||||
return (u"", 0)
|
return (u"", 0)
|
||||||
|
else:
|
||||||
|
self.first = None
|
||||||
|
else:
|
||||||
|
self.first = None
|
||||||
|
if input[:3] == codecs.BOM_UTF8:
|
||||||
(output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
|
(output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
|
||||||
self.first = False
|
|
||||||
return (output, consumed+3)
|
return (output, consumed+3)
|
||||||
return codecs.utf_8_decode(input, errors, final)
|
return codecs.utf_8_decode(input, errors, final)
|
||||||
|
|
||||||
|
|
|
@ -429,6 +429,11 @@ class UTF8SigTest(ReadTest):
|
||||||
# SF bug #1601501: check that the codec works with a buffer
|
# SF bug #1601501: check that the codec works with a buffer
|
||||||
unicode("\xef\xbb\xbf", "utf-8-sig")
|
unicode("\xef\xbb\xbf", "utf-8-sig")
|
||||||
|
|
||||||
|
def test_bom(self):
|
||||||
|
d = codecs.getincrementaldecoder("utf-8-sig")()
|
||||||
|
s = u"spam"
|
||||||
|
self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
|
||||||
|
|
||||||
class EscapeDecodeTest(unittest.TestCase):
|
class EscapeDecodeTest(unittest.TestCase):
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
self.assertEquals(codecs.escape_decode(""), ("", 0))
|
self.assertEquals(codecs.escape_decode(""), ("", 0))
|
||||||
|
|
|
@ -591,6 +591,8 @@ Library
|
||||||
|
|
||||||
- idle: Honor the "Cancel" action in the save dialog (Debian bug #299092).
|
- idle: Honor the "Cancel" action in the save dialog (Debian bug #299092).
|
||||||
|
|
||||||
|
- Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the
|
||||||
|
first chunk fed to the decoder started with a BOM, but was longer than 3 bytes.
|
||||||
|
|
||||||
Extension Modules
|
Extension Modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue