Issue #5006: Better handling of unicode byte-order marks (BOM) in the io

library. This means, for example, that opening an UTF-16 text file in append
mode doesn't add a BOM at the end of the file if the file isn't empty.
This commit is contained in:
Victor Stinner 2010-07-28 01:58:41 +00:00
parent 082a65ab1f
commit 8243ddb6ca
3 changed files with 55 additions and 0 deletions

View file

@ -799,6 +799,37 @@ class StatefulIncrementalDecoderTest(unittest.TestCase):
self.assertEquals(d.decode(b'oiabcd'), '')
self.assertEquals(d.decode(b'', 1), 'abcd.')
def test_append_bom(self):
# The BOM is not written again when appending to a non-empty file
filename = test_support.TESTFN
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
with io.open(filename, 'w', encoding=charset) as f:
f.write('aaa')
pos = f.tell()
with io.open(filename, 'rb') as f:
self.assertEquals(f.read(), 'aaa'.encode(charset))
with io.open(filename, 'a', encoding=charset) as f:
f.write('xxx')
with io.open(filename, 'rb') as f:
self.assertEquals(f.read(), 'aaaxxx'.encode(charset))
def test_seek_bom(self):
# Same test, but when seeking manually
filename = test_support.TESTFN
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
with io.open(filename, 'w', encoding=charset) as f:
f.write('aaa')
pos = f.tell()
with io.open(filename, 'r+', encoding=charset) as f:
f.seek(pos)
f.write('zzz')
f.seek(0)
f.write('bbb')
with io.open(filename, 'rb') as f:
self.assertEquals(f.read(), 'bbbzzz'.encode(charset))
class TextIOWrapperTest(unittest.TestCase):
def setUp(self):