mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.
This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.
This commit is contained in:
parent
b565577aa7
commit
e450185b4a
6 changed files with 168 additions and 22 deletions
|
@ -1963,6 +1963,37 @@ class TextIOWrapperTest(unittest.TestCase):
|
|||
|
||||
self.assertEqual(buffer.seekable(), txt.seekable())
|
||||
|
||||
def test_append_bom(self):
|
||||
# The BOM is not written again when appending to a non-empty file
|
||||
filename = support.TESTFN
|
||||
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
|
||||
with self.open(filename, 'w', encoding=charset) as f:
|
||||
f.write('aaa')
|
||||
pos = f.tell()
|
||||
with self.open(filename, 'rb') as f:
|
||||
self.assertEquals(f.read(), 'aaa'.encode(charset))
|
||||
|
||||
with self.open(filename, 'a', encoding=charset) as f:
|
||||
f.write('xxx')
|
||||
with self.open(filename, 'rb') as f:
|
||||
self.assertEquals(f.read(), 'aaaxxx'.encode(charset))
|
||||
|
||||
def test_seek_bom(self):
|
||||
# Same test, but when seeking manually
|
||||
filename = support.TESTFN
|
||||
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
|
||||
with self.open(filename, 'w', encoding=charset) as f:
|
||||
f.write('aaa')
|
||||
pos = f.tell()
|
||||
with self.open(filename, 'r+', encoding=charset) as f:
|
||||
f.seek(pos)
|
||||
f.write('zzz')
|
||||
f.seek(0)
|
||||
f.write('bbb')
|
||||
with self.open(filename, 'rb') as f:
|
||||
self.assertEquals(f.read(), 'bbbzzz'.encode(charset))
|
||||
|
||||
|
||||
class CTextIOWrapperTest(TextIOWrapperTest):
|
||||
|
||||
def test_initialization(self):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue