Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.

This means, for example, that opening an UTF-16 text file in
append mode doesn't add a BOM at the end of the file if the file isn't
empty.
This commit is contained in:
Antoine Pitrou 2009-05-14 18:55:55 +00:00
parent b565577aa7
commit e450185b4a
6 changed files with 168 additions and 22 deletions

View file

@ -1436,6 +1436,15 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable()
if self._seekable and self.writable():
position = self.buffer.tell()
if position != 0:
try:
self._get_encoder().setstate(0)
except LookupError:
# Sometimes the encoder doesn't exist
pass
# self._snapshot is either None, or a tuple (dec_flags, next_input)
# where dec_flags is the second (integer) item of the decoder state
# and next_input is the chunk of input bytes that comes next after the
@ -1741,6 +1750,17 @@ class TextIOWrapper(TextIOBase):
raise IOError("can't restore logical file position")
self._decoded_chars_used = chars_to_skip
# Finally, reset the encoder (merely useful for proper BOM handling)
try:
encoder = self._encoder or self._get_encoder()
except LookupError:
# Sometimes the encoder doesn't exist
pass
else:
if cookie != 0:
encoder.setstate(0)
else:
encoder.reset()
return cookie
def read(self, n=None):