mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.
This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.
This commit is contained in:
parent
b565577aa7
commit
e450185b4a
6 changed files with 168 additions and 22 deletions
20
Lib/_pyio.py
20
Lib/_pyio.py
|
@ -1436,6 +1436,15 @@ class TextIOWrapper(TextIOBase):
|
|||
self._snapshot = None # info for reconstructing decoder state
|
||||
self._seekable = self._telling = self.buffer.seekable()
|
||||
|
||||
if self._seekable and self.writable():
|
||||
position = self.buffer.tell()
|
||||
if position != 0:
|
||||
try:
|
||||
self._get_encoder().setstate(0)
|
||||
except LookupError:
|
||||
# Sometimes the encoder doesn't exist
|
||||
pass
|
||||
|
||||
# self._snapshot is either None, or a tuple (dec_flags, next_input)
|
||||
# where dec_flags is the second (integer) item of the decoder state
|
||||
# and next_input is the chunk of input bytes that comes next after the
|
||||
|
@ -1741,6 +1750,17 @@ class TextIOWrapper(TextIOBase):
|
|||
raise IOError("can't restore logical file position")
|
||||
self._decoded_chars_used = chars_to_skip
|
||||
|
||||
# Finally, reset the encoder (merely useful for proper BOM handling)
|
||||
try:
|
||||
encoder = self._encoder or self._get_encoder()
|
||||
except LookupError:
|
||||
# Sometimes the encoder doesn't exist
|
||||
pass
|
||||
else:
|
||||
if cookie != 0:
|
||||
encoder.setstate(0)
|
||||
else:
|
||||
encoder.reset()
|
||||
return cookie
|
||||
|
||||
def read(self, n=None):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue