in tokenize.detect_encoding(), return utf-8-sig when a BOM is found

This commit is contained in:
Benjamin Peterson 2010-03-18 22:29:52 +00:00
parent 8c8042734a
commit 689a558098
4 changed files with 22 additions and 12 deletions

View file

@ -726,7 +726,7 @@ class TestDetectEncoding(TestCase):
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
self.assertEquals(encoding, 'utf-8')
self.assertEquals(encoding, 'utf-8-sig')
self.assertEquals(consumed_lines,
[b'# something\n', b'print(something)\n'])
@ -747,7 +747,7 @@ class TestDetectEncoding(TestCase):
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
self.assertEquals(encoding, 'utf-8')
self.assertEquals(encoding, 'utf-8-sig')
self.assertEquals(consumed_lines, [b'# coding=utf-8\n'])
def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
@ -779,7 +779,7 @@ class TestDetectEncoding(TestCase):
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
self.assertEquals(encoding, 'utf-8')
self.assertEquals(encoding, 'utf-8-sig')
self.assertEquals(consumed_lines,
[b'#! something\n', b'f# coding=utf-8\n'])
@ -833,12 +833,12 @@ class TestDetectEncoding(TestCase):
readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
encoding, consumed_lines = detect_encoding(readline)
self.assertEquals(encoding, 'utf-8')
self.assertEquals(encoding, 'utf-8-sig')
self.assertEquals(consumed_lines, [b'print(something)\n'])
readline = self.get_readline((b'\xef\xbb\xbf',))
encoding, consumed_lines = detect_encoding(readline)
self.assertEquals(encoding, 'utf-8')
self.assertEquals(encoding, 'utf-8-sig')
self.assertEquals(consumed_lines, [])
readline = self.get_readline((b'# coding: bad\n',))