gh-124008: Fix calculation of the number of written bytes for the Windows console (GH-124059)

Since MultiByteToWideChar()/WideCharToMultiByte() is not reversible if
the data contains invalid UTF-8 sequences, use binary search to
calculate the number of written bytes from the number of written
characters.

Also fix writing incomplete UTF-8 sequences.

Also fix handling of memory allocation failures.
This commit is contained in:
Serhiy Storchaka 2024-11-27 13:38:12 +02:00 committed by GitHub
parent 83926d3b4c
commit 3cf83d91a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 115 additions and 28 deletions

View file

@ -142,6 +142,29 @@ class WindowsConsoleIOTests(unittest.TestCase):
with ConIO('CONOUT$', 'w') as f:
self.assertEqual(f.write(b''), 0)
@requires_resource('console')
def test_write(self):
testcases = []
with ConIO('CONOUT$', 'w') as f:
for a in [
b'',
b'abc',
b'\xc2\xa7\xe2\x98\x83\xf0\x9f\x90\x8d',
b'\xff'*10,
]:
for b in b'\xc2\xa7', b'\xe2\x98\x83', b'\xf0\x9f\x90\x8d':
testcases.append(a + b)
for i in range(1, len(b)):
data = a + b[:i]
testcases.append(data + b'z')
testcases.append(data + b'\xff')
# incomplete multibyte sequence
with self.subTest(data=data):
self.assertEqual(f.write(data), len(a))
for data in testcases:
with self.subTest(data=data):
self.assertEqual(f.write(data), len(data))
def assertStdinRoundTrip(self, text):
stdin = open('CONIN$', 'r')
old_stdin = sys.stdin