gh-131878: Fix input of unicode characters with two or more code points in new pyrepl on Windows (gh-131901)

Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
This commit is contained in:
Sergey Miryanov 2025-05-05 09:25:00 -07:00 committed by GitHub
parent d6078ed6d0
commit 0c5151bc81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 68 additions and 27 deletions

View file

@ -69,18 +69,14 @@ class BaseEventQueue:
trace('added event {event}', event=event)
self.events.append(event)
def push(self, char: int | bytes | str) -> None:
def push(self, char: int | bytes) -> None:
"""
Processes a character by updating the buffer and handling special key mappings.
"""
assert isinstance(char, (int, bytes))
ord_char = char if isinstance(char, int) else ord(char)
if ord_char > 255:
assert isinstance(char, str)
char = bytes(char.encode(self.encoding, "replace"))
self.buf.extend(char)
else:
char = bytes(bytearray((ord_char,)))
self.buf.append(ord_char)
char = ord_char.to_bytes()
self.buf.append(ord_char)
if char in self.keymap:
if self.keymap is self.compiled_keymap:

View file

@ -485,7 +485,8 @@ class WindowsConsole(Console):
return None
elif self.__vt_support:
# If virtual terminal is enabled, scanning VT sequences
self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar)
for char in raw_key.encode(self.event_queue.encoding, "replace"):
self.event_queue.push(char)
continue
if key_event.dwControlKeyState & ALT_ACTIVE:

View file

@ -53,7 +53,7 @@ class EventQueueTestBase:
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": "b"}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "b")
@ -63,7 +63,7 @@ class EventQueueTestBase:
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"c": "d"}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "a")
@ -73,13 +73,13 @@ class EventQueueTestBase:
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty())
eq.push("b")
eq.push(b"b")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "c")
eq.push("d")
eq.push(b"d")
self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "d")
@ -88,32 +88,32 @@ class EventQueueTestBase:
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty())
eq.flush_buf()
eq.push("\033")
eq.push(b"\033")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\033")
eq.push("b")
eq.push(b"b")
self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "b")
def test_push_special_key(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("\x1b")
eq.push("[")
eq.push("A")
eq.push(b"\x1b")
eq.push(b"[")
eq.push(b"A")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b")
def test_push_unrecognized_escape_sequence(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("\x1b")
eq.push("[")
eq.push("Z")
eq.push(b"\x1b")
eq.push(b"[")
eq.push(b"Z")
self.assertEqual(len(eq.events), 3)
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b")
@ -122,12 +122,54 @@ class EventQueueTestBase:
self.assertEqual(eq.events[2].evt, "key")
self.assertEqual(eq.events[2].data, "Z")
def test_push_unicode_character(self):
def test_push_unicode_character_as_str(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("ч")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "ч")
with self.assertRaises(AssertionError):
eq.push("ч")
with self.assertRaises(AssertionError):
eq.push("ñ")
def test_push_unicode_character_two_bytes(self):
eq = self.make_eventqueue()
eq.keymap = {}
encoded = "ч".encode(eq.encoding, "replace")
self.assertEqual(len(encoded), 2)
eq.push(encoded[0])
e = eq.get()
self.assertIsNone(e)
eq.push(encoded[1])
e = eq.get()
self.assertEqual(e.evt, "key")
self.assertEqual(e.data, "ч")
def test_push_single_chars_and_unicode_character_as_str(self):
eq = self.make_eventqueue()
eq.keymap = {}
def _event(evt, data, raw=None):
r = raw if raw is not None else data.encode(eq.encoding)
e = Event(evt, data, r)
return e
def _push(keys):
for k in keys:
eq.push(k)
self.assertIsInstance("ñ", str)
# If an exception happens during push, the existing events must be
# preserved and we can continue to push.
_push(b"b")
with self.assertRaises(AssertionError):
_push("ñ")
_push(b"a")
self.assertEqual(eq.get(), _event("key", "b"))
self.assertEqual(eq.get(), _event("key", "a"))
@unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows")

View file

@ -0,0 +1,2 @@
Fix support of unicode characters with two or more codepoints on Windows in
the new REPL.