This commit is contained in:
Vedant Ravindra Dhoke 2025-12-23 07:25:16 +00:00 committed by GitHub
commit 8f41334810
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 1 deletions

View file

@ -40,6 +40,12 @@ from .types import Callback, SimpleContextManager, KeySpec, CommandName
# syntax classes
SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3)
def normalize_surrogates(s: str) -> str:
# Encode with surrogatepass, decode to normalize surrogate pairs
try:
return s.encode('utf-16', 'surrogatepass').decode('utf-16')
except UnicodeEncodeError:
return s # fallback if encoding somehow fails
def make_default_syntax_table() -> dict[str, int]:
# XXX perhaps should use some unicodedata here?
@ -759,4 +765,5 @@ class Reader:
def get_unicode(self) -> str:
"""Return the current buffer as a unicode string."""
return "".join(self.buffer)
text = "".join(self.buffer)
return normalize_surrogates(text)

View file

@ -0,0 +1 @@
Fix a crash in the REPL on Windows when typing Unicode characters outside the Basic Multilingual Plane (≥ U+10000), such as emoji. These characters are now properly handled as surrogate pairs.