mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
Merge 7a31a1f32f into a273bc99d2
This commit is contained in:
commit
8f41334810
2 changed files with 9 additions and 1 deletions
|
|
@ -40,6 +40,12 @@ from .types import Callback, SimpleContextManager, KeySpec, CommandName
|
|||
# syntax classes
|
||||
SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3)
|
||||
|
||||
def normalize_surrogates(s: str) -> str:
|
||||
# Encode with surrogatepass, decode to normalize surrogate pairs
|
||||
try:
|
||||
return s.encode('utf-16', 'surrogatepass').decode('utf-16')
|
||||
except UnicodeEncodeError:
|
||||
return s # fallback if encoding somehow fails
|
||||
|
||||
def make_default_syntax_table() -> dict[str, int]:
|
||||
# XXX perhaps should use some unicodedata here?
|
||||
|
|
@ -759,4 +765,5 @@ class Reader:
|
|||
|
||||
def get_unicode(self) -> str:
|
||||
"""Return the current buffer as a unicode string."""
|
||||
return "".join(self.buffer)
|
||||
text = "".join(self.buffer)
|
||||
return normalize_surrogates(text)
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Fix a crash in the REPL on Windows when typing Unicode characters outside the Basic Multilingual Plane (≥ U+10000), such as emoji. These characters are now properly handled as surrogate pairs.
|
||||
Loading…
Add table
Add a link
Reference in a new issue