Merge 260543f61b into a273bc99d2

2025-12-23 09:19:18 +00:00 · 2025-12-23 01:09:31 -05:00 · 2025-12-23 01:09:31 -05:00 · 434fead833
commit 434fead833
parent a273bc99d2 260543f61b
6 changed files with 27 additions and 45 deletions
--- a/Lib/idlelib/autocomplete.py
+++ b/Lib/idlelib/autocomplete.py
@ -28,9 +28,8 @@ TAB   = False,    True,     True,    None   # Tab.
 TRY_A = False,    False,    False,   ATTRS  # '.' for attributes.
 TRY_F = False,    False,    False,   FILES  # '/' in quotes for file name.

-# This string includes all chars that may be in an identifier.
-# TODO Update this here and elsewhere.
-ID_CHARS = string.ascii_letters + string.digits + "_"
+# all ASCII chars that may be in an identifier
+_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")

 SEPS = f"{os.sep}{os.altsep if os.altsep else ''}"
 TRIGGERS = f".{SEPS}"
@ -134,7 +133,11 @@ class AutoComplete:
        elif hp.is_in_code() and (not mode or mode==ATTRS):
            self._remove_autocomplete_window()
            mode = ATTRS
-            while i and (curline[i-1] in ID_CHARS or ord(curline[i-1]) > 127):
+            while i:
+                c = curline[i-1]
+                if c not in _ASCII_ID_CHARS:
+                    if c <= '\x7f' or not ('a' + c).isidentifier():
+                        break
                i -= 1
            comp_start = curline[i:j]
            if i and curline[i-1] == '.':  # Need object with attributes.
--- a/Lib/idlelib/autoexpand.py
+++ b/Lib/idlelib/autoexpand.py
@ -13,12 +13,10 @@ its state.
 There is only one instance of Autoexpand.
 '''
 import re
-import string

+_LAST_WORD_RE = re.compile(r'\b\w+\Z')

 class AutoExpand:
-    wordchars = string.ascii_letters + string.digits + "_"
-
    def __init__(self, editwin):
        self.text = editwin.text
        self.bell = self.text.bell
@ -85,10 +83,8 @@ class AutoExpand:
    def getprevword(self):
        "Return the word prefix before the cursor."
        line = self.text.get("insert linestart", "insert")
-        i = len(line)
-        while i > 0 and line[i-1] in self.wordchars:
-            i = i-1
-        return line[i:]
+        m = _LAST_WORD_RE.search(line)
+        return m[0] if m else ''


 if __name__ == '__main__':
--- a/Lib/idlelib/editor.py
+++ b/Lib/idlelib/editor.py
@ -3,7 +3,6 @@ import importlib.util
 import os
 import platform
 import re
-import string
 import sys
 import tokenize
 import traceback
@ -806,14 +805,12 @@ class EditorWindow:
        if self.line_numbers is not None:
            self.line_numbers.update_colors()

-    IDENTCHARS = string.ascii_letters + string.digits + "_"
-
    def colorize_syntax_error(self, text, pos):
        text.tag_add("ERROR", pos)
        char = text.get(pos)
-        if char and char in self.IDENTCHARS:
+        if char and ('a' + char).isidentifier():
            text.tag_add("ERROR", pos + " wordstart", pos)
-        if '\n' == text.get(pos):   # error at line end
+        if char == '\n':   # error at line end
            text.mark_set("insert", pos)
        else:
            text.mark_set("insert", pos + "+1c")
--- a/Lib/idlelib/hyperparser.py
+++ b/Lib/idlelib/hyperparser.py
@ -14,13 +14,6 @@ _ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
 # all ASCII chars that may be the first char of an identifier
 _ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")

-# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
-_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
-# lookup table for whether 7-bit ASCII chars are valid as the first
-# char in a Python identifier
-_IS_ASCII_ID_FIRST_CHAR = \
-    [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
-

 class HyperParser:
    def __init__(self, editwin, index):
@ -166,8 +159,6 @@ class HyperParser:

        This ignores non-identifier eywords are not identifiers.
        """
-        is_ascii_id_char = _IS_ASCII_ID_CHAR
-
        # Start at the end (pos) and work backwards.
        i = pos

@ -175,44 +166,40 @@ class HyperParser:
        # identifier characters. This is an optimization, since it
        # is faster in the common case where most of the characters
        # are ASCII.
-        while i > limit and (
-                ord(str[i - 1]) < 128 and
-                is_ascii_id_char[ord(str[i - 1])]
-        ):
+        while i > limit and str[i - 1] in _ASCII_ID_CHARS:
            i -= 1

        # If the above loop ended due to reaching a non-ASCII
        # character, continue going backwards using the most generic
        # test for whether a string contains only valid identifier
        # characters.
-        if i > limit and ord(str[i - 1]) >= 128:
-            while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
+        if i > limit and str[i - 1] > '\x7f':
+            while i - 4 >= limit and ('a' + str[i - 4:i]).isidentifier():
                i -= 4
-            if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
+            if i - 2 >= limit and ('a' + str[i - 2:i]).isidentifier():
                i -= 2
-            if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
+            if i - 1 >= limit and ('a' + str[i - 1]).isidentifier():
                i -= 1

            # The identifier candidate starts here. If it isn't a valid
            # identifier, don't eat anything. At this point that is only
            # possible if the first character isn't a valid first
            # character for an identifier.
-            if not str[i:pos].isidentifier():
+            if i < pos and not str[i].isidentifier():
                return 0
        elif i < pos:
            # All characters in str[i:pos] are valid ASCII identifier
            # characters, so it is enough to check that the first is
            # valid as the first character of an identifier.
-            if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
+            if str[i] not in _ASCII_ID_FIRST_CHARS:
                return 0

        # All keywords are valid identifiers, but should not be
        # considered identifiers here, except for True, False and None.
-        if i < pos and (
-                iskeyword(str[i:pos]) and
-                str[i:pos] not in cls._ID_KEYWORDS
-        ):
-            return 0
+        if i < pos:
+            word = str[i:pos]
+            if iskeyword(word) and word not in cls._ID_KEYWORDS:
+                return 0

        return pos - i

--- a/Lib/idlelib/undo.py
+++ b/Lib/idlelib/undo.py
@ -1,5 +1,3 @@
-import string
-
 from idlelib.delegator import Delegator

 # tkinter import not needed because module does not create widgets,
@ -251,10 +249,8 @@ class InsertCommand(Command):
        self.chars = self.chars + cmd.chars
        return True

-    alphanumeric = string.ascii_letters + string.digits + "_"
-
    def classify(self, c):
-        if c in self.alphanumeric:
+        if ('a' + c).isidentifier():
            return "alphanumeric"
        if c == "\n":
            return "newline"
--- a/Misc/NEWS.d/next/IDLE/2021-11-03-10-37-29.bpo-45692.QSuHbM.rst
+++ b/Misc/NEWS.d/next/IDLE/2021-11-03-10-37-29.bpo-45692.QSuHbM.rst
@ -0,0 +1,3 @@
+Improve support of non-ASCII identifiers in IDLE
+(autoexpanding, autocompletion, undo, etc).y
+