This commit is contained in:
Serhiy Storchaka 2025-12-23 01:09:31 -05:00 committed by GitHub
commit 434fead833
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 27 additions and 45 deletions

View file

@ -28,9 +28,8 @@ TAB = False, True, True, None # Tab.
TRY_A = False, False, False, ATTRS # '.' for attributes.
TRY_F = False, False, False, FILES # '/' in quotes for file name.
# This string includes all chars that may be in an identifier.
# TODO Update this here and elsewhere.
ID_CHARS = string.ascii_letters + string.digits + "_"
# all ASCII chars that may be in an identifier
_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
SEPS = f"{os.sep}{os.altsep if os.altsep else ''}"
TRIGGERS = f".{SEPS}"
@ -134,7 +133,11 @@ class AutoComplete:
elif hp.is_in_code() and (not mode or mode==ATTRS):
self._remove_autocomplete_window()
mode = ATTRS
while i and (curline[i-1] in ID_CHARS or ord(curline[i-1]) > 127):
while i:
c = curline[i-1]
if c not in _ASCII_ID_CHARS:
if c <= '\x7f' or not ('a' + c).isidentifier():
break
i -= 1
comp_start = curline[i:j]
if i and curline[i-1] == '.': # Need object with attributes.

View file

@ -13,12 +13,10 @@ its state.
There is only one instance of Autoexpand.
'''
import re
import string
_LAST_WORD_RE = re.compile(r'\b\w+\Z')
class AutoExpand:
wordchars = string.ascii_letters + string.digits + "_"
def __init__(self, editwin):
self.text = editwin.text
self.bell = self.text.bell
@ -85,10 +83,8 @@ class AutoExpand:
def getprevword(self):
"Return the word prefix before the cursor."
line = self.text.get("insert linestart", "insert")
i = len(line)
while i > 0 and line[i-1] in self.wordchars:
i = i-1
return line[i:]
m = _LAST_WORD_RE.search(line)
return m[0] if m else ''
if __name__ == '__main__':

View file

@ -3,7 +3,6 @@ import importlib.util
import os
import platform
import re
import string
import sys
import tokenize
import traceback
@ -806,14 +805,12 @@ class EditorWindow:
if self.line_numbers is not None:
self.line_numbers.update_colors()
IDENTCHARS = string.ascii_letters + string.digits + "_"
def colorize_syntax_error(self, text, pos):
text.tag_add("ERROR", pos)
char = text.get(pos)
if char and char in self.IDENTCHARS:
if char and ('a' + char).isidentifier():
text.tag_add("ERROR", pos + " wordstart", pos)
if '\n' == text.get(pos): # error at line end
if char == '\n': # error at line end
text.mark_set("insert", pos)
else:
text.mark_set("insert", pos + "+1c")

View file

@ -14,13 +14,6 @@ _ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
# all ASCII chars that may be the first char of an identifier
_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
# lookup table for whether 7-bit ASCII chars are valid as the first
# char in a Python identifier
_IS_ASCII_ID_FIRST_CHAR = \
[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
class HyperParser:
def __init__(self, editwin, index):
@ -166,8 +159,6 @@ class HyperParser:
This ignores non-identifier eywords are not identifiers.
"""
is_ascii_id_char = _IS_ASCII_ID_CHAR
# Start at the end (pos) and work backwards.
i = pos
@ -175,44 +166,40 @@ class HyperParser:
# identifier characters. This is an optimization, since it
# is faster in the common case where most of the characters
# are ASCII.
while i > limit and (
ord(str[i - 1]) < 128 and
is_ascii_id_char[ord(str[i - 1])]
):
while i > limit and str[i - 1] in _ASCII_ID_CHARS:
i -= 1
# If the above loop ended due to reaching a non-ASCII
# character, continue going backwards using the most generic
# test for whether a string contains only valid identifier
# characters.
if i > limit and ord(str[i - 1]) >= 128:
while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
if i > limit and str[i - 1] > '\x7f':
while i - 4 >= limit and ('a' + str[i - 4:i]).isidentifier():
i -= 4
if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
if i - 2 >= limit and ('a' + str[i - 2:i]).isidentifier():
i -= 2
if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
if i - 1 >= limit and ('a' + str[i - 1]).isidentifier():
i -= 1
# The identifier candidate starts here. If it isn't a valid
# identifier, don't eat anything. At this point that is only
# possible if the first character isn't a valid first
# character for an identifier.
if not str[i:pos].isidentifier():
if i < pos and not str[i].isidentifier():
return 0
elif i < pos:
# All characters in str[i:pos] are valid ASCII identifier
# characters, so it is enough to check that the first is
# valid as the first character of an identifier.
if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
if str[i] not in _ASCII_ID_FIRST_CHARS:
return 0
# All keywords are valid identifiers, but should not be
# considered identifiers here, except for True, False and None.
if i < pos and (
iskeyword(str[i:pos]) and
str[i:pos] not in cls._ID_KEYWORDS
):
return 0
if i < pos:
word = str[i:pos]
if iskeyword(word) and word not in cls._ID_KEYWORDS:
return 0
return pos - i

View file

@ -1,5 +1,3 @@
import string
from idlelib.delegator import Delegator
# tkinter import not needed because module does not create widgets,
@ -251,10 +249,8 @@ class InsertCommand(Command):
self.chars = self.chars + cmd.chars
return True
alphanumeric = string.ascii_letters + string.digits + "_"
def classify(self, c):
if c in self.alphanumeric:
if ('a' + c).isidentifier():
return "alphanumeric"
if c == "\n":
return "newline"

View file

@ -0,0 +1,3 @@
Improve support of non-ASCII identifiers in IDLE
(autoexpanding, autocompletion, undo, etc).y