Issue #18873: The tokenize module, IDLE, 2to3, and the findnocoding.py script

now detect Python source code encoding only in comment lines.
2025-10-17 20:28:43 +00:00 · 2013-09-16 23:57:00 +03:00 · 2013-09-16 23:57:00 +03:00 · 935349406a
commit 935349406a
parent 3c41154331 dafea85190
9 changed files with 48 additions and 22 deletions
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@ -63,7 +63,7 @@ locale_encoding = locale_encoding.lower()
 encoding = locale_encoding  ### KBK 07Sep07  This is used all over IDLE, check!
                            ### 'encoding' is used below in encode(), check!

-coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
+coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)

 def coding_spec(data):
    """Return the encoding declaration according to PEP 263.
@ -84,14 +84,16 @@ def coding_spec(data):
        lines = data
    # consider only the first two lines
    if '\n' in lines:
-        lst = lines.split('\n')[:2]
+        lst = lines.split('\n', 2)[:2]
    elif '\r' in lines:
-        lst = lines.split('\r')[:2]
+        lst = lines.split('\r', 2)[:2]
+    else:
+        lst = [lines]
+    for line in lst:
+        match = coding_re.match(line)
+        if match is not None:
+            break
    else:
-        lst = list(lines)
-    str = '\n'.join(lst)
-    match = coding_re.search(str)
-    if not match:
        return None
    name = match.group(1)
    try: