Issue #18873: The tokenize module, IDLE, 2to3, and the findnocoding.py script

now detect Python source code encoding only in comment lines.
2025-11-24 20:30:18 +00:00 · 2013-09-16 23:51:56 +03:00 · 2013-09-16 23:51:56 +03:00 · dafea85190
commit dafea85190
parent 975fce3788
9 changed files with 44 additions and 22 deletions
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -31,7 +31,7 @@ from token import *
 from codecs import lookup, BOM_UTF8
 import collections
 from io import TextIOWrapper
-cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
+cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)

 import token
 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
@ -372,10 +372,10 @@ def detect_encoding(readline):
                msg = '{} for {!r}'.format(msg, filename)
            raise SyntaxError(msg)

-        matches = cookie_re.findall(line_string)
-        if not matches:
+        match = cookie_re.match(line_string)
+        if not match:
            return None
-        encoding = _get_normal_name(matches[0])
+        encoding = _get_normal_name(match.group(1))
        try:
            codec = lookup(encoding)
        except LookupError: