reuse tokenize.detect_encoding in linecache instead of a custom solution

patch by Victor Stinner #4016
2025-09-27 02:39:58 +00:00 · 2009-03-24 22:30:15 +00:00 · 2009-03-24 22:30:15 +00:00 · 9b8d24b17d
commit 9b8d24b17d
parent a8abe86331
2 changed files with 8 additions and 23 deletions
--- a/Lib/linecache.py
+++ b/Lib/linecache.py
@ -7,7 +7,7 @@ that name.
 import sys
 import os
-import re
+import tokenize
 __all__ = ["getline", "clearcache", "checkcache"]
@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None):
                    pass
        else:
            # No luck
 ##          print '*** Cannot stat', filename, ':', msg
            return []
-##  print("Refreshing cache for %s..." % fullname)
+    with open(fullname, 'rb') as fp:
-    try:
+        coding, line = tokenize.detect_encoding(fp.readline)
-        fp = open(fullname, 'rU')
+    with open(fullname, 'r', encoding=coding) as fp:
        lines = fp.readlines()
        fp.close()
    except Exception as msg:
 ##      print '*** Cannot open', fullname, ':', msg
        return []
    coding = "utf-8"
    for line in lines[:2]:
        m = re.search(r"coding[:=]\s*([-\w.]+)", line)
        if m:
            coding = m.group(1)
            break
    try:
        lines = [line if isinstance(line, str) else str(line, coding)
                 for line in lines]
    except:
        pass  # Hope for the best
    size, mtime = stat.st_size, stat.st_mtime
    cache[filename] = size, mtime, lines, fullname
    return lines
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
 import re, string, sys
 from token import *
 from codecs import lookup, BOM_UTF8
 from itertools import chain, repeat
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 import token
@ -327,13 +326,15 @@ def tokenize(readline):
    which tells you which encoding was used to decode the bytes stream.
    """
    encoding, consumed = detect_encoding(readline)
-    def readline_generator():
+    def readline_generator(consumed):
        for line in consumed:
            yield line
        while True:
            try:
                yield readline()
            except StopIteration:
                return
-    chained = chain(consumed, readline_generator())
+    chained = readline_generator(consumed)
    return _tokenize(chained.__next__, encoding)