mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
reuse tokenize.detect_encoding in linecache instead of a custom solution
patch by Victor Stinner #4016
This commit is contained in:
parent
a8abe86331
commit
9b8d24b17d
2 changed files with 8 additions and 23 deletions
|
@ -7,7 +7,7 @@ that name.
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
import tokenize
|
||||||
|
|
||||||
__all__ = ["getline", "clearcache", "checkcache"]
|
__all__ = ["getline", "clearcache", "checkcache"]
|
||||||
|
|
||||||
|
@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# No luck
|
# No luck
|
||||||
## print '*** Cannot stat', filename, ':', msg
|
|
||||||
return []
|
return []
|
||||||
## print("Refreshing cache for %s..." % fullname)
|
with open(fullname, 'rb') as fp:
|
||||||
try:
|
coding, line = tokenize.detect_encoding(fp.readline)
|
||||||
fp = open(fullname, 'rU')
|
with open(fullname, 'r', encoding=coding) as fp:
|
||||||
lines = fp.readlines()
|
lines = fp.readlines()
|
||||||
fp.close()
|
|
||||||
except Exception as msg:
|
|
||||||
## print '*** Cannot open', fullname, ':', msg
|
|
||||||
return []
|
|
||||||
coding = "utf-8"
|
|
||||||
for line in lines[:2]:
|
|
||||||
m = re.search(r"coding[:=]\s*([-\w.]+)", line)
|
|
||||||
if m:
|
|
||||||
coding = m.group(1)
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
lines = [line if isinstance(line, str) else str(line, coding)
|
|
||||||
for line in lines]
|
|
||||||
except:
|
|
||||||
pass # Hope for the best
|
|
||||||
size, mtime = stat.st_size, stat.st_mtime
|
size, mtime = stat.st_size, stat.st_mtime
|
||||||
cache[filename] = size, mtime, lines, fullname
|
cache[filename] = size, mtime, lines, fullname
|
||||||
return lines
|
return lines
|
||||||
|
|
|
@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
|
||||||
import re, string, sys
|
import re, string, sys
|
||||||
from token import *
|
from token import *
|
||||||
from codecs import lookup, BOM_UTF8
|
from codecs import lookup, BOM_UTF8
|
||||||
from itertools import chain, repeat
|
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
|
|
||||||
import token
|
import token
|
||||||
|
@ -327,13 +326,15 @@ def tokenize(readline):
|
||||||
which tells you which encoding was used to decode the bytes stream.
|
which tells you which encoding was used to decode the bytes stream.
|
||||||
"""
|
"""
|
||||||
encoding, consumed = detect_encoding(readline)
|
encoding, consumed = detect_encoding(readline)
|
||||||
def readline_generator():
|
def readline_generator(consumed):
|
||||||
|
for line in consumed:
|
||||||
|
yield line
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
yield readline()
|
yield readline()
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
return
|
return
|
||||||
chained = chain(consumed, readline_generator())
|
chained = readline_generator(consumed)
|
||||||
return _tokenize(chained.__next__, encoding)
|
return _tokenize(chained.__next__, encoding)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue