Issue #18873: The tokenize module, IDLE, 2to3, and the findnocoding.py script

now detect Python source code encoding only in comment lines.
This commit is contained in:
Serhiy Storchaka 2013-09-16 23:57:00 +03:00
commit 935349406a
9 changed files with 48 additions and 22 deletions

View file

@ -63,7 +63,7 @@ locale_encoding = locale_encoding.lower()
encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check!
### 'encoding' is used below in encode(), check!
coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
def coding_spec(data):
"""Return the encoding declaration according to PEP 263.
@ -84,14 +84,16 @@ def coding_spec(data):
lines = data
# consider only the first two lines
if '\n' in lines:
lst = lines.split('\n')[:2]
lst = lines.split('\n', 2)[:2]
elif '\r' in lines:
lst = lines.split('\r')[:2]
lst = lines.split('\r', 2)[:2]
else:
lst = [lines]
for line in lst:
match = coding_re.match(line)
if match is not None:
break
else:
lst = list(lines)
str = '\n'.join(lst)
match = coding_re.search(str)
if not match:
return None
name = match.group(1)
try: