Issue #14629: Raise SyntaxError in tokenizer.detect_encoding

if the first two lines have non-UTF-8 characters without an encoding declaration.
This commit is contained in:
Martin v. Löwis 2012-04-20 14:36:47 +02:00
parent 8e6e0fdb7f
commit 63674f4b52
3 changed files with 18 additions and 2 deletions

View file

@ -292,9 +292,12 @@ def detect_encoding(readline):
def find_cookie(line):
try:
line_string = line.decode('ascii')
# Decode as UTF-8. Either the line is an encoding declaration,
# in which case it should be pure ASCII, or it must be UTF-8
# per default encoding.
line_string = line.decode('utf-8')
except UnicodeDecodeError:
return None
raise SyntaxError("invalid or missing encoding declaration")
matches = cookie_re.findall(line_string)
if not matches: