mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
bpo-46054: Fix parsing error when parsing non-utf8 characters in source files (GH-30068)
This commit is contained in:
parent
59435eea08
commit
4325a766f5
3 changed files with 19 additions and 8 deletions
|
@ -2387,6 +2387,18 @@ class SyntaxErrorTests(unittest.TestCase):
|
||||||
finally:
|
finally:
|
||||||
unlink(TESTFN)
|
unlink(TESTFN)
|
||||||
|
|
||||||
|
def test_non_utf8(self):
|
||||||
|
# Check non utf-8 characters
|
||||||
|
try:
|
||||||
|
with open(TESTFN, 'bw') as testfile:
|
||||||
|
testfile.write(b'\x7fELF\x02\x01\x01\x00\x00\x00')
|
||||||
|
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||||
|
err = err.decode('utf-8').splitlines()
|
||||||
|
|
||||||
|
self.assertEqual(err[-1], "SyntaxError: invalid non-printable character U+007F")
|
||||||
|
finally:
|
||||||
|
unlink(TESTFN)
|
||||||
|
|
||||||
def test_attributes_new_constructor(self):
|
def test_attributes_new_constructor(self):
|
||||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||||
the_exception = SyntaxError("bad bad", args)
|
the_exception = SyntaxError("bad bad", args)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix parser error when parsing non-utf8 characters in source files. Patch by
|
||||||
|
Pablo Galindo.
|
|
@ -819,10 +819,10 @@ tok_readline_raw(struct tok_state *tok)
|
||||||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (*tok->inp == '\0') {
|
tok->inp = strchr(tok->inp, '\0');
|
||||||
|
if (tok->inp == tok->buf) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
tok->inp = strchr(tok->inp, '\0');
|
|
||||||
} while (tok->inp[-1] != '\n');
|
} while (tok->inp[-1] != '\n');
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -984,13 +984,10 @@ tok_underflow_file(struct tok_state *tok) {
|
||||||
}
|
}
|
||||||
/* The default encoding is UTF-8, so make sure we don't have any
|
/* The default encoding is UTF-8, so make sure we don't have any
|
||||||
non-UTF-8 sequences in it. */
|
non-UTF-8 sequences in it. */
|
||||||
if (!tok->encoding
|
if (!tok->encoding && !ensure_utf8(tok->cur, tok)) {
|
||||||
&& (tok->decoding_state != STATE_NORMAL || tok->lineno >= 2)) {
|
|
||||||
if (!ensure_utf8(tok->cur, tok)) {
|
|
||||||
error_ret(tok);
|
error_ret(tok);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
assert(tok->done == E_OK);
|
assert(tok->done == E_OK);
|
||||||
return tok->done == E_OK;
|
return tok->done == E_OK;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue