mirror of
https://github.com/python/cpython.git
synced 2025-08-03 00:23:06 +00:00
bpo-46054: Fix parsing error when parsing non-utf8 characters in source files (GH-30068) (GH-30069)
(cherry picked from commit 4325a766f5
)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
parent
438817fdd5
commit
94483f1e3c
3 changed files with 19 additions and 8 deletions
|
@ -2368,6 +2368,18 @@ class SyntaxErrorTests(unittest.TestCase):
|
|||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_non_utf8(self):
|
||||
# Check non utf-8 characters
|
||||
try:
|
||||
with open(TESTFN, 'bw') as testfile:
|
||||
testfile.write(b"\x89")
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
|
||||
self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in file", err[-1])
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_attributes_new_constructor(self):
|
||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||
the_exception = SyntaxError("bad bad", args)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix parser error when parsing non-utf8 characters in source files. Patch by
|
||||
Pablo Galindo.
|
|
@ -818,10 +818,10 @@ tok_readline_raw(struct tok_state *tok)
|
|||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||
return 0;
|
||||
}
|
||||
if (*tok->inp == '\0') {
|
||||
tok->inp = strchr(tok->inp, '\0');
|
||||
if (tok->inp == tok->buf) {
|
||||
return 0;
|
||||
}
|
||||
tok->inp = strchr(tok->inp, '\0');
|
||||
} while (tok->inp[-1] != '\n');
|
||||
return 1;
|
||||
}
|
||||
|
@ -983,13 +983,10 @@ tok_underflow_file(struct tok_state *tok) {
|
|||
}
|
||||
/* The default encoding is UTF-8, so make sure we don't have any
|
||||
non-UTF-8 sequences in it. */
|
||||
if (!tok->encoding
|
||||
&& (tok->decoding_state != STATE_NORMAL || tok->lineno >= 2)) {
|
||||
if (!ensure_utf8(tok->cur, tok)) {
|
||||
if (!tok->encoding && !ensure_utf8(tok->cur, tok)) {
|
||||
error_ret(tok);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
assert(tok->done == E_OK);
|
||||
return tok->done == E_OK;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue