mirror of
https://github.com/python/cpython.git
synced 2025-12-08 18:32:16 +00:00
Bug #2301: Don't try decoding the source code into the original
encoding for syntax errors.
This commit is contained in:
parent
ddaa7064ee
commit
2593146227
4 changed files with 18 additions and 74 deletions
|
|
@ -23,6 +23,13 @@ class PEP263Test(unittest.TestCase):
|
||||||
exec(c, d)
|
exec(c, d)
|
||||||
self.assertEqual(d['u'], '\xf3')
|
self.assertEqual(d['u'], '\xf3')
|
||||||
|
|
||||||
|
def test_issue2301(self):
|
||||||
|
try:
|
||||||
|
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
|
||||||
|
except SyntaxError as v:
|
||||||
|
self.assertEquals(v.text, "print '\u5e74'")
|
||||||
|
else:
|
||||||
|
self.fail()
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(PEP263Test)
|
test_support.run_unittest(PEP263Test)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,12 @@ What's New in Python 3.0a4?
|
||||||
|
|
||||||
*Release date: XX-XXX-2008*
|
*Release date: XX-XXX-2008*
|
||||||
|
|
||||||
|
Core and Builtins
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
- Bug #2301: Don't try decoding the source code into the original
|
||||||
|
encoding for syntax errors.
|
||||||
|
|
||||||
Extension Modules
|
Extension Modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -213,22 +213,17 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
||||||
err_ret->error = E_EOF;
|
err_ret->error = E_EOF;
|
||||||
err_ret->lineno = tok->lineno;
|
err_ret->lineno = tok->lineno;
|
||||||
if (tok->buf != NULL) {
|
if (tok->buf != NULL) {
|
||||||
char *text = NULL;
|
|
||||||
size_t len;
|
size_t len;
|
||||||
assert(tok->cur - tok->buf < INT_MAX);
|
assert(tok->cur - tok->buf < INT_MAX);
|
||||||
err_ret->offset = (int)(tok->cur - tok->buf);
|
err_ret->offset = (int)(tok->cur - tok->buf);
|
||||||
len = tok->inp - tok->buf;
|
len = tok->inp - tok->buf;
|
||||||
text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
|
err_ret->text = (char *) PyObject_MALLOC(len + 1);
|
||||||
if (text == NULL) {
|
if (err_ret->text != NULL) {
|
||||||
text = (char *) PyObject_MALLOC(len + 1);
|
|
||||||
if (text != NULL) {
|
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
strncpy(text, tok->buf, len);
|
strncpy(err_ret->text, tok->buf, len);
|
||||||
text[len] = '\0';
|
err_ret->text[len] = '\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
err_ret->text = text;
|
|
||||||
}
|
|
||||||
} else if (tok->encoding != NULL) {
|
} else if (tok->encoding != NULL) {
|
||||||
node* r = PyNode_New(encoding_decl);
|
node* r = PyNode_New(encoding_decl);
|
||||||
if (!r) {
|
if (!r) {
|
||||||
|
|
|
||||||
|
|
@ -1579,70 +1579,6 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function is only called from parsetok. However, it cannot live
|
|
||||||
there, as it must be empty for PGEN, and we can check for PGEN only
|
|
||||||
in this file. */
|
|
||||||
|
|
||||||
#ifdef PGEN
|
|
||||||
char*
|
|
||||||
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static PyObject *
|
|
||||||
dec_utf8(const char *enc, const char *text, size_t len) {
|
|
||||||
PyObject *ret = NULL;
|
|
||||||
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
|
|
||||||
if (unicode_text) {
|
|
||||||
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
|
|
||||||
Py_DECREF(unicode_text);
|
|
||||||
}
|
|
||||||
if (!ret) {
|
|
||||||
PyErr_Clear();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
assert(PyString_Check(ret));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *
|
|
||||||
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
|
|
||||||
{
|
|
||||||
char *text = NULL;
|
|
||||||
if (tok->encoding) {
|
|
||||||
/* convert source to original encondig */
|
|
||||||
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
|
|
||||||
if (lineobj != NULL) {
|
|
||||||
int linelen = PyString_GET_SIZE(lineobj);
|
|
||||||
const char *line = PyString_AS_STRING(lineobj);
|
|
||||||
text = PyObject_MALLOC(linelen + 1);
|
|
||||||
if (text != NULL && line != NULL) {
|
|
||||||
if (linelen)
|
|
||||||
strncpy(text, line, linelen);
|
|
||||||
text[linelen] = '\0';
|
|
||||||
}
|
|
||||||
Py_DECREF(lineobj);
|
|
||||||
|
|
||||||
/* adjust error offset */
|
|
||||||
if (*offset > 1) {
|
|
||||||
PyObject *offsetobj = dec_utf8(tok->encoding,
|
|
||||||
tok->buf,
|
|
||||||
*offset-1);
|
|
||||||
if (offsetobj) {
|
|
||||||
*offset = 1 + Py_SIZE(offsetobj);
|
|
||||||
Py_DECREF(offsetobj);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return text;
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Get -*- encoding -*- from a Python file.
|
/* Get -*- encoding -*- from a Python file.
|
||||||
|
|
||||||
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue