mirror of
https://github.com/python/cpython.git
synced 2025-09-17 14:16:02 +00:00
Patch #1031213: Decode source line in SyntaxErrors back to its original
source encoding. Will backport to 2.5.
This commit is contained in:
parent
58bd49f5fe
commit
a5136196bc
6 changed files with 107 additions and 5 deletions
|
@ -155,6 +155,32 @@ class CompilerTest(unittest.TestCase):
|
||||||
self.assertEquals(dct.get('result'), 1)
|
self.assertEquals(dct.get('result'), 1)
|
||||||
|
|
||||||
|
|
||||||
|
def _testErrEnc(self, src, text, offset):
|
||||||
|
try:
|
||||||
|
compile(src, "", "exec")
|
||||||
|
except SyntaxError, e:
|
||||||
|
self.assertEquals(e.offset, offset)
|
||||||
|
self.assertEquals(e.text, text)
|
||||||
|
|
||||||
|
def testSourceCodeEncodingsError(self):
|
||||||
|
# Test SyntaxError with encoding definition
|
||||||
|
sjis = "print '\x83\x70\x83\x43\x83\x5c\x83\x93', '\n"
|
||||||
|
ascii = "print '12345678', '\n"
|
||||||
|
encdef = "#! -*- coding: ShiftJIS -*-\n"
|
||||||
|
|
||||||
|
# ascii source without encdef
|
||||||
|
self._testErrEnc(ascii, ascii, 19)
|
||||||
|
|
||||||
|
# ascii source with encdef
|
||||||
|
self._testErrEnc(encdef+ascii, ascii, 19)
|
||||||
|
|
||||||
|
# non-ascii source with encdef
|
||||||
|
self._testErrEnc(encdef+sjis, sjis, 19)
|
||||||
|
|
||||||
|
# ShiftJIS source without encdef
|
||||||
|
self._testErrEnc(sjis, sjis, 19)
|
||||||
|
|
||||||
|
|
||||||
NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
|
NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
|
@ -320,6 +320,7 @@ Lars Immisch
|
||||||
Tony Ingraldi
|
Tony Ingraldi
|
||||||
John Interrante
|
John Interrante
|
||||||
Bob Ippolito
|
Bob Ippolito
|
||||||
|
Atsuo Ishimoto
|
||||||
Ben Jackson
|
Ben Jackson
|
||||||
Paul Jackson
|
Paul Jackson
|
||||||
David Jacobs
|
David Jacobs
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1?
|
||||||
Core and builtins
|
Core and builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Patch #1031213: Decode source line in SyntaxErrors back to its original source
|
||||||
|
encoding.
|
||||||
|
|
||||||
- Py_ssize_t fields work in structmember when HAVE_LONG_LONG is not defined.
|
- Py_ssize_t fields work in structmember when HAVE_LONG_LONG is not defined.
|
||||||
|
|
||||||
- PEP 3123: Provide forward compatibility with Python 3.0, while keeping
|
- PEP 3123: Provide forward compatibility with Python 3.0, while keeping
|
||||||
|
|
|
@ -218,16 +218,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
||||||
err_ret->error = E_EOF;
|
err_ret->error = E_EOF;
|
||||||
err_ret->lineno = tok->lineno;
|
err_ret->lineno = tok->lineno;
|
||||||
if (tok->buf != NULL) {
|
if (tok->buf != NULL) {
|
||||||
|
char *text = NULL;
|
||||||
size_t len;
|
size_t len;
|
||||||
assert(tok->cur - tok->buf < INT_MAX);
|
assert(tok->cur - tok->buf < INT_MAX);
|
||||||
err_ret->offset = (int)(tok->cur - tok->buf);
|
err_ret->offset = (int)(tok->cur - tok->buf);
|
||||||
len = tok->inp - tok->buf;
|
len = tok->inp - tok->buf;
|
||||||
err_ret->text = (char *) PyObject_MALLOC(len + 1);
|
#ifdef Py_USING_UNICODE
|
||||||
if (err_ret->text != NULL) {
|
text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
|
||||||
if (len > 0)
|
|
||||||
strncpy(err_ret->text, tok->buf, len);
|
#endif
|
||||||
err_ret->text[len] = '\0';
|
if (text == NULL) {
|
||||||
|
text = (char *) PyObject_MALLOC(len + 1);
|
||||||
|
if (text != NULL) {
|
||||||
|
if (len > 0)
|
||||||
|
strncpy(text, tok->buf, len);
|
||||||
|
text[len] = '\0';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
err_ret->text = text;
|
||||||
}
|
}
|
||||||
} else if (tok->encoding != NULL) {
|
} else if (tok->encoding != NULL) {
|
||||||
node* r = PyNode_New(encoding_decl);
|
node* r = PyNode_New(encoding_decl);
|
||||||
|
|
|
@ -1522,6 +1522,68 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This function is only called from parsetok. However, it cannot live
|
||||||
|
there, as it must be empty for PGEN, and we can check for PGEN only
|
||||||
|
in this file. */
|
||||||
|
|
||||||
|
#ifdef PGEN
|
||||||
|
char*
|
||||||
|
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static PyObject *
|
||||||
|
dec_utf8(const char *enc, const char *text, size_t len) {
|
||||||
|
PyObject *ret = NULL;
|
||||||
|
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
|
||||||
|
if (unicode_text) {
|
||||||
|
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
|
||||||
|
Py_DECREF(unicode_text);
|
||||||
|
}
|
||||||
|
if (!ret) {
|
||||||
|
PyErr_Print();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
|
||||||
|
{
|
||||||
|
char *text = NULL;
|
||||||
|
if (tok->encoding) {
|
||||||
|
/* convert source to original encondig */
|
||||||
|
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
|
||||||
|
if (lineobj != NULL) {
|
||||||
|
int linelen = PyString_Size(lineobj);
|
||||||
|
const char *line = PyString_AsString(lineobj);
|
||||||
|
text = PyObject_MALLOC(linelen + 1);
|
||||||
|
if (text != NULL && line != NULL) {
|
||||||
|
if (linelen)
|
||||||
|
strncpy(text, line, linelen);
|
||||||
|
text[linelen] = '\0';
|
||||||
|
}
|
||||||
|
Py_DECREF(lineobj);
|
||||||
|
|
||||||
|
/* adjust error offset */
|
||||||
|
if (*offset > 1) {
|
||||||
|
PyObject *offsetobj = dec_utf8(tok->encoding,
|
||||||
|
tok->buf, *offset-1);
|
||||||
|
if (offsetobj) {
|
||||||
|
*offset = PyString_Size(offsetobj) + 1;
|
||||||
|
Py_DECREF(offsetobj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -58,6 +58,8 @@ extern struct tok_state *PyTokenizer_FromString(const char *);
|
||||||
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
|
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
|
||||||
extern void PyTokenizer_Free(struct tok_state *);
|
extern void PyTokenizer_Free(struct tok_state *);
|
||||||
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
||||||
|
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
|
||||||
|
int len, int *offset);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue