Bug #2301: Don't try decoding the source code into the original

encoding for syntax errors.
This commit is contained in:
Martin v. Löwis 2008-03-17 20:43:42 +00:00
parent ddaa7064ee
commit 2593146227
4 changed files with 18 additions and 74 deletions

View file

@ -1579,70 +1579,6 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return result;
}
/* This function is only called from parsetok. However, it cannot live
there, as it must be empty for PGEN, and we can check for PGEN only
in this file. */
#ifdef PGEN
char*
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
{
return NULL;
}
#else
static PyObject *
dec_utf8(const char *enc, const char *text, size_t len) {
PyObject *ret = NULL;
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
if (unicode_text) {
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
Py_DECREF(unicode_text);
}
if (!ret) {
PyErr_Clear();
}
else {
assert(PyString_Check(ret));
}
return ret;
}
char *
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
{
char *text = NULL;
if (tok->encoding) {
/* convert source to original encondig */
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
if (lineobj != NULL) {
int linelen = PyString_GET_SIZE(lineobj);
const char *line = PyString_AS_STRING(lineobj);
text = PyObject_MALLOC(linelen + 1);
if (text != NULL && line != NULL) {
if (linelen)
strncpy(text, line, linelen);
text[linelen] = '\0';
}
Py_DECREF(lineobj);
/* adjust error offset */
if (*offset > 1) {
PyObject *offsetobj = dec_utf8(tok->encoding,
tok->buf,
*offset-1);
if (offsetobj) {
*offset = 1 + Py_SIZE(offsetobj);
Py_DECREF(offsetobj);
}
}
}
}
return text;
}
#endif
/* Get -*- encoding -*- from a Python file.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in