mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue 28128: Print out better error/warning messages for invalid string escapes.
This commit is contained in:
parent
a99cdb21a7
commit
42454af094
8 changed files with 173 additions and 22 deletions
|
@ -74,6 +74,11 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
|
||||||
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
||||||
const char *, Py_ssize_t,
|
const char *, Py_ssize_t,
|
||||||
const char *);
|
const char *);
|
||||||
|
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
|
||||||
|
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
||||||
|
const char *, Py_ssize_t,
|
||||||
|
const char *,
|
||||||
|
const char **);
|
||||||
|
|
||||||
/* Macro, trading safety for speed */
|
/* Macro, trading safety for speed */
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
|
|
|
@ -1486,6 +1486,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
|
||||||
const char *errors /* error handling */
|
const char *errors /* error handling */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
|
||||||
|
chars. */
|
||||||
|
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
|
||||||
|
const char *string, /* Unicode-Escape encoded string */
|
||||||
|
Py_ssize_t length, /* size of string */
|
||||||
|
const char *errors, /* error handling */
|
||||||
|
const char **first_invalid_escape /* on return, points to first
|
||||||
|
invalid escaped char in
|
||||||
|
string. */
|
||||||
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
|
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
|
||||||
PyObject *unicode /* Unicode object */
|
PyObject *unicode /* Unicode object */
|
||||||
);
|
);
|
||||||
|
|
|
@ -31,6 +31,7 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import warnings
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
@ -104,6 +105,19 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
|
self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
|
||||||
self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
|
self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
|
||||||
|
|
||||||
|
def test_eval_str_invalid_escape(self):
|
||||||
|
for b in range(1, 128):
|
||||||
|
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
|
||||||
|
continue
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
|
eval("'''\n\\z'''")
|
||||||
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
def test_eval_str_raw(self):
|
def test_eval_str_raw(self):
|
||||||
self.assertEqual(eval(""" r'x' """), 'x')
|
self.assertEqual(eval(""" r'x' """), 'x')
|
||||||
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
|
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
|
||||||
|
@ -130,6 +144,19 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertRaises(SyntaxError, eval, r""" b'\x' """)
|
self.assertRaises(SyntaxError, eval, r""" b'\x' """)
|
||||||
self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
|
self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
|
||||||
|
|
||||||
|
def test_eval_bytes_invalid_escape(self):
|
||||||
|
for b in range(1, 128):
|
||||||
|
if b in b"""\n\r"'01234567\\abfnrtvx""":
|
||||||
|
continue
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
|
eval("b'''\n\\z'''")
|
||||||
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
def test_eval_bytes_raw(self):
|
def test_eval_bytes_raw(self):
|
||||||
self.assertEqual(eval(""" br'x' """), b'x')
|
self.assertEqual(eval(""" br'x' """), b'x')
|
||||||
self.assertEqual(eval(""" rb'x' """), b'x')
|
self.assertEqual(eval(""" rb'x' """), b'x')
|
||||||
|
|
|
@ -2413,13 +2413,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
support.check_free_after_iterating(self, iter, str)
|
support.check_free_after_iterating(self, iter, str)
|
||||||
support.check_free_after_iterating(self, reversed, str)
|
support.check_free_after_iterating(self, reversed, str)
|
||||||
|
|
||||||
def test_invalid_sequences(self):
|
|
||||||
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
|
|
||||||
if letter in "abfnrtuvxNU":
|
|
||||||
continue
|
|
||||||
with self.assertWarns(DeprecationWarning):
|
|
||||||
eval(r"'\%s'" % letter)
|
|
||||||
|
|
||||||
|
|
||||||
class CAPITest(unittest.TestCase):
|
class CAPITest(unittest.TestCase):
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,10 @@ What's New in Python 3.7.0 alpha 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #28128: Deprecation warning for invalid str and byte escape
|
||||||
|
sequences now prints better information about where the error
|
||||||
|
occurs. Patch by Serhiy Storchaka and Eric Smith.
|
||||||
|
|
||||||
- Issue #28509: dict.update() no longer allocate unnecessary large memory.
|
- Issue #28509: dict.update() no longer allocate unnecessary large memory.
|
||||||
|
|
||||||
- Issue #28426: Fixed potential crash in PyUnicode_AsDecodedObject() in debug
|
- Issue #28426: Fixed potential crash in PyUnicode_AsDecodedObject() in debug
|
||||||
|
|
|
@ -1105,11 +1105,12 @@ _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyBytes_DecodeEscape(const char *s,
|
PyObject *_PyBytes_DecodeEscape(const char *s,
|
||||||
Py_ssize_t len,
|
Py_ssize_t len,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
Py_ssize_t unicode,
|
Py_ssize_t unicode,
|
||||||
const char *recode_encoding)
|
const char *recode_encoding,
|
||||||
|
const char **first_invalid_escape)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
char *p;
|
char *p;
|
||||||
|
@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
return NULL;
|
return NULL;
|
||||||
writer.overallocate = 1;
|
writer.overallocate = 1;
|
||||||
|
|
||||||
|
*first_invalid_escape = NULL;
|
||||||
|
|
||||||
end = s + len;
|
end = s + len;
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
if (*s != '\\') {
|
if (*s != '\\') {
|
||||||
|
@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
|
if (*first_invalid_escape == NULL) {
|
||||||
goto failed;
|
*first_invalid_escape = s-1; /* Back up one char, since we've
|
||||||
|
already incremented s. */
|
||||||
|
}
|
||||||
*p++ = '\\';
|
*p++ = '\\';
|
||||||
|
s--;
|
||||||
goto non_esc; /* an arbitrary number of unescaped
|
goto non_esc; /* an arbitrary number of unescaped
|
||||||
UTF-8 bytes may follow. */
|
UTF-8 bytes may follow. */
|
||||||
}
|
}
|
||||||
|
@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
|
Py_ssize_t len,
|
||||||
|
const char *errors,
|
||||||
|
Py_ssize_t unicode,
|
||||||
|
const char *recode_encoding)
|
||||||
|
{
|
||||||
|
const char* first_invalid_escape;
|
||||||
|
PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
|
||||||
|
recode_encoding,
|
||||||
|
&first_invalid_escape);
|
||||||
|
if (result == NULL)
|
||||||
|
return NULL;
|
||||||
|
if (first_invalid_escape != NULL) {
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
|
"invalid escape sequence '\\%c'",
|
||||||
|
*first_invalid_escape) < 0) {
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
/* object api */
|
/* object api */
|
||||||
|
|
||||||
|
|
|
@ -5877,9 +5877,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
|
||||||
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
|
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_DecodeUnicodeEscape(const char *s,
|
_PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
Py_ssize_t size,
|
Py_ssize_t size,
|
||||||
const char *errors)
|
const char *errors,
|
||||||
|
const char **first_invalid_escape)
|
||||||
{
|
{
|
||||||
const char *starts = s;
|
const char *starts = s;
|
||||||
_PyUnicodeWriter writer;
|
_PyUnicodeWriter writer;
|
||||||
|
@ -5887,6 +5888,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
|
|
||||||
|
// so we can remember if we've seen an invalid escape char or not
|
||||||
|
*first_invalid_escape = NULL;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
_Py_RETURN_UNICODE_EMPTY();
|
_Py_RETURN_UNICODE_EMPTY();
|
||||||
}
|
}
|
||||||
|
@ -6061,9 +6065,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
if (*first_invalid_escape == NULL) {
|
||||||
"invalid escape sequence '\\%c'", c) < 0)
|
*first_invalid_escape = s-1; /* Back up one char, since we've
|
||||||
goto onError;
|
already incremented s. */
|
||||||
|
}
|
||||||
WRITE_ASCII_CHAR('\\');
|
WRITE_ASCII_CHAR('\\');
|
||||||
WRITE_CHAR(c);
|
WRITE_CHAR(c);
|
||||||
continue;
|
continue;
|
||||||
|
@ -6098,6 +6103,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
|
Py_ssize_t size,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
const char *first_invalid_escape;
|
||||||
|
PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
|
||||||
|
&first_invalid_escape);
|
||||||
|
if (result == NULL)
|
||||||
|
return NULL;
|
||||||
|
if (first_invalid_escape != NULL) {
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
|
"invalid escape sequence '\\%c'",
|
||||||
|
*first_invalid_escape) < 0) {
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* Return a Unicode-Escape string version of the Unicode object.
|
/* Return a Unicode-Escape string version of the Unicode object.
|
||||||
|
|
||||||
If quotes is true, the string is enclosed in u"" or u'' quotes as
|
If quotes is true, the string is enclosed in u"" or u'' quotes as
|
||||||
|
|
66
Python/ast.c
66
Python/ast.c
|
@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end)
|
||||||
return PyUnicode_DecodeUTF8(t, s - t, NULL);
|
return PyUnicode_DecodeUTF8(t, s - t, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
warn_invalid_escape_sequence(struct compiling *c, const node *n,
|
||||||
|
char first_invalid_escape_char)
|
||||||
|
{
|
||||||
|
PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
|
||||||
|
first_invalid_escape_char);
|
||||||
|
if (msg == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
|
||||||
|
c->c_filename, LINENO(n),
|
||||||
|
NULL, NULL) < 0 &&
|
||||||
|
PyErr_ExceptionMatches(PyExc_DeprecationWarning))
|
||||||
|
{
|
||||||
|
const char *s = PyUnicode_AsUTF8(msg);
|
||||||
|
if (s != NULL) {
|
||||||
|
ast_error(c, n, s);
|
||||||
|
}
|
||||||
|
Py_DECREF(msg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Py_DECREF(msg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
|
decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
|
||||||
|
size_t len)
|
||||||
{
|
{
|
||||||
PyObject *v, *u;
|
PyObject *v, *u;
|
||||||
char *buf;
|
char *buf;
|
||||||
|
@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
|
||||||
len = p - buf;
|
len = p - buf;
|
||||||
s = buf;
|
s = buf;
|
||||||
|
|
||||||
v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
|
const char *first_invalid_escape;
|
||||||
|
v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
|
||||||
|
|
||||||
|
if (v != NULL && first_invalid_escape != NULL) {
|
||||||
|
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
|
||||||
|
/* We have not decref u before because first_invalid_escape points
|
||||||
|
inside u. */
|
||||||
|
Py_XDECREF(u);
|
||||||
|
Py_DECREF(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
Py_XDECREF(u);
|
Py_XDECREF(u);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
const char *first_invalid_escape;
|
||||||
|
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
|
||||||
|
&first_invalid_escape);
|
||||||
|
if (result == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (first_invalid_escape != NULL) {
|
||||||
|
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* Compile this expression in to an expr_ty. Add parens around the
|
/* Compile this expression in to an expr_ty. Add parens around the
|
||||||
expression, in order to allow leading spaces in the expression. */
|
expression, in order to allow leading spaces in the expression. */
|
||||||
static expr_ty
|
static expr_ty
|
||||||
|
@ -4310,7 +4366,7 @@ done:
|
||||||
literal_end-literal_start,
|
literal_end-literal_start,
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
else
|
else
|
||||||
*literal = decode_unicode_with_escapes(c, literal_start,
|
*literal = decode_unicode_with_escapes(c, n, literal_start,
|
||||||
literal_end-literal_start);
|
literal_end-literal_start);
|
||||||
if (!*literal)
|
if (!*literal)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
|
||||||
if (*rawmode)
|
if (*rawmode)
|
||||||
*result = PyBytes_FromStringAndSize(s, len);
|
*result = PyBytes_FromStringAndSize(s, len);
|
||||||
else
|
else
|
||||||
*result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
|
*result = decode_bytes_with_escapes(c, n, s, len);
|
||||||
} else {
|
} else {
|
||||||
if (*rawmode)
|
if (*rawmode)
|
||||||
*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
|
*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
|
||||||
else
|
else
|
||||||
*result = decode_unicode_with_escapes(c, s, len);
|
*result = decode_unicode_with_escapes(c, n, s, len);
|
||||||
}
|
}
|
||||||
return *result == NULL ? -1 : 0;
|
return *result == NULL ? -1 : 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue