mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
gh-81548: Deprecate octal escape sequences with value larger than 0o377 (GH-91668)
This commit is contained in:
parent
a055dac0b4
commit
3483299a24
8 changed files with 139 additions and 18 deletions
|
@ -596,6 +596,11 @@ Notes:
|
||||||
(1)
|
(1)
|
||||||
As in Standard C, up to three octal digits are accepted.
|
As in Standard C, up to three octal digits are accepted.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.11
|
||||||
|
Octal escapes with value larger than ``0o377`` produce a :exc:`DeprecationWarning`.
|
||||||
|
In a future Python version they will be a :exc:`SyntaxWarning` and
|
||||||
|
eventually a :exc:`SyntaxError`.
|
||||||
|
|
||||||
(2)
|
(2)
|
||||||
Unlike in Standard C, exactly two hex digits are required.
|
Unlike in Standard C, exactly two hex digits are required.
|
||||||
|
|
||||||
|
|
|
@ -1055,6 +1055,12 @@ CPython bytecode changes
|
||||||
Deprecated
|
Deprecated
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
* Octal escapes with value larger than ``0o377`` now produce
|
||||||
|
a :exc:`DeprecationWarning`.
|
||||||
|
In a future Python version they will be a :exc:`SyntaxWarning` and
|
||||||
|
eventually a :exc:`SyntaxError`.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`81548`.)
|
||||||
|
|
||||||
* The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not
|
* The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not
|
||||||
be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for
|
be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for
|
||||||
CPython). (Contributed by Victor Stinner in :issue:`40360`.)
|
CPython). (Contributed by Victor Stinner in :issue:`40360`.)
|
||||||
|
|
|
@ -1193,7 +1193,6 @@ class EscapeDecodeTest(unittest.TestCase):
|
||||||
check(br"[\418]", b"[!8]")
|
check(br"[\418]", b"[!8]")
|
||||||
check(br"[\101]", b"[A]")
|
check(br"[\101]", b"[A]")
|
||||||
check(br"[\1010]", b"[A0]")
|
check(br"[\1010]", b"[A0]")
|
||||||
check(br"[\501]", b"[A]")
|
|
||||||
check(br"[\x41]", b"[A]")
|
check(br"[\x41]", b"[A]")
|
||||||
check(br"[\x410]", b"[A0]")
|
check(br"[\x410]", b"[A0]")
|
||||||
for i in range(97, 123):
|
for i in range(97, 123):
|
||||||
|
@ -1209,6 +1208,9 @@ class EscapeDecodeTest(unittest.TestCase):
|
||||||
check(br"\9", b"\\9")
|
check(br"\9", b"\\9")
|
||||||
with self.assertWarns(DeprecationWarning):
|
with self.assertWarns(DeprecationWarning):
|
||||||
check(b"\\\xfa", b"\\\xfa")
|
check(b"\\\xfa", b"\\\xfa")
|
||||||
|
for i in range(0o400, 0o1000):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(rb'\%o' % i, bytes([i & 0o377]))
|
||||||
|
|
||||||
def test_errors(self):
|
def test_errors(self):
|
||||||
decode = codecs.escape_decode
|
decode = codecs.escape_decode
|
||||||
|
@ -2435,6 +2437,9 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase):
|
||||||
check(br"\9", "\\9")
|
check(br"\9", "\\9")
|
||||||
with self.assertWarns(DeprecationWarning):
|
with self.assertWarns(DeprecationWarning):
|
||||||
check(b"\\\xfa", "\\\xfa")
|
check(b"\\\xfa", "\\\xfa")
|
||||||
|
for i in range(0o400, 0o1000):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(rb'\%o' % i, chr(i))
|
||||||
|
|
||||||
def test_decode_errors(self):
|
def test_decode_errors(self):
|
||||||
decode = codecs.unicode_escape_decode
|
decode = codecs.unicode_escape_decode
|
||||||
|
|
|
@ -116,6 +116,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
warnings.simplefilter('always', category=DeprecationWarning)
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
eval("'''\n\\z'''")
|
eval("'''\n\\z'''")
|
||||||
self.assertEqual(len(w), 1)
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 1)
|
||||||
|
|
||||||
|
@ -125,6 +126,32 @@ class TestLiterals(unittest.TestCase):
|
||||||
eval("'''\n\\z'''")
|
eval("'''\n\\z'''")
|
||||||
exc = cm.exception
|
exc = cm.exception
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
||||||
|
self.assertEqual(exc.filename, '<string>')
|
||||||
|
self.assertEqual(exc.lineno, 1)
|
||||||
|
self.assertEqual(exc.offset, 1)
|
||||||
|
|
||||||
|
def test_eval_str_invalid_octal_escape(self):
|
||||||
|
for i in range(0o400, 0o1000):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(eval(r"'\%o'" % i), chr(i))
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
|
eval("'''\n\\407'''")
|
||||||
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(str(w[0].message),
|
||||||
|
r"invalid octal escape sequence '\407'")
|
||||||
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
|
self.assertEqual(w[0].lineno, 1)
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('error', category=DeprecationWarning)
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("'''\n\\407'''")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 1)
|
||||||
self.assertEqual(exc.offset, 1)
|
self.assertEqual(exc.offset, 1)
|
||||||
|
@ -166,6 +193,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
warnings.simplefilter('always', category=DeprecationWarning)
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
eval("b'''\n\\z'''")
|
eval("b'''\n\\z'''")
|
||||||
self.assertEqual(len(w), 1)
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 1)
|
||||||
|
|
||||||
|
@ -175,6 +203,31 @@ class TestLiterals(unittest.TestCase):
|
||||||
eval("b'''\n\\z'''")
|
eval("b'''\n\\z'''")
|
||||||
exc = cm.exception
|
exc = cm.exception
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
||||||
|
self.assertEqual(exc.filename, '<string>')
|
||||||
|
self.assertEqual(exc.lineno, 1)
|
||||||
|
|
||||||
|
def test_eval_bytes_invalid_octal_escape(self):
|
||||||
|
for i in range(0o400, 0o1000):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('always', category=DeprecationWarning)
|
||||||
|
eval("b'''\n\\407'''")
|
||||||
|
self.assertEqual(len(w), 1)
|
||||||
|
self.assertEqual(str(w[0].message),
|
||||||
|
r"invalid octal escape sequence '\407'")
|
||||||
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
|
self.assertEqual(w[0].lineno, 1)
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('error', category=DeprecationWarning)
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("b'''\n\\407'''")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 1)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Octal escapes with value larger than ``0o377`` now produce a
|
||||||
|
:exc:`DeprecationWarning`. In a future Python version they will be a
|
||||||
|
:exc:`SyntaxWarning` and eventually a :exc:`SyntaxError`.
|
|
@ -1113,6 +1113,12 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
|
||||||
if (s < end && '0' <= *s && *s <= '7')
|
if (s < end && '0' <= *s && *s <= '7')
|
||||||
c = (c<<3) + *s++ - '0';
|
c = (c<<3) + *s++ - '0';
|
||||||
}
|
}
|
||||||
|
if (c > 0377) {
|
||||||
|
if (*first_invalid_escape == NULL) {
|
||||||
|
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
||||||
|
already incremented s. */
|
||||||
|
}
|
||||||
|
}
|
||||||
*p++ = c;
|
*p++ = c;
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
|
@ -1179,13 +1185,26 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (first_invalid_escape != NULL) {
|
if (first_invalid_escape != NULL) {
|
||||||
|
unsigned char c = *first_invalid_escape;
|
||||||
|
if ('4' <= c && c <= '7') {
|
||||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
"invalid escape sequence '\\%c'",
|
"invalid octal escape sequence '\\%.3s'",
|
||||||
(unsigned char)*first_invalid_escape) < 0) {
|
first_invalid_escape) < 0)
|
||||||
|
{
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
|
"invalid escape sequence '\\%c'",
|
||||||
|
c) < 0)
|
||||||
|
{
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6404,6 +6404,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
||||||
ch = (ch<<3) + *s++ - '0';
|
ch = (ch<<3) + *s++ - '0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (ch > 0377) {
|
||||||
|
if (*first_invalid_escape == NULL) {
|
||||||
|
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
||||||
|
already incremented s. */
|
||||||
|
}
|
||||||
|
}
|
||||||
WRITE_CHAR(ch);
|
WRITE_CHAR(ch);
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -6554,13 +6560,26 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (first_invalid_escape != NULL) {
|
if (first_invalid_escape != NULL) {
|
||||||
|
unsigned char c = *first_invalid_escape;
|
||||||
|
if ('4' <= c && c <= '7') {
|
||||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
"invalid escape sequence '\\%c'",
|
"invalid octal escape sequence '\\%.3s'",
|
||||||
(unsigned char)*first_invalid_escape) < 0) {
|
first_invalid_escape) < 0)
|
||||||
|
{
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
|
"invalid escape sequence '\\%c'",
|
||||||
|
c) < 0)
|
||||||
|
{
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,10 +9,15 @@
|
||||||
//// STRING HANDLING FUNCTIONS ////
|
//// STRING HANDLING FUNCTIONS ////
|
||||||
|
|
||||||
static int
|
static int
|
||||||
warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t)
|
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
|
||||||
{
|
{
|
||||||
|
unsigned char c = *first_invalid_escape;
|
||||||
|
int octal = ('4' <= c && c <= '7');
|
||||||
PyObject *msg =
|
PyObject *msg =
|
||||||
PyUnicode_FromFormat("invalid escape sequence '\\%c'", first_invalid_escape_char);
|
octal
|
||||||
|
? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
|
||||||
|
first_invalid_escape)
|
||||||
|
: PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
|
||||||
if (msg == NULL) {
|
if (msg == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -27,7 +32,13 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char,
|
||||||
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
|
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
|
||||||
error location, if p->known_err_token is not set. */
|
error location, if p->known_err_token is not set. */
|
||||||
p->known_err_token = t;
|
p->known_err_token = t;
|
||||||
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", first_invalid_escape_char);
|
if (octal) {
|
||||||
|
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
|
||||||
|
first_invalid_escape);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Py_DECREF(msg);
|
Py_DECREF(msg);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -118,7 +129,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
|
||||||
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
|
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
|
||||||
|
|
||||||
if (v != NULL && first_invalid_escape != NULL) {
|
if (v != NULL && first_invalid_escape != NULL) {
|
||||||
if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
|
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
|
||||||
/* We have not decref u before because first_invalid_escape points
|
/* We have not decref u before because first_invalid_escape points
|
||||||
inside u. */
|
inside u. */
|
||||||
Py_XDECREF(u);
|
Py_XDECREF(u);
|
||||||
|
@ -140,7 +151,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (first_invalid_escape != NULL) {
|
if (first_invalid_escape != NULL) {
|
||||||
if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
|
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -465,7 +476,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
|
||||||
decode_unicode_with_escapes(). */
|
decode_unicode_with_escapes(). */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
|
if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue