bpo-34013: Move the Python 2 hints from the exception constructor to the parser (GH-27392)

This commit is contained in:
Pablo Galindo Salgado 2021-07-27 21:30:32 +01:00 committed by GitHub
parent 6948964ecf
commit ecc3c8e421
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 224 deletions

View file

@ -1475,9 +1475,6 @@ ComplexExtendsException(PyExc_Exception, AttributeError,
* SyntaxError extends Exception
*/
/* Helper function to customize error message for some syntax errors */
static int _report_missing_parentheses(PySyntaxErrorObject *self);
static int
SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
{
@ -1520,18 +1517,6 @@ SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
PyErr_SetString(PyExc_TypeError, "end_offset must be provided when end_lineno is provided");
return -1;
}
/*
* Issue #21669: Custom error for 'print' & 'exec' as statements
*
* Only applies to SyntaxError instances, not to subclasses such
* as TabError or IndentationError (see issue #31161)
*/
if (Py_IS_TYPE(self, (PyTypeObject *)PyExc_SyntaxError) &&
self->text && PyUnicode_Check(self->text) &&
_report_missing_parentheses(self) < 0) {
return -1;
}
}
return 0;
}
@ -3033,189 +3018,3 @@ _PyErr_TrySetFromCause(const char *format, ...)
PyErr_Restore(new_exc, new_val, new_tb);
return new_val;
}
/* To help with migration from Python 2, SyntaxError.__init__ applies some
* heuristics to try to report a more meaningful exception when print and
* exec are used like statements.
*
* The heuristics are currently expected to detect the following cases:
* - top level statement
* - statement in a nested suite
* - trailing section of a one line complex statement
*
* They're currently known not to trigger:
* - after a semi-colon
*
* The error message can be a bit odd in cases where the "arguments" are
* completely illegal syntactically, but that isn't worth the hassle of
* fixing.
*
* We also can't do anything about cases that are legal Python 3 syntax
* but mean something entirely different from what they did in Python 2
* (omitting the arguments entirely, printing items preceded by a unary plus
* or minus, using the stream redirection syntax).
*/
// Static helper for setting legacy print error message
static int
_set_legacy_print_statement_msg(PySyntaxErrorObject *self, Py_ssize_t start)
{
// PRINT_OFFSET is to remove the `print ` prefix from the data.
const int PRINT_OFFSET = 6;
const int STRIP_BOTH = 2;
Py_ssize_t start_pos = start + PRINT_OFFSET;
Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
Py_UCS4 semicolon = ';';
Py_ssize_t end_pos = PyUnicode_FindChar(self->text, semicolon,
start_pos, text_len, 1);
if (end_pos < -1) {
return -1;
} else if (end_pos == -1) {
end_pos = text_len;
}
PyObject *data = PyUnicode_Substring(self->text, start_pos, end_pos);
if (data == NULL) {
return -1;
}
PyObject *strip_sep_obj = PyUnicode_FromString(" \t\r\n");
if (strip_sep_obj == NULL) {
Py_DECREF(data);
return -1;
}
PyObject *new_data = _PyUnicode_XStrip(data, STRIP_BOTH, strip_sep_obj);
Py_DECREF(data);
Py_DECREF(strip_sep_obj);
if (new_data == NULL) {
return -1;
}
// gets the modified text_len after stripping `print `
text_len = PyUnicode_GET_LENGTH(new_data);
const char *maybe_end_arg = "";
if (text_len > 0 && PyUnicode_READ_CHAR(new_data, text_len-1) == ',') {
maybe_end_arg = " end=\" \"";
}
PyObject *error_msg = PyUnicode_FromFormat(
"Missing parentheses in call to 'print'. Did you mean print(%U%s)?",
new_data, maybe_end_arg
);
Py_DECREF(new_data);
if (error_msg == NULL)
return -1;
Py_XSETREF(self->msg, error_msg);
return 1;
}
static int
_check_for_legacy_statements(PySyntaxErrorObject *self, Py_ssize_t start)
{
/* Return values:
* -1: an error occurred
* 0: nothing happened
* 1: the check triggered & the error message was changed
*/
static PyObject *print_prefix = NULL;
static PyObject *exec_prefix = NULL;
Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text), match;
int kind = PyUnicode_KIND(self->text);
const void *data = PyUnicode_DATA(self->text);
/* Ignore leading whitespace */
while (start < text_len) {
Py_UCS4 ch = PyUnicode_READ(kind, data, start);
if (!Py_UNICODE_ISSPACE(ch))
break;
start++;
}
/* Checking against an empty or whitespace-only part of the string */
if (start == text_len) {
return 0;
}
/* Check for legacy print statements */
if (print_prefix == NULL) {
print_prefix = PyUnicode_InternFromString("print ");
if (print_prefix == NULL) {
return -1;
}
}
match = PyUnicode_Tailmatch(self->text, print_prefix,
start, text_len, -1);
if (match == -1) {
return -1;
}
if (match) {
return _set_legacy_print_statement_msg(self, start);
}
/* Check for legacy exec statements */
if (exec_prefix == NULL) {
exec_prefix = PyUnicode_InternFromString("exec ");
if (exec_prefix == NULL) {
return -1;
}
}
match = PyUnicode_Tailmatch(self->text, exec_prefix, start, text_len, -1);
if (match == -1) {
return -1;
}
if (match) {
PyObject *msg = PyUnicode_FromString("Missing parentheses in call "
"to 'exec'");
if (msg == NULL) {
return -1;
}
Py_XSETREF(self->msg, msg);
return 1;
}
/* Fall back to the default error message */
return 0;
}
static int
_report_missing_parentheses(PySyntaxErrorObject *self)
{
Py_UCS4 left_paren = 40;
Py_ssize_t left_paren_index;
Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
int legacy_check_result = 0;
/* Skip entirely if there is an opening parenthesis */
left_paren_index = PyUnicode_FindChar(self->text, left_paren,
0, text_len, 1);
if (left_paren_index < -1) {
return -1;
}
if (left_paren_index != -1) {
/* Use default error message for any line with an opening paren */
return 0;
}
/* Handle the simple statement case */
legacy_check_result = _check_for_legacy_statements(self, 0);
if (legacy_check_result < 0) {
return -1;
}
if (legacy_check_result == 0) {
/* Handle the one-line complex statement case */
Py_UCS4 colon = 58;
Py_ssize_t colon_index;
colon_index = PyUnicode_FindChar(self->text, colon,
0, text_len, 1);
if (colon_index < -1) {
return -1;
}
if (colon_index >= 0 && colon_index < text_len) {
/* Check again, starting from just after the colon */
if (_check_for_legacy_statements(self, colon_index+1) < 0) {
return -1;
}
}
}
return 0;
}