bpo-40593: Improve syntax errors for invalid characters in source code. (GH-20033)

This commit is contained in:
Serhiy Storchaka 2020-05-12 12:42:04 +03:00 committed by GitHub
parent f3a5b7ada0
commit 74ea6b5a75
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 92 additions and 45 deletions

View file

@ -12309,31 +12309,22 @@ unicode_isnumeric_impl(PyObject *self)
Py_RETURN_TRUE;
}
int
PyUnicode_IsIdentifier(PyObject *self)
Py_ssize_t
_PyUnicode_ScanIdentifier(PyObject *self)
{
Py_ssize_t i;
int ready = PyUnicode_IS_READY(self);
if (PyUnicode_READY(self) == -1)
return -1;
Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}
int kind = 0;
const void *data = NULL;
const wchar_t *wstr = NULL;
Py_UCS4 ch;
if (ready) {
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
ch = PyUnicode_READ(kind, data, 0);
}
else {
wstr = _PyUnicode_WSTR(self);
ch = wstr[0];
}
int kind = PyUnicode_KIND(self);
const void *data = PyUnicode_DATA(self);
Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
@ -12347,17 +12338,44 @@ PyUnicode_IsIdentifier(PyObject *self)
}
for (i = 1; i < len; i++) {
if (ready) {
ch = PyUnicode_READ(kind, data, i);
}
else {
ch = wstr[i];
}
ch = PyUnicode_READ(kind, data, i);
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
return i;
}
}
return 1;
return i;
}
int
PyUnicode_IsIdentifier(PyObject *self)
{
if (PyUnicode_IS_READY(self)) {
Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
/* an empty string is not a valid identifier */
return len && i == len;
}
else {
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}
const wchar_t *wstr = _PyUnicode_WSTR(self);
Py_UCS4 ch = wstr[0];
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}
for (i = 1; i < len; i++) {
ch = wstr[i];
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}
}
return 1;
}
}
/*[clinic input]