mirror of
https://github.com/python/cpython.git
synced 2025-07-12 22:05:16 +00:00
bpo-40593: Improve syntax errors for invalid characters in source code. (GH-20033)
This commit is contained in:
parent
f3a5b7ada0
commit
74ea6b5a75
10 changed files with 92 additions and 45 deletions
|
@ -12309,31 +12309,22 @@ unicode_isnumeric_impl(PyObject *self)
|
|||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
int
|
||||
PyUnicode_IsIdentifier(PyObject *self)
|
||||
Py_ssize_t
|
||||
_PyUnicode_ScanIdentifier(PyObject *self)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
int ready = PyUnicode_IS_READY(self);
|
||||
if (PyUnicode_READY(self) == -1)
|
||||
return -1;
|
||||
|
||||
Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
|
||||
if (len == 0) {
|
||||
/* an empty string is not a valid identifier */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kind = 0;
|
||||
const void *data = NULL;
|
||||
const wchar_t *wstr = NULL;
|
||||
Py_UCS4 ch;
|
||||
if (ready) {
|
||||
kind = PyUnicode_KIND(self);
|
||||
data = PyUnicode_DATA(self);
|
||||
ch = PyUnicode_READ(kind, data, 0);
|
||||
}
|
||||
else {
|
||||
wstr = _PyUnicode_WSTR(self);
|
||||
ch = wstr[0];
|
||||
}
|
||||
int kind = PyUnicode_KIND(self);
|
||||
const void *data = PyUnicode_DATA(self);
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
|
||||
/* PEP 3131 says that the first character must be in
|
||||
XID_Start and subsequent characters in XID_Continue,
|
||||
and for the ASCII range, the 2.x rules apply (i.e
|
||||
|
@ -12347,17 +12338,44 @@ PyUnicode_IsIdentifier(PyObject *self)
|
|||
}
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (ready) {
|
||||
ch = PyUnicode_READ(kind, data, i);
|
||||
}
|
||||
else {
|
||||
ch = wstr[i];
|
||||
}
|
||||
ch = PyUnicode_READ(kind, data, i);
|
||||
if (!_PyUnicode_IsXidContinue(ch)) {
|
||||
return 0;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
return i;
|
||||
}
|
||||
|
||||
int
|
||||
PyUnicode_IsIdentifier(PyObject *self)
|
||||
{
|
||||
if (PyUnicode_IS_READY(self)) {
|
||||
Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
|
||||
/* an empty string is not a valid identifier */
|
||||
return len && i == len;
|
||||
}
|
||||
else {
|
||||
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
|
||||
if (len == 0) {
|
||||
/* an empty string is not a valid identifier */
|
||||
return 0;
|
||||
}
|
||||
|
||||
const wchar_t *wstr = _PyUnicode_WSTR(self);
|
||||
Py_UCS4 ch = wstr[0];
|
||||
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
ch = wstr[i];
|
||||
if (!_PyUnicode_IsXidContinue(ch)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue