mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
Make identifiers str (not str8) objects throughout.
This affects the parser, various object implementations, and all places that put identifiers into C string literals. In testing, a number of crashes occurred as code would fail when the recursion limit was reached (such as the Unicode interning dictionary having key/value pairs where key is not value). To solve these, I added an overflowed flag, which allows for 50 more recursions after the limit was reached and the exception was raised, and a recursion_critical flag, which indicates that recursion absolutely must be allowed, i.e. that a certain call must not cause a stack overflow exception. There are still some places where both str and str8 are accepted as identifiers; these should eventually be removed.
This commit is contained in:
parent
38e43c25ee
commit
5b222135f8
40 changed files with 462 additions and 289 deletions
|
@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
|||
/* Copy the Unicode data into the new object */
|
||||
if (u != NULL) {
|
||||
Py_UNICODE *p = unicode->str;
|
||||
while ((*p++ = *u++))
|
||||
;
|
||||
while (size--)
|
||||
*p++ = *u++;
|
||||
/* Don't need to write trailing 0 because
|
||||
that's already done by _PyUnicode_New */
|
||||
}
|
||||
|
||||
return (PyObject *)unicode;
|
||||
|
@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
|
|||
return v;
|
||||
}
|
||||
|
||||
char*
|
||||
PyUnicode_AsString(PyObject *unicode)
|
||||
{
|
||||
assert(PyUnicode_Check(unicode));
|
||||
unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
return PyString_AsString(unicode);
|
||||
}
|
||||
|
||||
Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
|
||||
{
|
||||
if (!PyUnicode_Check(unicode)) {
|
||||
|
@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
|
|||
goto onError;
|
||||
}
|
||||
}
|
||||
if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
|
||||
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
|
||||
if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
|
||||
goto onError;
|
||||
Py_XDECREF(errorHandler);
|
||||
|
@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left,
|
|||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
|
||||
{
|
||||
int i;
|
||||
Py_UNICODE *id;
|
||||
assert(PyUnicode_Check(uni));
|
||||
id = PyUnicode_AS_UNICODE(uni);
|
||||
/* Compare Unicode string and source character set string */
|
||||
for (i = 0; id[i] && str[i]; i++)
|
||||
if (id[i] != str[i])
|
||||
return ((int)id[i] < (int)str[i]) ? -1 : 1;
|
||||
if (id[i])
|
||||
return 1; /* uni is longer */
|
||||
if (str[i])
|
||||
return -1; /* str is longer */
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_RichCompare(PyObject *left,
|
||||
PyObject *right,
|
||||
int op)
|
||||
|
@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
return;
|
||||
}
|
||||
}
|
||||
/* It might be that the GetItem call fails even
|
||||
though the key is present in the dictionary,
|
||||
namely when this happens during a stack overflow. */
|
||||
Py_ALLOW_RECURSION
|
||||
t = PyDict_GetItem(interned, (PyObject *)s);
|
||||
Py_END_ALLOW_RECURSION
|
||||
|
||||
if (t) {
|
||||
Py_INCREF(t);
|
||||
Py_DECREF(*p);
|
||||
|
@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
return;
|
||||
}
|
||||
|
||||
PyThreadState_GET()->recursion_critical = 1;
|
||||
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
|
||||
PyErr_Clear();
|
||||
PyThreadState_GET()->recursion_critical = 0;
|
||||
return;
|
||||
}
|
||||
PyThreadState_GET()->recursion_critical = 0;
|
||||
/* The two references in interned are not counted by refcnt.
|
||||
The deallocator will take care of this */
|
||||
s->ob_refcnt -= 2;
|
||||
|
@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq)
|
|||
return (PyObject *)it;
|
||||
}
|
||||
|
||||
size_t
|
||||
Py_UNICODE_strlen(const Py_UNICODE *u)
|
||||
{
|
||||
int res = 0;
|
||||
while(*u++)
|
||||
res++;
|
||||
return res;
|
||||
}
|
||||
|
||||
Py_UNICODE*
|
||||
Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
|
||||
{
|
||||
Py_UNICODE *u = s1;
|
||||
while ((*u++ = *s2++));
|
||||
return s1;
|
||||
}
|
||||
|
||||
Py_UNICODE*
|
||||
Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
|
||||
{
|
||||
Py_UNICODE *u = s1;
|
||||
while ((*u++ = *s2++))
|
||||
if (n-- == 0)
|
||||
break;
|
||||
return s1;
|
||||
}
|
||||
|
||||
int
|
||||
Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
|
||||
{
|
||||
while (*s1 && *s2 && *s1 == *s2)
|
||||
s1++, s2++;
|
||||
if (*s1 && *s2)
|
||||
return (*s1 < *s2) ? -1 : +1;
|
||||
if (*s1)
|
||||
return 1;
|
||||
if (*s2)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_UNICODE*
|
||||
Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
|
||||
{
|
||||
const Py_UNICODE *p;
|
||||
for (p = s; *p; p++)
|
||||
if (*p == c)
|
||||
return (Py_UNICODE*)p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue