Make identifiers str (not str8) objects throughout.

This affects the parser, various object implementations, and all places that put identifiers into C string literals. In testing, a number of crashes occurred as code would fail when the recursion limit was reached (such as the Unicode interning dictionary having key/value pairs where key is not value). To solve these, I added an overflowed flag, which allows for 50 more recursions after the limit was reached and the exception was raised, and a recursion_critical flag, which indicates that recursion absolutely must be allowed, i.e. that a certain call must not cause a stack overflow exception. There are still some places where both str and str8 are accepted as identifiers; these should eventually be removed.
2025-10-15 19:29:46 +00:00 · 2007-06-10 09:51:05 +00:00 · 2007-06-10 09:51:05 +00:00 · 5b222135f8
commit 5b222135f8
parent 38e43c25ee
40 changed files with 462 additions and 289 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
    /* Copy the Unicode data into the new object */
    if (u != NULL) {
        Py_UNICODE *p = unicode->str;
-        while ((*p++ = *u++))
-            ;
+        while (size--)
+            *p++ = *u++;
+        /* Don't need to write trailing 0 because
+           that's already done by _PyUnicode_New */
    }

    return (PyObject *)unicode;
@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
    return v;
 }

+char*
+PyUnicode_AsString(PyObject *unicode)
+{
+    assert(PyUnicode_Check(unicode));
+    unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
+    if (!unicode)
+        return NULL;
+    return PyString_AsString(unicode);
+}
+
 Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
 {
    if (!PyUnicode_Check(unicode)) {
@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
 		goto onError;
 	}
    }
-    if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
+    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
 	if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
 	    goto onError;
    Py_XDECREF(errorHandler);
@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left,
    return -1;
 }

+int
+PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
+{
+    int i;
+    Py_UNICODE *id;
+    assert(PyUnicode_Check(uni));
+    id = PyUnicode_AS_UNICODE(uni);
+    /* Compare Unicode string and source character set string */
+    for (i = 0; id[i] && str[i]; i++)
+	if (id[i] != str[i])
+	    return ((int)id[i] < (int)str[i]) ? -1 : 1;
+    if (id[i])
+	return 1; /* uni is longer */
+    if (str[i])
+	return -1; /* str is longer */
+    return 0;
+}
+
 PyObject *PyUnicode_RichCompare(PyObject *left,
                                PyObject *right,
                                int op)
@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p)
 			return;
 		}
 	}
+	/* It might be that the GetItem call fails even
+	   though the key is present in the dictionary,
+	   namely when this happens during a stack overflow. */
+	Py_ALLOW_RECURSION
 	t = PyDict_GetItem(interned, (PyObject *)s);
+	Py_END_ALLOW_RECURSION
+
 	if (t) {
 		Py_INCREF(t);
 		Py_DECREF(*p);
@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p)
 		return;
 	}

+	PyThreadState_GET()->recursion_critical = 1;
 	if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
 		PyErr_Clear();
+		PyThreadState_GET()->recursion_critical = 0;
 		return;
 	}
+	PyThreadState_GET()->recursion_critical = 0;
 	/* The two references in interned are not counted by refcnt.
 	   The deallocator will take care of this */
 	s->ob_refcnt -= 2;
@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq)
 	return (PyObject *)it;
 }

+size_t
+Py_UNICODE_strlen(const Py_UNICODE *u)
+{
+    int res = 0;
+    while(*u++)
+        res++;
+    return res;
+}
+
+Py_UNICODE*
+Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+    Py_UNICODE *u = s1;
+    while ((*u++ = *s2++));
+    return s1;
+}
+
+Py_UNICODE*
+Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
+{
+    Py_UNICODE *u = s1;
+    while ((*u++ = *s2++))
+        if (n-- == 0)
+            break;
+    return s1;
+}
+
+int
+Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+    while (*s1 && *s2 && *s1 == *s2)
+        s1++, s2++;
+    if (*s1 && *s2)
+        return (*s1 < *s2) ? -1 : +1;
+    if (*s1)
+        return 1;
+    if (*s2)
+        return -1;
+    return 0;
+}
+
+Py_UNICODE*
+Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
+{
+    const Py_UNICODE *p;
+    for (p = s; *p; p++)
+        if (*p == c)
+            return (Py_UNICODE*)p;
+    return NULL;
+}
+
+
 #ifdef __cplusplus
 }
 #endif