[3.10] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) (#100353)

(cherry picked from commit 8dbe08eb7c) Fix double-free bug mentioned at GH-99240, by moving memory clean up out of "exit" label.
2025-08-04 17:08:35 +00:00 · 2022-12-20 18:20:42 +08:00 · 2022-12-20 18:20:42 +08:00 · 53063b7ffa
commit 53063b7ffa
parent b81d1c3be3
6 changed files with 202 additions and 25 deletions
--- a/Lib/test/clinic.test
+++ b/Lib/test/clinic.test
@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
        goto exit;
    }
    return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+    /* Post parse cleanup for c */
+    PyMem_FREE(c);
+    /* Post parse cleanup for d */
+    PyMem_FREE(d);
+    /* Post parse cleanup for e */
+    PyMem_FREE(e);

 exit:
-    /* Cleanup for a */
-    if (a) {
-       PyMem_FREE(a);
-    }
-    /* Cleanup for b */
-    if (b) {
-       PyMem_FREE(b);
-    }
-    /* Cleanup for c */
-    if (c) {
-       PyMem_FREE(c);
-    }
-    /* Cleanup for d */
-    if (d) {
-       PyMem_FREE(d);
-    }
-    /* Cleanup for e */
-    if (e) {
-       PyMem_FREE(e);
-    }
-
    return return_value;
 }

@ -1770,7 +1759,7 @@ static PyObject *
 test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
                                 char *d, Py_ssize_clean_t d_length, char *e,
                                 Py_ssize_clean_t e_length)
-/*[clinic end generated code: output=f579dd9e795a364e input=eb4c38e1f898f402]*/
+/*[clinic end generated code: output=2d7e8b6203db31aa input=eb4c38e1f898f402]*/


 /*[clinic input]
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@ -1045,6 +1045,17 @@ class ClinicFunctionalTest(unittest.TestCase):
        self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c'))
        self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c'))

+    def test_str_converter_encoding(self):
+        with self.assertRaises(TypeError):
+            ac_tester.str_converter_encoding(1)
+        self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c'))
+        with self.assertRaises(TypeError):
+            ac_tester.str_converter_encoding('a', b'b\0b', 'c')
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c'))
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])),
+                         ('a', 'b', 'c\x00c'))
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c'))
+
    def test_py_buffer_converter(self):
        with self.assertRaises(TypeError):
            ac_tester.py_buffer_converter('a', 'b')
@ -1211,6 +1222,11 @@ class ClinicFunctionalTest(unittest.TestCase):
            ac_tester.keyword_only_parameter(1)
        self.assertEqual(ac_tester.keyword_only_parameter(a=1), (1,))

+    def test_gh_99240_double_free(self):
+        expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str'
+        with self.assertRaisesRegex(TypeError, expected_error):
+            ac_tester.gh_99240_double_free('a', '\0b')
+

 if __name__ == "__main__":
    unittest.main()
--- a/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
+++ b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
@ -0,0 +1,2 @@
+Fix double-free bug in Argument Clinic ``str_converter`` by
+extracting memory clean up to a new ``post_parsing`` section.
--- a/Modules/_testclinic.c
+++ b/Modules/_testclinic.c
@ -551,6 +551,64 @@ error:
 }


+/*[clinic input]
+str_converter_encoding
+
+    a: str(encoding="idna")
+    b: str(encoding="idna", accept={bytes, bytearray, str})
+    c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True)
+    /
+
+[clinic start generated code]*/
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+                            Py_ssize_clean_t c_length)
+/*[clinic end generated code: output=ebef1a3f9b730a24 input=0c5cf5159d0e870d]*/
+{
+    assert(!PyErr_Occurred());
+    PyObject *out[3] = {NULL,};
+    int i = 0;
+    PyObject *arg;
+
+    arg = PyUnicode_FromString(a);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    arg = PyUnicode_FromString(b);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    arg = PyUnicode_FromStringAndSize(c, c_length);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    PyObject *tuple = PyTuple_New(3);
+    if (!tuple) {
+        goto error;
+    }
+    for (int j = 0; j < 3; j++) {
+        PyTuple_SET_ITEM(tuple, j, out[j]);
+    }
+    return tuple;
+
+error:
+    for (int j = 0; j < i; j++) {
+        Py_DECREF(out[j]);
+    }
+    return NULL;
+}
+
+
 static PyObject *
 bytes_from_buffer(Py_buffer *buf)
 {
@ -892,6 +950,25 @@ keyword_only_parameter_impl(PyObject *module, PyObject *a)
 }


+/*[clinic input]
+gh_99240_double_free
+
+    a: str(encoding="idna")
+    b: str(encoding="idna")
+    /
+
+Proof-of-concept of GH-99240 double-free bug.
+
+[clinic start generated code]*/
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b)
+/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/
+{
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef tester_methods[] = {
    TEST_EMPTY_FUNCTION_METHODDEF
    OBJECTS_CONVERTER_METHODDEF
@ -916,6 +993,7 @@ static PyMethodDef tester_methods[] = {
    DOUBLE_CONVERTER_METHODDEF
    PY_COMPLEX_CONVERTER_METHODDEF
    STR_CONVERTER_METHODDEF
+    STR_CONVERTER_ENCODING_METHODDEF
    PY_BUFFER_CONVERTER_METHODDEF
    KEYWORDS_METHODDEF
    KEYWORDS_KWONLY_METHODDEF
@ -933,6 +1011,7 @@ static PyMethodDef tester_methods[] = {
    POSONLY_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
    POSONLY_OPT_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
    KEYWORD_ONLY_PARAMETER_METHODDEF
+    GH_99240_DOUBLE_FREE_METHODDEF
    {NULL, NULL}
 };

--- a/Modules/clinic/_testclinic.c.h
+++ b/Modules/clinic/_testclinic.c.h
@ -1159,6 +1159,43 @@ exit:
    return return_value;
 }

+PyDoc_STRVAR(str_converter_encoding__doc__,
+"str_converter_encoding($module, a, b, c, /)\n"
+"--\n"
+"\n");
+
+#define STR_CONVERTER_ENCODING_METHODDEF    \
+    {"str_converter_encoding", (PyCFunction)(void(*)(void))str_converter_encoding, METH_FASTCALL, str_converter_encoding__doc__},
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+                            Py_ssize_clean_t c_length);
+
+static PyObject *
+str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    char *a = NULL;
+    char *b = NULL;
+    char *c = NULL;
+    Py_ssize_clean_t c_length;
+
+    if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding",
+        "idna", &a, "idna", &b, "idna", &c, &c_length)) {
+        goto exit;
+    }
+    return_value = str_converter_encoding_impl(module, a, b, c, c_length);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+    /* Post parse cleanup for c */
+    PyMem_FREE(c);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(py_buffer_converter__doc__,
 "py_buffer_converter($module, a, b, /)\n"
 "--\n"
@ -1914,4 +1951,37 @@ keyword_only_parameter(PyObject *module, PyObject *const *args, Py_ssize_t nargs
 exit:
    return return_value;
 }
-/*[clinic end generated code: output=2d5ac6b41daadf6f input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(gh_99240_double_free__doc__,
+"gh_99240_double_free($module, a, b, /)\n"
+"--\n"
+"\n"
+"Proof-of-concept of GH-99240 double-free bug.");
+
+#define GH_99240_DOUBLE_FREE_METHODDEF    \
+    {"gh_99240_double_free", (PyCFunction)(void(*)(void))gh_99240_double_free, METH_FASTCALL, gh_99240_double_free__doc__},
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b);
+
+static PyObject *
+gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    char *a = NULL;
+    char *b = NULL;
+
+    if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free",
+        "idna", &a, "idna", &b)) {
+        goto exit;
+    }
+    return_value = gh_99240_double_free_impl(module, a, b);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=ecc55cbeac4adae6 input=a9049054013a1b77]*/
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@ -346,6 +346,12 @@ class CRenderData:
        # "goto exit" if there are any.
        self.return_conversion = []

+        # The C statements required to do some operations
+        # after the end of parsing but before cleaning up.
+        # These operations may be, for example, memory deallocations which
+        # can only be done without any error happening during argument parsing.
+        self.post_parsing = []
+
        # The C statements required to clean up after the impl call.
        self.cleanup = []

@ -751,6 +757,7 @@ class CLanguage(Language):
                    {modifications}
                    {return_value} = {c_basename}_impl({impl_arguments});
                    {return_conversion}
+                    {post_parsing}

                {exit_label}
                    {cleanup}
@ -1343,6 +1350,7 @@ class CLanguage(Language):
        template_dict['impl_parameters'] = ", ".join(data.impl_parameters)
        template_dict['impl_arguments'] = ", ".join(data.impl_arguments)
        template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip())
+        template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip())
        template_dict['cleanup'] = format_escape("".join(data.cleanup))
        template_dict['return_value'] = data.return_value

@ -1367,6 +1375,7 @@ class CLanguage(Language):
                return_conversion=template_dict['return_conversion'],
                initializers=template_dict['initializers'],
                modifications=template_dict['modifications'],
+                post_parsing=template_dict['post_parsing'],
                cleanup=template_dict['cleanup'],
                )

@ -2595,6 +2604,10 @@ class CConverter(metaclass=CConverterAutoRegister):
        # parse_arguments
        self.parse_argument(data.parse_arguments)

+        # post_parsing
+        if post_parsing := self.post_parsing():
+            data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n')
+
        # cleanup
        cleanup = self.cleanup()
        if cleanup:
@ -2686,6 +2699,14 @@ class CConverter(metaclass=CConverterAutoRegister):
        """
        return ""

+    def post_parsing(self):
+        """
+        The C statements required to do some operations after the end of parsing but before cleaning up.
+        Return a string containing this code indented at column 0.
+        If no operation is necessary, return an empty string.
+        """
+        return ""
+
    def cleanup(self):
        """
        The C statements required to clean up after this variable.
@ -3278,10 +3299,10 @@ class str_converter(CConverter):
        if NoneType in accept and self.c_default == "Py_None":
            self.c_default = "NULL"

-    def cleanup(self):
+    def post_parsing(self):
        if self.encoding:
            name = self.name
-            return "".join(["if (", name, ") {\n   PyMem_FREE(", name, ");\n}\n"])
+            return f"PyMem_FREE({name});\n"

    def parse_arg(self, argname, displayname):
        if self.format_unit == 's':