[3.10] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) (#100353)

(cherry picked from commit 8dbe08eb7c)

Fix double-free bug mentioned at GH-99240, by moving memory clean up out of "exit" label.
This commit is contained in:
colorfulappl 2022-12-20 18:20:42 +08:00 committed by GitHub
parent b81d1c3be3
commit 53063b7ffa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 202 additions and 25 deletions

View file

@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
goto exit;
}
return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
/* Post parse cleanup for c */
PyMem_FREE(c);
/* Post parse cleanup for d */
PyMem_FREE(d);
/* Post parse cleanup for e */
PyMem_FREE(e);
exit:
/* Cleanup for a */
if (a) {
PyMem_FREE(a);
}
/* Cleanup for b */
if (b) {
PyMem_FREE(b);
}
/* Cleanup for c */
if (c) {
PyMem_FREE(c);
}
/* Cleanup for d */
if (d) {
PyMem_FREE(d);
}
/* Cleanup for e */
if (e) {
PyMem_FREE(e);
}
return return_value;
}
@ -1770,7 +1759,7 @@ static PyObject *
test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
char *d, Py_ssize_clean_t d_length, char *e,
Py_ssize_clean_t e_length)
/*[clinic end generated code: output=f579dd9e795a364e input=eb4c38e1f898f402]*/
/*[clinic end generated code: output=2d7e8b6203db31aa input=eb4c38e1f898f402]*/
/*[clinic input]

View file

@ -1045,6 +1045,17 @@ class ClinicFunctionalTest(unittest.TestCase):
self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c'))
self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c'))
def test_str_converter_encoding(self):
with self.assertRaises(TypeError):
ac_tester.str_converter_encoding(1)
self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c'))
with self.assertRaises(TypeError):
ac_tester.str_converter_encoding('a', b'b\0b', 'c')
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c'))
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])),
('a', 'b', 'c\x00c'))
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c'))
def test_py_buffer_converter(self):
with self.assertRaises(TypeError):
ac_tester.py_buffer_converter('a', 'b')
@ -1211,6 +1222,11 @@ class ClinicFunctionalTest(unittest.TestCase):
ac_tester.keyword_only_parameter(1)
self.assertEqual(ac_tester.keyword_only_parameter(a=1), (1,))
def test_gh_99240_double_free(self):
expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str'
with self.assertRaisesRegex(TypeError, expected_error):
ac_tester.gh_99240_double_free('a', '\0b')
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,2 @@
Fix double-free bug in Argument Clinic ``str_converter`` by
extracting memory clean up to a new ``post_parsing`` section.

View file

@ -551,6 +551,64 @@ error:
}
/*[clinic input]
str_converter_encoding
a: str(encoding="idna")
b: str(encoding="idna", accept={bytes, bytearray, str})
c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True)
/
[clinic start generated code]*/
static PyObject *
str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
Py_ssize_clean_t c_length)
/*[clinic end generated code: output=ebef1a3f9b730a24 input=0c5cf5159d0e870d]*/
{
assert(!PyErr_Occurred());
PyObject *out[3] = {NULL,};
int i = 0;
PyObject *arg;
arg = PyUnicode_FromString(a);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
arg = PyUnicode_FromString(b);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
arg = PyUnicode_FromStringAndSize(c, c_length);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
PyObject *tuple = PyTuple_New(3);
if (!tuple) {
goto error;
}
for (int j = 0; j < 3; j++) {
PyTuple_SET_ITEM(tuple, j, out[j]);
}
return tuple;
error:
for (int j = 0; j < i; j++) {
Py_DECREF(out[j]);
}
return NULL;
}
static PyObject *
bytes_from_buffer(Py_buffer *buf)
{
@ -892,6 +950,25 @@ keyword_only_parameter_impl(PyObject *module, PyObject *a)
}
/*[clinic input]
gh_99240_double_free
a: str(encoding="idna")
b: str(encoding="idna")
/
Proof-of-concept of GH-99240 double-free bug.
[clinic start generated code]*/
static PyObject *
gh_99240_double_free_impl(PyObject *module, char *a, char *b)
/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/
{
Py_RETURN_NONE;
}
static PyMethodDef tester_methods[] = {
TEST_EMPTY_FUNCTION_METHODDEF
OBJECTS_CONVERTER_METHODDEF
@ -916,6 +993,7 @@ static PyMethodDef tester_methods[] = {
DOUBLE_CONVERTER_METHODDEF
PY_COMPLEX_CONVERTER_METHODDEF
STR_CONVERTER_METHODDEF
STR_CONVERTER_ENCODING_METHODDEF
PY_BUFFER_CONVERTER_METHODDEF
KEYWORDS_METHODDEF
KEYWORDS_KWONLY_METHODDEF
@ -933,6 +1011,7 @@ static PyMethodDef tester_methods[] = {
POSONLY_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
POSONLY_OPT_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
KEYWORD_ONLY_PARAMETER_METHODDEF
GH_99240_DOUBLE_FREE_METHODDEF
{NULL, NULL}
};

View file

@ -1159,6 +1159,43 @@ exit:
return return_value;
}
PyDoc_STRVAR(str_converter_encoding__doc__,
"str_converter_encoding($module, a, b, c, /)\n"
"--\n"
"\n");
#define STR_CONVERTER_ENCODING_METHODDEF \
{"str_converter_encoding", (PyCFunction)(void(*)(void))str_converter_encoding, METH_FASTCALL, str_converter_encoding__doc__},
static PyObject *
str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
Py_ssize_clean_t c_length);
static PyObject *
str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
char *a = NULL;
char *b = NULL;
char *c = NULL;
Py_ssize_clean_t c_length;
if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding",
"idna", &a, "idna", &b, "idna", &c, &c_length)) {
goto exit;
}
return_value = str_converter_encoding_impl(module, a, b, c, c_length);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
/* Post parse cleanup for c */
PyMem_FREE(c);
exit:
return return_value;
}
PyDoc_STRVAR(py_buffer_converter__doc__,
"py_buffer_converter($module, a, b, /)\n"
"--\n"
@ -1914,4 +1951,37 @@ keyword_only_parameter(PyObject *module, PyObject *const *args, Py_ssize_t nargs
exit:
return return_value;
}
/*[clinic end generated code: output=2d5ac6b41daadf6f input=a9049054013a1b77]*/
PyDoc_STRVAR(gh_99240_double_free__doc__,
"gh_99240_double_free($module, a, b, /)\n"
"--\n"
"\n"
"Proof-of-concept of GH-99240 double-free bug.");
#define GH_99240_DOUBLE_FREE_METHODDEF \
{"gh_99240_double_free", (PyCFunction)(void(*)(void))gh_99240_double_free, METH_FASTCALL, gh_99240_double_free__doc__},
static PyObject *
gh_99240_double_free_impl(PyObject *module, char *a, char *b);
static PyObject *
gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
char *a = NULL;
char *b = NULL;
if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free",
"idna", &a, "idna", &b)) {
goto exit;
}
return_value = gh_99240_double_free_impl(module, a, b);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
exit:
return return_value;
}
/*[clinic end generated code: output=ecc55cbeac4adae6 input=a9049054013a1b77]*/

View file

@ -346,6 +346,12 @@ class CRenderData:
# "goto exit" if there are any.
self.return_conversion = []
# The C statements required to do some operations
# after the end of parsing but before cleaning up.
# These operations may be, for example, memory deallocations which
# can only be done without any error happening during argument parsing.
self.post_parsing = []
# The C statements required to clean up after the impl call.
self.cleanup = []
@ -751,6 +757,7 @@ class CLanguage(Language):
{modifications}
{return_value} = {c_basename}_impl({impl_arguments});
{return_conversion}
{post_parsing}
{exit_label}
{cleanup}
@ -1343,6 +1350,7 @@ class CLanguage(Language):
template_dict['impl_parameters'] = ", ".join(data.impl_parameters)
template_dict['impl_arguments'] = ", ".join(data.impl_arguments)
template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip())
template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip())
template_dict['cleanup'] = format_escape("".join(data.cleanup))
template_dict['return_value'] = data.return_value
@ -1367,6 +1375,7 @@ class CLanguage(Language):
return_conversion=template_dict['return_conversion'],
initializers=template_dict['initializers'],
modifications=template_dict['modifications'],
post_parsing=template_dict['post_parsing'],
cleanup=template_dict['cleanup'],
)
@ -2595,6 +2604,10 @@ class CConverter(metaclass=CConverterAutoRegister):
# parse_arguments
self.parse_argument(data.parse_arguments)
# post_parsing
if post_parsing := self.post_parsing():
data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n')
# cleanup
cleanup = self.cleanup()
if cleanup:
@ -2686,6 +2699,14 @@ class CConverter(metaclass=CConverterAutoRegister):
"""
return ""
def post_parsing(self):
"""
The C statements required to do some operations after the end of parsing but before cleaning up.
Return a string containing this code indented at column 0.
If no operation is necessary, return an empty string.
"""
return ""
def cleanup(self):
"""
The C statements required to clean up after this variable.
@ -3278,10 +3299,10 @@ class str_converter(CConverter):
if NoneType in accept and self.c_default == "Py_None":
self.c_default = "NULL"
def cleanup(self):
def post_parsing(self):
if self.encoding:
name = self.name
return "".join(["if (", name, ") {\n PyMem_FREE(", name, ");\n}\n"])
return f"PyMem_FREE({name});\n"
def parse_arg(self, argname, displayname):
if self.format_unit == 's':