gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)

* PyUnicode_AsUTF8() now raises an exception if the string contains
  embedded null characters.
* Update related C API tests (test_capi.test_unicode).
* type_new_set_doc() uses PyUnicode_AsUTF8AndSize() to silently
  truncate doc containing null bytes.

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Victor Stinner 2023-10-20 17:59:29 +02:00 committed by GitHub
parent 59ea0f523e
commit d731579bfb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 49 additions and 25 deletions

View file

@ -3837,7 +3837,13 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
const char *
PyUnicode_AsUTF8(PyObject *unicode)
{
return PyUnicode_AsUTF8AndSize(unicode, NULL);
Py_ssize_t size;
const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size);
if (utf8 != NULL && strlen(utf8) != (size_t)size) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
return NULL;
}
return utf8;
}
/*