bpo-45061: Detect refcount bug on empty string singleton (GH-28504)

Detect refcount bugs in C extensions when the empty Unicode string
singleton is destroyed by mistake.

* Move forward declarations to the top of unicodeobject.c.
* Simplifiy unicode_is_singleton().
This commit is contained in:
Victor Stinner 2021-09-21 23:43:09 +02:00 committed by GitHub
parent 06e1773c8d
commit 86f28372b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 20 deletions

View file

@ -1,4 +1,5 @@
Add a deallocator to the bool type to detect refcount bugs in C extensions
which call Py_DECREF(Py_True) or Py_DECREF(Py_False) by mistake. Detect also
refcount bugs when the empty tuple singleton is destroyed by mistake. Patch
by Victor Stinner.
refcount bugs when the empty tuple singleton or the Unicode empty string
singleton is destroyed by mistake.
Patch by Victor Stinner.

View file

@ -48,6 +48,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
#include "pycore_pyerrors.h" // _Py_FatalRefcountError()
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@ -212,6 +213,24 @@ extern "C" {
#endif
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
static inline void
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors);
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
#ifdef Py_DEBUG
static inline int unicode_is_finalizing(void);
static int unicode_is_singleton(PyObject *unicode);
#endif
static struct _Py_unicode_state*
get_unicode_state(void)
{
@ -279,19 +298,6 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
}
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
static inline void
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors);
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
0, 0, 0, 0, 0, 0, 0, 0,
@ -1930,6 +1936,12 @@ _PyUnicode_Ready(PyObject *unicode)
static void
unicode_dealloc(PyObject *unicode)
{
#ifdef Py_DEBUG
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
_Py_FatalRefcountError("deallocating an Unicode singleton");
}
#endif
switch (PyUnicode_CHECK_INTERNED(unicode)) {
case SSTATE_NOT_INTERNED:
break;
@ -1982,11 +1994,8 @@ unicode_is_singleton(PyObject *unicode)
if (unicode == state->empty_string) {
return 1;
}
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && state->latin1[ch] == unicode) {
for (Py_ssize_t i = 0; i < 256; i++) {
if (unicode == state->latin1[i]) {
return 1;
}
}
@ -15984,6 +15993,16 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
#endif
#ifdef Py_DEBUG
static inline int
unicode_is_finalizing(void)
{
struct _Py_unicode_state *state = get_unicode_state();
return (state->interned == NULL);
}
#endif
void
_PyUnicode_Fini(PyInterpreterState *interp)
{