bpo-1635741: Release Unicode interned strings at exit (GH-21269)

* PyUnicode_InternInPlace() now ensures that interned strings are
  ready.
* Add _PyUnicode_ClearInterned().
* Py_Finalize() now releases Unicode interned strings:
  call _PyUnicode_ClearInterned().
This commit is contained in:
Victor Stinner 2020-07-02 01:19:57 +02:00 committed by GitHub
parent 90db4653ae
commit 666ecfb095
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 36 deletions

View file

@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate);
extern void _PyType_Fini(void); extern void _PyType_Fini(void);
extern void _Py_HashRandomization_Fini(void); extern void _Py_HashRandomization_Fini(void);
extern void _PyUnicode_Fini(PyThreadState *tstate); extern void _PyUnicode_Fini(PyThreadState *tstate);
extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
extern void _PyLong_Fini(PyThreadState *tstate); extern void _PyLong_Fini(PyThreadState *tstate);
extern void _PyFaulthandler_Fini(void); extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void); extern void _PyHash_Fini(void);

View file

@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h> #include <windows.h>
#endif #endif
/* Uncomment to display statistics on interned strings at exit when /* Uncomment to display statistics on interned strings at exit
using Valgrind or Insecure++. */ in _PyUnicode_ClearInterned(). */
/* #define INTERNED_STATS 1 */ /* #define INTERNED_STATS 1 */
@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p)
} }
#ifdef INTERNED_STRINGS #ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) {
PyErr_Clear();
return;
}
if (interned == NULL) { if (interned == NULL) {
interned = PyDict_New(); interned = PyDict_New();
if (interned == NULL) { if (interned == NULL) {
@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp)
} }
#if defined(WITH_VALGRIND) || defined(__INSURE__) void
static void _PyUnicode_ClearInterned(PyThreadState *tstate)
unicode_release_interned(void)
{ {
if (interned == NULL || !PyDict_Check(interned)) { if (!_Py_IsMainInterpreter(tstate)) {
return; // interned dict is shared by all interpreters
}
PyObject *keys = PyDict_Keys(interned);
if (keys == NULL || !PyList_Check(keys)) {
PyErr_Clear();
return; return;
} }
/* Since unicode_release_interned() is intended to help a leak if (interned == NULL) {
detector, interned unicode strings are not forcibly deallocated; return;
rather, we give them their stolen references back, and then clear }
and DECREF the interned dict. */ assert(PyDict_CheckExact(interned));
PyObject *keys = PyDict_Keys(interned);
if (keys == NULL) {
PyErr_Clear();
return;
}
assert(PyList_CheckExact(keys));
/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */
Py_ssize_t n = PyList_GET_SIZE(keys); Py_ssize_t n = PyList_GET_SIZE(keys);
#ifdef INTERNED_STATS #ifdef INTERNED_STATS
@ -15759,9 +15770,8 @@ unicode_release_interned(void)
#endif #endif
for (Py_ssize_t i = 0; i < n; i++) { for (Py_ssize_t i = 0; i < n; i++) {
PyObject *s = PyList_GET_ITEM(keys, i); PyObject *s = PyList_GET_ITEM(keys, i);
if (PyUnicode_READY(s) == -1) { assert(PyUnicode_IS_READY(s));
Py_UNREACHABLE();
}
switch (PyUnicode_CHECK_INTERNED(s)) { switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL: case SSTATE_INTERNED_IMMORTAL:
Py_SET_REFCNT(s, Py_REFCNT(s) + 1); Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
@ -15788,10 +15798,10 @@ unicode_release_interned(void)
mortal_size, immortal_size); mortal_size, immortal_size);
#endif #endif
Py_DECREF(keys); Py_DECREF(keys);
PyDict_Clear(interned); PyDict_Clear(interned);
Py_CLEAR(interned); Py_CLEAR(interned);
} }
#endif
/********************* Unicode Iterator **************************/ /********************* Unicode Iterator **************************/
@ -16160,23 +16170,9 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
void void
_PyUnicode_Fini(PyThreadState *tstate) _PyUnicode_Fini(PyThreadState *tstate)
{ {
struct _Py_unicode_state *state = &tstate->interp->unicode; // _PyUnicode_ClearInterned() must be called before
int is_main_interp = _Py_IsMainInterpreter(tstate); struct _Py_unicode_state *state = &tstate->interp->unicode;
if (is_main_interp) {
#if defined(WITH_VALGRIND) || defined(__INSURE__)
/* Insure++ is a memory analysis tool that aids in discovering
* memory leaks and other memory problems. On Python exit, the
* interned string dictionaries are flagged as being in use at exit
* (which it is). Under normal circumstances, this is fine because
* the memory will be automatically reclaimed by the system. Under
* memory debugging, it's a huge source of useless noise, so we
* trade off slower shutdown for less distraction in the memory
* reports. -baw
*/
unicode_release_interned();
#endif /* __INSURE__ */
}
Py_CLEAR(state->empty_string); Py_CLEAR(state->empty_string);
@ -16184,7 +16180,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
Py_CLEAR(state->latin1[i]); Py_CLEAR(state->latin1[i]);
} }
if (is_main_interp) { if (_Py_IsMainInterpreter(tstate)) {
unicode_clear_static_strings(); unicode_clear_static_strings();
} }

View file

@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate)
_PyFrame_Fini(tstate); _PyFrame_Fini(tstate);
_PyAsyncGen_Fini(tstate); _PyAsyncGen_Fini(tstate);
_PyContext_Fini(tstate); _PyContext_Fini(tstate);
_PyUnicode_ClearInterned(tstate);
_PyDict_Fini(tstate); _PyDict_Fini(tstate);
_PyList_Fini(tstate); _PyList_Fini(tstate);