gh-116738: Make _codecs module thread-safe (#117530)

The module itself is a thin wrapper around calls to functions in
`Python/codecs.c`, so that's where the meaningful changes happened:

- Move codecs-related state that lives on `PyInterpreterState` to a
  struct declared in `pycore_codecs.h`.

- In free-threaded builds, add a mutex to `codecs_state` to synchronize
  operations on `search_path`. Because `search_path_mutex` is used as a
  normal mutex and not a critical section, we must be extremely careful
  with operations called while holding it.

- The codec registry is explicitly initialized as part of
  `_PyUnicode_InitEncodings` to simplify thread-safety.
This commit is contained in:
Brett Simmers 2024-05-02 15:25:36 -07:00 committed by GitHub
parent 4e2caf2aa0
commit f8290df63f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 120 additions and 79 deletions

View file

@ -14,6 +14,7 @@ extern "C" {
#include "pycore_atexit.h" // struct atexit_state
#include "pycore_ceval_state.h" // struct _ceval_state
#include "pycore_code.h" // struct callable_cache
#include "pycore_codecs.h" // struct codecs_state
#include "pycore_context.h" // struct _Py_context_state
#include "pycore_crossinterp.h" // struct _xidregistry
#include "pycore_dict_state.h" // struct _Py_dict_state
@ -182,10 +183,7 @@ struct _is {
possible to facilitate out-of-process observability
tools. */
PyObject *codec_search_path;
PyObject *codec_search_cache;
PyObject *codec_error_registry;
int codecs_initialized;
struct codecs_state codecs;
PyConfig config;
unsigned long feature_flags;