gh-116738: Make _codecs module thread-safe (#117530)

The module itself is a thin wrapper around calls to functions in
`Python/codecs.c`, so that's where the meaningful changes happened:

- Move codecs-related state that lives on `PyInterpreterState` to a
  struct declared in `pycore_codecs.h`.

- In free-threaded builds, add a mutex to `codecs_state` to synchronize
  operations on `search_path`. Because `search_path_mutex` is used as a
  normal mutex and not a critical section, we must be extremely careful
  with operations called while holding it.

- The codec registry is explicitly initialized as part of
  `_PyUnicode_InitEncodings` to simplify thread-safety.
This commit is contained in:
Brett Simmers 2024-05-02 15:25:36 -07:00 committed by GitHub
parent 4e2caf2aa0
commit f8290df63f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 120 additions and 79 deletions

View file

@ -8,6 +8,17 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
#include "pycore_lock.h" // PyMutex
/* Initialize codecs-related state for the given interpreter, including
registering the first codec search function. Must be called before any other
PyCodec-related functions, and while only one thread is active. */
extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp);
/* Finalize codecs-related state for the given interpreter. No PyCodec-related
functions other than PyCodec_Unregister() may be called after this. */
extern void _PyCodec_Fini(PyInterpreterState *interp);
extern PyObject* _PyCodec_Lookup(const char *encoding);
/* Text codec specific encoding and decoding API.
@ -48,6 +59,26 @@ extern PyObject* _PyCodecInfo_GetIncrementalEncoder(
PyObject *codec_info,
const char *errors);
// Per-interpreter state used by codecs.c.
struct codecs_state {
// A list of callable objects used to search for codecs.
PyObject *search_path;
// A dict mapping codec names to codecs returned from a callable in
// search_path.
PyObject *search_cache;
// A dict mapping error handling strategies to functions to implement them.
PyObject *error_registry;
#ifdef Py_GIL_DISABLED
// Used to safely delete a specific item from search_path.
PyMutex search_path_mutex;
#endif
// Whether or not the rest of the state is initialized.
int initialized;
};
#ifdef __cplusplus
}