bpo-39465: Fix _PyUnicode_FromId() for subinterpreters (GH-20058)

Make _PyUnicode_FromId() function compatible with subinterpreters.
Each interpreter now has an array of identifier objects (interned
strings decoded from UTF-8).

* Add PyInterpreterState.unicode.identifiers: array of identifiers
  objects.
* Add _PyRuntimeState.unicode_ids used to allocate unique indexes
  to _Py_Identifier.
* Rewrite the _Py_Identifier structure.

Microbenchmark on _PyUnicode_FromId(&PyId_a) with _Py_IDENTIFIER(a):

[ref] 2.42 ns +- 0.00 ns -> [atomic] 3.39 ns +- 0.00 ns: 1.40x slower

This change adds 1 ns per _PyUnicode_FromId() call in average.
This commit is contained in:
Victor Stinner 2020-12-26 00:41:46 +01:00 committed by GitHub
parent f0853bcedf
commit ba3d67c2fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 102 additions and 37 deletions

View file

@ -35,12 +35,13 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
*/
typedef struct _Py_Identifier {
struct _Py_Identifier *next;
const char* string;
PyObject *object;
// Index in PyInterpreterState.unicode.ids.array. It is process-wide
// unique and must be initialized to -1.
Py_ssize_t index;
} _Py_Identifier;
#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
#define _Py_static_string_init(value) { .string = value, .index = -1 }
#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)

View file

@ -64,6 +64,11 @@ struct _Py_bytes_state {
PyBytesObject *characters[256];
};
struct _Py_unicode_ids {
Py_ssize_t size;
PyObject **array;
};
struct _Py_unicode_state {
// The empty Unicode object is a singleton to improve performance.
PyObject *empty_string;
@ -71,6 +76,8 @@ struct _Py_unicode_state {
shared as well. */
PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec;
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
};
struct _Py_float_state {

View file

@ -49,6 +49,11 @@ typedef struct _Py_AuditHookEntry {
void *userData;
} _Py_AuditHookEntry;
struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
Py_ssize_t next_index;
};
/* Full Python runtime state */
typedef struct pyruntimestate {
@ -106,6 +111,8 @@ typedef struct pyruntimestate {
void *open_code_userdata;
_Py_AuditHookEntry *audit_hook_head;
struct _Py_unicode_runtime_ids unicode_ids;
// XXX Consolidate globals found via the check-c-globals script.
} _PyRuntimeState;