bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -23,26 +23,6 @@ class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
_Py_IDENTIFIER(close);
_Py_IDENTIFIER(_dealloc_warn);
_Py_IDENTIFIER(decode);
_Py_IDENTIFIER(fileno);
_Py_IDENTIFIER(flush);
_Py_IDENTIFIER(isatty);
_Py_IDENTIFIER(mode);
_Py_IDENTIFIER(name);
_Py_IDENTIFIER(raw);
_Py_IDENTIFIER(read);
_Py_IDENTIFIER(readable);
_Py_IDENTIFIER(replace);
_Py_IDENTIFIER(reset);
_Py_IDENTIFIER(seek);
_Py_IDENTIFIER(seekable);
_Py_IDENTIFIER(setstate);
_Py_IDENTIFIER(strict);
_Py_IDENTIFIER(tell);
_Py_IDENTIFIER(writable);
/* TextIOBase */
PyDoc_STRVAR(textiobase_doc,
@ -255,9 +235,7 @@ _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
Py_INCREF(decoder);
if (errors == NULL) {
self->errors = _PyUnicode_FromId(&PyId_strict);
if (self->errors == NULL)
return -1;
self->errors = &_Py_ID(strict);
}
else {
self->errors = errors;
@ -586,11 +564,13 @@ _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
self->pendingcr = (int) (flag & 1);
flag >>= 1;
if (self->decoder != Py_None)
return _PyObject_CallMethodId(self->decoder,
&PyId_setstate, "((OK))", buffer, flag);
else
if (self->decoder != Py_None) {
return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
"((OK))", buffer, flag);
}
else {
Py_RETURN_NONE;
}
}
/*[clinic input]
@ -865,7 +845,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
PyObject *res;
int r;
res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
if (res == NULL)
return -1;
@ -920,7 +900,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
PyObject *res;
int r;
res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
if (res == NULL)
return -1;
@ -939,7 +919,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
return -1;
/* Get the normalized named of the codec */
if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
return -1;
}
if (res != NULL && PyUnicode_Check(res)) {
@ -1099,10 +1079,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
}
if (errors == Py_None) {
errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
if (errors == NULL) {
return -1;
}
errors = &_Py_ID(strict);
}
else if (!PyUnicode_Check(errors)) {
// Check 'errors' argument here because Argument Clinic doesn't support
@ -1142,7 +1119,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
state = IO_STATE();
if (state == NULL)
goto error;
fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
fileno = PyObject_CallMethodNoArgs(buffer, &_Py_ID(fileno));
/* Ignore only AttributeError and UnsupportedOperation */
if (fileno == NULL) {
if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
@ -1228,7 +1205,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
{
if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
goto error;
/* Cache the raw FileIO object to speed up 'closed' checks */
if (raw != NULL) {
@ -1239,7 +1216,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
}
}
res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
if (res == NULL)
goto error;
r = PyObject_IsTrue(res);
@ -1302,10 +1279,7 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding,
}
}
else if (errors == Py_None) {
errors = _PyUnicode_FromId(&PyId_strict);
if (errors == NULL) {
return -1;
}
errors = &_Py_ID(strict);
}
const char *c_errors = PyUnicode_AsUTF8(errors);
@ -1640,8 +1614,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
haslf = 1;
if (haslf && self->writetranslate && self->writenl != NULL) {
PyObject *newtext = _PyObject_CallMethodId(
text, &PyId_replace, "ss", "\n", self->writenl);
PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
"ss", "\n", self->writenl);
Py_DECREF(text);
if (newtext == NULL)
return NULL;
@ -1740,7 +1714,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
Py_CLEAR(self->snapshot);
if (self->decoder) {
ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
if (ret == NULL)
return NULL;
Py_DECREF(ret);
@ -1944,7 +1918,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
if (n < 0) {
/* Read everything */
PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
PyObject *decoded;
if (bytes == NULL)
goto fail;
@ -2404,13 +2378,16 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
utf-16, that we are expecting a BOM).
*/
if (cookie->start_pos == 0 && cookie->dec_flags == 0)
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
else
res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
"((yi))", "", cookie->dec_flags);
if (res == NULL)
}
else {
res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
"((yi))", "", cookie->dec_flags);
}
if (res == NULL) {
return -1;
}
Py_DECREF(res);
return 0;
}
@ -2487,7 +2464,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
* sync the underlying buffer with the current position.
*/
Py_DECREF(cookieObj);
cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
if (cookieObj == NULL)
goto fail;
break;
@ -2503,7 +2480,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
goto fail;
}
res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
if (res == NULL)
goto fail;
Py_DECREF(res);
@ -2511,13 +2488,13 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
textiowrapper_set_decoded_chars(self, NULL);
Py_CLEAR(self->snapshot);
if (self->decoder) {
res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
if (res == NULL)
goto fail;
Py_DECREF(res);
}
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
Py_CLEAR(cookieObj);
if (res == NULL)
goto fail;
@ -2583,8 +2560,8 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
if (cookie.chars_to_skip) {
/* Just like _read_chunk, feed the decoder and save a snapshot. */
PyObject *input_chunk = _PyObject_CallMethodId(
self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
"i", cookie.bytes_to_feed);
PyObject *decoded;
if (input_chunk == NULL)
@ -2605,7 +2582,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
}
Py_XSETREF(self->snapshot, snapshot);
decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
if (check_decoded(decoded) < 0)
@ -2673,12 +2650,12 @@ _io_TextIOWrapper_tell_impl(textio *self)
if (_textiowrapper_writeflush(self) < 0)
return NULL;
res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
if (res == NULL)
goto fail;
Py_DECREF(res);
posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
if (posobj == NULL)
goto fail;
@ -2750,8 +2727,8 @@ _io_TextIOWrapper_tell_impl(textio *self)
} while (0)
#define DECODER_DECODE(start, len, res) do { \
PyObject *_decoded = _PyObject_CallMethodId( \
self->decoder, &PyId_decode, "y#", start, len); \
PyObject *_decoded = _PyObject_CallMethod( \
self->decoder, &_Py_ID(decode), "y#", start, len); \
if (check_decoded(_decoded) < 0) \
goto fail; \
res = PyUnicode_GET_LENGTH(_decoded); \
@ -2832,8 +2809,8 @@ _io_TextIOWrapper_tell_impl(textio *self)
}
if (input == input_end) {
/* We didn't get enough decoded data; signal EOF to get more. */
PyObject *decoded = _PyObject_CallMethodId(
self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
PyObject *decoded = _PyObject_CallMethod(
self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
if (check_decoded(decoded) < 0)
goto fail;
chars_decoded += PyUnicode_GET_LENGTH(decoded);
@ -2848,7 +2825,8 @@ _io_TextIOWrapper_tell_impl(textio *self)
}
finally:
res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
res = PyObject_CallMethodOneArg(
self->decoder, &_Py_ID(setstate), saved_state);
Py_DECREF(saved_state);
if (res == NULL)
return NULL;
@ -2862,7 +2840,8 @@ fail:
if (saved_state) {
PyObject *type, *value, *traceback;
PyErr_Fetch(&type, &value, &traceback);
res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
res = PyObject_CallMethodOneArg(
self->decoder, &_Py_ID(setstate), saved_state);
_PyErr_ChainExceptions(type, value, traceback);
Py_DECREF(saved_state);
Py_XDECREF(res);
@ -2913,7 +2892,7 @@ textiowrapper_repr(textio *self)
}
goto error;
}
if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
goto error;
}
@ -2929,7 +2908,7 @@ textiowrapper_repr(textio *self)
if (res == NULL)
goto error;
}
if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
goto error;
}
if (modeobj != NULL) {
@ -2969,7 +2948,7 @@ _io_TextIOWrapper_fileno_impl(textio *self)
/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
{
CHECK_ATTACHED(self);
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
}
/*[clinic input]
@ -2981,7 +2960,7 @@ _io_TextIOWrapper_seekable_impl(textio *self)
/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
{
CHECK_ATTACHED(self);
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
}
/*[clinic input]
@ -2993,7 +2972,7 @@ _io_TextIOWrapper_readable_impl(textio *self)
/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
{
CHECK_ATTACHED(self);
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
}
/*[clinic input]
@ -3005,7 +2984,7 @@ _io_TextIOWrapper_writable_impl(textio *self)
/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
{
CHECK_ATTACHED(self);
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
}
/*[clinic input]
@ -3017,7 +2996,7 @@ _io_TextIOWrapper_isatty_impl(textio *self)
/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
{
CHECK_ATTACHED(self);
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
}
/*[clinic input]
@ -3033,7 +3012,7 @@ _io_TextIOWrapper_flush_impl(textio *self)
self->telling = self->seekable;
if (_textiowrapper_writeflush(self) < 0)
return NULL;
return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
}
/*[clinic input]
@ -3062,21 +3041,20 @@ _io_TextIOWrapper_close_impl(textio *self)
else {
PyObject *exc = NULL, *val, *tb;
if (self->finalizing) {
res = _PyObject_CallMethodIdOneArg(self->buffer,
&PyId__dealloc_warn,
(PyObject *)self);
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
(PyObject *)self);
if (res)
Py_DECREF(res);
else
PyErr_Clear();
}
res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
if (res == NULL)
PyErr_Fetch(&exc, &val, &tb);
else
Py_DECREF(res);
res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
if (exc != NULL) {
_PyErr_ChainExceptions(exc, val, tb);
Py_CLEAR(res);
@ -3127,7 +3105,7 @@ static PyObject *
textiowrapper_name_get(textio *self, void *context)
{
CHECK_ATTACHED(self);
return _PyObject_GetAttrId(self->buffer, &PyId_name);
return PyObject_GetAttr(self->buffer, &_Py_ID(name));
}
static PyObject *