mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code. It is still used in a number of non-builtin stdlib modules. The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime. A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings). https://bugs.python.org/issue46541#msg411799 explains the rationale for this change. The core of the change is in: * (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros * Include/internal/pycore_runtime_init.h - added the static initializers for the global strings * Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState * Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings. That check is added to the PR CI config. The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()). This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *. The following are not changed (yet): * stop using _Py_IDENTIFIER() in the stdlib modules * (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API * (maybe) intern the strings during runtime init https://bugs.python.org/issue46541
		
			
				
	
	
		
			62 lines
		
	
	
	
		
			1.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			62 lines
		
	
	
	
		
			1.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#ifndef Py_INTERNAL_UNICODEOBJECT_H
 | 
						|
#define Py_INTERNAL_UNICODEOBJECT_H
 | 
						|
#ifdef __cplusplus
 | 
						|
extern "C" {
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef Py_BUILD_CORE
 | 
						|
#  error "this header requires Py_BUILD_CORE define"
 | 
						|
#endif
 | 
						|
 | 
						|
#include "pycore_fileutils.h"     // _Py_error_handler
 | 
						|
 | 
						|
 | 
						|
/* runtime lifecycle */
 | 
						|
 | 
						|
extern void _PyUnicode_InitState(PyInterpreterState *);
 | 
						|
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
 | 
						|
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
 | 
						|
extern void _PyUnicode_Fini(PyInterpreterState *);
 | 
						|
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
 | 
						|
 | 
						|
 | 
						|
/* other API */
 | 
						|
 | 
						|
struct _Py_unicode_runtime_ids {
 | 
						|
    PyThread_type_lock lock;
 | 
						|
    // next_index value must be preserved when Py_Initialize()/Py_Finalize()
 | 
						|
    // is called multiple times: see _PyUnicode_FromId() implementation.
 | 
						|
    Py_ssize_t next_index;
 | 
						|
};
 | 
						|
 | 
						|
/* fs_codec.encoding is initialized to NULL.
 | 
						|
   Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
 | 
						|
struct _Py_unicode_fs_codec {
 | 
						|
    char *encoding;   // Filesystem encoding (encoded to UTF-8)
 | 
						|
    int utf8;         // encoding=="utf-8"?
 | 
						|
    char *errors;     // Filesystem errors (encoded to UTF-8)
 | 
						|
    _Py_error_handler error_handler;
 | 
						|
};
 | 
						|
 | 
						|
struct _Py_unicode_ids {
 | 
						|
    Py_ssize_t size;
 | 
						|
    PyObject **array;
 | 
						|
};
 | 
						|
 | 
						|
struct _Py_unicode_state {
 | 
						|
    /* Single character Unicode strings in the Latin-1 range are being
 | 
						|
       shared as well. */
 | 
						|
    PyObject *latin1[256];
 | 
						|
    struct _Py_unicode_fs_codec fs_codec;
 | 
						|
 | 
						|
    // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
 | 
						|
    struct _Py_unicode_ids ids;
 | 
						|
};
 | 
						|
 | 
						|
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
 | 
						|
 | 
						|
 | 
						|
#ifdef __cplusplus
 | 
						|
}
 | 
						|
#endif
 | 
						|
#endif /* !Py_INTERNAL_UNICODEOBJECT_H */
 |