mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			434 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			434 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import contextlib
 | 
						|
import io
 | 
						|
import os.path
 | 
						|
import re
 | 
						|
 | 
						|
SCRIPT_NAME = 'Tools/build/generate_global_objects.py'
 | 
						|
__file__ = os.path.abspath(__file__)
 | 
						|
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 | 
						|
INTERNAL = os.path.join(ROOT, 'Include', 'internal')
 | 
						|
 | 
						|
 | 
						|
IGNORED = {
 | 
						|
    'ACTION',  # Python/_warnings.c
 | 
						|
    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
 | 
						|
    'DUNDER',  # Objects/typeobject.c
 | 
						|
    'RDUNDER',  # Objects/typeobject.c
 | 
						|
    'SPECIAL',  # Objects/weakrefobject.c
 | 
						|
    'NAME',  # Objects/typeobject.c
 | 
						|
}
 | 
						|
IDENTIFIERS = [
 | 
						|
    # from ADD() Python/_warnings.c
 | 
						|
    'default',
 | 
						|
    'ignore',
 | 
						|
 | 
						|
    # from GET_WARNINGS_ATTR() in Python/_warnings.c
 | 
						|
    'WarningMessage',
 | 
						|
    '_showwarnmsg',
 | 
						|
    '_warn_unawaited_coroutine',
 | 
						|
    'defaultaction',
 | 
						|
    'filters',
 | 
						|
    'onceregistry',
 | 
						|
 | 
						|
    # from WRAP_METHOD() in Objects/weakrefobject.c
 | 
						|
    '__bytes__',
 | 
						|
    '__reversed__',
 | 
						|
 | 
						|
    # from COPY_ATTR() in Objects/funcobject.c
 | 
						|
    '__module__',
 | 
						|
    '__name__',
 | 
						|
    '__qualname__',
 | 
						|
    '__doc__',
 | 
						|
    '__annotations__',
 | 
						|
 | 
						|
    # from SLOT* in Objects/typeobject.c
 | 
						|
    '__abs__',
 | 
						|
    '__add__',
 | 
						|
    '__aiter__',
 | 
						|
    '__and__',
 | 
						|
    '__anext__',
 | 
						|
    '__await__',
 | 
						|
    '__bool__',
 | 
						|
    '__call__',
 | 
						|
    '__contains__',
 | 
						|
    '__del__',
 | 
						|
    '__delattr__',
 | 
						|
    '__delete__',
 | 
						|
    '__delitem__',
 | 
						|
    '__eq__',
 | 
						|
    '__float__',
 | 
						|
    '__floordiv__',
 | 
						|
    '__ge__',
 | 
						|
    '__get__',
 | 
						|
    '__getattr__',
 | 
						|
    '__getattribute__',
 | 
						|
    '__getitem__',
 | 
						|
    '__gt__',
 | 
						|
    '__hash__',
 | 
						|
    '__iadd__',
 | 
						|
    '__iand__',
 | 
						|
    '__ifloordiv__',
 | 
						|
    '__ilshift__',
 | 
						|
    '__imatmul__',
 | 
						|
    '__imod__',
 | 
						|
    '__imul__',
 | 
						|
    '__index__',
 | 
						|
    '__init__',
 | 
						|
    '__int__',
 | 
						|
    '__invert__',
 | 
						|
    '__ior__',
 | 
						|
    '__ipow__',
 | 
						|
    '__irshift__',
 | 
						|
    '__isub__',
 | 
						|
    '__iter__',
 | 
						|
    '__itruediv__',
 | 
						|
    '__ixor__',
 | 
						|
    '__le__',
 | 
						|
    '__len__',
 | 
						|
    '__lshift__',
 | 
						|
    '__lt__',
 | 
						|
    '__matmul__',
 | 
						|
    '__mod__',
 | 
						|
    '__mul__',
 | 
						|
    '__ne__',
 | 
						|
    '__neg__',
 | 
						|
    '__new__',
 | 
						|
    '__next__',
 | 
						|
    '__or__',
 | 
						|
    '__pos__',
 | 
						|
    '__pow__',
 | 
						|
    '__radd__',
 | 
						|
    '__rand__',
 | 
						|
    '__repr__',
 | 
						|
    '__rfloordiv__',
 | 
						|
    '__rlshift__',
 | 
						|
    '__rmatmul__',
 | 
						|
    '__rmod__',
 | 
						|
    '__rmul__',
 | 
						|
    '__ror__',
 | 
						|
    '__rpow__',
 | 
						|
    '__rrshift__',
 | 
						|
    '__rshift__',
 | 
						|
    '__rsub__',
 | 
						|
    '__rtruediv__',
 | 
						|
    '__rxor__',
 | 
						|
    '__set__',
 | 
						|
    '__setattr__',
 | 
						|
    '__setitem__',
 | 
						|
    '__str__',
 | 
						|
    '__sub__',
 | 
						|
    '__truediv__',
 | 
						|
    '__xor__',
 | 
						|
    '__divmod__',
 | 
						|
    '__rdivmod__',
 | 
						|
    '__buffer__',
 | 
						|
    '__release_buffer__',
 | 
						|
]
 | 
						|
 | 
						|
NON_GENERATED_IMMORTAL_OBJECTS = [
 | 
						|
    # The generated ones come from generate_runtime_init().
 | 
						|
    '(PyObject *)&_Py_SINGLETON(bytes_empty)',
 | 
						|
    '(PyObject *)&_Py_SINGLETON(tuple_empty)',
 | 
						|
    '(PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)',
 | 
						|
    '(PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)',
 | 
						|
    '(PyObject *)&_Py_SINGLETON(context_token_missing)',
 | 
						|
]
 | 
						|
 | 
						|
 | 
						|
#######################################
 | 
						|
# helpers
 | 
						|
 | 
						|
def iter_files():
 | 
						|
    for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
 | 
						|
        root = os.path.join(ROOT, name)
 | 
						|
        for dirname, _, files in os.walk(root):
 | 
						|
            for name in files:
 | 
						|
                if not name.endswith(('.c', '.h')):
 | 
						|
                    continue
 | 
						|
                yield os.path.join(dirname, name)
 | 
						|
 | 
						|
 | 
						|
def iter_global_strings():
 | 
						|
    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
 | 
						|
    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
 | 
						|
    for filename in iter_files():
 | 
						|
        try:
 | 
						|
            infile = open(filename, encoding='utf-8')
 | 
						|
        except FileNotFoundError:
 | 
						|
            # The file must have been a temporary file.
 | 
						|
            continue
 | 
						|
        with infile:
 | 
						|
            for lno, line in enumerate(infile, 1):
 | 
						|
                for m in id_regex.finditer(line):
 | 
						|
                    identifier, = m.groups()
 | 
						|
                    yield identifier, None, filename, lno, line
 | 
						|
                for m in str_regex.finditer(line):
 | 
						|
                    varname, string = m.groups()
 | 
						|
                    yield varname, string, filename, lno, line
 | 
						|
 | 
						|
 | 
						|
def iter_to_marker(lines, marker):
 | 
						|
    for line in lines:
 | 
						|
        if line.rstrip() == marker:
 | 
						|
            break
 | 
						|
        yield line
 | 
						|
 | 
						|
 | 
						|
class Printer:
 | 
						|
 | 
						|
    def __init__(self, file):
 | 
						|
        self.level = 0
 | 
						|
        self.file = file
 | 
						|
        self.continuation = [False]
 | 
						|
 | 
						|
    @contextlib.contextmanager
 | 
						|
    def indent(self):
 | 
						|
        save_level = self.level
 | 
						|
        try:
 | 
						|
            self.level += 1
 | 
						|
            yield
 | 
						|
        finally:
 | 
						|
            self.level = save_level
 | 
						|
 | 
						|
    def write(self, arg):
 | 
						|
        eol = '\n'
 | 
						|
        if self.continuation[-1]:
 | 
						|
            eol = f' \\{eol}' if arg else f'\\{eol}'
 | 
						|
        self.file.writelines(("    "*self.level, arg, eol))
 | 
						|
 | 
						|
    @contextlib.contextmanager
 | 
						|
    def block(self, prefix, suffix="", *, continuation=None):
 | 
						|
        if continuation is None:
 | 
						|
            continuation = self.continuation[-1]
 | 
						|
        self.continuation.append(continuation)
 | 
						|
 | 
						|
        self.write(prefix + " {")
 | 
						|
        with self.indent():
 | 
						|
            yield
 | 
						|
        self.continuation.pop()
 | 
						|
        self.write("}" + suffix)
 | 
						|
 | 
						|
 | 
						|
@contextlib.contextmanager
 | 
						|
def open_for_changes(filename, orig):
 | 
						|
    """Like open() but only write to the file if it changed."""
 | 
						|
    outfile = io.StringIO()
 | 
						|
    yield outfile
 | 
						|
    text = outfile.getvalue()
 | 
						|
    if text != orig:
 | 
						|
        with open(filename, 'w', encoding='utf-8') as outfile:
 | 
						|
            outfile.write(text)
 | 
						|
    else:
 | 
						|
        print(f'# not changed: {filename}')
 | 
						|
 | 
						|
 | 
						|
#######################################
 | 
						|
# the global objects
 | 
						|
 | 
						|
START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'
 | 
						|
END = '/* End auto-generated code */'
 | 
						|
 | 
						|
 | 
						|
def generate_global_strings(identifiers, strings):
 | 
						|
    filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
 | 
						|
 | 
						|
    # Read the non-generated part of the file.
 | 
						|
    with open(filename) as infile:
 | 
						|
        orig = infile.read()
 | 
						|
    lines = iter(orig.rstrip().splitlines())
 | 
						|
    before = '\n'.join(iter_to_marker(lines, START))
 | 
						|
    for _ in iter_to_marker(lines, END):
 | 
						|
        pass
 | 
						|
    after = '\n'.join(lines)
 | 
						|
 | 
						|
    # Generate the file.
 | 
						|
    with open_for_changes(filename, orig) as outfile:
 | 
						|
        printer = Printer(outfile)
 | 
						|
        printer.write(before)
 | 
						|
        printer.write(START)
 | 
						|
        with printer.block('struct _Py_global_strings', ';'):
 | 
						|
            with printer.block('struct', ' literals;'):
 | 
						|
                for literal, name in sorted(strings.items(), key=lambda x: x[1]):
 | 
						|
                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
 | 
						|
            outfile.write('\n')
 | 
						|
            with printer.block('struct', ' identifiers;'):
 | 
						|
                for name in sorted(identifiers):
 | 
						|
                    assert name.isidentifier(), name
 | 
						|
                    printer.write(f'STRUCT_FOR_ID({name})')
 | 
						|
            with printer.block('struct', ' ascii[128];'):
 | 
						|
                printer.write("PyASCIIObject _ascii;")
 | 
						|
                printer.write("uint8_t _data[2];")
 | 
						|
            with printer.block('struct', ' latin1[128];'):
 | 
						|
                printer.write("PyCompactUnicodeObject _latin1;")
 | 
						|
                printer.write("uint8_t _data[2];")
 | 
						|
        printer.write(END)
 | 
						|
        printer.write(after)
 | 
						|
 | 
						|
 | 
						|
def generate_runtime_init(identifiers, strings):
 | 
						|
    # First get some info from the declarations.
 | 
						|
    nsmallposints = None
 | 
						|
    nsmallnegints = None
 | 
						|
    with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
 | 
						|
        for line in infile:
 | 
						|
            if line.startswith('#define _PY_NSMALLPOSINTS'):
 | 
						|
                nsmallposints = int(line.split()[-1])
 | 
						|
            elif line.startswith('#define _PY_NSMALLNEGINTS'):
 | 
						|
                nsmallnegints = int(line.split()[-1])
 | 
						|
                break
 | 
						|
        else:
 | 
						|
            raise NotImplementedError
 | 
						|
    assert nsmallposints and nsmallnegints
 | 
						|
 | 
						|
    # Then target the runtime initializer.
 | 
						|
    filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
 | 
						|
 | 
						|
    # Read the non-generated part of the file.
 | 
						|
    with open(filename) as infile:
 | 
						|
        orig = infile.read()
 | 
						|
    lines = iter(orig.rstrip().splitlines())
 | 
						|
    before = '\n'.join(iter_to_marker(lines, START))
 | 
						|
    for _ in iter_to_marker(lines, END):
 | 
						|
        pass
 | 
						|
    after = '\n'.join(lines)
 | 
						|
 | 
						|
    # Generate the file.
 | 
						|
    with open_for_changes(filename, orig) as outfile:
 | 
						|
        immortal_objects = []
 | 
						|
        printer = Printer(outfile)
 | 
						|
        printer.write(before)
 | 
						|
        printer.write(START)
 | 
						|
        with printer.block('#define _Py_small_ints_INIT', continuation=True):
 | 
						|
            for i in range(-nsmallnegints, nsmallposints):
 | 
						|
                printer.write(f'_PyLong_DIGIT_INIT({i}),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
 | 
						|
        printer.write('')
 | 
						|
        with printer.block('#define _Py_bytes_characters_INIT', continuation=True):
 | 
						|
            for i in range(256):
 | 
						|
                printer.write(f'_PyBytes_CHAR_INIT({i}),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
 | 
						|
        printer.write('')
 | 
						|
        with printer.block('#define _Py_str_literals_INIT', continuation=True):
 | 
						|
            for literal, name in sorted(strings.items(), key=lambda x: x[1]):
 | 
						|
                printer.write(f'INIT_STR({name}, "{literal}"),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
 | 
						|
        printer.write('')
 | 
						|
        with printer.block('#define _Py_str_identifiers_INIT', continuation=True):
 | 
						|
            for name in sorted(identifiers):
 | 
						|
                assert name.isidentifier(), name
 | 
						|
                printer.write(f'INIT_ID({name}),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
 | 
						|
        printer.write('')
 | 
						|
        with printer.block('#define _Py_str_ascii_INIT', continuation=True):
 | 
						|
            for i in range(128):
 | 
						|
                printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
 | 
						|
        printer.write('')
 | 
						|
        with printer.block('#define _Py_str_latin1_INIT', continuation=True):
 | 
						|
            for i in range(128, 256):
 | 
						|
                utf8 = ['"']
 | 
						|
                for c in chr(i).encode('utf-8'):
 | 
						|
                    utf8.append(f"\\x{c:02x}")
 | 
						|
                utf8.append('"')
 | 
						|
                printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
 | 
						|
                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
 | 
						|
        printer.write(END)
 | 
						|
        printer.write(after)
 | 
						|
        return immortal_objects
 | 
						|
 | 
						|
 | 
						|
def generate_static_strings_initializer(identifiers, strings):
 | 
						|
    # Target the runtime initializer.
 | 
						|
    filename = os.path.join(INTERNAL, 'pycore_unicodeobject_generated.h')
 | 
						|
 | 
						|
    # Read the non-generated part of the file.
 | 
						|
    with open(filename) as infile:
 | 
						|
        orig = infile.read()
 | 
						|
    lines = iter(orig.rstrip().splitlines())
 | 
						|
    before = '\n'.join(iter_to_marker(lines, START))
 | 
						|
    for _ in iter_to_marker(lines, END):
 | 
						|
        pass
 | 
						|
    after = '\n'.join(lines)
 | 
						|
 | 
						|
    # Generate the file.
 | 
						|
    with open_for_changes(filename, orig) as outfile:
 | 
						|
        printer = Printer(outfile)
 | 
						|
        printer.write(before)
 | 
						|
        printer.write(START)
 | 
						|
        printer.write("static inline void")
 | 
						|
        with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"):
 | 
						|
            printer.write(f'PyObject *string;')
 | 
						|
            for i in sorted(identifiers):
 | 
						|
                # This use of _Py_ID() is ignored by iter_global_strings()
 | 
						|
                # since iter_files() ignores .h files.
 | 
						|
                printer.write(f'string = &_Py_ID({i});')
 | 
						|
                printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')
 | 
						|
                printer.write(f'_PyUnicode_InternInPlace(interp, &string);')
 | 
						|
            # XXX What about "strings"?
 | 
						|
        printer.write(END)
 | 
						|
        printer.write(after)
 | 
						|
 | 
						|
 | 
						|
def generate_global_object_finalizers(generated_immortal_objects):
 | 
						|
    # Target the runtime initializer.
 | 
						|
    filename = os.path.join(INTERNAL, 'pycore_global_objects_fini_generated.h')
 | 
						|
 | 
						|
    # Read the non-generated part of the file.
 | 
						|
    with open(filename) as infile:
 | 
						|
        orig = infile.read()
 | 
						|
    lines = iter(orig.rstrip().splitlines())
 | 
						|
    before = '\n'.join(iter_to_marker(lines, START))
 | 
						|
    for _ in iter_to_marker(lines, END):
 | 
						|
        pass
 | 
						|
    after = '\n'.join(lines)
 | 
						|
 | 
						|
    # Generate the file.
 | 
						|
    with open_for_changes(filename, orig) as outfile:
 | 
						|
        printer = Printer(outfile)
 | 
						|
        printer.write(before)
 | 
						|
        printer.write(START)
 | 
						|
        printer.write('#ifdef Py_DEBUG')
 | 
						|
        printer.write("static inline void")
 | 
						|
        with printer.block(
 | 
						|
                "_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp)"):
 | 
						|
            printer.write('/* generated runtime-global */')
 | 
						|
            printer.write('// (see pycore_runtime_init_generated.h)')
 | 
						|
            for ref in generated_immortal_objects:
 | 
						|
                printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
 | 
						|
            printer.write('/* non-generated */')
 | 
						|
            for ref in NON_GENERATED_IMMORTAL_OBJECTS:
 | 
						|
                printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
 | 
						|
        printer.write('#endif  // Py_DEBUG')
 | 
						|
        printer.write(END)
 | 
						|
        printer.write(after)
 | 
						|
 | 
						|
 | 
						|
def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
 | 
						|
    identifiers = set(IDENTIFIERS)
 | 
						|
    strings = {}
 | 
						|
    for name, string, *_ in iter_global_strings():
 | 
						|
        if string is None:
 | 
						|
            if name not in IGNORED:
 | 
						|
                identifiers.add(name)
 | 
						|
        else:
 | 
						|
            if string not in strings:
 | 
						|
                strings[string] = name
 | 
						|
            elif name != strings[string]:
 | 
						|
                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
 | 
						|
    return identifiers, strings
 | 
						|
 | 
						|
 | 
						|
#######################################
 | 
						|
# the script
 | 
						|
 | 
						|
def main() -> None:
 | 
						|
    identifiers, strings = get_identifiers_and_strings()
 | 
						|
 | 
						|
    generate_global_strings(identifiers, strings)
 | 
						|
    generated_immortal_objects = generate_runtime_init(identifiers, strings)
 | 
						|
    generate_static_strings_initializer(identifiers, strings)
 | 
						|
    generate_global_object_finalizers(generated_immortal_objects)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |