mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
bpo-46608: exclude marshalled-frozen data if deep-freezing to save 300 KB space (GH-31074)
This reduces the size of the data segment by **300 KB** of the executable because if the modules are deep-frozen then the marshalled frozen data just wastes space. This was inspired by comment by @gvanrossum in https://github.com/python/cpython/pull/29118#issuecomment-958521863. Note: There is a new option `--deepfreeze-only` in `freeze_modules.py` to change this behavior, it is on be default to save disk space. ```console # du -s ./python before 27892 ./python # du -s ./python after 27524 ./python ``` Automerge-Triggered-By: GH:ericsnowcurrently
This commit is contained in:
parent
9b4e3d94a5
commit
bf95ff91f2
9 changed files with 87 additions and 84 deletions
|
@ -256,8 +256,12 @@ Importing Modules
|
||||||
const char *name;
|
const char *name;
|
||||||
const unsigned char *code;
|
const unsigned char *code;
|
||||||
int size;
|
int size;
|
||||||
|
bool is_package;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
.. versionchanged:: 3.11
|
||||||
|
The new ``is_package`` field indicates whether the module is a package or not.
|
||||||
|
This replaces setting the ``size`` field to a negative value.
|
||||||
|
|
||||||
.. c:var:: const struct _frozen* PyImport_FrozenModules
|
.. c:var:: const struct _frozen* PyImport_FrozenModules
|
||||||
|
|
||||||
|
|
|
@ -650,6 +650,11 @@ C API Changes
|
||||||
fields of the result from the exception instance (the ``value`` field).
|
fields of the result from the exception instance (the ``value`` field).
|
||||||
(Contributed by Irit Katriel in :issue:`45711`.)
|
(Contributed by Irit Katriel in :issue:`45711`.)
|
||||||
|
|
||||||
|
* :c:struct:`_frozen` has a new ``is_package`` field to indicate whether
|
||||||
|
or not the frozen module is a package. Previously, a negative value
|
||||||
|
in the ``size`` field was the indicator. Now only non-negative values
|
||||||
|
be used for ``size``.
|
||||||
|
(Contributed by Kumar Aditya in :issue:`46608`.)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
------------
|
------------
|
||||||
|
|
|
@ -32,6 +32,7 @@ struct _frozen {
|
||||||
const char *name; /* ASCII encoded string */
|
const char *name; /* ASCII encoded string */
|
||||||
const unsigned char *code;
|
const unsigned char *code;
|
||||||
int size;
|
int size;
|
||||||
|
bool is_package;
|
||||||
PyObject *(*get_code)(void);
|
PyObject *(*get_code)(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,7 @@ class PythonValuesTestCase(unittest.TestCase):
|
||||||
_fields_ = [("name", c_char_p),
|
_fields_ = [("name", c_char_p),
|
||||||
("code", POINTER(c_ubyte)),
|
("code", POINTER(c_ubyte)),
|
||||||
("size", c_int),
|
("size", c_int),
|
||||||
|
("is_package", c_bool),
|
||||||
("get_code", POINTER(c_ubyte)), # Function ptr
|
("get_code", POINTER(c_ubyte)), # Function ptr
|
||||||
]
|
]
|
||||||
FrozenTable = POINTER(struct_frozen)
|
FrozenTable = POINTER(struct_frozen)
|
||||||
|
@ -71,13 +72,14 @@ class PythonValuesTestCase(unittest.TestCase):
|
||||||
modname = entry.name.decode("ascii")
|
modname = entry.name.decode("ascii")
|
||||||
modules.append(modname)
|
modules.append(modname)
|
||||||
with self.subTest(modname):
|
with self.subTest(modname):
|
||||||
# Do a sanity check on entry.size and entry.code.
|
if entry.size != 0:
|
||||||
self.assertGreater(abs(entry.size), 10)
|
# Do a sanity check on entry.size and entry.code.
|
||||||
self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
|
self.assertGreater(abs(entry.size), 10)
|
||||||
|
self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
|
||||||
# Check the module's package-ness.
|
# Check the module's package-ness.
|
||||||
with import_helper.frozen_modules():
|
with import_helper.frozen_modules():
|
||||||
spec = importlib.util.find_spec(modname)
|
spec = importlib.util.find_spec(modname)
|
||||||
if entry.size < 0:
|
if entry.is_package:
|
||||||
# It's a package.
|
# It's a package.
|
||||||
self.assertIsNotNone(spec.submodule_search_locations)
|
self.assertIsNotNone(spec.submodule_search_locations)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Exclude marshalled-frozen data if deep-freezing to save 300 KB disk space. This includes adding
|
||||||
|
a new ``is_package`` field to :c:struct:`_frozen`. Patch by Kumar Aditya.
|
|
@ -39,29 +39,6 @@
|
||||||
#include "pycore_import.h"
|
#include "pycore_import.h"
|
||||||
|
|
||||||
/* Includes for frozen modules: */
|
/* Includes for frozen modules: */
|
||||||
#include "frozen_modules/importlib._bootstrap.h"
|
|
||||||
#include "frozen_modules/importlib._bootstrap_external.h"
|
|
||||||
#include "frozen_modules/zipimport.h"
|
|
||||||
#include "frozen_modules/abc.h"
|
|
||||||
#include "frozen_modules/codecs.h"
|
|
||||||
#include "frozen_modules/io.h"
|
|
||||||
#include "frozen_modules/_collections_abc.h"
|
|
||||||
#include "frozen_modules/_sitebuiltins.h"
|
|
||||||
#include "frozen_modules/genericpath.h"
|
|
||||||
#include "frozen_modules/ntpath.h"
|
|
||||||
#include "frozen_modules/posixpath.h"
|
|
||||||
#include "frozen_modules/os.h"
|
|
||||||
#include "frozen_modules/site.h"
|
|
||||||
#include "frozen_modules/stat.h"
|
|
||||||
#include "frozen_modules/importlib.util.h"
|
|
||||||
#include "frozen_modules/importlib.machinery.h"
|
|
||||||
#include "frozen_modules/runpy.h"
|
|
||||||
#include "frozen_modules/__hello__.h"
|
|
||||||
#include "frozen_modules/__phello__.h"
|
|
||||||
#include "frozen_modules/__phello__.ham.h"
|
|
||||||
#include "frozen_modules/__phello__.ham.eggs.h"
|
|
||||||
#include "frozen_modules/__phello__.spam.h"
|
|
||||||
#include "frozen_modules/frozen_only.h"
|
|
||||||
/* End includes */
|
/* End includes */
|
||||||
|
|
||||||
#define GET_CODE(name) _Py_get_##name##_toplevel
|
#define GET_CODE(name) _Py_get_##name##_toplevel
|
||||||
|
@ -98,49 +75,47 @@ extern PyObject *_Py_get___phello___spam_toplevel(void);
|
||||||
extern PyObject *_Py_get_frozen_only_toplevel(void);
|
extern PyObject *_Py_get_frozen_only_toplevel(void);
|
||||||
/* End extern declarations */
|
/* End extern declarations */
|
||||||
|
|
||||||
/* Note that a negative size indicates a package. */
|
|
||||||
|
|
||||||
static const struct _frozen bootstrap_modules[] = {
|
static const struct _frozen bootstrap_modules[] = {
|
||||||
{"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap), GET_CODE(importlib__bootstrap)},
|
{"_frozen_importlib", NULL, 0, false, GET_CODE(importlib__bootstrap)},
|
||||||
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external), GET_CODE(importlib__bootstrap_external)},
|
{"_frozen_importlib_external", NULL, 0, false, GET_CODE(importlib__bootstrap_external)},
|
||||||
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport), GET_CODE(zipimport)},
|
{"zipimport", NULL, 0, false, GET_CODE(zipimport)},
|
||||||
{0, 0, 0} /* bootstrap sentinel */
|
{0, 0, 0} /* bootstrap sentinel */
|
||||||
};
|
};
|
||||||
static const struct _frozen stdlib_modules[] = {
|
static const struct _frozen stdlib_modules[] = {
|
||||||
/* stdlib - startup, without site (python -S) */
|
/* stdlib - startup, without site (python -S) */
|
||||||
{"abc", _Py_M__abc, (int)sizeof(_Py_M__abc), GET_CODE(abc)},
|
{"abc", NULL, 0, false, GET_CODE(abc)},
|
||||||
{"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs), GET_CODE(codecs)},
|
{"codecs", NULL, 0, false, GET_CODE(codecs)},
|
||||||
{"io", _Py_M__io, (int)sizeof(_Py_M__io), GET_CODE(io)},
|
{"io", NULL, 0, false, GET_CODE(io)},
|
||||||
|
|
||||||
/* stdlib - startup, with site */
|
/* stdlib - startup, with site */
|
||||||
{"_collections_abc", _Py_M___collections_abc, (int)sizeof(_Py_M___collections_abc), GET_CODE(_collections_abc)},
|
{"_collections_abc", NULL, 0, false, GET_CODE(_collections_abc)},
|
||||||
{"_sitebuiltins", _Py_M___sitebuiltins, (int)sizeof(_Py_M___sitebuiltins), GET_CODE(_sitebuiltins)},
|
{"_sitebuiltins", NULL, 0, false, GET_CODE(_sitebuiltins)},
|
||||||
{"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath), GET_CODE(genericpath)},
|
{"genericpath", NULL, 0, false, GET_CODE(genericpath)},
|
||||||
{"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath), GET_CODE(ntpath)},
|
{"ntpath", NULL, 0, false, GET_CODE(ntpath)},
|
||||||
{"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
|
{"posixpath", NULL, 0, false, GET_CODE(posixpath)},
|
||||||
{"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
|
{"os.path", NULL, 0, false, GET_CODE(posixpath)},
|
||||||
{"os", _Py_M__os, (int)sizeof(_Py_M__os), GET_CODE(os)},
|
{"os", NULL, 0, false, GET_CODE(os)},
|
||||||
{"site", _Py_M__site, (int)sizeof(_Py_M__site), GET_CODE(site)},
|
{"site", NULL, 0, false, GET_CODE(site)},
|
||||||
{"stat", _Py_M__stat, (int)sizeof(_Py_M__stat), GET_CODE(stat)},
|
{"stat", NULL, 0, false, GET_CODE(stat)},
|
||||||
|
|
||||||
/* runpy - run module with -m */
|
/* runpy - run module with -m */
|
||||||
{"importlib.util", _Py_M__importlib_util, (int)sizeof(_Py_M__importlib_util), GET_CODE(importlib_util)},
|
{"importlib.util", NULL, 0, false, GET_CODE(importlib_util)},
|
||||||
{"importlib.machinery", _Py_M__importlib_machinery, (int)sizeof(_Py_M__importlib_machinery), GET_CODE(importlib_machinery)},
|
{"importlib.machinery", NULL, 0, false, GET_CODE(importlib_machinery)},
|
||||||
{"runpy", _Py_M__runpy, (int)sizeof(_Py_M__runpy), GET_CODE(runpy)},
|
{"runpy", NULL, 0, false, GET_CODE(runpy)},
|
||||||
{0, 0, 0} /* stdlib sentinel */
|
{0, 0, 0} /* stdlib sentinel */
|
||||||
};
|
};
|
||||||
static const struct _frozen test_modules[] = {
|
static const struct _frozen test_modules[] = {
|
||||||
{"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
|
{"__hello__", NULL, 0, false, GET_CODE(__hello__)},
|
||||||
{"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
|
{"__hello_alias__", NULL, 0, false, GET_CODE(__hello__)},
|
||||||
{"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
|
{"__phello_alias__", NULL, 0, true, GET_CODE(__hello__)},
|
||||||
{"__phello_alias__.spam", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
|
{"__phello_alias__.spam", NULL, 0, false, GET_CODE(__hello__)},
|
||||||
{"__phello__", _Py_M____phello__, -(int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
|
{"__phello__", NULL, 0, true, GET_CODE(__phello__)},
|
||||||
{"__phello__.__init__", _Py_M____phello__, (int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
|
{"__phello__.__init__", NULL, 0, false, GET_CODE(__phello__)},
|
||||||
{"__phello__.ham", _Py_M____phello___ham, -(int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
|
{"__phello__.ham", NULL, 0, true, GET_CODE(__phello___ham)},
|
||||||
{"__phello__.ham.__init__", _Py_M____phello___ham, (int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
|
{"__phello__.ham.__init__", NULL, 0, false, GET_CODE(__phello___ham)},
|
||||||
{"__phello__.ham.eggs", _Py_M____phello___ham_eggs, (int)sizeof(_Py_M____phello___ham_eggs), GET_CODE(__phello___ham_eggs)},
|
{"__phello__.ham.eggs", NULL, 0, false, GET_CODE(__phello___ham_eggs)},
|
||||||
{"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam), GET_CODE(__phello___spam)},
|
{"__phello__.spam", NULL, 0, false, GET_CODE(__phello___spam)},
|
||||||
{"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only), GET_CODE(frozen_only)},
|
{"__hello_only__", NULL, 0, false, GET_CODE(frozen_only)},
|
||||||
{0, 0, 0} /* test sentinel */
|
{0, 0, 0} /* test sentinel */
|
||||||
};
|
};
|
||||||
const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;
|
const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;
|
||||||
|
|
|
@ -1297,13 +1297,21 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
|
||||||
info->nameobj = nameobj; // borrowed
|
info->nameobj = nameobj; // borrowed
|
||||||
info->data = (const char *)p->code;
|
info->data = (const char *)p->code;
|
||||||
info->get_code = p->get_code;
|
info->get_code = p->get_code;
|
||||||
info->size = p->size < 0 ? -(p->size) : p->size;
|
info->size = p->size;
|
||||||
info->is_package = p->size < 0 ? true : false;
|
info->is_package = p->is_package;
|
||||||
|
if (p->size < 0) {
|
||||||
|
// backward compatibility with negative size values
|
||||||
|
info->size = -(p->size);
|
||||||
|
info->is_package = true;
|
||||||
|
}
|
||||||
info->origname = name;
|
info->origname = name;
|
||||||
info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases,
|
info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases,
|
||||||
&info->origname);
|
&info->origname);
|
||||||
}
|
}
|
||||||
|
if (p->code == NULL && p->size == 0 && p->get_code != NULL) {
|
||||||
|
/* It is only deepfrozen. */
|
||||||
|
return FROZEN_OKAY;
|
||||||
|
}
|
||||||
if (p->code == NULL) {
|
if (p->code == NULL) {
|
||||||
/* It is frozen but marked as un-importable. */
|
/* It is frozen but marked as un-importable. */
|
||||||
return FROZEN_EXCLUDED;
|
return FROZEN_EXCLUDED;
|
||||||
|
@ -2224,7 +2232,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name,
|
||||||
if (info.nameobj == NULL) {
|
if (info.nameobj == NULL) {
|
||||||
info.nameobj = name;
|
info.nameobj = name;
|
||||||
}
|
}
|
||||||
if (info.size == 0) {
|
if (info.size == 0 && info.get_code == NULL) {
|
||||||
/* Does not contain executable code. */
|
/* Does not contain executable code. */
|
||||||
set_frozen_error(FROZEN_INVALID, name);
|
set_frozen_error(FROZEN_INVALID, name);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -45,19 +45,19 @@ def makefreeze(base, dict, debug=0, entry_point=None, fail_import=()):
|
||||||
print("freezing", mod, "...")
|
print("freezing", mod, "...")
|
||||||
str = marshal.dumps(m.__code__)
|
str = marshal.dumps(m.__code__)
|
||||||
size = len(str)
|
size = len(str)
|
||||||
|
is_package = 'false'
|
||||||
if m.__path__:
|
if m.__path__:
|
||||||
# Indicate package by negative size
|
is_package = 'true'
|
||||||
size = -size
|
done.append((mod, mangled, size, is_package))
|
||||||
done.append((mod, mangled, size))
|
|
||||||
writecode(outfp, mangled, str)
|
writecode(outfp, mangled, str)
|
||||||
if debug:
|
if debug:
|
||||||
print("generating table of frozen modules")
|
print("generating table of frozen modules")
|
||||||
with bkfile.open(base + 'frozen.c', 'w') as outfp:
|
with bkfile.open(base + 'frozen.c', 'w') as outfp:
|
||||||
for mod, mangled, size in done:
|
for mod, mangled, size, _ in done:
|
||||||
outfp.write('extern unsigned char M_%s[];\n' % mangled)
|
outfp.write('extern unsigned char M_%s[];\n' % mangled)
|
||||||
outfp.write(header)
|
outfp.write(header)
|
||||||
for mod, mangled, size in done:
|
for mod, mangled, size, is_package in done:
|
||||||
outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
|
outfp.write('\t{"%s", M_%s, %d, %s},\n' % (mod, mangled, size, is_package))
|
||||||
outfp.write('\n')
|
outfp.write('\n')
|
||||||
# The following modules have a NULL code pointer, indicating
|
# The following modules have a NULL code pointer, indicating
|
||||||
# that the frozen program should not search for them on the host
|
# that the frozen program should not search for them on the host
|
||||||
|
|
|
@ -9,7 +9,7 @@ import os
|
||||||
import ntpath
|
import ntpath
|
||||||
import posixpath
|
import posixpath
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
from update_file import updating_file_with_tmpfile
|
from update_file import updating_file_with_tmpfile
|
||||||
|
|
||||||
|
|
||||||
|
@ -463,14 +463,15 @@ def replace_block(lines, start_marker, end_marker, replacements, file):
|
||||||
return lines[:start_pos + 1] + replacements + lines[end_pos:]
|
return lines[:start_pos + 1] + replacements + lines[end_pos:]
|
||||||
|
|
||||||
|
|
||||||
def regen_frozen(modules):
|
def regen_frozen(modules, deepfreeze_only: bool):
|
||||||
headerlines = []
|
headerlines = []
|
||||||
parentdir = os.path.dirname(FROZEN_FILE)
|
parentdir = os.path.dirname(FROZEN_FILE)
|
||||||
for src in _iter_sources(modules):
|
if not deepfreeze_only:
|
||||||
# Adding a comment to separate sections here doesn't add much,
|
for src in _iter_sources(modules):
|
||||||
# so we don't.
|
# Adding a comment to separate sections here doesn't add much,
|
||||||
header = relpath_for_posix_display(src.frozenfile, parentdir)
|
# so we don't.
|
||||||
headerlines.append(f'#include "{header}"')
|
header = relpath_for_posix_display(src.frozenfile, parentdir)
|
||||||
|
headerlines.append(f'#include "{header}"')
|
||||||
|
|
||||||
externlines = []
|
externlines = []
|
||||||
bootstraplines = []
|
bootstraplines = []
|
||||||
|
@ -500,9 +501,13 @@ def regen_frozen(modules):
|
||||||
externlines.append("extern PyObject *%s(void);" % get_code_name)
|
externlines.append("extern PyObject *%s(void);" % get_code_name)
|
||||||
|
|
||||||
symbol = mod.symbol
|
symbol = mod.symbol
|
||||||
pkg = '-' if mod.ispkg else ''
|
pkg = 'true' if mod.ispkg else 'false'
|
||||||
line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},'
|
if deepfreeze_only:
|
||||||
) % (mod.name, symbol, pkg, symbol, code_name)
|
line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},'
|
||||||
|
) % (mod.name, pkg, code_name)
|
||||||
|
else:
|
||||||
|
line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},'
|
||||||
|
) % (mod.name, symbol, symbol, pkg, code_name)
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
|
|
||||||
if mod.isalias:
|
if mod.isalias:
|
||||||
|
@ -710,18 +715,19 @@ def regen_pcbuild(modules):
|
||||||
#######################################
|
#######################################
|
||||||
# the script
|
# the script
|
||||||
|
|
||||||
def main():
|
def main(deepfreeze_only: bool):
|
||||||
# Expand the raw specs, preserving order.
|
# Expand the raw specs, preserving order.
|
||||||
modules = list(parse_frozen_specs())
|
modules = list(parse_frozen_specs())
|
||||||
|
|
||||||
# Regen build-related files.
|
# Regen build-related files.
|
||||||
regen_makefile(modules)
|
regen_makefile(modules)
|
||||||
regen_pcbuild(modules)
|
regen_pcbuild(modules)
|
||||||
regen_frozen(modules)
|
regen_frozen(modules, deepfreeze_only)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
argv = sys.argv[1:]
|
parser = argparse.ArgumentParser()
|
||||||
if argv:
|
parser.add_argument("--deepfreeze-only", action="store_true",
|
||||||
sys.exit(f'ERROR: got unexpected args {argv}')
|
help="Only use deepfrozen modules", default=True)
|
||||||
main()
|
args = parser.parse_args()
|
||||||
|
main(args.deepfreeze_only)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue