mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	bpo-40495: compileall option to hardlink duplicate pyc files (GH-19901)
compileall is now able to use hardlinks to prevent duplicates in a case when .pyc files for different optimization levels have the same content. Co-authored-by: Miro Hrončok <miro@hroncok.cz> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
		
							parent
							
								
									7443d42021
								
							
						
					
					
						commit
						e77d428856
					
				
					 6 changed files with 285 additions and 15 deletions
				
			
		| 
						 | 
					@ -113,6 +113,11 @@ compile Python sources.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   Ignore symlinks pointing outside the given directory.
 | 
					   Ignore symlinks pointing outside the given directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. cmdoption:: --hardlink-dupes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   If two ``.pyc`` files with different optimization level have
 | 
				
			||||||
 | 
					   the same content, use hard links to consolidate duplicate files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. versionchanged:: 3.2
 | 
					.. versionchanged:: 3.2
 | 
				
			||||||
   Added the ``-i``, ``-b`` and ``-h`` options.
 | 
					   Added the ``-i``, ``-b`` and ``-h`` options.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -125,7 +130,7 @@ compile Python sources.
 | 
				
			||||||
   Added the ``--invalidation-mode`` option.
 | 
					   Added the ``--invalidation-mode`` option.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. versionchanged:: 3.9
 | 
					.. versionchanged:: 3.9
 | 
				
			||||||
   Added the ``-s``, ``-p``, ``-e`` options.
 | 
					   Added the ``-s``, ``-p``, ``-e`` and ``--hardlink-dupes`` options.
 | 
				
			||||||
   Raised the default recursion limit from 10 to
 | 
					   Raised the default recursion limit from 10 to
 | 
				
			||||||
   :py:func:`sys.getrecursionlimit()`.
 | 
					   :py:func:`sys.getrecursionlimit()`.
 | 
				
			||||||
   Added the possibility to specify the ``-o`` option multiple times.
 | 
					   Added the possibility to specify the ``-o`` option multiple times.
 | 
				
			||||||
| 
						 | 
					@ -143,7 +148,7 @@ runtime.
 | 
				
			||||||
Public functions
 | 
					Public functions
 | 
				
			||||||
----------------
 | 
					----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
 | 
					.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
 | 
					   Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
 | 
				
			||||||
   files along the way. Return a true value if all the files compiled successfully,
 | 
					   files along the way. Return a true value if all the files compiled successfully,
 | 
				
			||||||
| 
						 | 
					@ -193,6 +198,9 @@ Public functions
 | 
				
			||||||
   the ``-s``, ``-p`` and ``-e`` options described above.
 | 
					   the ``-s``, ``-p`` and ``-e`` options described above.
 | 
				
			||||||
   They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
 | 
					   They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   If *hardlink_dupes* is true and two ``.pyc`` files with different optimization
 | 
				
			||||||
 | 
					   level have the same content, use hard links to consolidate duplicate files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. versionchanged:: 3.2
 | 
					   .. versionchanged:: 3.2
 | 
				
			||||||
      Added the *legacy* and *optimize* parameter.
 | 
					      Added the *legacy* and *optimize* parameter.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -219,9 +227,9 @@ Public functions
 | 
				
			||||||
      Setting *workers* to 0 now chooses the optimal number of cores.
 | 
					      Setting *workers* to 0 now chooses the optimal number of cores.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. versionchanged:: 3.9
 | 
					   .. versionchanged:: 3.9
 | 
				
			||||||
      Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
 | 
					      Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
 | 
					.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   Compile the file with path *fullname*. Return a true value if the file
 | 
					   Compile the file with path *fullname*. Return a true value if the file
 | 
				
			||||||
   compiled successfully, and a false value otherwise.
 | 
					   compiled successfully, and a false value otherwise.
 | 
				
			||||||
| 
						 | 
					@ -257,6 +265,9 @@ Public functions
 | 
				
			||||||
   the ``-s``, ``-p`` and ``-e`` options described above.
 | 
					   the ``-s``, ``-p`` and ``-e`` options described above.
 | 
				
			||||||
   They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
 | 
					   They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   If *hardlink_dupes* is true and two ``.pyc`` files with different optimization
 | 
				
			||||||
 | 
					   level have the same content, use hard links to consolidate duplicate files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. versionadded:: 3.2
 | 
					   .. versionadded:: 3.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. versionchanged:: 3.5
 | 
					   .. versionchanged:: 3.5
 | 
				
			||||||
| 
						 | 
					@ -273,7 +284,7 @@ Public functions
 | 
				
			||||||
      The *invalidation_mode* parameter's default value is updated to None.
 | 
					      The *invalidation_mode* parameter's default value is updated to None.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. versionchanged:: 3.9
 | 
					   .. versionchanged:: 3.9
 | 
				
			||||||
      Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
 | 
					      Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None)
 | 
					.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -245,6 +245,16 @@ that schedules a shutdown for the default executor that waits on the
 | 
				
			||||||
Added :class:`asyncio.PidfdChildWatcher`, a Linux-specific child watcher
 | 
					Added :class:`asyncio.PidfdChildWatcher`, a Linux-specific child watcher
 | 
				
			||||||
implementation that polls process file descriptors. (:issue:`38692`)
 | 
					implementation that polls process file descriptors. (:issue:`38692`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					compileall
 | 
				
			||||||
 | 
					----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Added new possibility to use hardlinks for duplicated ``.pyc`` files: *hardlink_dupes* parameter and --hardlink-dupes command line option.
 | 
				
			||||||
 | 
					(Contributed by  Lumír 'Frenzy' Balhar in :issue:`40495`.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Added new options for path manipulation in resulting ``.pyc`` files: *stripdir*, *prependdir*, *limit_sl_dest* parameters and -s, -p, -e command line options.
 | 
				
			||||||
 | 
					Added the possibility to specify the option for an optimization level multiple times.
 | 
				
			||||||
 | 
					(Contributed by Lumír 'Frenzy' Balhar in :issue:`38112`.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
concurrent.futures
 | 
					concurrent.futures
 | 
				
			||||||
------------------
 | 
					------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@ import sys
 | 
				
			||||||
import importlib.util
 | 
					import importlib.util
 | 
				
			||||||
import py_compile
 | 
					import py_compile
 | 
				
			||||||
import struct
 | 
					import struct
 | 
				
			||||||
 | 
					import filecmp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from functools import partial
 | 
					from functools import partial
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
| 
						 | 
					@ -47,7 +48,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
 | 
				
			||||||
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
 | 
					def compile_dir(dir, maxlevels=None, ddir=None, force=False,
 | 
				
			||||||
                rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
 | 
					                rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
 | 
				
			||||||
                invalidation_mode=None, *, stripdir=None,
 | 
					                invalidation_mode=None, *, stripdir=None,
 | 
				
			||||||
                prependdir=None, limit_sl_dest=None):
 | 
					                prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
 | 
				
			||||||
    """Byte-compile all modules in the given directory tree.
 | 
					    """Byte-compile all modules in the given directory tree.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Arguments (only dir is required):
 | 
					    Arguments (only dir is required):
 | 
				
			||||||
| 
						 | 
					@ -70,6 +71,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
 | 
				
			||||||
               after stripdir
 | 
					               after stripdir
 | 
				
			||||||
    limit_sl_dest: ignore symlinks if they are pointing outside of
 | 
					    limit_sl_dest: ignore symlinks if they are pointing outside of
 | 
				
			||||||
                   the defined path
 | 
					                   the defined path
 | 
				
			||||||
 | 
					    hardlink_dupes: hardlink duplicated pyc files
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    ProcessPoolExecutor = None
 | 
					    ProcessPoolExecutor = None
 | 
				
			||||||
    if ddir is not None and (stripdir is not None or prependdir is not None):
 | 
					    if ddir is not None and (stripdir is not None or prependdir is not None):
 | 
				
			||||||
| 
						 | 
					@ -104,7 +106,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
 | 
				
			||||||
                                           invalidation_mode=invalidation_mode,
 | 
					                                           invalidation_mode=invalidation_mode,
 | 
				
			||||||
                                           stripdir=stripdir,
 | 
					                                           stripdir=stripdir,
 | 
				
			||||||
                                           prependdir=prependdir,
 | 
					                                           prependdir=prependdir,
 | 
				
			||||||
                                           limit_sl_dest=limit_sl_dest),
 | 
					                                           limit_sl_dest=limit_sl_dest,
 | 
				
			||||||
 | 
					                                           hardlink_dupes=hardlink_dupes),
 | 
				
			||||||
                                   files)
 | 
					                                   files)
 | 
				
			||||||
            success = min(results, default=True)
 | 
					            success = min(results, default=True)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
| 
						 | 
					@ -112,14 +115,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
 | 
				
			||||||
            if not compile_file(file, ddir, force, rx, quiet,
 | 
					            if not compile_file(file, ddir, force, rx, quiet,
 | 
				
			||||||
                                legacy, optimize, invalidation_mode,
 | 
					                                legacy, optimize, invalidation_mode,
 | 
				
			||||||
                                stripdir=stripdir, prependdir=prependdir,
 | 
					                                stripdir=stripdir, prependdir=prependdir,
 | 
				
			||||||
                                limit_sl_dest=limit_sl_dest):
 | 
					                                limit_sl_dest=limit_sl_dest,
 | 
				
			||||||
 | 
					                                hardlink_dupes=hardlink_dupes):
 | 
				
			||||||
                success = False
 | 
					                success = False
 | 
				
			||||||
    return success
 | 
					    return success
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
 | 
					def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
 | 
				
			||||||
                 legacy=False, optimize=-1,
 | 
					                 legacy=False, optimize=-1,
 | 
				
			||||||
                 invalidation_mode=None, *, stripdir=None, prependdir=None,
 | 
					                 invalidation_mode=None, *, stripdir=None, prependdir=None,
 | 
				
			||||||
                 limit_sl_dest=None):
 | 
					                 limit_sl_dest=None, hardlink_dupes=False):
 | 
				
			||||||
    """Byte-compile one file.
 | 
					    """Byte-compile one file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Arguments (only fullname is required):
 | 
					    Arguments (only fullname is required):
 | 
				
			||||||
| 
						 | 
					@ -140,6 +144,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
 | 
				
			||||||
               after stripdir
 | 
					               after stripdir
 | 
				
			||||||
    limit_sl_dest: ignore symlinks if they are pointing outside of
 | 
					    limit_sl_dest: ignore symlinks if they are pointing outside of
 | 
				
			||||||
                   the defined path.
 | 
					                   the defined path.
 | 
				
			||||||
 | 
					    hardlink_dupes: hardlink duplicated pyc files
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if ddir is not None and (stripdir is not None or prependdir is not None):
 | 
					    if ddir is not None and (stripdir is not None or prependdir is not None):
 | 
				
			||||||
| 
						 | 
					@ -176,6 +181,14 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
 | 
				
			||||||
    if isinstance(optimize, int):
 | 
					    if isinstance(optimize, int):
 | 
				
			||||||
        optimize = [optimize]
 | 
					        optimize = [optimize]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Use set() to remove duplicates.
 | 
				
			||||||
 | 
					    # Use sorted() to create pyc files in a deterministic order.
 | 
				
			||||||
 | 
					    optimize = sorted(set(optimize))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if hardlink_dupes and len(optimize) < 2:
 | 
				
			||||||
 | 
					        raise ValueError("Hardlinking of duplicated bytecode makes sense "
 | 
				
			||||||
 | 
					                          "only for more than one optimization level")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if rx is not None:
 | 
					    if rx is not None:
 | 
				
			||||||
        mo = rx.search(fullname)
 | 
					        mo = rx.search(fullname)
 | 
				
			||||||
        if mo:
 | 
					        if mo:
 | 
				
			||||||
| 
						 | 
					@ -220,10 +233,16 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
 | 
				
			||||||
            if not quiet:
 | 
					            if not quiet:
 | 
				
			||||||
                print('Compiling {!r}...'.format(fullname))
 | 
					                print('Compiling {!r}...'.format(fullname))
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                for opt_level, cfile in opt_cfiles.items():
 | 
					                for index, opt_level in enumerate(optimize):
 | 
				
			||||||
 | 
					                    cfile = opt_cfiles[opt_level]
 | 
				
			||||||
                    ok = py_compile.compile(fullname, cfile, dfile, True,
 | 
					                    ok = py_compile.compile(fullname, cfile, dfile, True,
 | 
				
			||||||
                                            optimize=opt_level,
 | 
					                                            optimize=opt_level,
 | 
				
			||||||
                                            invalidation_mode=invalidation_mode)
 | 
					                                            invalidation_mode=invalidation_mode)
 | 
				
			||||||
 | 
					                    if index > 0 and hardlink_dupes:
 | 
				
			||||||
 | 
					                        previous_cfile = opt_cfiles[optimize[index - 1]]
 | 
				
			||||||
 | 
					                        if filecmp.cmp(cfile, previous_cfile, shallow=False):
 | 
				
			||||||
 | 
					                            os.unlink(cfile)
 | 
				
			||||||
 | 
					                            os.link(previous_cfile, cfile)
 | 
				
			||||||
            except py_compile.PyCompileError as err:
 | 
					            except py_compile.PyCompileError as err:
 | 
				
			||||||
                success = False
 | 
					                success = False
 | 
				
			||||||
                if quiet >= 2:
 | 
					                if quiet >= 2:
 | 
				
			||||||
| 
						 | 
					@ -352,6 +371,9 @@ def main():
 | 
				
			||||||
                              'Python interpreter itself (specified by -O).'))
 | 
					                              'Python interpreter itself (specified by -O).'))
 | 
				
			||||||
    parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
 | 
					    parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
 | 
				
			||||||
                        help='Ignore symlinks pointing outsite of the DIR')
 | 
					                        help='Ignore symlinks pointing outsite of the DIR')
 | 
				
			||||||
 | 
					    parser.add_argument('--hardlink-dupes', action='store_true',
 | 
				
			||||||
 | 
					                        dest='hardlink_dupes',
 | 
				
			||||||
 | 
					                        help='Hardlink duplicated pyc files')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    args = parser.parse_args()
 | 
					    args = parser.parse_args()
 | 
				
			||||||
    compile_dests = args.compile_dest
 | 
					    compile_dests = args.compile_dest
 | 
				
			||||||
| 
						 | 
					@ -371,6 +393,10 @@ def main():
 | 
				
			||||||
    if args.opt_levels is None:
 | 
					    if args.opt_levels is None:
 | 
				
			||||||
        args.opt_levels = [-1]
 | 
					        args.opt_levels = [-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if len(args.opt_levels) == 1 and args.hardlink_dupes:
 | 
				
			||||||
 | 
					        parser.error(("Hardlinking of duplicated bytecode makes sense "
 | 
				
			||||||
 | 
					                      "only for more than one optimization level."))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.ddir is not None and (
 | 
					    if args.ddir is not None and (
 | 
				
			||||||
        args.stripdir is not None or args.prependdir is not None
 | 
					        args.stripdir is not None or args.prependdir is not None
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
| 
						 | 
					@ -404,7 +430,8 @@ def main():
 | 
				
			||||||
                                        stripdir=args.stripdir,
 | 
					                                        stripdir=args.stripdir,
 | 
				
			||||||
                                        prependdir=args.prependdir,
 | 
					                                        prependdir=args.prependdir,
 | 
				
			||||||
                                        optimize=args.opt_levels,
 | 
					                                        optimize=args.opt_levels,
 | 
				
			||||||
                                        limit_sl_dest=args.limit_sl_dest):
 | 
					                                        limit_sl_dest=args.limit_sl_dest,
 | 
				
			||||||
 | 
					                                        hardlink_dupes=args.hardlink_dupes):
 | 
				
			||||||
                        success = False
 | 
					                        success = False
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    if not compile_dir(dest, maxlevels, args.ddir,
 | 
					                    if not compile_dir(dest, maxlevels, args.ddir,
 | 
				
			||||||
| 
						 | 
					@ -414,7 +441,8 @@ def main():
 | 
				
			||||||
                                       stripdir=args.stripdir,
 | 
					                                       stripdir=args.stripdir,
 | 
				
			||||||
                                       prependdir=args.prependdir,
 | 
					                                       prependdir=args.prependdir,
 | 
				
			||||||
                                       optimize=args.opt_levels,
 | 
					                                       optimize=args.opt_levels,
 | 
				
			||||||
                                       limit_sl_dest=args.limit_sl_dest):
 | 
					                                       limit_sl_dest=args.limit_sl_dest,
 | 
				
			||||||
 | 
					                                       hardlink_dupes=args.hardlink_dupes):
 | 
				
			||||||
                        success = False
 | 
					                        success = False
 | 
				
			||||||
            return success
 | 
					            return success
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,16 +1,19 @@
 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
import compileall
 | 
					import compileall
 | 
				
			||||||
 | 
					import contextlib
 | 
				
			||||||
 | 
					import filecmp
 | 
				
			||||||
import importlib.util
 | 
					import importlib.util
 | 
				
			||||||
import test.test_importlib.util
 | 
					import io
 | 
				
			||||||
 | 
					import itertools
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import pathlib
 | 
					import pathlib
 | 
				
			||||||
import py_compile
 | 
					import py_compile
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
import struct
 | 
					import struct
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
 | 
					import test.test_importlib.util
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
import io
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from unittest import mock, skipUnless
 | 
					from unittest import mock, skipUnless
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
| 
						 | 
					@ -26,6 +29,24 @@ from .test_py_compile import without_source_date_epoch
 | 
				
			||||||
from .test_py_compile import SourceDateEpochTestMeta
 | 
					from .test_py_compile import SourceDateEpochTestMeta
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_pyc(script, opt):
 | 
				
			||||||
 | 
					    if not opt:
 | 
				
			||||||
 | 
					        # Replace None and 0 with ''
 | 
				
			||||||
 | 
					        opt = ''
 | 
				
			||||||
 | 
					    return importlib.util.cache_from_source(script, optimization=opt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_pycs(script):
 | 
				
			||||||
 | 
					    return [get_pyc(script, opt) for opt in (0, 1, 2)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def is_hardlink(filename1, filename2):
 | 
				
			||||||
 | 
					    """Returns True if two files have the same inode (hardlink)"""
 | 
				
			||||||
 | 
					    inode1 = os.stat(filename1).st_ino
 | 
				
			||||||
 | 
					    inode2 = os.stat(filename2).st_ino
 | 
				
			||||||
 | 
					    return inode1 == inode2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CompileallTestsBase:
 | 
					class CompileallTestsBase:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def setUp(self):
 | 
					    def setUp(self):
 | 
				
			||||||
| 
						 | 
					@ -825,6 +846,32 @@ class CommandLineTestsBase:
 | 
				
			||||||
        self.assertTrue(os.path.isfile(allowed_bc))
 | 
					        self.assertTrue(os.path.isfile(allowed_bc))
 | 
				
			||||||
        self.assertFalse(os.path.isfile(prohibited_bc))
 | 
					        self.assertFalse(os.path.isfile(prohibited_bc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_hardlink_bad_args(self):
 | 
				
			||||||
 | 
					        # Bad arguments combination, hardlink deduplication make sense
 | 
				
			||||||
 | 
					        # only for more than one optimization level
 | 
				
			||||||
 | 
					        self.assertRunNotOK(self.directory, "-o 1", "--hardlink-dupes")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_hardlink(self):
 | 
				
			||||||
 | 
					        # 'a = 0' code produces the same bytecode for the 3 optimization
 | 
				
			||||||
 | 
					        # levels. All three .pyc files must have the same inode (hardlinks).
 | 
				
			||||||
 | 
					        #
 | 
				
			||||||
 | 
					        # If deduplication is disabled, all pyc files must have different
 | 
				
			||||||
 | 
					        # inodes.
 | 
				
			||||||
 | 
					        for dedup in (True, False):
 | 
				
			||||||
 | 
					            with tempfile.TemporaryDirectory() as path:
 | 
				
			||||||
 | 
					                with self.subTest(dedup=dedup):
 | 
				
			||||||
 | 
					                    script = script_helper.make_script(path, "script", "a = 0")
 | 
				
			||||||
 | 
					                    pycs = get_pycs(script)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    args = ["-q", "-o 0", "-o 1", "-o 2"]
 | 
				
			||||||
 | 
					                    if dedup:
 | 
				
			||||||
 | 
					                        args.append("--hardlink-dupes")
 | 
				
			||||||
 | 
					                    self.assertRunOK(path, *args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    self.assertEqual(is_hardlink(pycs[0], pycs[1]), dedup)
 | 
				
			||||||
 | 
					                    self.assertEqual(is_hardlink(pycs[1], pycs[2]), dedup)
 | 
				
			||||||
 | 
					                    self.assertEqual(is_hardlink(pycs[0], pycs[2]), dedup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CommandLineTestsWithSourceEpoch(CommandLineTestsBase,
 | 
					class CommandLineTestsWithSourceEpoch(CommandLineTestsBase,
 | 
				
			||||||
                                       unittest.TestCase,
 | 
					                                       unittest.TestCase,
 | 
				
			||||||
| 
						 | 
					@ -841,5 +888,176 @@ class CommandLineTestsNoSourceEpoch(CommandLineTestsBase,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class HardlinkDedupTestsBase:
 | 
				
			||||||
 | 
					    # Test hardlink_dupes parameter of compileall.compile_dir()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def setUp(self):
 | 
				
			||||||
 | 
					        self.path = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @contextlib.contextmanager
 | 
				
			||||||
 | 
					    def temporary_directory(self):
 | 
				
			||||||
 | 
					        with tempfile.TemporaryDirectory() as path:
 | 
				
			||||||
 | 
					            self.path = path
 | 
				
			||||||
 | 
					            yield path
 | 
				
			||||||
 | 
					            self.path = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def make_script(self, code, name="script"):
 | 
				
			||||||
 | 
					        return script_helper.make_script(self.path, name, code)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def compile_dir(self, *, dedup=True, optimize=(0, 1, 2), force=False):
 | 
				
			||||||
 | 
					        compileall.compile_dir(self.path, quiet=True, optimize=optimize,
 | 
				
			||||||
 | 
					                               hardlink_dupes=dedup, force=force)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_bad_args(self):
 | 
				
			||||||
 | 
					        # Bad arguments combination, hardlink deduplication make sense
 | 
				
			||||||
 | 
					        # only for more than one optimization level
 | 
				
			||||||
 | 
					        with self.temporary_directory():
 | 
				
			||||||
 | 
					            self.make_script("pass")
 | 
				
			||||||
 | 
					            with self.assertRaises(ValueError):
 | 
				
			||||||
 | 
					                compileall.compile_dir(self.path, quiet=True, optimize=0,
 | 
				
			||||||
 | 
					                                       hardlink_dupes=True)
 | 
				
			||||||
 | 
					            with self.assertRaises(ValueError):
 | 
				
			||||||
 | 
					                # same optimization level specified twice:
 | 
				
			||||||
 | 
					                # compile_dir() removes duplicates
 | 
				
			||||||
 | 
					                compileall.compile_dir(self.path, quiet=True, optimize=[0, 0],
 | 
				
			||||||
 | 
					                                       hardlink_dupes=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def create_code(self, docstring=False, assertion=False):
 | 
				
			||||||
 | 
					        lines = []
 | 
				
			||||||
 | 
					        if docstring:
 | 
				
			||||||
 | 
					            lines.append("'module docstring'")
 | 
				
			||||||
 | 
					        lines.append('x = 1')
 | 
				
			||||||
 | 
					        if assertion:
 | 
				
			||||||
 | 
					            lines.append("assert x == 1")
 | 
				
			||||||
 | 
					        return '\n'.join(lines)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def iter_codes(self):
 | 
				
			||||||
 | 
					        for docstring in (False, True):
 | 
				
			||||||
 | 
					            for assertion in (False, True):
 | 
				
			||||||
 | 
					                code = self.create_code(docstring=docstring, assertion=assertion)
 | 
				
			||||||
 | 
					                yield (code, docstring, assertion)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_disabled(self):
 | 
				
			||||||
 | 
					        # Deduplication disabled, no hardlinks
 | 
				
			||||||
 | 
					        for code, docstring, assertion in self.iter_codes():
 | 
				
			||||||
 | 
					            with self.subTest(docstring=docstring, assertion=assertion):
 | 
				
			||||||
 | 
					                with self.temporary_directory():
 | 
				
			||||||
 | 
					                    script = self.make_script(code)
 | 
				
			||||||
 | 
					                    pycs = get_pycs(script)
 | 
				
			||||||
 | 
					                    self.compile_dir(dedup=False)
 | 
				
			||||||
 | 
					                    self.assertFalse(is_hardlink(pycs[0], pycs[1]))
 | 
				
			||||||
 | 
					                    self.assertFalse(is_hardlink(pycs[0], pycs[2]))
 | 
				
			||||||
 | 
					                    self.assertFalse(is_hardlink(pycs[1], pycs[2]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def check_hardlinks(self, script, docstring=False, assertion=False):
 | 
				
			||||||
 | 
					        pycs = get_pycs(script)
 | 
				
			||||||
 | 
					        self.assertEqual(is_hardlink(pycs[0], pycs[1]),
 | 
				
			||||||
 | 
					                         not assertion)
 | 
				
			||||||
 | 
					        self.assertEqual(is_hardlink(pycs[0], pycs[2]),
 | 
				
			||||||
 | 
					                         not assertion and not docstring)
 | 
				
			||||||
 | 
					        self.assertEqual(is_hardlink(pycs[1], pycs[2]),
 | 
				
			||||||
 | 
					                         not docstring)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_hardlink(self):
 | 
				
			||||||
 | 
					        # Test deduplication on all combinations
 | 
				
			||||||
 | 
					        for code, docstring, assertion in self.iter_codes():
 | 
				
			||||||
 | 
					            with self.subTest(docstring=docstring, assertion=assertion):
 | 
				
			||||||
 | 
					                with self.temporary_directory():
 | 
				
			||||||
 | 
					                    script = self.make_script(code)
 | 
				
			||||||
 | 
					                    self.compile_dir()
 | 
				
			||||||
 | 
					                    self.check_hardlinks(script, docstring, assertion)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_only_two_levels(self):
 | 
				
			||||||
 | 
					        # Don't build the 3 optimization levels, but only 2
 | 
				
			||||||
 | 
					        for opts in ((0, 1), (1, 2), (0, 2)):
 | 
				
			||||||
 | 
					            with self.subTest(opts=opts):
 | 
				
			||||||
 | 
					                with self.temporary_directory():
 | 
				
			||||||
 | 
					                    # code with no dostring and no assertion:
 | 
				
			||||||
 | 
					                    # same bytecode for all optimization levels
 | 
				
			||||||
 | 
					                    script = self.make_script(self.create_code())
 | 
				
			||||||
 | 
					                    self.compile_dir(optimize=opts)
 | 
				
			||||||
 | 
					                    pyc1 = get_pyc(script, opts[0])
 | 
				
			||||||
 | 
					                    pyc2 = get_pyc(script, opts[1])
 | 
				
			||||||
 | 
					                    self.assertTrue(is_hardlink(pyc1, pyc2))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_duplicated_levels(self):
 | 
				
			||||||
 | 
					        # compile_dir() must not fail if optimize contains duplicated
 | 
				
			||||||
 | 
					        # optimization levels and/or if optimization levels are not sorted.
 | 
				
			||||||
 | 
					        with self.temporary_directory():
 | 
				
			||||||
 | 
					            # code with no dostring and no assertion:
 | 
				
			||||||
 | 
					            # same bytecode for all optimization levels
 | 
				
			||||||
 | 
					            script = self.make_script(self.create_code())
 | 
				
			||||||
 | 
					            self.compile_dir(optimize=[1, 0, 1, 0])
 | 
				
			||||||
 | 
					            pyc1 = get_pyc(script, 0)
 | 
				
			||||||
 | 
					            pyc2 = get_pyc(script, 1)
 | 
				
			||||||
 | 
					            self.assertTrue(is_hardlink(pyc1, pyc2))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_recompilation(self):
 | 
				
			||||||
 | 
					        # Test compile_dir() when pyc files already exists and the script
 | 
				
			||||||
 | 
					        # content changed
 | 
				
			||||||
 | 
					        with self.temporary_directory():
 | 
				
			||||||
 | 
					            script = self.make_script("a = 0")
 | 
				
			||||||
 | 
					            self.compile_dir()
 | 
				
			||||||
 | 
					            # All three levels have the same inode
 | 
				
			||||||
 | 
					            self.check_hardlinks(script)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            pycs = get_pycs(script)
 | 
				
			||||||
 | 
					            inode = os.stat(pycs[0]).st_ino
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Change of the module content
 | 
				
			||||||
 | 
					            script = self.make_script("print(0)")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Recompilation without -o 1
 | 
				
			||||||
 | 
					            self.compile_dir(optimize=[0, 2], force=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # opt-1.pyc should have the same inode as before and others should not
 | 
				
			||||||
 | 
					            self.assertEqual(inode, os.stat(pycs[1]).st_ino)
 | 
				
			||||||
 | 
					            self.assertTrue(is_hardlink(pycs[0], pycs[2]))
 | 
				
			||||||
 | 
					            self.assertNotEqual(inode, os.stat(pycs[2]).st_ino)
 | 
				
			||||||
 | 
					            # opt-1.pyc and opt-2.pyc have different content
 | 
				
			||||||
 | 
					            self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_import(self):
 | 
				
			||||||
 | 
					        # Test that import updates a single pyc file when pyc files already
 | 
				
			||||||
 | 
					        # exists and the script content changed
 | 
				
			||||||
 | 
					        with self.temporary_directory():
 | 
				
			||||||
 | 
					            script = self.make_script(self.create_code(), name="module")
 | 
				
			||||||
 | 
					            self.compile_dir()
 | 
				
			||||||
 | 
					            # All three levels have the same inode
 | 
				
			||||||
 | 
					            self.check_hardlinks(script)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            pycs = get_pycs(script)
 | 
				
			||||||
 | 
					            inode = os.stat(pycs[0]).st_ino
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Change of the module content
 | 
				
			||||||
 | 
					            script = self.make_script("print(0)", name="module")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Import the module in Python with -O (optimization level 1)
 | 
				
			||||||
 | 
					            script_helper.assert_python_ok(
 | 
				
			||||||
 | 
					                "-O", "-c", "import module", __isolated=False, PYTHONPATH=self.path
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Only opt-1.pyc is changed
 | 
				
			||||||
 | 
					            self.assertEqual(inode, os.stat(pycs[0]).st_ino)
 | 
				
			||||||
 | 
					            self.assertEqual(inode, os.stat(pycs[2]).st_ino)
 | 
				
			||||||
 | 
					            self.assertFalse(is_hardlink(pycs[1], pycs[2]))
 | 
				
			||||||
 | 
					            # opt-1.pyc and opt-2.pyc have different content
 | 
				
			||||||
 | 
					            self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class HardlinkDedupTestsWithSourceEpoch(HardlinkDedupTestsBase,
 | 
				
			||||||
 | 
					                                        unittest.TestCase,
 | 
				
			||||||
 | 
					                                        metaclass=SourceDateEpochTestMeta,
 | 
				
			||||||
 | 
					                                        source_date_epoch=True):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class HardlinkDedupTestsNoSourceEpoch(HardlinkDedupTestsBase,
 | 
				
			||||||
 | 
					                                      unittest.TestCase,
 | 
				
			||||||
 | 
					                                      metaclass=SourceDateEpochTestMeta,
 | 
				
			||||||
 | 
					                                      source_date_epoch=False):
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,6 +86,7 @@ Marcin Bachry
 | 
				
			||||||
Alfonso Baciero
 | 
					Alfonso Baciero
 | 
				
			||||||
Dwayne Bailey
 | 
					Dwayne Bailey
 | 
				
			||||||
Stig Bakken
 | 
					Stig Bakken
 | 
				
			||||||
 | 
					Lumír Balhar
 | 
				
			||||||
Aleksandr Balezin
 | 
					Aleksandr Balezin
 | 
				
			||||||
Greg Ball
 | 
					Greg Ball
 | 
				
			||||||
Lewis Ball
 | 
					Lewis Ball
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,2 @@
 | 
				
			||||||
 | 
					:mod:`compileall` is now able to use hardlinks to prevent duplicates in a
 | 
				
			||||||
 | 
					case when ``.pyc`` files for different optimization levels have the same content.
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue