gh-97669: Create Tools/patchcheck/ directory (#98186)

Move patchcheck.py, reindent.py and untabify.py scripts to a new Tools/patchcheck/ directory.
2025-11-01 18:51:43 +00:00 · 2022-10-12 10:09:21 +02:00 · 2022-10-12 10:09:21 +02:00 · 0895c2a066
commit 0895c2a066
parent c39a0c3354
7 changed files with 5 additions and 8 deletions
--- a/Tools/scripts/README
+++ b/Tools/scripts/README
@ -5,9 +5,6 @@ useful while building, extending or managing Python.
 combinerefs.py            A helper for analyzing PYTHONDUMPREFS output
 idle3                     Main program to start IDLE
 parse_html5_entities.py   Utility for parsing HTML5 entity definitions
-patchcheck.py             Perform common checks and cleanup before committing
 pydoc3                    Python documentation browser
-reindent.py               Change .py files to use 4-space indents
 run_tests.py              Run the test suite with more sensible default options
 stable_abi.py             Stable ABI checks and file generators.
-untabify.py               Replace tabs with spaces in argument files
--- a/Tools/scripts/patchcheck.py
+++ b/Tools/scripts/patchcheck.py
@ -1,312 +0,0 @@
-#!/usr/bin/env python3
-"""Check proposed changes for common issues."""
-import re
-import sys
-import shutil
-import os.path
-import subprocess
-import sysconfig
-
-import reindent
-import untabify
-
-
-# Excluded directories which are copies of external libraries:
-# don't check their coding style
-EXCLUDE_DIRS = [os.path.join('Modules', '_ctypes', 'libffi_osx'),
-                os.path.join('Modules', '_ctypes', 'libffi_msvc'),
-                os.path.join('Modules', '_decimal', 'libmpdec'),
-                os.path.join('Modules', 'expat'),
-                os.path.join('Modules', 'zlib')]
-SRCDIR = sysconfig.get_config_var('srcdir')
-
-
-def n_files_str(count):
-    """Return 'N file(s)' with the proper plurality on 'file'."""
-    return "{} file{}".format(count, "s" if count != 1 else "")
-
-
-def status(message, modal=False, info=None):
-    """Decorator to output status info to stdout."""
-    def decorated_fxn(fxn):
-        def call_fxn(*args, **kwargs):
-            sys.stdout.write(message + ' ... ')
-            sys.stdout.flush()
-            result = fxn(*args, **kwargs)
-            if not modal and not info:
-                print("done")
-            elif info:
-                print(info(result))
-            else:
-                print("yes" if result else "NO")
-            return result
-        return call_fxn
-    return decorated_fxn
-
-
-def get_git_branch():
-    """Get the symbolic name for the current git branch"""
-    cmd = "git rev-parse --abbrev-ref HEAD".split()
-    try:
-        return subprocess.check_output(cmd,
-                                       stderr=subprocess.DEVNULL,
-                                       cwd=SRCDIR,
-                                       encoding='UTF-8')
-    except subprocess.CalledProcessError:
-        return None
-
-
-def get_git_upstream_remote():
-    """Get the remote name to use for upstream branches
-
-    Uses "upstream" if it exists, "origin" otherwise
-    """
-    cmd = "git remote get-url upstream".split()
-    try:
-        subprocess.check_output(cmd,
-                                stderr=subprocess.DEVNULL,
-                                cwd=SRCDIR,
-                                encoding='UTF-8')
-    except subprocess.CalledProcessError:
-        return "origin"
-    return "upstream"
-
-
-def get_git_remote_default_branch(remote_name):
-    """Get the name of the default branch for the given remote
-
-    It is typically called 'main', but may differ
-    """
-    cmd = "git remote show {}".format(remote_name).split()
-    env = os.environ.copy()
-    env['LANG'] = 'C'
-    try:
-        remote_info = subprocess.check_output(cmd,
-                                              stderr=subprocess.DEVNULL,
-                                              cwd=SRCDIR,
-                                              encoding='UTF-8',
-                                              env=env)
-    except subprocess.CalledProcessError:
-        return None
-    for line in remote_info.splitlines():
-        if "HEAD branch:" in line:
-            base_branch = line.split(":")[1].strip()
-            return base_branch
-    return None
-
-
-@status("Getting base branch for PR",
-        info=lambda x: x if x is not None else "not a PR branch")
-def get_base_branch():
-    if not os.path.exists(os.path.join(SRCDIR, '.git')):
-        # Not a git checkout, so there's no base branch
-        return None
-    upstream_remote = get_git_upstream_remote()
-    version = sys.version_info
-    if version.releaselevel == 'alpha':
-        base_branch = get_git_remote_default_branch(upstream_remote)
-    else:
-        base_branch = "{0.major}.{0.minor}".format(version)
-    this_branch = get_git_branch()
-    if this_branch is None or this_branch == base_branch:
-        # Not on a git PR branch, so there's no base branch
-        return None
-    return upstream_remote + "/" + base_branch
-
-
-@status("Getting the list of files that have been added/changed",
-        info=lambda x: n_files_str(len(x)))
-def changed_files(base_branch=None):
-    """Get the list of changed or added files from git."""
-    if os.path.exists(os.path.join(SRCDIR, '.git')):
-        # We just use an existence check here as:
-        #  directory = normal git checkout/clone
-        #  file = git worktree directory
-        if base_branch:
-            cmd = 'git diff --name-status ' + base_branch
-        else:
-            cmd = 'git status --porcelain'
-        filenames = []
-        with subprocess.Popen(cmd.split(),
-                              stdout=subprocess.PIPE,
-                              cwd=SRCDIR) as st:
-            if st.wait() != 0:
-                sys.exit(f'error running {cmd}')
-            for line in st.stdout:
-                line = line.decode().rstrip()
-                status_text, filename = line.split(maxsplit=1)
-                status = set(status_text)
-                # modified, added or unmerged files
-                if not status.intersection('MAU'):
-                    continue
-                if ' -> ' in filename:
-                    # file is renamed
-                    filename = filename.split(' -> ', 2)[1].strip()
-                filenames.append(filename)
-    else:
-        sys.exit('need a git checkout to get modified files')
-
-    filenames2 = []
-    for filename in filenames:
-        # Normalize the path to be able to match using .startswith()
-        filename = os.path.normpath(filename)
-        if any(filename.startswith(path) for path in EXCLUDE_DIRS):
-            # Exclude the file
-            continue
-        filenames2.append(filename)
-
-    return filenames2
-
-
-def report_modified_files(file_paths):
-    count = len(file_paths)
-    if count == 0:
-        return n_files_str(count)
-    else:
-        lines = ["{}:".format(n_files_str(count))]
-        for path in file_paths:
-            lines.append("  {}".format(path))
-        return "\n".join(lines)
-
-
-@status("Fixing Python file whitespace", info=report_modified_files)
-def normalize_whitespace(file_paths):
-    """Make sure that the whitespace for .py files have been normalized."""
-    reindent.makebackup = False  # No need to create backups.
-    fixed = [path for path in file_paths if path.endswith('.py') and
-             reindent.check(os.path.join(SRCDIR, path))]
-    return fixed
-
-
-@status("Fixing C file whitespace", info=report_modified_files)
-def normalize_c_whitespace(file_paths):
-    """Report if any C files """
-    fixed = []
-    for path in file_paths:
-        abspath = os.path.join(SRCDIR, path)
-        with open(abspath, 'r') as f:
-            if '\t' not in f.read():
-                continue
-        untabify.process(abspath, 8, verbose=False)
-        fixed.append(path)
-    return fixed
-
-
-ws_re = re.compile(br'\s+(\r?\n)$')
-
-@status("Fixing docs whitespace", info=report_modified_files)
-def normalize_docs_whitespace(file_paths):
-    fixed = []
-    for path in file_paths:
-        abspath = os.path.join(SRCDIR, path)
-        try:
-            with open(abspath, 'rb') as f:
-                lines = f.readlines()
-            new_lines = [ws_re.sub(br'\1', line) for line in lines]
-            if new_lines != lines:
-                shutil.copyfile(abspath, abspath + '.bak')
-                with open(abspath, 'wb') as f:
-                    f.writelines(new_lines)
-                fixed.append(path)
-        except Exception as err:
-            print('Cannot fix %s: %s' % (path, err))
-    return fixed
-
-
-@status("Docs modified", modal=True)
-def docs_modified(file_paths):
-    """Report if any file in the Doc directory has been changed."""
-    return bool(file_paths)
-
-
-@status("Misc/ACKS updated", modal=True)
-def credit_given(file_paths):
-    """Check if Misc/ACKS has been changed."""
-    return os.path.join('Misc', 'ACKS') in file_paths
-
-
-@status("Misc/NEWS.d updated with `blurb`", modal=True)
-def reported_news(file_paths):
-    """Check if Misc/NEWS.d has been changed."""
-    return any(p.startswith(os.path.join('Misc', 'NEWS.d', 'next'))
-               for p in file_paths)
-
-@status("configure regenerated", modal=True, info=str)
-def regenerated_configure(file_paths):
-    """Check if configure has been regenerated."""
-    if 'configure.ac' in file_paths:
-        return "yes" if 'configure' in file_paths else "no"
-    else:
-        return "not needed"
-
-@status("pyconfig.h.in regenerated", modal=True, info=str)
-def regenerated_pyconfig_h_in(file_paths):
-    """Check if pyconfig.h.in has been regenerated."""
-    if 'configure.ac' in file_paths:
-        return "yes" if 'pyconfig.h.in' in file_paths else "no"
-    else:
-        return "not needed"
-
-def ci(pull_request):
-    if pull_request == 'false':
-        print('Not a pull request; skipping')
-        return
-    base_branch = get_base_branch()
-    file_paths = changed_files(base_branch)
-    python_files = [fn for fn in file_paths if fn.endswith('.py')]
-    c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
-    doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
-                 fn.endswith(('.rst', '.inc'))]
-    fixed = []
-    fixed.extend(normalize_whitespace(python_files))
-    fixed.extend(normalize_c_whitespace(c_files))
-    fixed.extend(normalize_docs_whitespace(doc_files))
-    if not fixed:
-        print('No whitespace issues found')
-    else:
-        print(f'Please fix the {len(fixed)} file(s) with whitespace issues')
-        print('(on UNIX you can run `make patchcheck` to make the fixes)')
-        sys.exit(1)
-
-def main():
-    base_branch = get_base_branch()
-    file_paths = changed_files(base_branch)
-    python_files = [fn for fn in file_paths if fn.endswith('.py')]
-    c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
-    doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
-                 fn.endswith(('.rst', '.inc'))]
-    misc_files = {p for p in file_paths if p.startswith('Misc')}
-    # PEP 8 whitespace rules enforcement.
-    normalize_whitespace(python_files)
-    # C rules enforcement.
-    normalize_c_whitespace(c_files)
-    # Doc whitespace enforcement.
-    normalize_docs_whitespace(doc_files)
-    # Docs updated.
-    docs_modified(doc_files)
-    # Misc/ACKS changed.
-    credit_given(misc_files)
-    # Misc/NEWS changed.
-    reported_news(misc_files)
-    # Regenerated configure, if necessary.
-    regenerated_configure(file_paths)
-    # Regenerated pyconfig.h.in, if necessary.
-    regenerated_pyconfig_h_in(file_paths)
-
-    # Test suite run and passed.
-    if python_files or c_files:
-        end = " and check for refleaks?" if c_files else "?"
-        print()
-        print("Did you run the test suite" + end)
-
-
-if __name__ == '__main__':
-    import argparse
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('--ci',
-                        help='Perform pass/fail checks')
-    args = parser.parse_args()
-    if args.ci:
-        ci(args.ci)
-    else:
-        main()
--- a/Tools/scripts/reindent.py
+++ b/Tools/scripts/reindent.py
@ -1,333 +0,0 @@
-#! /usr/bin/env python3
-
-# Released to the public domain, by Tim Peters, 03 October 2000.
-
-"""reindent [-d][-r][-v] [ path ... ]
-
-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
-v (--verbose)  Verbose.   Print informative msgs; else no output.
-   (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
-                           Default is the same as the original file.
-h (--help)     Help.      Print this usage information and exit.
-
-Change Python (.py) files to use 4-space indents and no hard tab characters.
-Also trim excess spaces and tabs from ends of lines, and remove empty lines
-at the end of files.  Also ensure the last line ends with a newline.
-
-If no paths are given on the command line, reindent operates as a filter,
-reading a single source file from standard input and writing the transformed
-source to standard output.  In this case, the -d, -r and -v flags are
-ignored.
-
-You can pass one or more file and/or directory paths.  When a directory
-path, all .py files within the directory will be examined, and, if the -r
-option is given, likewise recursively for subdirectories.
-
-If output is not to standard output, reindent overwrites files in place,
-renaming the originals with a .bak extension.  If it finds nothing to
-change, the file is left alone.  If reindent does change a file, the changed
-file is a fixed-point for future runs (i.e., running reindent on the
-resulting .py file won't change it again).
-
-The hard part of reindenting is figuring out what to do with comment
-lines.  So long as the input files get a clean bill of health from
-tabnanny.py, reindent should do a good job.
-
-The backup file is a copy of the one that is being reindented. The ".bak"
-file is generated with shutil.copy(), but some corner cases regarding
-user/group and permissions could leave the backup file more readable than
-you'd prefer. You can always use the --nobackup option to prevent this.
-"""
-
-__version__ = "1"
-
-import tokenize
-import os
-import shutil
-import sys
-
-verbose = False
-recurse = False
-dryrun = False
-makebackup = True
-# A specified newline to be used in the output (set by --newline option)
-spec_newline = None
-
-
-def usage(msg=None):
-    if msg is None:
-        msg = __doc__
-    print(msg, file=sys.stderr)
-
-
-def errprint(*args):
-    sys.stderr.write(" ".join(str(arg) for arg in args))
-    sys.stderr.write("\n")
-
-def main():
-    import getopt
-    global verbose, recurse, dryrun, makebackup, spec_newline
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
-            ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
-    except getopt.error as msg:
-        usage(msg)
-        return
-    for o, a in opts:
-        if o in ('-d', '--dryrun'):
-            dryrun = True
-        elif o in ('-r', '--recurse'):
-            recurse = True
-        elif o in ('-n', '--nobackup'):
-            makebackup = False
-        elif o in ('-v', '--verbose'):
-            verbose = True
-        elif o in ('--newline',):
-            if not a.upper() in ('CRLF', 'LF'):
-                usage()
-                return
-            spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
-        elif o in ('-h', '--help'):
-            usage()
-            return
-    if not args:
-        r = Reindenter(sys.stdin)
-        r.run()
-        r.write(sys.stdout)
-        return
-    for arg in args:
-        check(arg)
-
-
-def check(file):
-    if os.path.isdir(file) and not os.path.islink(file):
-        if verbose:
-            print("listing directory", file)
-        names = os.listdir(file)
-        for name in names:
-            fullname = os.path.join(file, name)
-            if ((recurse and os.path.isdir(fullname) and
-                 not os.path.islink(fullname) and
-                 not os.path.split(fullname)[1].startswith("."))
-                or name.lower().endswith(".py")):
-                check(fullname)
-        return
-
-    if verbose:
-        print("checking", file, "...", end=' ')
-    with open(file, 'rb') as f:
-        try:
-            encoding, _ = tokenize.detect_encoding(f.readline)
-        except SyntaxError as se:
-            errprint("%s: SyntaxError: %s" % (file, str(se)))
-            return
-    try:
-        with open(file, encoding=encoding) as f:
-            r = Reindenter(f)
-    except IOError as msg:
-        errprint("%s: I/O Error: %s" % (file, str(msg)))
-        return
-
-    newline = spec_newline if spec_newline else r.newlines
-    if isinstance(newline, tuple):
-        errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
-        return
-
-    if r.run():
-        if verbose:
-            print("changed.")
-            if dryrun:
-                print("But this is a dry run, so leaving it alone.")
-        if not dryrun:
-            bak = file + ".bak"
-            if makebackup:
-                shutil.copyfile(file, bak)
-                if verbose:
-                    print("backed up", file, "to", bak)
-            with open(file, "w", encoding=encoding, newline=newline) as f:
-                r.write(f)
-            if verbose:
-                print("wrote new", file)
-        return True
-    else:
-        if verbose:
-            print("unchanged.")
-        return False
-
-
-def _rstrip(line, JUNK='\n \t'):
-    """Return line stripped of trailing spaces, tabs, newlines.
-
-    Note that line.rstrip() instead also strips sundry control characters,
-    but at least one known Emacs user expects to keep junk like that, not
-    mentioning Barry by name or anything <wink>.
-    """
-
-    i = len(line)
-    while i > 0 and line[i - 1] in JUNK:
-        i -= 1
-    return line[:i]
-
-
-class Reindenter:
-
-    def __init__(self, f):
-        self.find_stmt = 1  # next token begins a fresh stmt?
-        self.level = 0      # current indent level
-
-        # Raw file lines.
-        self.raw = f.readlines()
-
-        # File lines, rstripped & tab-expanded.  Dummy at start is so
-        # that we can use tokenize's 1-based line numbering easily.
-        # Note that a line is all-blank iff it's "\n".
-        self.lines = [_rstrip(line).expandtabs() + "\n"
-                      for line in self.raw]
-        self.lines.insert(0, None)
-        self.index = 1  # index into self.lines of next line
-
-        # List of (lineno, indentlevel) pairs, one for each stmt and
-        # comment line.  indentlevel is -1 for comment lines, as a
-        # signal that tokenize doesn't know what to do about them;
-        # indeed, they're our headache!
-        self.stats = []
-
-        # Save the newlines found in the file so they can be used to
-        #  create output without mutating the newlines.
-        self.newlines = f.newlines
-
-    def run(self):
-        tokens = tokenize.generate_tokens(self.getline)
-        for _token in tokens:
-            self.tokeneater(*_token)
-        # Remove trailing empty lines.
-        lines = self.lines
-        while lines and lines[-1] == "\n":
-            lines.pop()
-        # Sentinel.
-        stats = self.stats
-        stats.append((len(lines), 0))
-        # Map count of leading spaces to # we want.
-        have2want = {}
-        # Program after transformation.
-        after = self.after = []
-        # Copy over initial empty lines -- there's nothing to do until
-        # we see a line with *something* on it.
-        i = stats[0][0]
-        after.extend(lines[1:i])
-        for i in range(len(stats) - 1):
-            thisstmt, thislevel = stats[i]
-            nextstmt = stats[i + 1][0]
-            have = getlspace(lines[thisstmt])
-            want = thislevel * 4
-            if want < 0:
-                # A comment line.
-                if have:
-                    # An indented comment line.  If we saw the same
-                    # indentation before, reuse what it most recently
-                    # mapped to.
-                    want = have2want.get(have, -1)
-                    if want < 0:
-                        # Then it probably belongs to the next real stmt.
-                        for j in range(i + 1, len(stats) - 1):
-                            jline, jlevel = stats[j]
-                            if jlevel >= 0:
-                                if have == getlspace(lines[jline]):
-                                    want = jlevel * 4
-                                break
-                    if want < 0:           # Maybe it's a hanging
-                                           # comment like this one,
-                        # in which case we should shift it like its base
-                        # line got shifted.
-                        for j in range(i - 1, -1, -1):
-                            jline, jlevel = stats[j]
-                            if jlevel >= 0:
-                                want = have + (getlspace(after[jline - 1]) -
-                                               getlspace(lines[jline]))
-                                break
-                    if want < 0:
-                        # Still no luck -- leave it alone.
-                        want = have
-                else:
-                    want = 0
-            assert want >= 0
-            have2want[have] = want
-            diff = want - have
-            if diff == 0 or have == 0:
-                after.extend(lines[thisstmt:nextstmt])
-            else:
-                for line in lines[thisstmt:nextstmt]:
-                    if diff > 0:
-                        if line == "\n":
-                            after.append(line)
-                        else:
-                            after.append(" " * diff + line)
-                    else:
-                        remove = min(getlspace(line), -diff)
-                        after.append(line[remove:])
-        return self.raw != self.after
-
-    def write(self, f):
-        f.writelines(self.after)
-
-    # Line-getter for tokenize.
-    def getline(self):
-        if self.index >= len(self.lines):
-            line = ""
-        else:
-            line = self.lines[self.index]
-            self.index += 1
-        return line
-
-    # Line-eater for tokenize.
-    def tokeneater(self, type, token, slinecol, end, line,
-                   INDENT=tokenize.INDENT,
-                   DEDENT=tokenize.DEDENT,
-                   NEWLINE=tokenize.NEWLINE,
-                   COMMENT=tokenize.COMMENT,
-                   NL=tokenize.NL):
-
-        if type == NEWLINE:
-            # A program statement, or ENDMARKER, will eventually follow,
-            # after some (possibly empty) run of tokens of the form
-            #     (NL | COMMENT)* (INDENT | DEDENT+)?
-            self.find_stmt = 1
-
-        elif type == INDENT:
-            self.find_stmt = 1
-            self.level += 1
-
-        elif type == DEDENT:
-            self.find_stmt = 1
-            self.level -= 1
-
-        elif type == COMMENT:
-            if self.find_stmt:
-                self.stats.append((slinecol[0], -1))
-                # but we're still looking for a new stmt, so leave
-                # find_stmt alone
-
-        elif type == NL:
-            pass
-
-        elif self.find_stmt:
-            # This is the first "real token" following a NEWLINE, so it
-            # must be the first token of the next program statement, or an
-            # ENDMARKER.
-            self.find_stmt = 0
-            if line:   # not endmarker
-                self.stats.append((slinecol[0], self.level))
-
-
-# Count number of leading blanks.
-def getlspace(line):
-    i, n = 0, len(line)
-    while i < n and line[i] == " ":
-        i += 1
-    return i
-
-
-if __name__ == '__main__':
-    main()
--- a/Tools/scripts/untabify.py
+++ b/Tools/scripts/untabify.py
@ -1,55 +0,0 @@
-#! /usr/bin/env python3
-
-"Replace tabs with spaces in argument files.  Print names of changed files."
-
-import os
-import sys
-import getopt
-import tokenize
-
-def main():
-    tabsize = 8
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "t:")
-        if not args:
-            raise getopt.error("At least one file argument required")
-    except getopt.error as msg:
-        print(msg)
-        print("usage:", sys.argv[0], "[-t tabwidth] file ...")
-        return
-    for optname, optvalue in opts:
-        if optname == '-t':
-            tabsize = int(optvalue)
-
-    for filename in args:
-        process(filename, tabsize)
-
-
-def process(filename, tabsize, verbose=True):
-    try:
-        with tokenize.open(filename) as f:
-            text = f.read()
-            encoding = f.encoding
-    except IOError as msg:
-        print("%r: I/O error: %s" % (filename, msg))
-        return
-    newtext = text.expandtabs(tabsize)
-    if newtext == text:
-        return
-    backup = filename + "~"
-    try:
-        os.unlink(backup)
-    except OSError:
-        pass
-    try:
-        os.rename(filename, backup)
-    except OSError:
-        pass
-    with open(filename, "w", encoding=encoding) as f:
-        f.write(newtext)
-    if verbose:
-        print(filename)
-
-
-if __name__ == '__main__':
-    main()