Support comparing two sets of pystats (GH-98816)

This adds support for comparing pystats collected from two different builds. - The `--json-output` can be used to load in a set of raw stats and output a JSON file. - Two of these JSON files can be provided on the next run, and then comparative results between the two are output.
2025-12-23 09:19:18 +00:00 · 2022-11-04 06:15:54 -04:00 · 2022-11-04 06:15:54 -04:00 · 2844aa6a8e
commit 2844aa6a8e
parent 044bcc1771
2 changed files with 365 additions and 122 deletions
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@ -131,7 +131,8 @@ General Options
   Turn on internal statistics gathering.

   The statistics will be dumped to a arbitrary (probably unique) file in
-   ``/tmp/py_stats/``, or ``C:\temp\py_stats\`` on Windows.
+   ``/tmp/py_stats/``, or ``C:\temp\py_stats\`` on Windows. If that directory
+   does not exist, results will be printed on stdout.

   Use ``Tools/scripts/summarize_stats.py`` to read the stats.

--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@ -2,7 +2,9 @@
 default stats folders.
 """

+import argparse
 import collections
+import json
 import os.path
 import opcode
 from datetime import date
@ -32,6 +34,93 @@ opmap = dict(sorted(opmap.items()))

 TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"

+def join_rows(a_rows, b_rows):
+    """
+    Joins two tables together, side-by-side, where the first column in each is a
+    common key.
+    """
+    if len(a_rows) == 0 and len(b_rows) == 0:
+        return []
+
+    if len(a_rows):
+        a_ncols = list(set(len(x) for x in a_rows))
+        if len(a_ncols) != 1:
+            raise ValueError("Table a is ragged")
+
+    if len(b_rows):
+        b_ncols = list(set(len(x) for x in b_rows))
+        if len(b_ncols) != 1:
+            raise ValueError("Table b is ragged")
+
+    if len(a_rows) and len(b_rows) and a_ncols[0] != b_ncols[0]:
+        raise ValueError("Tables have different widths")
+
+    if len(a_rows):
+        ncols = a_ncols[0]
+    else:
+        ncols = b_ncols[0]
+
+    default = [""] * (ncols - 1)
+    a_data = {x[0]: x[1:] for x in a_rows}
+    b_data = {x[0]: x[1:] for x in b_rows}
+
+    if len(a_data) != len(a_rows) or len(b_data) != len(b_rows):
+        raise ValueError("Duplicate keys")
+
+    # To preserve ordering, use A's keys as is and then add any in B that aren't
+    # in A
+    keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data]
+    return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys]
+
+def calculate_specialization_stats(family_stats, total):
+    rows = []
+    for key in sorted(family_stats):
+        if key.startswith("specialization.failure_kinds"):
+            continue
+        if key in ("specialization.hit", "specialization.miss"):
+            label = key[len("specialization."):]
+        elif key == "execution_count":
+            label = "unquickened"
+        elif key in ("specialization.success",  "specialization.failure", "specializable"):
+            continue
+        elif key.startswith("pair"):
+            continue
+        else:
+            label = key
+        rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
+    return rows
+
+def calculate_specialization_success_failure(family_stats):
+    total_attempts = 0
+    for key in ("specialization.success",  "specialization.failure"):
+        total_attempts += family_stats.get(key, 0)
+    rows = []
+    if total_attempts:
+        for key in ("specialization.success",  "specialization.failure"):
+            label = key[len("specialization."):]
+            label = label[0].upper() + label[1:]
+            val = family_stats.get(key, 0)
+            rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
+    return rows
+
+def calculate_specialization_failure_kinds(name, family_stats, defines):
+    total_failures = family_stats.get("specialization.failure", 0)
+    failure_kinds = [ 0 ] * 30
+    for key in family_stats:
+        if not key.startswith("specialization.failure_kind"):
+            continue
+        _, index = key[:-1].split("[")
+        index = int(index)
+        failure_kinds[index] = family_stats[key]
+    failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
+    failures.sort(reverse=True)
+    rows = []
+    for value, index in failures:
+        if not value:
+            continue
+        rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
+    return rows
+
 def print_specialization_stats(name, family_stats, defines):
    if "specializable" not in family_stats:
        return
@ -39,65 +128,65 @@ def print_specialization_stats(name, family_stats, defines):
    if total == 0:
        return
    with Section(name, 3, f"specialization stats for {name} family"):
-        rows = []
-        for key in sorted(family_stats):
-            if key.startswith("specialization.failure_kinds"):
-                continue
-            if key in ("specialization.hit", "specialization.miss"):
-                label = key[len("specialization."):]
-            elif key == "execution_count":
-                label = "unquickened"
-            elif key in ("specialization.success",  "specialization.failure", "specializable"):
-                continue
-            elif key.startswith("pair"):
-                continue
-            else:
-                label = key
-            rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
+        rows = calculate_specialization_stats(family_stats, total)
        emit_table(("Kind", "Count", "Ratio"), rows)
-        print_title("Specialization attempts", 4)
-        total_attempts = 0
-        for key in ("specialization.success",  "specialization.failure"):
-            total_attempts += family_stats.get(key, 0)
-        rows = []
-        if total_attempts:
-            for key in ("specialization.success",  "specialization.failure"):
-                label = key[len("specialization."):]
-                label = label[0].upper() + label[1:]
-                val = family_stats.get(key, 0)
-                rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
+        rows = calculate_specialization_success_failure(family_stats)
+        if rows:
+            print_title("Specialization attempts", 4)
            emit_table(("", "Count:", "Ratio:"), rows)
-        total_failures = family_stats.get("specialization.failure", 0)
-        failure_kinds = [ 0 ] * 30
-        for key in family_stats:
-            if not key.startswith("specialization.failure_kind"):
-                continue
-            _, index = key[:-1].split("[")
-            index =  int(index)
-            failure_kinds[index] = family_stats[key]
-        failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
-        failures.sort(reverse=True)
-        rows = []
-        for value, index in failures:
-            if not value:
-                continue
-            rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
-        emit_table(("Failure kind", "Count:", "Ratio:"), rows)
+            rows = calculate_specialization_failure_kinds(name, family_stats, defines)
+            emit_table(("Failure kind", "Count:", "Ratio:"), rows)

-def gather_stats():
-    stats = collections.Counter()
-    for filename in os.listdir(DEFAULT_DIR):
-        with open(os.path.join(DEFAULT_DIR, filename)) as fd:
-            for line in fd:
-                try:
-                    key, value = line.split(":")
-                except ValueError:
-                    print (f"Unparsable line: '{line.strip()}' in  {filename}", file=sys.stderr)
-                    continue
-                key = key.strip()
-                value = int(value)
-                stats[key] += value
-    return stats
+def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines):
+    if "specializable" not in base_family_stats:
+        return
+
+    base_total = sum(base_family_stats.get(kind, 0) for kind in TOTAL)
+    head_total = sum(head_family_stats.get(kind, 0) for kind in TOTAL)
+    if base_total + head_total == 0:
+        return
+    with Section(name, 3, f"specialization stats for {name} family"):
+        base_rows = calculate_specialization_stats(base_family_stats, base_total)
+        head_rows = calculate_specialization_stats(head_family_stats, head_total)
+        emit_table(
+            ("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"),
+            join_rows(base_rows, head_rows)
+        )
+        base_rows = calculate_specialization_success_failure(base_family_stats)
+        head_rows = calculate_specialization_success_failure(head_family_stats)
+        rows = join_rows(base_rows, head_rows)
+        if rows:
+            print_title("Specialization attempts", 4)
+            emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows)
+            base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines)
+            head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines)
+            emit_table(
+                ("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
+                join_rows(base_rows, head_rows)
+            )
+
+def gather_stats(input):
+    # Note the output of this function must be JSON-serializable
+
+    if os.path.isfile(input):
+        with open(input, "r") as fd:
+            return json.load(fd)
+    elif os.path.isdir(input):
+        stats = collections.Counter()
+        for filename in os.listdir(input):
+            with open(os.path.join(input, filename)) as fd:
+                for line in fd:
+                    try:
+                        key, value = line.split(":")
+                    except ValueError:
+                        print(f"Unparsable line: '{line.strip()}' in  {filename}", file=sys.stderr)
+                        continue
+                    key = key.strip()
+                    value = int(value)
+                    stats[key] += value
+        return stats
+    else:
+        raise ValueError(f"{input:r} is not a file or directory path")

 def extract_opcode_stats(stats):
    opcode_stats = [ {} for _ in range(256) ]
@ -213,50 +302,98 @@ def emit_table(header, rows):
        print("|", " | ".join(to_str(i) for i in row), "|")
    print()

+def calculate_execution_counts(opcode_stats, total):
+    counts = []
+    for i, opcode_stat in enumerate(opcode_stats):
+        if "execution_count" in opcode_stat:
+            count = opcode_stat['execution_count']
+            miss = 0
+            if "specializable" not in opcode_stat:
+                miss = opcode_stat.get("specialization.miss")
+            counts.append((count, opname[i], miss))
+    counts.sort(reverse=True)
+    cumulative = 0
+    rows = []
+    for (count, name, miss) in counts:
+        cumulative += count
+        if miss:
+            miss =  f"{100*miss/count:0.1f}%"
+        else:
+            miss = ""
+            rows.append((name, count, f"{100*count/total:0.1f}%",
+                         f"{100*cumulative/total:0.1f}%", miss))
+    return rows
+
 def emit_execution_counts(opcode_stats, total):
    with Section("Execution counts", summary="execution counts for all instructions"):
-        counts = []
-        for i, opcode_stat in enumerate(opcode_stats):
-            if "execution_count" in opcode_stat:
-                count = opcode_stat['execution_count']
-                miss = 0
-                if "specializable" not in opcode_stat:
-                    miss = opcode_stat.get("specialization.miss")
-                counts.append((count, opname[i], miss))
-        counts.sort(reverse=True)
-        cumulative = 0
-        rows = []
-        for (count, name, miss) in counts:
-            cumulative += count
-            if miss:
-                miss =  f"{100*miss/count:0.1f}%"
-            else:
-                miss = ""
-            rows.append((name, count, f"{100*count/total:0.1f}%",
-                        f"{100*cumulative/total:0.1f}%", miss))
+        rows = calculate_execution_counts(opcode_stats, total)
        emit_table(
            ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
            rows
        )

+def emit_comparative_execution_counts(
+    base_opcode_stats, base_total, head_opcode_stats, head_total
+):
+    with Section("Execution counts", summary="execution counts for all instructions"):
+        base_rows = calculate_execution_counts(base_opcode_stats, base_total)
+        head_rows = calculate_execution_counts(head_opcode_stats, head_total)
+        base_data = dict((x[0], x[1:]) for x in base_rows)
+        head_data = dict((x[0], x[1:]) for x in head_rows)
+        opcodes = set(base_data.keys()) | set(head_data.keys())

-def emit_specialization_stats(opcode_stats):
+        rows = []
+        default = [0, "0.0%", "0.0%", 0]
+        for opcode in opcodes:
+            base_entry = base_data.get(opcode, default)
+            head_entry = head_data.get(opcode, default)
+            if base_entry[0] == 0:
+                change = 1
+            else:
+                change = (head_entry[0] - base_entry[0]) / base_entry[0]
+            rows.append(
+                (opcode, base_entry[0], head_entry[0],
+                 f"{100*change:0.1f}%"))
+
+        rows.sort(key=lambda x: -abs(float(x[-1][:-1])))
+
+        emit_table(
+            ("Name", "Base Count:", "Head Count:", "Change:"),
+            rows
+        )
+
+def get_defines():
    spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
    with open(spec_path) as spec_src:
        defines = parse_kinds(spec_src)
+    return defines
+
+def emit_specialization_stats(opcode_stats):
+    defines = get_defines()
    with Section("Specialization stats", summary="specialization stats by family"):
        for i, opcode_stat in enumerate(opcode_stats):
            name = opname[i]
            print_specialization_stats(name, opcode_stat, defines)

-def emit_specialization_overview(opcode_stats, total):
+def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
+    defines = get_defines()
+    with Section("Specialization stats", summary="specialization stats by family"):
+        for i, (base_opcode_stat, head_opcode_stat) in enumerate(zip(base_opcode_stats, head_opcode_stats)):
+            name = opname[i]
+            print_comparative_specialization_stats(name, base_opcode_stat, head_opcode_stat, defines)
+
+def calculate_specialization_effectiveness(opcode_stats, total):
    basic, not_specialized, specialized = categorized_counts(opcode_stats)
+    return [
+        ("Basic", basic, f"{basic*100/total:0.1f}%"),
+        ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
+        ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
+    ]
+
+def emit_specialization_overview(opcode_stats, total):
    with Section("Specialization effectiveness"):
-        emit_table(("Instructions", "Count:", "Ratio:"), (
-            ("Basic", basic, f"{basic*100/total:0.1f}%"),
-            ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
-            ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
-        ))
+        rows = calculate_specialization_effectiveness(opcode_stats, total)
+        emit_table(("Instructions", "Count:", "Ratio:"), rows)
        for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
            total = 0
            counts = []
@ -270,53 +407,91 @@ def emit_specialization_overview(opcode_stats, total):
                    rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ]
                    emit_table(("Name", "Count:", "Ratio:"), rows)

-def emit_call_stats(stats):
+def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
+    with Section("Specialization effectiveness"):
+        base_rows = calculate_specialization_effectiveness(base_opcode_stats, base_total)
+        head_rows = calculate_specialization_effectiveness(head_opcode_stats, head_total)
+        emit_table(
+            ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
+            join_rows(base_rows, head_rows)
+        )
+
+def get_stats_defines():
    stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h")
    with open(stats_path) as stats_src:
        defines = parse_kinds(stats_src, prefix="EVAL_CALL")
+    return defines
+
+def calculate_call_stats(stats):
+    defines = get_stats_defines()
+    total = 0
+    for key, value in stats.items():
+        if "Calls to" in key:
+            total += value
+            rows = []
+    for key, value in stats.items():
+        if "Calls to" in key:
+            rows.append((key, value, f"{100*value/total:0.1f}%"))
+        elif key.startswith("Calls "):
+            name, index = key[:-1].split("[")
+            index =  int(index)
+            label = name + " (" + pretty(defines[index][0]) + ")"
+            rows.append((label, value, f"{100*value/total:0.1f}%"))
+    for key, value in stats.items():
+        if key.startswith("Frame"):
+            rows.append((key, value, f"{100*value/total:0.1f}%"))
+    return rows
+
+def emit_call_stats(stats):
    with Section("Call stats", summary="Inlined calls and frame stats"):
-        total = 0
-        for key, value in stats.items():
-            if "Calls to" in key:
-                total += value
-        rows = []
-        for key, value in stats.items():
-            if "Calls to" in key:
-                rows.append((key, value, f"{100*value/total:0.1f}%"))
-            elif key.startswith("Calls "):
-                name, index = key[:-1].split("[")
-                index =  int(index)
-                label = name + " (" + pretty(defines[index][0]) + ")"
-                rows.append((label, value, f"{100*value/total:0.1f}%"))
-        for key, value in stats.items():
-            if key.startswith("Frame"):
-                rows.append((key, value, f"{100*value/total:0.1f}%"))
+        rows = calculate_call_stats(stats)
        emit_table(("", "Count:", "Ratio:"), rows)

+def emit_comparative_call_stats(base_stats, head_stats):
+    with Section("Call stats", summary="Inlined calls and frame stats"):
+        base_rows = calculate_call_stats(base_stats)
+        head_rows = calculate_call_stats(head_stats)
+        rows = join_rows(base_rows, head_rows)
+        rows.sort(key=lambda x: -float(x[-1][:-1]))
+        emit_table(
+            ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
+            rows
+        )
+
+def calculate_object_stats(stats):
+    total_materializations = stats.get("Object new values")
+    total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
+    total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
+    total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
+    rows = []
+    for key, value in stats.items():
+        if key.startswith("Object"):
+            if "materialize" in key:
+                ratio = f"{100*value/total_materializations:0.1f}%"
+            elif "allocations" in key:
+                ratio = f"{100*value/total_allocations:0.1f}%"
+            elif "increfs"     in key:
+                ratio = f"{100*value/total_increfs:0.1f}%"
+            elif "decrefs"     in key:
+                ratio = f"{100*value/total_decrefs:0.1f}%"
+            else:
+                ratio = ""
+            label = key[6:].strip()
+            label = label[0].upper() + label[1:]
+            rows.append((label, value, ratio))
+    return rows
+
 def emit_object_stats(stats):
    with Section("Object stats", summary="allocations, frees and dict materializatons"):
-        total_materializations = stats.get("Object new values")
-        total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
-        total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
-        total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
-        rows = []
-        for key, value in stats.items():
-            if key.startswith("Object"):
-                if "materialize" in key:
-                    ratio = f"{100*value/total_materializations:0.1f}%"
-                elif "allocations" in key:
-                    ratio = f"{100*value/total_allocations:0.1f}%"
-                elif "increfs"     in key:
-                    ratio = f"{100*value/total_increfs:0.1f}%"
-                elif "decrefs"     in key:
-                    ratio = f"{100*value/total_decrefs:0.1f}%"
-                else:
-                    ratio = ""
-                label = key[6:].strip()
-                label = label[0].upper() + label[1:]
-                rows.append((label, value, ratio))
+        rows = calculate_object_stats(stats)
        emit_table(("",  "Count:", "Ratio:"), rows)

+def emit_comparative_object_stats(base_stats, head_stats):
+    with Section("Object stats", summary="allocations, frees and dict materializatons"):
+        base_rows = calculate_object_stats(base_stats)
+        head_rows = calculate_object_stats(head_stats)
+        emit_table(("",  "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
+
 def get_total(opcode_stats):
    total = 0
    for opcode_stat in opcode_stats:
@ -377,8 +552,7 @@ def emit_pair_counts(opcode_stats, total):
                    succ_rows
                )

-def main():
-    stats = gather_stats()
+def output_single_stats(stats):
    opcode_stats = extract_opcode_stats(stats)
    total = get_total(opcode_stats)
    emit_execution_counts(opcode_stats, total)
@ -387,8 +561,76 @@ def main():
    emit_specialization_overview(opcode_stats, total)
    emit_call_stats(stats)
    emit_object_stats(stats)
+
+def output_comparative_stats(base_stats, head_stats):
+    base_opcode_stats = extract_opcode_stats(base_stats)
+    base_total = get_total(base_opcode_stats)
+
+    head_opcode_stats = extract_opcode_stats(head_stats)
+    head_total = get_total(head_opcode_stats)
+
+    emit_comparative_execution_counts(
+        base_opcode_stats, base_total, head_opcode_stats, head_total
+    )
+    emit_comparative_specialization_stats(
+        base_opcode_stats, head_opcode_stats
+    )
+    emit_comparative_specialization_overview(
+        base_opcode_stats, base_total, head_opcode_stats, head_total
+    )
+    emit_comparative_call_stats(base_stats, head_stats)
+    emit_comparative_object_stats(base_stats, head_stats)
+
+def output_stats(inputs, json_output=None):
+    if len(inputs) == 1:
+        stats = gather_stats(inputs[0])
+        if json_output is not None:
+            json.dump(stats, json_output)
+        output_single_stats(stats)
+    elif len(inputs) == 2:
+        if json_output is not None:
+            raise ValueError(
+                "Can not output to JSON when there are multiple inputs"
+            )
+
+        base_stats = gather_stats(inputs[0])
+        head_stats = gather_stats(inputs[1])
+        output_comparative_stats(base_stats, head_stats)
+
    print("---")
    print("Stats gathered on:", date.today())

+def main():
+    parser = argparse.ArgumentParser(description="Summarize pystats results")
+
+    parser.add_argument(
+        "inputs",
+        nargs="*",
+        type=str,
+        default=[DEFAULT_DIR],
+        help=f"""
+        Input source(s).
+        For each entry, if a .json file, the output provided by --json-output from a previous run;
+        if a directory, a directory containing raw pystats .txt files.
+        If one source is provided, its stats are printed.
+        If two sources are provided, comparative stats are printed.
+        Default is {DEFAULT_DIR}.
+        """
+    )
+
+    parser.add_argument(
+        "--json-output",
+        nargs="?",
+        type=argparse.FileType("w"),
+        help="Output complete raw results to the given JSON file."
+    )
+
+    args = parser.parse_args()
+
+    if len(args.inputs) > 2:
+        raise ValueError("0-2 arguments may be provided.")
+
+    output_stats(args.inputs, json_output=args.json_output)
+
 if __name__ == "__main__":
    main()