GH-109373: Store metadata required for pystats comparison in the JSON (GH-109374)

This commit is contained in:
Michael Droettboom 2023-09-15 16:10:46 -04:00 committed by GitHub
parent 3d881453d3
commit 19f5effc27
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,11 +2,14 @@
default stats folders. default stats folders.
""" """
# NOTE: Bytecode introspection modules (opcode, dis, etc.) should only
# happen when loading a single dataset. When comparing datasets, it
# could get it wrong, leading to subtle errors.
import argparse import argparse
import collections import collections
import json import json
import os.path import os.path
import opcode
from datetime import date from datetime import date
import itertools import itertools
import sys import sys
@ -28,6 +31,16 @@ def format_ratio(num, den):
else: else:
return f"{num/den:.01%}" return f"{num/den:.01%}"
def percentage_to_float(s):
"""
Converts a percentage string to a float. The empty string is returned as 0.0
"""
if s == "":
return 0.0
else:
assert s[-1] == "%"
return float(s[:-1])
def join_rows(a_rows, b_rows): def join_rows(a_rows, b_rows):
""" """
Joins two tables together, side-by-side, where the first column in each is a Joins two tables together, side-by-side, where the first column in each is a
@ -164,7 +177,12 @@ def gather_stats(input):
if os.path.isfile(input): if os.path.isfile(input):
with open(input, "r") as fd: with open(input, "r") as fd:
return json.load(fd) stats = json.load(fd)
stats["_stats_defines"] = {int(k): v for k, v in stats["_stats_defines"].items()}
stats["_defines"] = {int(k): v for k, v in stats["_defines"].items()}
return stats
elif os.path.isdir(input): elif os.path.isdir(input):
stats = collections.Counter() stats = collections.Counter()
for filename in os.listdir(input): for filename in os.listdir(input):
@ -179,6 +197,16 @@ def gather_stats(input):
value = int(value) value = int(value)
stats[key] += value stats[key] += value
stats['__nfiles__'] += 1 stats['__nfiles__'] += 1
import opcode
stats["_specialized_instructions"] = [
op for op in opcode._specialized_opmap.keys()
if "__" not in op
]
stats["_stats_defines"] = get_stats_defines()
stats["_defines"] = get_defines()
return stats return stats
else: else:
raise ValueError(f"{input:r} is not a file or directory path") raise ValueError(f"{input:r} is not a file or directory path")
@ -223,13 +251,10 @@ def kind_to_text(kind, defines, opname):
return pretty(name[len(opname)+1:]) return pretty(name[len(opname)+1:])
return "kind " + str(kind) return "kind " + str(kind)
def categorized_counts(opcode_stats): def categorized_counts(opcode_stats, specialized_instructions):
basic = 0 basic = 0
specialized = 0 specialized = 0
not_specialized = 0 not_specialized = 0
specialized_instructions = {
op for op in opcode._specialized_opmap.keys()
if "__" not in op}
for name, opcode_stat in opcode_stats.items(): for name, opcode_stat in opcode_stats.items():
if "execution_count" not in opcode_stat: if "execution_count" not in opcode_stat:
continue continue
@ -348,7 +373,7 @@ def emit_comparative_execution_counts(
(opcode, base_entry[0], head_entry[0], (opcode, base_entry[0], head_entry[0],
f"{100*change:0.1f}%")) f"{100*change:0.1f}%"))
rows.sort(key=lambda x: -abs(float(x[-1][:-1]))) rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
emit_table( emit_table(
("Name", "Base Count:", "Head Count:", "Change:"), ("Name", "Base Count:", "Head Count:", "Change:"),
@ -361,14 +386,12 @@ def get_defines():
defines = parse_kinds(spec_src) defines = parse_kinds(spec_src)
return defines return defines
def emit_specialization_stats(opcode_stats): def emit_specialization_stats(opcode_stats, defines):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"): with Section("Specialization stats", summary="specialization stats by family"):
for name, opcode_stat in opcode_stats.items(): for name, opcode_stat in opcode_stats.items():
print_specialization_stats(name, opcode_stat, defines) print_specialization_stats(name, opcode_stat, defines)
def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats): def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, defines):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"): with Section("Specialization stats", summary="specialization stats by family"):
opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys()) opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
for opcode in opcodes: for opcode in opcodes:
@ -376,17 +399,21 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
) )
def calculate_specialization_effectiveness(opcode_stats, total): def calculate_specialization_effectiveness(
basic, not_specialized, specialized = categorized_counts(opcode_stats) opcode_stats, total, specialized_instructions
):
basic, not_specialized, specialized = categorized_counts(
opcode_stats, specialized_instructions
)
return [ return [
("Basic", basic, format_ratio(basic, total)), ("Basic", basic, format_ratio(basic, total)),
("Not specialized", not_specialized, format_ratio(not_specialized, total)), ("Not specialized", not_specialized, format_ratio(not_specialized, total)),
("Specialized", specialized, format_ratio(specialized, total)), ("Specialized", specialized, format_ratio(specialized, total)),
] ]
def emit_specialization_overview(opcode_stats, total): def emit_specialization_overview(opcode_stats, total, specialized_instructions):
with Section("Specialization effectiveness"): with Section("Specialization effectiveness"):
rows = calculate_specialization_effectiveness(opcode_stats, total) rows = calculate_specialization_effectiveness(opcode_stats, total, specialized_instructions)
emit_table(("Instructions", "Count:", "Ratio:"), rows) emit_table(("Instructions", "Count:", "Ratio:"), rows)
for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")): for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
total = 0 total = 0
@ -404,10 +431,16 @@ def emit_specialization_overview(opcode_stats, total):
rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ] rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
emit_table(("Name", "Count:", "Ratio:"), rows) emit_table(("Name", "Count:", "Ratio:"), rows)
def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total): def emit_comparative_specialization_overview(
base_opcode_stats, base_total, head_opcode_stats, head_total, specialized_instructions
):
with Section("Specialization effectiveness"): with Section("Specialization effectiveness"):
base_rows = calculate_specialization_effectiveness(base_opcode_stats, base_total) base_rows = calculate_specialization_effectiveness(
head_rows = calculate_specialization_effectiveness(head_opcode_stats, head_total) base_opcode_stats, base_total, specialized_instructions
)
head_rows = calculate_specialization_effectiveness(
head_opcode_stats, head_total, specialized_instructions
)
emit_table( emit_table(
("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
join_rows(base_rows, head_rows) join_rows(base_rows, head_rows)
@ -419,8 +452,7 @@ def get_stats_defines():
defines = parse_kinds(stats_src, prefix="EVAL_CALL") defines = parse_kinds(stats_src, prefix="EVAL_CALL")
return defines return defines
def calculate_call_stats(stats): def calculate_call_stats(stats, defines):
defines = get_stats_defines()
total = 0 total = 0
for key, value in stats.items(): for key, value in stats.items():
if "Calls to" in key: if "Calls to" in key:
@ -439,17 +471,17 @@ def calculate_call_stats(stats):
rows.append((key, value, format_ratio(value, total))) rows.append((key, value, format_ratio(value, total)))
return rows return rows
def emit_call_stats(stats): def emit_call_stats(stats, defines):
with Section("Call stats", summary="Inlined calls and frame stats"): with Section("Call stats", summary="Inlined calls and frame stats"):
rows = calculate_call_stats(stats) rows = calculate_call_stats(stats, defines)
emit_table(("", "Count:", "Ratio:"), rows) emit_table(("", "Count:", "Ratio:"), rows)
def emit_comparative_call_stats(base_stats, head_stats): def emit_comparative_call_stats(base_stats, head_stats, defines):
with Section("Call stats", summary="Inlined calls and frame stats"): with Section("Call stats", summary="Inlined calls and frame stats"):
base_rows = calculate_call_stats(base_stats) base_rows = calculate_call_stats(base_stats, defines)
head_rows = calculate_call_stats(head_stats) head_rows = calculate_call_stats(head_stats, defines)
rows = join_rows(base_rows, head_rows) rows = join_rows(base_rows, head_rows)
rows.sort(key=lambda x: -float(x[-1][:-1])) rows.sort(key=lambda x: -percentage_to_float(x[-1]))
emit_table( emit_table(
("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
rows rows
@ -584,9 +616,9 @@ def output_single_stats(stats):
total = get_total(opcode_stats) total = get_total(opcode_stats)
emit_execution_counts(opcode_stats, total) emit_execution_counts(opcode_stats, total)
emit_pair_counts(opcode_stats, total) emit_pair_counts(opcode_stats, total)
emit_specialization_stats(opcode_stats) emit_specialization_stats(opcode_stats, stats["_defines"])
emit_specialization_overview(opcode_stats, total) emit_specialization_overview(opcode_stats, total, stats["_specialized_instructions"])
emit_call_stats(stats) emit_call_stats(stats, stats["_stats_defines"])
emit_object_stats(stats) emit_object_stats(stats)
emit_gc_stats(stats) emit_gc_stats(stats)
with Section("Meta stats", summary="Meta statistics"): with Section("Meta stats", summary="Meta statistics"):
@ -604,12 +636,13 @@ def output_comparative_stats(base_stats, head_stats):
base_opcode_stats, base_total, head_opcode_stats, head_total base_opcode_stats, base_total, head_opcode_stats, head_total
) )
emit_comparative_specialization_stats( emit_comparative_specialization_stats(
base_opcode_stats, head_opcode_stats base_opcode_stats, head_opcode_stats, head_stats["_defines"]
) )
emit_comparative_specialization_overview( emit_comparative_specialization_overview(
base_opcode_stats, base_total, head_opcode_stats, head_total base_opcode_stats, base_total, head_opcode_stats, head_total,
head_stats["_specialized_instructions"]
) )
emit_comparative_call_stats(base_stats, head_stats) emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
emit_comparative_object_stats(base_stats, head_stats) emit_comparative_object_stats(base_stats, head_stats)
emit_comparative_gc_stats(base_stats, head_stats) emit_comparative_gc_stats(base_stats, head_stats)