mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			376 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			376 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""Print a summary of specialization stats for all files in the
 | 
						|
default stats folders.
 | 
						|
"""
 | 
						|
 | 
						|
import collections
 | 
						|
import os.path
 | 
						|
import opcode
 | 
						|
from datetime import date
 | 
						|
import itertools
 | 
						|
import argparse
 | 
						|
import sys
 | 
						|
 | 
						|
if os.name == "nt":
 | 
						|
    DEFAULT_DIR = "c:\\temp\\py_stats\\"
 | 
						|
else:
 | 
						|
    DEFAULT_DIR = "/tmp/py_stats/"
 | 
						|
 | 
						|
#Create list of all instruction names
 | 
						|
specialized = iter(opcode._specialized_instructions)
 | 
						|
opname = ["<0>"]
 | 
						|
for name in opcode.opname[1:]:
 | 
						|
    if name.startswith("<"):
 | 
						|
        try:
 | 
						|
            name = next(specialized)
 | 
						|
        except StopIteration:
 | 
						|
            pass
 | 
						|
    opname.append(name)
 | 
						|
 | 
						|
# opcode_name --> opcode
 | 
						|
# Sort alphabetically.
 | 
						|
opmap = {name: i for i, name in enumerate(opname)}
 | 
						|
opmap = dict(sorted(opmap.items()))
 | 
						|
 | 
						|
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
 | 
						|
 | 
						|
def print_specialization_stats(name, family_stats, defines):
 | 
						|
    if "specializable" not in family_stats:
 | 
						|
        return
 | 
						|
    total = sum(family_stats.get(kind, 0) for kind in TOTAL)
 | 
						|
    if total == 0:
 | 
						|
        return
 | 
						|
    with Section(name, 3, f"specialization stats for {name} family"):
 | 
						|
        rows = []
 | 
						|
        for key in sorted(family_stats):
 | 
						|
            if key.startswith("specialization.failure_kinds"):
 | 
						|
                continue
 | 
						|
            if key in ("specialization.hit", "specialization.miss"):
 | 
						|
                label = key[len("specialization."):]
 | 
						|
            elif key == "execution_count":
 | 
						|
                label = "unquickened"
 | 
						|
            elif key in ("specialization.success",  "specialization.failure", "specializable"):
 | 
						|
                continue
 | 
						|
            elif key.startswith("pair"):
 | 
						|
                continue
 | 
						|
            else:
 | 
						|
                label = key
 | 
						|
            rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
 | 
						|
        emit_table(("Kind", "Count", "Ratio"), rows)
 | 
						|
        print_title("Specialization attempts", 4)
 | 
						|
        total_attempts = 0
 | 
						|
        for key in ("specialization.success",  "specialization.failure"):
 | 
						|
            total_attempts += family_stats.get(key, 0)
 | 
						|
        rows = []
 | 
						|
        for key in ("specialization.success",  "specialization.failure"):
 | 
						|
            label = key[len("specialization."):]
 | 
						|
            label = label[0].upper() + label[1:]
 | 
						|
            val = family_stats.get(key, 0)
 | 
						|
            rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
 | 
						|
        emit_table(("", "Count:", "Ratio:"), rows)
 | 
						|
        total_failures = family_stats.get("specialization.failure", 0)
 | 
						|
        failure_kinds = [ 0 ] * 30
 | 
						|
        for key in family_stats:
 | 
						|
            if not key.startswith("specialization.failure_kind"):
 | 
						|
                continue
 | 
						|
            _, index = key[:-1].split("[")
 | 
						|
            index =  int(index)
 | 
						|
            failure_kinds[index] = family_stats[key]
 | 
						|
        failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
 | 
						|
        failures.sort(reverse=True)
 | 
						|
        rows = []
 | 
						|
        for value, index in failures:
 | 
						|
            if not value:
 | 
						|
                continue
 | 
						|
            rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
 | 
						|
        emit_table(("Failure kind", "Count:", "Ratio:"), rows)
 | 
						|
 | 
						|
def gather_stats():
 | 
						|
    stats = collections.Counter()
 | 
						|
    for filename in os.listdir(DEFAULT_DIR):
 | 
						|
        with open(os.path.join(DEFAULT_DIR, filename)) as fd:
 | 
						|
            for line in fd:
 | 
						|
                try:
 | 
						|
                    key, value = line.split(":")
 | 
						|
                except ValueError:
 | 
						|
                    print (f"Unparsable line: '{line.strip()}' in  {filename}", file=sys.stderr)
 | 
						|
                    continue
 | 
						|
                key = key.strip()
 | 
						|
                value = int(value)
 | 
						|
                stats[key] += value
 | 
						|
    return stats
 | 
						|
 | 
						|
def extract_opcode_stats(stats):
 | 
						|
    opcode_stats = [ {} for _ in range(256) ]
 | 
						|
    for key, value in stats.items():
 | 
						|
        if not key.startswith("opcode"):
 | 
						|
            continue
 | 
						|
        n, _, rest = key[7:].partition("]")
 | 
						|
        opcode_stats[int(n)][rest.strip(".")] = value
 | 
						|
    return opcode_stats
 | 
						|
 | 
						|
def parse_kinds(spec_src, prefix="SPEC_FAIL"):
 | 
						|
    defines = collections.defaultdict(list)
 | 
						|
    start = "#define " + prefix + "_"
 | 
						|
    for line in spec_src:
 | 
						|
        line = line.strip()
 | 
						|
        if not line.startswith(start):
 | 
						|
            continue
 | 
						|
        line = line[len(start):]
 | 
						|
        name, val = line.split()
 | 
						|
        defines[int(val.strip())].append(name.strip())
 | 
						|
    return defines
 | 
						|
 | 
						|
def pretty(defname):
 | 
						|
    return defname.replace("_", " ").lower()
 | 
						|
 | 
						|
def kind_to_text(kind, defines, opname):
 | 
						|
    if kind < 7:
 | 
						|
        return pretty(defines[kind][0])
 | 
						|
    if opname.endswith("ATTR"):
 | 
						|
        opname = "ATTR"
 | 
						|
    if opname.endswith("SUBSCR"):
 | 
						|
        opname = "SUBSCR"
 | 
						|
    for name in defines[kind]:
 | 
						|
        if name.startswith(opname):
 | 
						|
            return pretty(name[len(opname)+1:])
 | 
						|
    return "kind " + str(kind)
 | 
						|
 | 
						|
def categorized_counts(opcode_stats):
 | 
						|
    basic = 0
 | 
						|
    specialized = 0
 | 
						|
    not_specialized = 0
 | 
						|
    specialized_instructions = {
 | 
						|
        op for op in opcode._specialized_instructions
 | 
						|
        if "__" not in op and "ADAPTIVE" not in op}
 | 
						|
    adaptive_instructions = {
 | 
						|
        op for op in opcode._specialized_instructions
 | 
						|
        if "ADAPTIVE" in op}
 | 
						|
    for i, opcode_stat in enumerate(opcode_stats):
 | 
						|
        if "execution_count" not in opcode_stat:
 | 
						|
            continue
 | 
						|
        count = opcode_stat['execution_count']
 | 
						|
        name = opname[i]
 | 
						|
        if "specializable" in opcode_stat:
 | 
						|
            not_specialized += count
 | 
						|
        elif name in adaptive_instructions:
 | 
						|
            not_specialized += count
 | 
						|
        elif name in specialized_instructions:
 | 
						|
            miss = opcode_stat.get("specialization.miss", 0)
 | 
						|
            not_specialized += miss
 | 
						|
            specialized += count - miss
 | 
						|
        else:
 | 
						|
            basic += count
 | 
						|
    return basic, not_specialized, specialized
 | 
						|
 | 
						|
def print_title(name, level=2):
 | 
						|
    print("#"*level, name)
 | 
						|
    print()
 | 
						|
 | 
						|
class Section:
 | 
						|
 | 
						|
    def __init__(self, title, level=2, summary=None):
 | 
						|
        self.title = title
 | 
						|
        self.level = level
 | 
						|
        if summary is None:
 | 
						|
            self.summary = title.lower()
 | 
						|
        else:
 | 
						|
            self.summary = summary
 | 
						|
 | 
						|
    def __enter__(self):
 | 
						|
        print_title(self.title, self.level)
 | 
						|
        print("<details>")
 | 
						|
        print("<summary>", self.summary, "</summary>")
 | 
						|
        print()
 | 
						|
        return self
 | 
						|
 | 
						|
    def __exit__(*args):
 | 
						|
        print()
 | 
						|
        print("</details>")
 | 
						|
        print()
 | 
						|
 | 
						|
def emit_table(header, rows):
 | 
						|
    width = len(header)
 | 
						|
    header_line = "|"
 | 
						|
    under_line = "|"
 | 
						|
    for item in header:
 | 
						|
        under = "---"
 | 
						|
        if item.endswith(":"):
 | 
						|
            item = item[:-1]
 | 
						|
            under += ":"
 | 
						|
        header_line += item + " | "
 | 
						|
        under_line += under + "|"
 | 
						|
    print(header_line)
 | 
						|
    print(under_line)
 | 
						|
    for row in rows:
 | 
						|
        if width is not None and len(row) != width:
 | 
						|
            raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
 | 
						|
        print("|", " | ".join(str(i) for i in row), "|")
 | 
						|
    print()
 | 
						|
 | 
						|
def emit_execution_counts(opcode_stats, total):
 | 
						|
    with Section("Execution counts", summary="execution counts for all instructions"):
 | 
						|
        counts = []
 | 
						|
        for i, opcode_stat in enumerate(opcode_stats):
 | 
						|
            if "execution_count" in opcode_stat:
 | 
						|
                count = opcode_stat['execution_count']
 | 
						|
                miss = 0
 | 
						|
                if "specializable" not in opcode_stat:
 | 
						|
                    miss = opcode_stat.get("specialization.miss")
 | 
						|
                counts.append((count, opname[i], miss))
 | 
						|
        counts.sort(reverse=True)
 | 
						|
        cumulative = 0
 | 
						|
        rows = []
 | 
						|
        for (count, name, miss) in counts:
 | 
						|
            cumulative += count
 | 
						|
            if miss:
 | 
						|
                miss =  f"{100*miss/count:0.1f}%"
 | 
						|
            else:
 | 
						|
                miss = ""
 | 
						|
            rows.append((name, count, f"{100*count/total:0.1f}%",
 | 
						|
                        f"{100*cumulative/total:0.1f}%", miss))
 | 
						|
        emit_table(
 | 
						|
            ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
 | 
						|
            rows
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def emit_specialization_stats(opcode_stats):
 | 
						|
    spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
 | 
						|
    with open(spec_path) as spec_src:
 | 
						|
        defines = parse_kinds(spec_src)
 | 
						|
    with Section("Specialization stats", summary="specialization stats by family"):
 | 
						|
        for i, opcode_stat in enumerate(opcode_stats):
 | 
						|
            name = opname[i]
 | 
						|
            print_specialization_stats(name, opcode_stat, defines)
 | 
						|
 | 
						|
def emit_specialization_overview(opcode_stats, total):
 | 
						|
    basic, not_specialized, specialized = categorized_counts(opcode_stats)
 | 
						|
    with Section("Specialization effectiveness"):
 | 
						|
        emit_table(("Instructions", "Count:", "Ratio:"), (
 | 
						|
            ("Basic", basic, f"{basic*100/total:0.1f}%"),
 | 
						|
            ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
 | 
						|
            ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
 | 
						|
        ))
 | 
						|
 | 
						|
def emit_call_stats(stats):
 | 
						|
    stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h")
 | 
						|
    with open(stats_path) as stats_src:
 | 
						|
        defines = parse_kinds(stats_src, prefix="EVAL_CALL")
 | 
						|
    with Section("Call stats", summary="Inlined calls and frame stats"):
 | 
						|
        total = 0
 | 
						|
        for key, value in stats.items():
 | 
						|
            if "Calls to" in key:
 | 
						|
                total += value
 | 
						|
        rows = []
 | 
						|
        for key, value in stats.items():
 | 
						|
            if "Calls to" in key:
 | 
						|
                rows.append((key, value, f"{100*value/total:0.1f}%"))
 | 
						|
            elif key.startswith("Calls "):
 | 
						|
                name, index = key[:-1].split("[")
 | 
						|
                index =  int(index)
 | 
						|
                label = name + " (" + pretty(defines[index][0]) + ")"
 | 
						|
                rows.append((label, value, f"{100*value/total:0.1f}%"))
 | 
						|
        for key, value in stats.items():
 | 
						|
            if key.startswith("Frame"):
 | 
						|
                rows.append((key, value, f"{100*value/total:0.1f}%"))
 | 
						|
        emit_table(("", "Count:", "Ratio:"), rows)
 | 
						|
 | 
						|
def emit_object_stats(stats):
 | 
						|
    with Section("Object stats", summary="allocations, frees and dict materializatons"):
 | 
						|
        total_materializations = stats.get("Object new values")
 | 
						|
        total_allocations = stats.get("Object allocations")
 | 
						|
        total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
 | 
						|
        total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
 | 
						|
        rows = []
 | 
						|
        for key, value in stats.items():
 | 
						|
            if key.startswith("Object"):
 | 
						|
                if "materialize" in key:
 | 
						|
                    ratio = f"{100*value/total_materializations:0.1f}%"
 | 
						|
                elif "allocations" in key:
 | 
						|
                    ratio = f"{100*value/total_allocations:0.1f}%"
 | 
						|
                elif "increfs"     in key:
 | 
						|
                    ratio = f"{100*value/total_increfs:0.1f}%"
 | 
						|
                elif "decrefs"     in key:
 | 
						|
                    ratio = f"{100*value/total_decrefs:0.1f}%"
 | 
						|
                else:
 | 
						|
                    ratio = ""
 | 
						|
                label = key[6:].strip()
 | 
						|
                label = label[0].upper() + label[1:]
 | 
						|
                rows.append((label, value, ratio))
 | 
						|
        emit_table(("",  "Count:", "Ratio:"), rows)
 | 
						|
 | 
						|
def get_total(opcode_stats):
 | 
						|
    total = 0
 | 
						|
    for opcode_stat in opcode_stats:
 | 
						|
        if "execution_count" in opcode_stat:
 | 
						|
            total += opcode_stat['execution_count']
 | 
						|
    return total
 | 
						|
 | 
						|
def emit_pair_counts(opcode_stats, total):
 | 
						|
    pair_counts = []
 | 
						|
    for i, opcode_stat in enumerate(opcode_stats):
 | 
						|
        if i == 0:
 | 
						|
            continue
 | 
						|
        for key, value in opcode_stat.items():
 | 
						|
            if key.startswith("pair_count"):
 | 
						|
                x, _, _ = key[11:].partition("]")
 | 
						|
                if value:
 | 
						|
                    pair_counts.append((value, (i, int(x))))
 | 
						|
    with Section("Pair counts", summary="Pair counts for top 100 pairs"):
 | 
						|
        pair_counts.sort(reverse=True)
 | 
						|
        cumulative = 0
 | 
						|
        rows = []
 | 
						|
        for (count, pair) in itertools.islice(pair_counts, 100):
 | 
						|
            i, j = pair
 | 
						|
            cumulative += count
 | 
						|
            rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
 | 
						|
                        f"{100*cumulative/total:0.1f}%"))
 | 
						|
        emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
 | 
						|
            rows
 | 
						|
        )
 | 
						|
    with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
 | 
						|
        predecessors = collections.defaultdict(collections.Counter)
 | 
						|
        successors = collections.defaultdict(collections.Counter)
 | 
						|
        total_predecessors = collections.Counter()
 | 
						|
        total_successors = collections.Counter()
 | 
						|
        for count, (first, second) in pair_counts:
 | 
						|
            if count:
 | 
						|
                predecessors[second][first] = count
 | 
						|
                successors[first][second] = count
 | 
						|
                total_predecessors[second] += count
 | 
						|
                total_successors[first] += count
 | 
						|
        for name, i in opmap.items():
 | 
						|
            total1 = total_predecessors[i]
 | 
						|
            total2 = total_successors[i]
 | 
						|
            if total1 == 0 and total2 == 0:
 | 
						|
                continue
 | 
						|
            pred_rows = succ_rows = ()
 | 
						|
            if total1:
 | 
						|
                pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
 | 
						|
                             for (pred, count) in predecessors[i].most_common(5)]
 | 
						|
            if total2:
 | 
						|
                succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
 | 
						|
                             for (succ, count) in successors[i].most_common(5)]
 | 
						|
            with Section(name, 3, f"Successors and predecessors for {name}"):
 | 
						|
                emit_table(("Predecessors", "Count:", "Percentage:"),
 | 
						|
                    pred_rows
 | 
						|
                )
 | 
						|
                emit_table(("Successors", "Count:", "Percentage:"),
 | 
						|
                    succ_rows
 | 
						|
                )
 | 
						|
 | 
						|
def main():
 | 
						|
    stats = gather_stats()
 | 
						|
    opcode_stats = extract_opcode_stats(stats)
 | 
						|
    total = get_total(opcode_stats)
 | 
						|
    emit_execution_counts(opcode_stats, total)
 | 
						|
    emit_pair_counts(opcode_stats, total)
 | 
						|
    emit_specialization_stats(opcode_stats)
 | 
						|
    emit_specialization_overview(opcode_stats, total)
 | 
						|
    emit_call_stats(stats)
 | 
						|
    emit_object_stats(stats)
 | 
						|
    print("---")
 | 
						|
    print("Stats gathered on:", date.today())
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |