From c6e04e3adcede2a4a584dc03b74a8b116ed3903f Mon Sep 17 00:00:00 2001 From: FilipAndersson245 <17986183+FilipAndersson245@users.noreply.github.com> Date: Sat, 12 Apr 2025 22:04:26 +0200 Subject: [PATCH 1/3] Adds profile guided optimization script. --- .gitignore | 2 ++ scripts/pgo.py | 46 ++++++++++++++++++++++++++++++++ scripts/pgo_profile.json | 57 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 scripts/pgo.py create mode 100644 scripts/pgo_profile.json diff --git a/.gitignore b/.gitignore index f75e68c745..f7d72a565c 100644 --- a/.gitignore +++ b/.gitignore @@ -227,3 +227,5 @@ cython_debug/ !crates/ruff_python_resolver/resources/test/airflow/venv/lib !crates/ruff_python_resolver/resources/test/airflow/venv/lib/python3.11/site-packages/_watchdog_fsevents.cpython-311-darwin.so !crates/ruff_python_resolver/resources/test/airflow/venv/lib/python3.11/site-packages/orjson/orjson.cpython-311-darwin.so + +clones \ No newline at end of file diff --git a/scripts/pgo.py b/scripts/pgo.py new file mode 100644 index 0000000000..f33966fd24 --- /dev/null +++ b/scripts/pgo.py @@ -0,0 +1,46 @@ +"""Creates Profile-Guided-Optimized binaries for ruff.""" + +import json +import os +import subprocess +from pathlib import Path + +PROJECTS_JSON = "./scripts/pgo_profile.json" +CLONE_DIR = Path("clones") +TRIPLE = "x86_64-unknown-linux-gnu" + +env = os.environ.copy() +env["LLVM_PROFILE_FILE"] = f"{os.getcwd()}/target/pgo-profiles/ruff_%m_%p.profraw" + +def run_command(cmd, env, cwd=None, check=False): + print(f">>> {cmd}") + subprocess.run(cmd, shell=True, check=check, cwd=cwd, env=env, stdout = subprocess.DEVNULL, stderr= subprocess.DEVNULL) + + +def main(): + CLONE_DIR.mkdir(exist_ok=True) + + with open(PROJECTS_JSON, "r") as f: + projects = json.load(f) + + run_command("cargo pgo clean", env=env) + run_command("cargo pgo instrument build --keep-profiles -- -q", env=env) + + for project in projects: + name = project["name"] + url = project["url"] + branch = project["branch"] + dest = CLONE_DIR / name + + print(f">> collecting data on {name}.") + + if not dest.exists(): + run_command(f"git clone --depth 1 --quiet --branch {branch} {url} {dest}", env=env) + + run_command(f"../../target/{TRIPLE}/release/ruff check -n -e --diff .", env=env, cwd=dest) + run_command(f"../../target/{TRIPLE}/release/ruff format -n --check .", env=env, cwd=dest) + + run_command("cargo pgo optimize", env=env) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/pgo_profile.json b/scripts/pgo_profile.json new file mode 100644 index 0000000000..3ccb298e0a --- /dev/null +++ b/scripts/pgo_profile.json @@ -0,0 +1,57 @@ +[ + { + "name": "mypy", + "url": "https://github.com/python/mypy.git", + "branch": "master" + }, + { + "name": "pandas", + "url": "https://github.com/pandas-dev/pandas.git", + "branch": "main" + }, + { + "name": "django", + "url": "https://github.com/django/django.git", + "branch": "main" + }, + { + "name": "openstack", + "url": "https://opendev.org/openstack/nova.git", + "branch": "master" + }, + { + "name": "polars", + "url": "https://github.com/pola-rs/polars.git", + "branch": "main" + }, + { + "name": "pytorch", + "url": "https://github.com/pytorch/pytorch.git", + "branch": "main" + }, + { + "name": "cpython", + "url": "https://github.com/python/cpython.git", + "branch": "main" + }, + { + "name": "zulip", + "url": "https://github.com/zulip/zulip.git", + "branch": "main" + }, + { + "name": "matplotlib", + "url": "https://github.com/matplotlib/matplotlib.git", + "branch": "main" + }, + { + "name": "core", + "url": "https://github.com/home-assistant/core.git", + "branch": "master" + }, + { + "name": "fastapi", + "url": "https://github.com/fastapi/fastapi.git", + "branch": "master" + } +] \ No newline at end of file From 998b8b57b7ae28cdd528b3832ff1bcb3dc6dcca4 Mon Sep 17 00:00:00 2001 From: FilipAndersson245 <17986183+FilipAndersson245@users.noreply.github.com> Date: Sat, 12 Apr 2025 22:14:46 +0200 Subject: [PATCH 2/3] Adds comment about having `cargo-pgo` installed. --- scripts/pgo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/pgo.py b/scripts/pgo.py index f33966fd24..33b3e348ad 100644 --- a/scripts/pgo.py +++ b/scripts/pgo.py @@ -1,4 +1,7 @@ -"""Creates Profile-Guided-Optimized binaries for ruff.""" +""" +Creates Profile-Guided-Optimized binaries for ruff. +Ensure `cargo-pgo` is installed and configured to run this code. +""" import json import os From 9ab5e06bcc4808c8e6b8d5f2122957871f199401 Mon Sep 17 00:00:00 2001 From: FilipAndersson245 <17986183+FilipAndersson245@users.noreply.github.com> Date: Tue, 22 Apr 2025 16:41:27 +0200 Subject: [PATCH 3/3] Formated. --- scripts/pgo.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/scripts/pgo.py b/scripts/pgo.py index 33b3e348ad..c5503d5bfc 100644 --- a/scripts/pgo.py +++ b/scripts/pgo.py @@ -15,14 +15,23 @@ TRIPLE = "x86_64-unknown-linux-gnu" env = os.environ.copy() env["LLVM_PROFILE_FILE"] = f"{os.getcwd()}/target/pgo-profiles/ruff_%m_%p.profraw" + def run_command(cmd, env, cwd=None, check=False): print(f">>> {cmd}") - subprocess.run(cmd, shell=True, check=check, cwd=cwd, env=env, stdout = subprocess.DEVNULL, stderr= subprocess.DEVNULL) + subprocess.run( + cmd, + shell=True, + check=check, + cwd=cwd, + env=env, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) def main(): CLONE_DIR.mkdir(exist_ok=True) - + with open(PROJECTS_JSON, "r") as f: projects = json.load(f) @@ -38,12 +47,21 @@ def main(): print(f">> collecting data on {name}.") if not dest.exists(): - run_command(f"git clone --depth 1 --quiet --branch {branch} {url} {dest}", env=env) + run_command( + f"git clone --depth 1 --quiet --branch {branch} {url} {dest}", env=env + ) - run_command(f"../../target/{TRIPLE}/release/ruff check -n -e --diff .", env=env, cwd=dest) - run_command(f"../../target/{TRIPLE}/release/ruff format -n --check .", env=env, cwd=dest) + run_command( + f"../../target/{TRIPLE}/release/ruff check -n -e --diff .", + env=env, + cwd=dest, + ) + run_command( + f"../../target/{TRIPLE}/release/ruff format -n --check .", env=env, cwd=dest + ) run_command("cargo pgo optimize", env=env) + if __name__ == "__main__": - main() \ No newline at end of file + main()