diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2d4ecb9b44..d79da233eb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -556,10 +556,10 @@ examples. #### Linux -Install `perf` and build `ruff_benchmark` with the `release-debug` profile and then run it with perf +Install `perf` and build `ruff_benchmark` with the `profiling` profile and then run it with perf ```shell -cargo bench -p ruff_benchmark --no-run --profile=release-debug && perf record --call-graph dwarf -F 9999 cargo bench -p ruff_benchmark --profile=release-debug -- --profile-time=1 +cargo bench -p ruff_benchmark --no-run --profile=profiling && perf record --call-graph dwarf -F 9999 cargo bench -p ruff_benchmark --profile=profiling -- --profile-time=1 ``` You can also use the `ruff_dev` launcher to run `ruff check` multiple times on a repository to @@ -567,8 +567,8 @@ gather enough samples for a good flamegraph (change the 999, the sample rate, an of checks, to your liking) ```shell -cargo build --bin ruff_dev --profile=release-debug -perf record -g -F 999 target/release-debug/ruff_dev repeat --repeat 30 --exit-zero --no-cache path/to/cpython > /dev/null +cargo build --bin ruff_dev --profile=profiling +perf record -g -F 999 target/profiling/ruff_dev repeat --repeat 30 --exit-zero --no-cache path/to/cpython > /dev/null ``` Then convert the recorded profile @@ -598,7 +598,7 @@ cargo install cargo-instruments Then run the profiler with ```shell -cargo instruments -t time --bench linter --profile release-debug -p ruff_benchmark -- --profile-time=1 +cargo instruments -t time --bench linter --profile profiling -p ruff_benchmark -- --profile-time=1 ``` - `-t`: Specifies what to profile. Useful options are `time` to profile the wall time and `alloc` diff --git a/Cargo.toml b/Cargo.toml index 304162dba6..aad3c66446 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,20 @@ rc_mutex = "warn" rest_pat_in_fully_bound_structs = "warn" [profile.release] -lto = "fat" +# Note that we set these explicitly, and these values +# were chosen based on a trade-off between compile times +# and runtime performance[1]. +# +# [1]: https://github.com/astral-sh/ruff/pull/9031 +lto = "thin" +codegen-units = 16 + +# Some crates don't change as much but benefit more from +# more expensive optimization passes, so we selectively +# decrease codegen-units in some cases. +[profile.release.package.ruff_python_parser] +codegen-units = 1 +[profile.release.package.ruff_python_ast] codegen-units = 1 [profile.dev.package.insta] @@ -102,8 +115,8 @@ opt-level = 3 [profile.dev.package.ruff_python_parser] opt-level = 1 -# Use the `--profile release-debug` flag to show symbols in release mode. -# e.g. `cargo build --profile release-debug` -[profile.release-debug] +# Use the `--profile profiling` flag to show symbols in release mode. +# e.g. `cargo build --profile profiling` +[profile.profiling] inherits = "release" debug = 1 diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs index 91c7d30c53..26f3bb8a5b 100644 --- a/crates/ruff_python_parser/src/lexer/cursor.rs +++ b/crates/ruff_python_parser/src/lexer/cursor.rs @@ -120,6 +120,7 @@ impl<'a> Cursor<'a> { } /// Eats symbols while predicate returns true or until the end of file is reached. + #[inline] pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { // It was tried making optimized version of this for eg. line comments, but // LLVM can inline all of this and compile it down to fast iteration over bytes.