diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2d4ecb9b44..d79da233eb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -556,10 +556,10 @@ examples.
 
 #### Linux
 
-Install `perf` and build `ruff_benchmark` with the `release-debug` profile and then run it with perf
+Install `perf` and build `ruff_benchmark` with the `profiling` profile and then run it with perf
 
 ```shell
-cargo bench -p ruff_benchmark --no-run --profile=release-debug && perf record --call-graph dwarf -F 9999 cargo bench -p ruff_benchmark --profile=release-debug -- --profile-time=1
+cargo bench -p ruff_benchmark --no-run --profile=profiling && perf record --call-graph dwarf -F 9999 cargo bench -p ruff_benchmark --profile=profiling -- --profile-time=1
 ```
 
 You can also use the `ruff_dev` launcher to run `ruff check` multiple times on a repository to
@@ -567,8 +567,8 @@ gather enough samples for a good flamegraph (change the 999, the sample rate, an
 of checks, to your liking)
 
 ```shell
-cargo build --bin ruff_dev --profile=release-debug
-perf record -g -F 999 target/release-debug/ruff_dev repeat --repeat 30 --exit-zero --no-cache path/to/cpython > /dev/null
+cargo build --bin ruff_dev --profile=profiling
+perf record -g -F 999 target/profiling/ruff_dev repeat --repeat 30 --exit-zero --no-cache path/to/cpython > /dev/null
 ```
 
 Then convert the recorded profile
@@ -598,7 +598,7 @@ cargo install cargo-instruments
 Then run the profiler with
 
 ```shell
-cargo instruments -t time --bench linter --profile release-debug -p ruff_benchmark -- --profile-time=1
+cargo instruments -t time --bench linter --profile profiling -p ruff_benchmark -- --profile-time=1
 ```
 
 - `-t`: Specifies what to profile. Useful options are `time` to profile the wall time and `alloc`
diff --git a/Cargo.toml b/Cargo.toml
index 304162dba6..aad3c66446 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -88,7 +88,20 @@ rc_mutex = "warn"
 rest_pat_in_fully_bound_structs = "warn"
 
 [profile.release]
-lto = "fat"
+# Note that we set these explicitly, and these values
+# were chosen based on a trade-off between compile times
+# and runtime performance[1].
+#
+# [1]: https://github.com/astral-sh/ruff/pull/9031
+lto = "thin"
+codegen-units = 16
+
+# Some crates don't change as much but benefit more from
+# more expensive optimization passes, so we selectively
+# decrease codegen-units in some cases.
+[profile.release.package.ruff_python_parser]
+codegen-units = 1
+[profile.release.package.ruff_python_ast]
 codegen-units = 1
 
 [profile.dev.package.insta]
@@ -102,8 +115,8 @@ opt-level = 3
 [profile.dev.package.ruff_python_parser]
 opt-level = 1
 
-# Use the `--profile release-debug` flag to show symbols in release mode.
-# e.g. `cargo build --profile release-debug`
-[profile.release-debug]
+# Use the `--profile profiling` flag to show symbols in release mode.
+# e.g. `cargo build --profile profiling`
+[profile.profiling]
 inherits = "release"
 debug = 1
diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs
index 91c7d30c53..26f3bb8a5b 100644
--- a/crates/ruff_python_parser/src/lexer/cursor.rs
+++ b/crates/ruff_python_parser/src/lexer/cursor.rs
@@ -120,6 +120,7 @@ impl<'a> Cursor<'a> {
     }
 
     /// Eats symbols while predicate returns true or until the end of file is reached.
+    #[inline]
     pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
         // It was tried making optimized version of this for eg. line comments, but
         // LLVM can inline all of this and compile it down to fast iteration over bytes.