Use Matchit to Resolve Per-File Settings (#11111)

## Summary

Continuation of https://github.com/astral-sh/ruff/pull/9444.

> When the formatter is fully cached, it turns out we actually spend
meaningful time mapping from file to `Settings` (since we use a
hierarchical approach to settings). Using `matchit` rather than
`BTreeMap` improves fully-cached performance by anywhere from 2-5%
depending on the project, and since these are all implementation details
of `Resolver`, it's minimally invasive.

`matchit` supports escaping routing characters so this change should now
be fully compatible.

## Test Plan

On my machine I'm seeing a ~3% improvement with this change.

```
hyperfine --warmup 20 -i "./target/release/main format ../airflow" "./target/release/ruff format ../airflow"
Benchmark 1: ./target/release/main format ../airflow
  Time (mean ± σ):      58.1 ms ±   1.4 ms    [User: 63.1 ms, System: 66.5 ms]
  Range (min … max):    56.1 ms …  62.9 ms    49 runs
 
Benchmark 2: ./target/release/ruff format ../airflow
  Time (mean ± σ):      56.6 ms ±   1.5 ms    [User: 57.8 ms, System: 67.7 ms]
  Range (min … max):    54.1 ms …  63.0 ms    51 runs
 
Summary
  ./target/release/ruff format ../airflow ran
    1.03 ± 0.04 times faster than ./target/release/main format ../airflow
```
This commit is contained in:
Ibraheem Ahmed 2024-04-24 10:25:46 -04:00 committed by GitHub
parent 37af6e6147
commit 1c8849f9a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 53 additions and 19 deletions

14
Cargo.lock generated
View file

@ -1261,6 +1261,12 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
[[package]]
name = "matchit"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357db4d45704af452edb5861033c1c28db6f583d2e34cc6d40c6e096eb111499"
[[package]]
name = "memchr"
version = "2.7.2"
@ -1460,6 +1466,12 @@ dependencies = [
"once_cell",
]
[[package]]
name = "path-slash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42"
[[package]]
name = "pathdiff"
version = "0.2.1"
@ -2332,7 +2344,9 @@ dependencies = [
"is-macro",
"itertools 0.12.1",
"log",
"matchit",
"path-absolutize",
"path-slash",
"pep440_rs 0.6.0",
"regex",
"ruff_cache",

View file

@ -58,6 +58,7 @@ libcst = { version = "1.1.0", default-features = false }
log = { version = "0.4.17" }
lsp-server = { version = "0.7.6" }
lsp-types = { version = "0.95.0", features = ["proposed"] }
matchit = { version = "0.8.1" }
memchr = { version = "2.7.1" }
mimalloc = { version = "0.1.39" }
natord = { version = "1.0.9" }
@ -65,6 +66,7 @@ notify = { version = "6.1.1" }
num_cpus = { version = "1.16.0" }
once_cell = { version = "1.19.0" }
path-absolutize = { version = "3.1.1" }
path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
pep440_rs = { version = "0.6.0", features = ["serde"] }
pretty_assertions = "1.3.0"

View file

@ -28,9 +28,11 @@ ignore = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
matchit = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
path-absolutize = { workspace = true }
path-slash = { workspace = true }
pep440_rs = { workspace = true, features = ["serde"] }
regex = { workspace = true }
rustc-hash = { workspace = true }

View file

@ -2,7 +2,6 @@
//! filesystem.
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::sync::RwLock;
@ -13,7 +12,9 @@ use globset::{Candidate, GlobSet};
use ignore::{WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use matchit::{InsertError, Match, Router};
use path_absolutize::path_dedot;
use path_slash::PathExt;
use rustc_hash::{FxHashMap, FxHashSet};
use ruff_linter::fs;
@ -86,13 +87,12 @@ pub enum Relativity {
}
impl Relativity {
pub fn resolve(self, path: &Path) -> PathBuf {
pub fn resolve(self, path: &Path) -> &Path {
match self {
Relativity::Parent => path
.parent()
.expect("Expected pyproject.toml file to be in parent directory")
.to_path_buf(),
Relativity::Cwd => path_dedot::CWD.clone(),
.expect("Expected pyproject.toml file to be in parent directory"),
Relativity::Cwd => &path_dedot::CWD,
}
}
}
@ -100,7 +100,10 @@ impl Relativity {
#[derive(Debug)]
pub struct Resolver<'a> {
pyproject_config: &'a PyprojectConfig,
settings: BTreeMap<PathBuf, Settings>,
/// All [`Settings`] that have been added to the resolver.
settings: Vec<Settings>,
/// A router from path to index into the `settings` vector.
router: Router<usize>,
}
impl<'a> Resolver<'a> {
@ -108,7 +111,8 @@ impl<'a> Resolver<'a> {
pub fn new(pyproject_config: &'a PyprojectConfig) -> Self {
Self {
pyproject_config,
settings: BTreeMap::new(),
settings: Vec::new(),
router: Router::new(),
}
}
@ -140,8 +144,21 @@ impl<'a> Resolver<'a> {
}
/// Add a resolved [`Settings`] under a given [`PathBuf`] scope.
fn add(&mut self, path: PathBuf, settings: Settings) {
self.settings.insert(path, settings);
fn add(&mut self, path: &Path, settings: Settings) {
self.settings.push(settings);
// normalize the path to use `/` separators and escape the '{' and '}' characters,
// which matchit uses for routing parameters
let path = path.to_slash_lossy().replace('{', "{{").replace('}', "}}");
match self
.router
.insert(format!("{path}/{{*filepath}}"), self.settings.len() - 1)
{
Ok(()) => {}
Err(InsertError::Conflict { .. }) => {}
Err(_) => unreachable!("file paths are escaped before being inserted in the router"),
}
}
/// Return the appropriate [`Settings`] for a given [`Path`].
@ -149,10 +166,9 @@ impl<'a> Resolver<'a> {
match self.pyproject_config.strategy {
PyprojectDiscoveryStrategy::Fixed => &self.pyproject_config.settings,
PyprojectDiscoveryStrategy::Hierarchical => self
.settings
.iter()
.rev()
.find_map(|(root, settings)| path.starts_with(root).then_some(settings))
.router
.at(path.to_slash_lossy().as_ref())
.map(|Match { value, .. }| &self.settings[*value])
.unwrap_or(&self.pyproject_config.settings),
}
}
@ -196,7 +212,7 @@ impl<'a> Resolver<'a> {
/// Return an iterator over the resolved [`Settings`] in this [`Resolver`].
pub fn settings(&self) -> impl Iterator<Item = &Settings> {
std::iter::once(&self.pyproject_config.settings).chain(self.settings.values())
std::iter::once(&self.pyproject_config.settings).chain(self.settings.iter())
}
}
@ -257,7 +273,7 @@ fn resolve_configuration(
let options = pyproject::load_options(&path)?;
let project_root = relativity.resolve(&path);
let configuration = Configuration::from_options(options, Some(&path), &project_root)?;
let configuration = Configuration::from_options(options, Some(&path), project_root)?;
// If extending, continue to collect.
next = configuration.extend.as_ref().map(|extend| {
@ -285,14 +301,14 @@ fn resolve_configuration(
/// Extract the project root (scope) and [`Settings`] from a given
/// `pyproject.toml`.
fn resolve_scoped_settings(
pyproject: &Path,
fn resolve_scoped_settings<'a>(
pyproject: &'a Path,
relativity: Relativity,
transformer: &dyn ConfigurationTransformer,
) -> Result<(PathBuf, Settings)> {
) -> Result<(&'a Path, Settings)> {
let configuration = resolve_configuration(pyproject, relativity, transformer)?;
let project_root = relativity.resolve(pyproject);
let settings = configuration.into_settings(&project_root)?;
let settings = configuration.into_settings(project_root)?;
Ok((project_root, settings))
}