Add caching to formatter (#8089)

This commit is contained in:
Micha Reiser 2023-10-23 17:43:08 +09:00 committed by GitHub
parent c0710a1dd4
commit 6fc35dd075
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 627 additions and 260 deletions

View file

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::fmt::Write;
use std::io;
use std::path::{Path, PathBuf};
@ -7,28 +6,26 @@ use std::time::Instant;
use anyhow::Result;
use colored::Colorize;
use ignore::Error;
use itertools::Itertools;
use log::{debug, error, warn};
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use ruff_linter::settings::types::UnsafeFixes;
use rustc_hash::FxHashMap;
use ruff_diagnostics::Diagnostic;
use ruff_linter::message::Message;
use ruff_linter::registry::Rule;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::{fs, warn_user_once, IOError};
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::resolver::{
match_exclusion, python_files_in_path, PyprojectConfig, PyprojectDiscoveryStrategy,
ResolvedFile,
match_exclusion, python_files_in_path, PyprojectConfig, ResolvedFile,
};
use crate::args::CliOverrides;
use crate::cache::{self, Cache};
use crate::cache::{Cache, PackageCacheMap, PackageCaches};
use crate::diagnostics::Diagnostics;
use crate::panic::catch_unwind;
@ -52,28 +49,6 @@ pub(crate) fn check(
return Ok(Diagnostics::default());
}
// Initialize the cache.
if cache.into() {
fn init_cache(path: &Path) {
if let Err(e) = cache::init(path) {
error!("Failed to initialize cache at {}: {e:?}", path.display());
}
}
match pyproject_config.strategy {
PyprojectDiscoveryStrategy::Fixed => {
init_cache(&pyproject_config.settings.cache_dir);
}
PyprojectDiscoveryStrategy::Hierarchical => {
for settings in
std::iter::once(&pyproject_config.settings).chain(resolver.settings())
{
init_cache(&settings.cache_dir);
}
}
}
};
// Discover the package root for each Python file.
let package_roots = resolver.package_roots(
&paths
@ -85,19 +60,15 @@ pub(crate) fn check(
);
// Load the caches.
let caches = bool::from(cache).then(|| {
package_roots
.iter()
.map(|(package, package_root)| package_root.unwrap_or(package))
.unique()
.par_bridge()
.map(|cache_root| {
let settings = resolver.resolve(cache_root, pyproject_config);
let cache = Cache::open(cache_root.to_path_buf(), settings);
(cache_root, cache)
})
.collect::<HashMap<&Path, Cache>>()
});
let caches = if bool::from(cache) {
Some(PackageCacheMap::init(
pyproject_config,
&package_roots,
&resolver,
))
} else {
None
};
let start = Instant::now();
let diagnostics_per_file = paths.par_iter().filter_map(|resolved_file| {
@ -122,14 +93,7 @@ pub(crate) fn check(
}
let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path));
let cache = caches.as_ref().and_then(|caches| {
if let Some(cache) = caches.get(&cache_root) {
Some(cache)
} else {
debug!("No cache found for {}", cache_root.display());
None
}
});
let cache = caches.get(cache_root);
lint_path(
path,
@ -210,11 +174,7 @@ pub(crate) fn check(
all_diagnostics.messages.sort();
// Store the caches.
if let Some(caches) = caches {
caches
.into_par_iter()
.try_for_each(|(_, cache)| cache.store())?;
}
caches.persist()?;
let duration = start.elapsed();
debug!("Checked {:?} files in: {:?}", checked_files, duration);
@ -266,13 +226,12 @@ mod test {
use std::os::unix::fs::OpenOptionsExt;
use anyhow::Result;
use ruff_linter::settings::types::UnsafeFixes;
use rustc_hash::FxHashMap;
use tempfile::TempDir;
use ruff_linter::message::{Emitter, EmitterContext, TextEmitter};
use ruff_linter::registry::Rule;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
use ruff_workspace::Settings;

View file

@ -10,7 +10,7 @@ use colored::Colorize;
use itertools::Itertools;
use log::error;
use rayon::iter::Either::{Left, Right};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use similar::TextDiff;
use thiserror::Error;
use tracing::debug;
@ -23,10 +23,11 @@ use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module_source, FormatModuleError};
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_workspace::resolver::{match_exclusion, python_files_in_path};
use ruff_workspace::resolver::{match_exclusion, python_files_in_path, ResolvedFile};
use ruff_workspace::FormatterSettings;
use crate::args::{CliOverrides, FormatArguments};
use crate::cache::{Cache, FileCacheKey, PackageCacheMap, PackageCaches};
use crate::panic::{catch_unwind, PanicError};
use crate::resolve::resolve;
use crate::ExitStatus;
@ -73,9 +74,34 @@ pub(crate) fn format(
return Ok(ExitStatus::Success);
}
// Discover the package root for each Python file.
let package_roots = resolver.package_roots(
&paths
.iter()
.flatten()
.map(ResolvedFile::path)
.collect::<Vec<_>>(),
&pyproject_config,
);
let caches = if cli.no_cache {
None
} else {
// `--no-cache` doesn't respect code changes, and so is often confusing during
// development.
#[cfg(debug_assertions)]
crate::warn_user!("Detected debug build without --no-cache.");
Some(PackageCacheMap::init(
&pyproject_config,
&package_roots,
&resolver,
))
};
let start = Instant::now();
let (mut results, mut errors): (Vec<_>, Vec<_>) = paths
.into_par_iter()
.par_iter()
.filter_map(|entry| {
match entry {
Ok(resolved_file) => {
@ -110,6 +136,13 @@ pub(crate) fn format(
}
};
let package = path
.parent()
.and_then(|parent| package_roots.get(parent).copied())
.flatten();
let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path));
let cache = caches.get(cache_root);
Some(
match catch_unwind(|| {
format_path(
@ -118,21 +151,22 @@ pub(crate) fn format(
&unformatted,
source_type,
mode,
cache,
)
}) {
Ok(inner) => inner.map(|result| FormatPathResult {
path: resolved_file.into_path(),
path: resolved_file.path().to_path_buf(),
unformatted,
result,
}),
Err(error) => Err(FormatCommandError::Panic(
Some(resolved_file.into_path()),
Some(resolved_file.path().to_path_buf()),
error,
)),
},
)
}
Err(err) => Some(Err(FormatCommandError::Ignore(err))),
Err(err) => Some(Err(FormatCommandError::Ignore(err.clone()))),
}
})
.partition_map(|result| match result {
@ -168,6 +202,8 @@ pub(crate) fn format(
duration
);
caches.persist()?;
// Report on any errors.
for error in &errors {
error!("{error}");
@ -214,13 +250,26 @@ pub(crate) fn format(
/// Format the file at the given [`Path`].
#[tracing::instrument(level="debug", skip_all, fields(path = %path.display()))]
fn format_path(
pub(crate) fn format_path(
path: &Path,
settings: &FormatterSettings,
unformatted: &SourceKind,
source_type: PySourceType,
mode: FormatMode,
cache: Option<&Cache>,
) -> Result<FormatResult, FormatCommandError> {
if let Some(cache) = cache {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
if let Ok(cache_key) = FileCacheKey::from_path(path) {
if cache.is_formatted(relative_path, &cache_key) {
return Ok(FormatResult::Unchanged);
}
}
}
// Format the source.
let format_result = match format_source(unformatted, source_type, Some(path), settings)? {
FormattedSource::Formatted(formatted) => match mode {
@ -231,13 +280,35 @@ fn format_path(
formatted
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?;
if let Some(cache) = cache {
if let Ok(cache_key) = FileCacheKey::from_path(path) {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
cache.set_formatted(relative_path.to_path_buf(), &cache_key);
}
}
FormatResult::Formatted
}
FormatMode::Check => FormatResult::Formatted,
FormatMode::Diff => FormatResult::Diff(formatted),
},
FormattedSource::Unchanged => FormatResult::Unchanged,
FormattedSource::Unchanged => {
if let Some(cache) = cache {
if let Ok(cache_key) = FileCacheKey::from_path(path) {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
cache.set_formatted(relative_path.to_path_buf(), &cache_key);
}
}
FormatResult::Unchanged
}
};
Ok(format_result)
}