Add [format|lint].exclude options (#8000)

This commit is contained in:
Micha Reiser 2023-10-18 10:15:25 +09:00 committed by GitHub
parent d685107638
commit fe485d791c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 772 additions and 294 deletions

View file

@ -1,13 +1,15 @@
//! Discover Python files, and their corresponding [`Settings`], from the
//! filesystem.
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::sync::RwLock;
use anyhow::Result;
use anyhow::{anyhow, bail};
use ignore::{DirEntry, WalkBuilder, WalkState};
use ignore::{WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use path_absolutize::path_dedot;
@ -276,7 +278,7 @@ pub fn python_files_in_path(
paths: &[PathBuf],
pyproject_config: &PyprojectConfig,
transformer: &dyn ConfigurationTransformer,
) -> Result<(Vec<Result<DirEntry, ignore::Error>>, Resolver)> {
) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver)> {
// Normalize every path (e.g., convert from relative to absolute).
let mut paths: Vec<PathBuf> = paths.iter().map(fs::normalize_path).unique().collect();
@ -305,13 +307,12 @@ pub fn python_files_in_path(
}
}
let (first_path, rest_paths) = paths
.split_first()
.ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?;
// Create the `WalkBuilder`.
let mut builder = WalkBuilder::new(
paths
.get(0)
.ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?,
);
for path in &paths[1..] {
let mut builder = WalkBuilder::new(first_path);
for path in rest_paths {
builder.add(path);
}
builder.standard_filters(pyproject_config.settings.file_resolver.respect_gitignore);
@ -321,7 +322,7 @@ pub fn python_files_in_path(
// Run the `WalkParallel` to collect all Python files.
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(()));
let resolver: RwLock<Resolver> = RwLock::new(resolver);
let files: std::sync::Mutex<Vec<Result<DirEntry, ignore::Error>>> =
let files: std::sync::Mutex<Vec<Result<ResolvedFile, ignore::Error>>> =
std::sync::Mutex::new(vec![]);
walker.run(|| {
Box::new(|result| {
@ -332,18 +333,14 @@ pub fn python_files_in_path(
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path, pyproject_config);
if let Some(file_name) = path.file_name() {
if !settings.file_resolver.exclude.is_empty()
&& match_exclusion(path, file_name, &settings.file_resolver.exclude)
{
if match_exclusion(path, file_name, &settings.file_resolver.exclude) {
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if !settings.file_resolver.extend_exclude.is_empty()
&& match_exclusion(
path,
file_name,
&settings.file_resolver.extend_exclude,
)
{
} else if match_exclusion(
path,
file_name,
&settings.file_resolver.extend_exclude,
) {
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip;
}
@ -386,30 +383,37 @@ pub fn python_files_in_path(
}
}
if result.as_ref().map_or(true, |entry| {
// Ignore directories
if entry.file_type().map_or(true, |ft| ft.is_dir()) {
false
} else if entry.depth() == 0 {
// Accept all files that are passed-in directly.
true
} else {
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path, pyproject_config);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
true
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
true
match result {
Ok(entry) => {
// Ignore directories
let resolved = if entry.file_type().map_or(true, |ft| ft.is_dir()) {
None
} else if entry.depth() == 0 {
// Accept all files that are passed-in directly.
Some(ResolvedFile::Root(entry.into_path()))
} else {
false
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path, pyproject_config);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else {
None
}
};
if let Some(resolved) = resolved {
files.lock().unwrap().push(Ok(resolved));
}
}
}) {
files.lock().unwrap().push(result);
Err(err) => {
files.lock().unwrap().push(Err(err));
}
}
WalkState::Continue
@ -421,6 +425,51 @@ pub fn python_files_in_path(
Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap()))
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ResolvedFile {
/// File explicitly passed to the CLI
Root(PathBuf),
/// File in a sub-directory
Nested(PathBuf),
}
impl ResolvedFile {
pub fn into_path(self) -> PathBuf {
match self {
ResolvedFile::Root(path) => path,
ResolvedFile::Nested(path) => path,
}
}
pub fn path(&self) -> &Path {
match self {
ResolvedFile::Root(root) => root.as_path(),
ResolvedFile::Nested(root) => root.as_path(),
}
}
pub fn file_name(&self) -> &OsStr {
let path = self.path();
path.file_name().unwrap_or(path.as_os_str())
}
pub fn is_root(&self) -> bool {
matches!(self, ResolvedFile::Root(_))
}
}
impl PartialOrd for ResolvedFile {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ResolvedFile {
fn cmp(&self, other: &Self) -> Ordering {
self.path().cmp(other.path())
}
}
/// Return `true` if the Python file at [`Path`] is _not_ excluded.
pub fn python_file_at_path(
path: &Path,
@ -458,25 +507,17 @@ fn is_file_excluded(
) -> bool {
// TODO(charlie): Respect gitignore.
for path in path.ancestors() {
if path.file_name().is_none() {
break;
}
let settings = resolver.resolve(path, pyproject_strategy);
if let Some(file_name) = path.file_name() {
if !settings.file_resolver.exclude.is_empty()
&& match_exclusion(path, file_name, &settings.file_resolver.exclude)
{
if match_exclusion(path, file_name, &settings.file_resolver.exclude) {
debug!("Ignored path via `exclude`: {:?}", path);
return true;
} else if !settings.file_resolver.extend_exclude.is_empty()
&& match_exclusion(path, file_name, &settings.file_resolver.extend_exclude)
{
} else if match_exclusion(path, file_name, &settings.file_resolver.extend_exclude) {
debug!("Ignored path via `extend-exclude`: {:?}", path);
return true;
}
} else {
debug!("Ignored path due to error in parsing: {:?}", path);
return true;
break;
}
if path == settings.file_resolver.project_root {
// Bail out; we'd end up past the project root on the next iteration
@ -489,7 +530,7 @@ fn is_file_excluded(
/// Return `true` if the given file should be ignored based on the exclusion
/// criteria.
fn match_exclusion<P: AsRef<Path>, R: AsRef<Path>>(
pub fn match_exclusion<P: AsRef<Path>, R: AsRef<Path>>(
file_path: P,
file_basename: R,
exclusion: &globset::GlobSet,
@ -515,7 +556,7 @@ mod tests {
use crate::resolver::{
is_file_excluded, match_exclusion, python_files_in_path, resolve_root_settings,
ConfigurationTransformer, PyprojectConfig, PyprojectDiscoveryStrategy, Relativity,
Resolver,
ResolvedFile, Resolver,
};
use crate::settings::Settings;
use crate::tests::test_resource_path;
@ -584,12 +625,12 @@ mod tests {
&NoOpTransformer,
)?;
let paths = paths
.iter()
.into_iter()
.flatten()
.map(ignore::DirEntry::path)
.map(ResolvedFile::into_path)
.sorted()
.collect::<Vec<_>>();
assert_eq!(paths, &[file2, file1]);
assert_eq!(paths, [file2, file1]);
Ok(())
}