Add TOML files to SourceType (#6929)

## Summary

This PR adds a higher-level enum (`SourceType`) around `PySourceType` to
allow us to use the same detection path to handle TOML files. Right now,
we have ad hoc `is_pyproject_toml` checks littered around, and some
codepaths are omitting that logic altogether (like `add_noqa`). Instead,
we should always be required to check the source type and handle TOML
files as appropriate.

This PR will also help with our pre-commit capabilities. If we add
`toml` to pre-commit (to support `pyproject.toml`), pre-commit will
start to pass _other_ files to Ruff (along with `poetry.lock` and
`Pipfile` -- see
[identify](b59996304f/identify/extensions.py (L355))).
By detecting those files and handling those cases, we avoid attempting
to parse them as Python files, which would lead to pre-commit errors.
(We tried to add `toml` to pre-commit here
(https://github.com/astral-sh/ruff-pre-commit/pull/44), but had to
revert here (https://github.com/astral-sh/ruff-pre-commit/pull/45) as it
led to the pre-commit hook attempting to parse `poetry.lock` files as
Python files.)
This commit is contained in:
Charlie Marsh 2023-08-28 11:01:48 -04:00 committed by GitHub
parent 2893a9f6b5
commit 58f5f27dc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 135 additions and 136 deletions

View file

@ -268,9 +268,12 @@ pub fn check_path(
const MAX_ITERATIONS: usize = 100;
/// Add any missing `# noqa` pragmas to the source code at the given `Path`.
pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings) -> Result<usize> {
let source_type = PySourceType::from(path);
pub fn add_noqa_to_path(
path: &Path,
package: Option<&Path>,
source_type: PySourceType,
settings: &Settings,
) -> Result<usize> {
// Read the file from disk.
let contents = std::fs::read_to_string(path)?;

View file

@ -3,7 +3,9 @@ use crate::jupyter::Notebook;
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum SourceKind {
/// The source contains Python source code.
Python(String),
/// The source contains a Jupyter notebook.
IpyNotebook(Notebook),
}

View file

@ -8,7 +8,7 @@ use rayon::prelude::*;
use ruff::linter::add_noqa_to_path;
use ruff::warn_user_once;
use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
use ruff_python_ast::{PySourceType, SourceType};
use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig};
use crate::args::Overrides;
@ -46,15 +46,17 @@ pub(crate) fn add_noqa(
.flatten()
.filter_map(|entry| {
let path = entry.path();
if is_project_toml(path) || is_jupyter_notebook(path) {
let SourceType::Python(source_type @ (PySourceType::Python | PySourceType::Stub)) =
SourceType::from(path)
else {
return None;
}
};
let package = path
.parent()
.and_then(|parent| package_roots.get(parent))
.and_then(|package| *package);
let settings = resolver.resolve(path, pyproject_config);
match add_noqa_to_path(path, package, settings) {
match add_noqa_to_path(path, package, source_type, settings) {
Ok(count) => Some(count),
Err(e) => {
error!("Failed to add noqa to {}: {e}", path.display());

View file

@ -12,7 +12,7 @@ use tracing::{span, Level};
use ruff::fs;
use ruff::warn_user_once;
use ruff_formatter::LineWidth;
use ruff_python_ast::PySourceType;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions};
use ruff_workspace::resolver::python_files_in_path;
@ -37,23 +37,21 @@ pub(crate) fn format(cli: &Arguments, overrides: &Overrides) -> Result<ExitStatu
let result = paths
.into_par_iter()
.map(|dir_entry| {
let dir_entry = dir_entry?;
let path = dir_entry.path();
let source_type = PySourceType::from(path);
if !(source_type.is_python() || source_type.is_stub())
|| path
.extension()
.is_some_and(|extension| extension == "toml")
{
return Ok(());
.map(|entry| {
let entry = entry?;
let path = entry.path();
if matches!(
SourceType::from(path),
SourceType::Python(PySourceType::Python | PySourceType::Stub)
) {
let line_length = resolver.resolve(path, &pyproject_config).line_length;
let options = PyFormatOptions::from_extension(path)
.with_line_width(LineWidth::from(NonZeroU16::from(line_length)));
format_path(path, options)
} else {
Ok(())
}
let line_length = resolver.resolve(path, &pyproject_config).line_length;
let options = PyFormatOptions::from_extension(path)
.with_line_width(LineWidth::from(NonZeroU16::from(line_length)));
format_path(path, options)
})
.map(|result| {
result.map_err(|err| {

View file

@ -27,8 +27,7 @@ use ruff::{fs, IOError};
use ruff_diagnostics::Diagnostic;
use ruff_macros::CacheKey;
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::PySourceType;
use ruff_python_stdlib::path::is_project_toml;
use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_text_size::{TextRange, TextSize};
@ -228,36 +227,36 @@ pub(crate) fn lint_path(
debug!("Checking: {}", path.display());
// We have to special case this here since the Python tokenizer doesn't work with TOML.
if is_project_toml(path) {
let messages = if settings
.lib
.rules
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_pyproject_toml())
{
let contents = match std::fs::read_to_string(path) {
Ok(contents) => contents,
Err(err) => {
return Ok(Diagnostics::from_io_error(&err, path, &settings.lib));
}
let source_type = match SourceType::from(path) {
SourceType::Toml(TomlSourceType::Pyproject) => {
let messages = if settings
.lib
.rules
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_pyproject_toml())
{
let contents = match std::fs::read_to_string(path) {
Ok(contents) => contents,
Err(err) => {
return Ok(Diagnostics::from_io_error(&err, path, &settings.lib));
}
};
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
lint_pyproject_toml(source_file, &settings.lib)
} else {
vec![]
};
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
lint_pyproject_toml(source_file, &settings.lib)
} else {
vec![]
};
return Ok(Diagnostics {
messages,
..Diagnostics::default()
});
}
return Ok(Diagnostics {
messages,
..Diagnostics::default()
});
}
SourceType::Toml(_) => return Ok(Diagnostics::default()),
SourceType::Python(source_type) => source_type,
};
// Extract the sources from the file.
let LintSources {
source_type,
source_kind,
} = match LintSources::try_from_path(path) {
let LintSource(source_kind) = match LintSource::try_from_path(path, source_type) {
Ok(sources) => sources,
Err(SourceExtractionError::Io(err)) => {
return Ok(Diagnostics::from_io_error(&err, path, &settings.lib));
@ -438,21 +437,24 @@ pub(crate) fn lint_stdin(
noqa: flags::Noqa,
autofix: flags::FixMode,
) -> Result<Diagnostics> {
// Extract the sources from the file.
let LintSources {
source_type,
source_kind,
} = match LintSources::try_from_source_code(contents, path) {
Ok(sources) => sources,
Err(SourceExtractionError::Io(err)) => {
// SAFETY: An `io::Error` can only occur if we're reading from a path.
return Ok(Diagnostics::from_io_error(&err, path.unwrap(), settings));
}
Err(SourceExtractionError::Diagnostics(diagnostics)) => {
return Ok(*diagnostics);
}
// TODO(charlie): Support `pyproject.toml`.
let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else {
return Ok(Diagnostics::default());
};
// Extract the sources from the file.
let LintSource(source_kind) =
match LintSource::try_from_source_code(contents, path, source_type) {
Ok(sources) => sources,
Err(SourceExtractionError::Io(err)) => {
// SAFETY: An `io::Error` can only occur if we're reading from a path.
return Ok(Diagnostics::from_io_error(&err, path.unwrap(), settings));
}
Err(SourceExtractionError::Diagnostics(diagnostics)) => {
return Ok(*diagnostics);
}
};
// Lint the inputs.
let (
LinterResult {
@ -554,58 +556,40 @@ pub(crate) fn lint_stdin(
}
#[derive(Debug)]
struct LintSources {
/// The "type" of source code, e.g. `.py`, `.pyi`, `.ipynb`, etc.
source_type: PySourceType,
/// The "kind" of source, e.g. Python file, Jupyter Notebook, etc.
source_kind: SourceKind,
}
struct LintSource(SourceKind);
impl LintSources {
/// Extract the lint [`LintSources`] from the given file path.
fn try_from_path(path: &Path) -> Result<LintSources, SourceExtractionError> {
let source_type = PySourceType::from(path);
// Read the file from disk.
impl LintSource {
/// Extract the lint [`LintSource`] from the given file path.
fn try_from_path(
path: &Path,
source_type: PySourceType,
) -> Result<LintSource, SourceExtractionError> {
if source_type.is_ipynb() {
let notebook = notebook_from_path(path).map_err(SourceExtractionError::Diagnostics)?;
let source_kind = SourceKind::IpyNotebook(notebook);
Ok(LintSources {
source_type,
source_kind,
})
Ok(LintSource(source_kind))
} else {
// This is tested by ruff_cli integration test `unreadable_file`
let contents = std::fs::read_to_string(path).map_err(SourceExtractionError::Io)?;
Ok(LintSources {
source_type,
source_kind: SourceKind::Python(contents),
})
Ok(LintSource(SourceKind::Python(contents)))
}
}
/// Extract the lint [`LintSources`] from the raw string contents, optionally accompanied by a
/// Extract the lint [`LintSource`] from the raw string contents, optionally accompanied by a
/// file path indicating the path to the file from which the contents were read. If provided,
/// the file path should be used for diagnostics, but not for reading the file from disk.
fn try_from_source_code(
source_code: String,
path: Option<&Path>,
) -> Result<LintSources, SourceExtractionError> {
let source_type = path.map(PySourceType::from).unwrap_or_default();
source_type: PySourceType,
) -> Result<LintSource, SourceExtractionError> {
if source_type.is_ipynb() {
let notebook = notebook_from_source_code(&source_code, path)
.map_err(SourceExtractionError::Diagnostics)?;
let source_kind = SourceKind::IpyNotebook(notebook);
Ok(LintSources {
source_type,
source_kind,
})
Ok(LintSource(source_kind))
} else {
Ok(LintSources {
source_type,
source_kind: SourceKind::Python(source_code),
})
Ok(LintSource(SourceKind::Python(source_code)))
}
}
}

View file

@ -24,32 +24,63 @@ pub mod types;
pub mod visitor;
pub mod whitespace;
#[derive(Clone, Copy, Debug, Default, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PySourceType {
#[default]
Python,
Stub,
Ipynb,
/// The type of a source file.
#[derive(Clone, Copy, Debug, PartialEq, is_macro::Is)]
pub enum SourceType {
/// The file contains Python source code.
Python(PySourceType),
/// The file contains TOML.
Toml(TomlSourceType),
}
impl PySourceType {
pub const fn is_python(&self) -> bool {
matches!(self, PySourceType::Python)
impl Default for SourceType {
fn default() -> Self {
Self::Python(PySourceType::Python)
}
}
pub const fn is_stub(&self) -> bool {
matches!(self, PySourceType::Stub)
impl From<&Path> for SourceType {
fn from(path: &Path) -> Self {
match path.file_name() {
Some(filename) if filename == "pyproject.toml" => Self::Toml(TomlSourceType::Pyproject),
Some(filename) if filename == "Pipfile" => Self::Toml(TomlSourceType::Pipfile),
Some(filename) if filename == "poetry.lock" => Self::Toml(TomlSourceType::Poetry),
_ => match path.extension() {
Some(ext) if ext == "toml" => Self::Toml(TomlSourceType::Unrecognized),
_ => Self::Python(PySourceType::from(path)),
},
}
}
}
pub const fn is_ipynb(&self) -> bool {
matches!(self, PySourceType::Ipynb)
}
#[derive(Clone, Copy, Debug, PartialEq, is_macro::Is)]
pub enum TomlSourceType {
/// The source is a `pyproject.toml`.
Pyproject,
/// The source is a `Pipfile`.
Pipfile,
/// The source is a `poetry.lock`.
Poetry,
/// The source is an unrecognized TOML file.
Unrecognized,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, is_macro::Is)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PySourceType {
/// The source is a Python file (`.py`).
#[default]
Python,
/// The source is a Python stub file (`.pyi`).
Stub,
/// The source is a Jupyter notebook (`.ipynb`).
Ipynb,
}
impl From<&Path> for PySourceType {
fn from(path: &Path) -> Self {
match path.extension() {
Some(ext) if ext == "py" => PySourceType::Python,
Some(ext) if ext == "pyi" => PySourceType::Stub,
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
_ => PySourceType::Python,

View file

@ -1,13 +1,7 @@
use std::path::Path;
/// Return `true` if the [`Path`] appears to be that of a Python file.
pub fn is_python_file(path: &Path) -> bool {
path.extension()
.is_some_and(|ext| ext == "py" || ext == "pyi")
}
/// Return `true` if the [`Path`] is named `pyproject.toml`.
pub fn is_project_toml(path: &Path) -> bool {
pub fn is_pyproject_toml(path: &Path) -> bool {
path.file_name()
.is_some_and(|name| name == "pyproject.toml")
}
@ -26,22 +20,7 @@ pub fn is_jupyter_notebook(path: &Path) -> bool {
mod tests {
use std::path::Path;
use crate::path::{is_jupyter_notebook, is_python_file};
#[test]
fn inclusions() {
let path = Path::new("foo/bar/baz.py");
assert!(is_python_file(path));
let path = Path::new("foo/bar/baz.pyi");
assert!(is_python_file(path));
let path = Path::new("foo/bar/baz.js");
assert!(!is_python_file(path));
let path = Path::new("foo/bar/baz");
assert!(!is_python_file(path));
}
use crate::path::is_jupyter_notebook;
#[test]
fn test_is_jupyter_notebook() {