Lint pyproject.toml (#4496)

This adds a new rule `InvalidPyprojectToml` that lints pyproject.toml by checking if https://github.com/PyO3/pyproject-toml-rs can parse it. This means the linting is currently very basic, e.g. we don't check whether the name is actually a valid python project name or appropriately normalized. It does catch errors e.g. with invalid dependency requirements or problems withs the license specifications. It is open to be extended in the future (validate name, SPDX expressions, classifiers, ...), either in ruff or in pyproject-toml-rs.

Test plan:

```
scripts/ecosystem_all_check.sh check --select RUF200
```
This lead to a bunch of 
```
RUF200 Failed to parse pyproject.toml: missing field `name`
```
(e.g. https://github.com/amitsk/fastapi-todos/blob/main/pyproject.toml) which is indeed invalid (https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#specification).

Filtering those out, the following other problems were found by `cd target/ecosystem_all_results/ && rg RUF200`:
```
UCL-ARC:rred-reports.stdout.txt
1:pyproject.toml:27:16: RUF200 Failed to parse pyproject.toml: Version specifier `>='3.9'` doesn't match PEP 440 rules
EndlessTrax:python-start-project.stdout.txt
1:pyproject.toml:14:16: RUF200 Failed to parse pyproject.toml: Expected package name starting with an alphanumeric character, found '#'
redjax:gardening-api.stdout.txt
1:pyproject.toml:7:11: RUF200 Failed to parse pyproject.toml: Version `` doesn't match PEP 440 rules
ajslater:codex.stdout.txt
2:  3:17 RUF200 Failed to parse pyproject.toml: invalid type: sequence, expected a string
LDmitriy7:404_AvatarsBot.stdout.txt
1:pyproject.toml:3:11: RUF200 Failed to parse pyproject.toml: Version `` doesn't match PEP 440 rules
ajslater:comicbox.stdout.txt
1:pyproject.toml:3:17: RUF200 Failed to parse pyproject.toml: invalid type: sequence, expected a string
manueldevillena:forecast-earnings.stdout.txt
1:pyproject.toml:24:12: RUF200 Failed to parse pyproject.toml: Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `^`
redjax:ohio_utility_scraper.stdout.txt
1:pyproject.toml:11:11: RUF200 Failed to parse pyproject.toml: Version `` doesn't match PEP 440 rules
agronholm:typeguard.stdout.txt
1:pyproject.toml:40:8: RUF200 Failed to parse pyproject.toml: Expected a valid marker name, found 'python_implementation'
cyuss:decathlon-turnover.stdout.txt
1:pyproject.toml:7:12: RUF200 Failed to parse pyproject.toml: invalid type: string "Youcef", expected a table with 'name' and 'email' keys
ajslater:boilerplate.stdout.txt
1:pyproject.toml:3:17: RUF200 Failed to parse pyproject.toml: invalid type: sequence, expected a string
kaparoo:lightning-project-template.stdout.txt
1:pyproject.toml:56:16: RUF200 Failed to parse pyproject.toml: You can't mix a >= operator with a local version (`+cu117`)
dijital20:pytexas2023-decorators.stdout.txt
1:pyproject.toml:5:11: RUF200 Failed to parse pyproject.toml: Version `` doesn't match PEP 440 rules
pfouque:django-anymail-history.stdout.txt
1:pyproject.toml:137:12: RUF200 Failed to parse pyproject.toml: Version specifier `> = 1.2.0` doesn't match PEP 440 rules
pfouque:django-fakemessages.stdout.txt
1:pyproject.toml:130:12: RUF200 Failed to parse pyproject.toml: Version specifier `> = 1.2.0` doesn't match PEP 440 rules
pypa:build.stdout.txt
1:tests/packages/test-invalid-requirements/pyproject.toml:2:12: RUF200 Failed to parse pyproject.toml: Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `i`
4:tests/packages/test-no-requires/pyproject.toml:1:1: RUF200 Failed to parse pyproject.toml: missing field `requires`
UnoYakshi:FRAAND.stdout.txt
2:  3:11 RUF200 Failed to parse pyproject.toml: Version `` doesn't match PEP 440 rules
DHolmanCoding:python-template.stdout.txt
1:pyproject.toml:22:1: RUF200 Failed to parse pyproject.toml: missing field `requires`
```
Overall, this emitted errors in 43 out of 3408 projects (`rg -c RUF200 target/ecosystem_all_results/ | wc -l`)


Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
konstin 2023-05-25 14:05:28 +02:00 committed by GitHub
parent 050350527c
commit b6a382eeaf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 356 additions and 3 deletions

33
Cargo.lock generated
View file

@ -913,6 +913,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
"serde",
]
[[package]]
@ -1401,6 +1402,22 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "pep508_rs"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "969679a29dfdc8278a449f75b3dd45edf57e649bd59f7502429c2840751c46d8"
dependencies = [
"once_cell",
"pep440_rs",
"regex",
"serde",
"thiserror",
"tracing",
"unicode-width",
"url",
]
[[package]]
name = "percent-encoding"
version = "2.2.0"
@ -1563,6 +1580,19 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "pyproject-toml"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f04dbbb336bd88583943c7cd973a32fed323578243a7569f40cb0c7da673321b"
dependencies = [
"indexmap",
"pep440_rs",
"pep508_rs",
"serde",
"toml",
]
[[package]]
name = "quick-junit"
version = "0.3.2"
@ -1751,6 +1781,7 @@ dependencies = [
"pathdiff",
"pep440_rs",
"pretty_assertions",
"pyproject-toml",
"quick-junit",
"regex",
"result-like",
@ -2601,6 +2632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
dependencies = [
"cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@ -2772,6 +2804,7 @@ dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"serde",
]
[[package]]

View file

@ -50,6 +50,7 @@ path-absolutize = { workspace = true, features = [
] }
pathdiff = { version = "0.2.1" }
pep440_rs = { version = "0.3.1", features = ["serde"] }
pyproject-toml = { version = "0.6.0" }
quick-junit = { version = "0.3.2" }
regex = { workspace = true }
result-like = { version = "0.4.6" }

View file

@ -0,0 +1,7 @@
[project]
name = "hello-world"
version = "0.1.0"
# There's a comma missing here
dependencies = [
"tinycss2>=1.1.0<1.2",
]

View file

@ -0,0 +1,7 @@
[project]
name = "hello-world"
version = "0.1.0"
# Ensure that the spans from toml handle utf-8 correctly
authors = [
{ name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
]

View file

@ -0,0 +1,57 @@
# This is a valid pyproject.toml
# https://github.com/PyO3/maturin/blob/87ac3d9f74dd79ef2df9a20880b9f1fa23f9a437/pyproject.toml
[build-system]
requires = ["setuptools", "wheel>=0.36.2", "tomli>=1.1.0 ; python_version<'3.11'", "setuptools-rust>=1.4.0"]
build-backend = "setuptools.build_meta"
[project]
name = "maturin"
requires-python = ">=3.7"
classifiers = [
"Topic :: Software Development :: Build Tools",
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["tomli>=1.1.0 ; python_version<'3.11'"]
dynamic = [
"authors",
"description",
"license",
"readme",
"version"
]
[project.optional-dependencies]
zig = [
"ziglang~=0.10.0",
]
patchelf = [
"patchelf",
]
[project.urls]
"Source Code" = "https://github.com/PyO3/maturin"
Issues = "https://github.com/PyO3/maturin/issues"
Documentation = "https://maturin.rs"
Changelog = "https://maturin.rs/changelog.html"
[tool.maturin]
bindings = "bin"
[tool.black]
target_version = ['py37']
extend-exclude = '''
# Ignore cargo-generate templates
^/src/templates
'''
[tool.ruff]
line-length = 120
target-version = "py37"
[tool.mypy]
disallow_untyped_defs = true
disallow_incomplete_defs = true
warn_no_return = true
ignore_missing_imports = true

View file

@ -0,0 +1,39 @@
# license-files is wrong here
# https://github.com/PyO3/maturin/issues/1615
[build-system]
requires = [ "maturin>=0.14", "numpy", "wheel", "patchelf",]
build-backend = "maturin"
[project]
name = "..."
license-files = [ "license.txt",]
requires-python = ">=3.8"
requires-dist = [ "maturin>=0.14", "...",]
dependencies = [ "packaging", "...",]
zip-safe = false
version = "..."
readme = "..."
description = "..."
classifiers = [ "...",]
[[project.authors]]
name = "..."
email = "..."
[project.urls]
homepage = "..."
documentation = "..."
repository = "..."
[project.optional-dependencies]
test = [ "coverage", "...",]
docs = [ "sphinx", "sphinx-rtd-theme",]
devel = []
[tool.maturin]
include = [ "...",]
bindings = "pyo3"
compatibility = "manylinux2014"
[tool.pytest.ini_options]
testpaths = [ "...",]
addopts = "--color=yes --tb=native --cov-report term --cov-report html:docs/dist_coverage --cov=aisdb --doctest-modules --envfile .env"

View file

@ -722,6 +722,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "009") => (RuleGroup::Unspecified, Rule::FunctionCallInDataclassDefaultArgument),
(Ruff, "010") => (RuleGroup::Unspecified, Rule::ExplicitFStringTypeConversion),
(Ruff, "100") => (RuleGroup::Unspecified, Rule::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, Rule::InvalidPyprojectToml),
// flake8-django
(Flake8Django, "001") => (RuleGroup::Unspecified, Rule::DjangoNullableModelStringField),

View file

@ -27,6 +27,7 @@ pub mod logging;
pub mod message;
mod noqa;
pub mod packaging;
pub mod pyproject_toml;
pub mod registry;
pub mod resolver;
mod rule_redirects;

View file

@ -0,0 +1,62 @@
use anyhow::Result;
use pyproject_toml::{BuildSystem, Project};
use ruff_text_size::{TextRange, TextSize};
use serde::{Deserialize, Serialize};
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::source_code::SourceFile;
use crate::message::Message;
use crate::rules::ruff::rules::InvalidPyprojectToml;
use crate::IOError;
/// Unlike [`pyproject_toml::PyProjectToml`], in our case `build_system` is also optional
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
struct PyProjectToml {
/// Build-related data
build_system: Option<BuildSystem>,
/// Project metadata
project: Option<Project>,
}
pub fn lint_pyproject_toml(source_file: SourceFile) -> Result<Vec<Message>> {
let err = match toml::from_str::<PyProjectToml>(source_file.source_text()) {
Ok(_) => return Ok(Vec::default()),
Err(err) => err,
};
let range = match err.span() {
// This is bad but sometimes toml and/or serde just don't give us spans
// TODO(konstin,micha): https://github.com/charliermarsh/ruff/issues/4571
None => TextRange::default(),
Some(range) => {
let Ok(end) = TextSize::try_from(range.end) else {
let diagnostic = Diagnostic::new(
IOError {
message: "pyproject.toml is larger than 4GB".to_string(),
},
TextRange::default(),
);
return Ok(vec![Message::from_diagnostic(
diagnostic,
source_file,
TextSize::default(),
)]);
};
TextRange::new(
// start <= end, so if end < 4GB follows start < 4GB
TextSize::try_from(range.start).unwrap(),
end,
)
}
};
let toml_err = err.message().to_string();
let diagnostic = Diagnostic::new(InvalidPyprojectToml { message: toml_err }, range);
Ok(vec![Message::from_diagnostic(
diagnostic,
source_file,
TextSize::default(),
)])
}

View file

@ -646,6 +646,7 @@ ruff_macros::register_rules!(
rules::ruff::rules::MutableDataclassDefault,
rules::ruff::rules::FunctionCallInDataclassDefaultArgument,
rules::ruff::rules::ExplicitFStringTypeConversion,
rules::ruff::rules::InvalidPyprojectToml,
// flake8-django
rules::flake8_django::rules::DjangoNullableModelStringField,
rules::flake8_django::rules::DjangoLocalsInRenderFunction,

View file

@ -4,16 +4,20 @@ pub(crate) mod rules;
#[cfg(test)]
mod tests {
use std::fs;
use std::path::Path;
use anyhow::Result;
use rustc_hash::FxHashSet;
use test_case::test_case;
use ruff_python_ast::source_code::SourceFileBuilder;
use crate::pyproject_toml::lint_pyproject_toml;
use crate::registry::Rule;
use crate::settings::resolve_per_file_ignores;
use crate::settings::types::PerFileIgnore;
use crate::test::test_path;
use crate::test::{test_path, test_resource_path};
use crate::{assert_messages, settings};
#[test_case(Rule::ExplicitFStringTypeConversion, Path::new("RUF010.py"); "RUF010")]
@ -174,4 +178,22 @@ mod tests {
assert_messages!(snapshot, diagnostics);
Ok(())
}
#[test_case(Rule::InvalidPyprojectToml, Path::new("bleach"))]
#[test_case(Rule::InvalidPyprojectToml, Path::new("invalid_author"))]
#[test_case(Rule::InvalidPyprojectToml, Path::new("maturin"))]
#[test_case(Rule::InvalidPyprojectToml, Path::new("maturin_gh_1615"))]
fn invalid_pyproject_toml(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let path = test_resource_path("fixtures")
.join("ruff")
.join("pyproject_toml")
.join(path)
.join("pyproject.toml");
let contents = fs::read_to_string(path)?;
let source_file = SourceFileBuilder::new("pyproject.toml", contents).finish();
let messages = lint_pyproject_toml(source_file)?;
assert_messages!(snapshot, messages);
Ok(())
}
}

View file

@ -0,0 +1,45 @@
use ruff_diagnostics::{AutofixKind, Violation};
use ruff_macros::{derive_message_formats, violation};
/// ## What it does
/// Checks for any pyproject.toml that does not conform to the schema from the relevant PEPs.
///
/// ## Why is this bad?
/// Your project may contain invalid metadata or configuration without you noticing
///
/// ## Example
/// ```toml
/// [project]
/// name = "crab"
/// version = "1.0.0"
/// authors = ["Ferris the Crab <ferris@example.org>"]
/// ```
///
/// Use instead:
/// ```toml
/// [project]
/// name = "crab"
/// version = "1.0.0"
/// authors = [
/// { email = "ferris@example.org" },
/// { name = "Ferris the Crab"}
/// ]
/// ```
///
/// ## References
/// - [Specification of `[project]` in pyproject.toml](https://packaging.python.org/en/latest/specifications/declaring-project-metadata/)
/// - [Specification of `[build-system]` in pyproject.toml](https://peps.python.org/pep-0518/)
/// - [Draft but implemented license declaration extensions](https://peps.python.org/pep-0639)
#[violation]
pub struct InvalidPyprojectToml {
pub message: String,
}
impl Violation for InvalidPyprojectToml {
const AUTOFIX: AutofixKind = AutofixKind::None;
#[derive_message_formats]
fn message(&self) -> String {
format!("Failed to parse pyproject.toml: {}", self.message)
}
}

View file

@ -9,6 +9,7 @@ pub(crate) use collection_literal_concatenation::{
pub(crate) use explicit_f_string_type_conversion::{
explicit_f_string_type_conversion, ExplicitFStringTypeConversion,
};
pub(crate) use invalid_pyproject_toml::InvalidPyprojectToml;
pub(crate) use mutable_defaults_in_dataclass_fields::{
function_call_in_dataclass_defaults, is_dataclass, mutable_dataclass_default,
FunctionCallInDataclassDefaultArgument, MutableDataclassDefault,
@ -21,6 +22,7 @@ mod asyncio_dangling_task;
mod collection_literal_concatenation;
mod confusables;
mod explicit_f_string_type_conversion;
mod invalid_pyproject_toml;
mod mutable_defaults_in_dataclass_fields;
mod pairwise_over_zipped;
mod unused_noqa;

View file

@ -0,0 +1,17 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
---
pyproject.toml:5:16: RUF200 Failed to parse pyproject.toml: Version specifier `>=1.1.0<1.2` doesn't match PEP 440 rules
tinycss2>=1.1.0<1.2
^^^^^^^^^^^
|
5 | version = "0.1.0"
6 | # There's a comma missing here
7 | dependencies = [
| ________________^
8 | | "tinycss2>=1.1.0<1.2",
9 | | ]
| |_^ RUF200
|

View file

@ -0,0 +1,13 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
---
pyproject.toml:6:84: RUF200 Failed to parse pyproject.toml: invalid type: integer `1`, expected a string
|
6 | # Ensure that the spans from toml handle utf-8 correctly
7 | authors = [
8 | { name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
| ^ RUF200
9 | ]
|

View file

@ -0,0 +1,4 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
---

View file

@ -0,0 +1,14 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
---
pyproject.toml:9:17: RUF200 Failed to parse pyproject.toml: wanted string or table
|
9 | [project]
10 | name = "..."
11 | license-files = [ "license.txt",]
| ^^^^^^^^^^^^^^^^^ RUF200
12 | requires-python = ">=3.8"
13 | requires-dist = [ "maturin>=0.14", "...",]
|

View file

@ -58,8 +58,13 @@ pub static EXCLUDE: Lazy<Vec<FilePattern>> = Lazy::new(|| {
]
});
pub static INCLUDE: Lazy<Vec<FilePattern>> =
Lazy::new(|| vec![FilePattern::Builtin("*.py"), FilePattern::Builtin("*.pyi")]);
pub static INCLUDE: Lazy<Vec<FilePattern>> = Lazy::new(|| {
vec![
FilePattern::Builtin("*.py"),
FilePattern::Builtin("*.pyi"),
FilePattern::Builtin("**/pyproject.toml"),
]
});
impl Default for Settings {
fn default() -> Self {

View file

@ -18,9 +18,11 @@ use ruff::jupyter::{is_jupyter_notebook, JupyterIndex, JupyterNotebook};
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError;
use ruff::message::Message;
use ruff::pyproject_toml::lint_pyproject_toml;
use ruff::settings::{flags, AllSettings, Settings};
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_python_stdlib::path::is_project_toml;
use crate::cache;
@ -130,6 +132,17 @@ pub(crate) fn lint_path(
debug!("Checking: {}", path.display());
// We have to special case this here since the python tokenizer doesn't work with toml
if is_project_toml(path) {
let contents = std::fs::read_to_string(path)?;
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
let messages = lint_pyproject_toml(source_file)?;
return Ok(Diagnostics {
messages,
..Diagnostics::default()
});
}
// Read the file from disk
let (contents, jupyter_index) = if is_jupyter_notebook(path) {
match load_jupyter_notebook(path) {

View file

@ -6,6 +6,12 @@ pub fn is_python_file(path: &Path) -> bool {
.map_or(false, |ext| ext == "py" || ext == "pyi")
}
/// Return `true` if the [`Path`] is named `pyproject.toml`.
pub fn is_project_toml(path: &Path) -> bool {
path.file_name()
.map_or(false, |name| name == "pyproject.toml")
}
/// Return `true` if the [`Path`] appears to be that of a Python interface definition file (`.pyi`).
pub fn is_python_stub_file(path: &Path) -> bool {
path.extension().map_or(false, |ext| ext == "pyi")

3
ruff.schema.json generated
View file

@ -2246,6 +2246,9 @@
"RUF1",
"RUF10",
"RUF100",
"RUF2",
"RUF20",
"RUF200",
"S",
"S1",
"S10",