From f450e2e79d9f7923a0b3b5caa1a5a643c75457f0 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 11 Jan 2023 22:32:14 -0500 Subject: [PATCH] Implement doc line length enforcement (#1804) This PR implements `W505` (`DocLineTooLong`), which is similar to `E501` (`LineTooLong`) but confined to doc lines. I based the "doc line" definition on pycodestyle, which defines a doc line as a standalone comment or string statement. Our definition is a bit more liberal, since we consider any string statement a doc line (even if it's part of a multi-line statement) -- but that seems fine to me. Note that, unusually, this rule requires custom extraction from both the token stream (to find standalone comments) and the AST (to find string statements). Closes #1784. --- README.md | 19 +++++ resources/test/fixtures/pycodestyle/W505.py | 15 ++++ ruff.schema.json | 12 +++ src/checkers/lines.rs | 35 +++++--- src/doc_lines.rs | 58 +++++++++++++ src/lib.rs | 1 + src/linter.rs | 21 ++++- src/pycodestyle/mod.rs | 17 ++++ src/pycodestyle/rules.rs | 82 +++++++++++++++---- src/pycodestyle/settings.rs | 13 +++ ...f__pycodestyle__tests__max_doc_length.snap | 53 ++++++++++++ src/registry.rs | 2 + src/settings/mod.rs | 6 +- src/violations.rs | 14 ++++ 14 files changed, 314 insertions(+), 34 deletions(-) create mode 100644 resources/test/fixtures/pycodestyle/W505.py create mode 100644 src/doc_lines.rs create mode 100644 src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap diff --git a/README.md b/README.md index 2de452e189..8320d806c2 100644 --- a/README.md +++ b/README.md @@ -597,6 +597,7 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/2.9.1/) on PyPI | E902 | IOError | IOError: `...` | | | E999 | SyntaxError | SyntaxError: `...` | | | W292 | NoNewLineAtEndOfFile | No newline at end of file | 🛠 | +| W505 | DocLineTooLong | Doc line too long (89 > 88 characters) | | | W605 | InvalidEscapeSequence | Invalid escape sequence: '\c' | 🛠 | ### mccabe (C90) @@ -3161,6 +3162,24 @@ ignore-overlong-task-comments = true --- +#### [`max-doc-length`](#max-doc-length) + +The maximum line length to allow for line-length violations within +documentation (`W505`), including standalone comments. + +**Default value**: `None` + +**Type**: `usize` + +**Example usage**: + +```toml +[tool.ruff.pycodestyle] +max-doc-length = 88 +``` + +--- + ### `pydocstyle` #### [`convention`](#convention) diff --git a/resources/test/fixtures/pycodestyle/W505.py b/resources/test/fixtures/pycodestyle/W505.py new file mode 100644 index 0000000000..62f5faae72 --- /dev/null +++ b/resources/test/fixtures/pycodestyle/W505.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +"""Here's a top-level docstring that's over the limit.""" + + +def f(): + """Here's a docstring that's also over the limit.""" + + x = 1 # Here's a comment that's over the limit, but it's not standalone. + + # Here's a standalone comment that's over the limit. + + print("Here's a string that's over the limit, but it's not a docstring.") + + +"This is also considered a docstring, and is over the limit." diff --git a/ruff.schema.json b/ruff.schema.json index 86a101e3c4..bfaa9af202 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -945,6 +945,15 @@ "boolean", "null" ] + }, + "max-doc-length": { + "description": "The maximum line length to allow for line-length violations within documentation (`W505`), including standalone comments.", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 } }, "additionalProperties": false @@ -1616,6 +1625,9 @@ "W2", "W29", "W292", + "W5", + "W50", + "W505", "W6", "W60", "W605", diff --git a/src/checkers/lines.rs b/src/checkers/lines.rs index 4895cbec87..53e2411e78 100644 --- a/src/checkers/lines.rs +++ b/src/checkers/lines.rs @@ -1,6 +1,6 @@ //! Lint rules based on checking raw physical lines. -use crate::pycodestyle::rules::{line_too_long, no_newline_at_end_of_file}; +use crate::pycodestyle::rules::{doc_line_too_long, line_too_long, no_newline_at_end_of_file}; use crate::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore}; use crate::pyupgrade::rules::unnecessary_coding_comment; use crate::registry::{Diagnostic, RuleCode}; @@ -9,18 +9,21 @@ use crate::settings::{flags, Settings}; pub fn check_lines( contents: &str, commented_lines: &[usize], + doc_lines: &[usize], settings: &Settings, autofix: flags::Autofix, ) -> Vec { let mut diagnostics: Vec = vec![]; - let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009); + let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004); + let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003); + let enforce_doc_line_too_long = settings.enabled.contains(&RuleCode::W505); let enforce_line_too_long = settings.enabled.contains(&RuleCode::E501); let enforce_no_newline_at_end_of_file = settings.enabled.contains(&RuleCode::W292); - let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003); - let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004); + let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009); let mut commented_lines_iter = commented_lines.iter().peekable(); + let mut doc_lines_iter = doc_lines.iter().peekable(); for (index, line) in contents.lines().enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) @@ -40,18 +43,25 @@ pub fn check_lines( } if enforce_blanket_type_ignore { - if commented_lines.contains(&(index + 1)) { - if let Some(diagnostic) = blanket_type_ignore(index, line) { - diagnostics.push(diagnostic); - } + if let Some(diagnostic) = blanket_type_ignore(index, line) { + diagnostics.push(diagnostic); } } if enforce_blanket_noqa { - if commented_lines.contains(&(index + 1)) { - if let Some(diagnostic) = blanket_noqa(index, line) { - diagnostics.push(diagnostic); - } + if let Some(diagnostic) = blanket_noqa(index, line) { + diagnostics.push(diagnostic); + } + } + } + + while doc_lines_iter + .next_if(|lineno| &(index + 1) == *lineno) + .is_some() + { + if enforce_doc_line_too_long { + if let Some(diagnostic) = doc_line_too_long(index, line, settings) { + diagnostics.push(diagnostic); } } } @@ -90,6 +100,7 @@ mod tests { check_lines( line, &[], + &[], &Settings { line_length, ..Settings::for_rule(RuleCode::E501) diff --git a/src/doc_lines.rs b/src/doc_lines.rs new file mode 100644 index 0000000000..a691a459fc --- /dev/null +++ b/src/doc_lines.rs @@ -0,0 +1,58 @@ +//! Doc line extraction. In this context, a doc line is a line consisting of a +//! standalone comment or a constant string statement. + +use rustpython_ast::{Constant, ExprKind, Stmt, StmtKind, Suite}; +use rustpython_parser::lexer::{LexResult, Tok}; + +use crate::ast::visitor; +use crate::ast::visitor::Visitor; + +/// Extract doc lines (standalone comments) from a token sequence. +pub fn doc_lines_from_tokens(lxr: &[LexResult]) -> Vec { + let mut doc_lines: Vec = Vec::default(); + let mut prev: Option = None; + for (start, tok, end) in lxr.iter().flatten() { + if matches!(tok, Tok::Indent | Tok::Dedent | Tok::Newline) { + continue; + } + if matches!(tok, Tok::Comment(..)) { + if let Some(prev) = prev { + if start.row() > prev { + doc_lines.push(start.row()); + } + } else { + doc_lines.push(start.row()); + } + } + prev = Some(end.row()); + } + doc_lines +} + +#[derive(Default)] +struct StringLinesVisitor { + string_lines: Vec, +} + +impl Visitor<'_> for StringLinesVisitor { + fn visit_stmt(&mut self, stmt: &Stmt) { + if let StmtKind::Expr { value } = &stmt.node { + if let ExprKind::Constant { + value: Constant::Str(..), + .. + } = &value.node + { + self.string_lines + .extend(value.location.row()..=value.end_location.unwrap().row()); + } + } + visitor::walk_stmt(self, stmt); + } +} + +/// Extract doc lines (standalone strings) from an AST. +pub fn doc_lines_from_ast(python_ast: &Suite) -> Vec { + let mut visitor = StringLinesVisitor::default(); + visitor.visit_body(python_ast); + visitor.string_lines +} diff --git a/src/lib.rs b/src/lib.rs index 4aa916d315..a6bdc5abf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,4 +93,5 @@ cfg_if! { pub use lib_wasm::check; } } +pub mod doc_lines; pub mod flake8_pie; diff --git a/src/linter.rs b/src/linter.rs index 252555bbc7..1697741d46 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -19,6 +19,7 @@ use crate::checkers::lines::check_lines; use crate::checkers::noqa::check_noqa; use crate::checkers::tokens::check_tokens; use crate::directives::Directives; +use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens}; use crate::message::{Message, Source}; use crate::noqa::add_noqa; use crate::registry::{Diagnostic, LintSource, RuleCode}; @@ -70,6 +71,14 @@ pub(crate) fn check_path( // Aggregate all diagnostics. let mut diagnostics: Vec = vec![]; + // Collect doc lines. This requires a rare mix of tokens (for comments) and AST + // (for docstrings), which demands special-casing at this level. + let use_doc_lines = settings.enabled.contains(&RuleCode::W505); + let mut doc_lines = vec![]; + if use_doc_lines { + doc_lines.extend(doc_lines_from_tokens(&tokens)); + } + // Run the token-based rules. if settings .enabled @@ -89,7 +98,7 @@ pub(crate) fn check_path( .enabled .iter() .any(|rule_code| matches!(rule_code.lint_source(), LintSource::Imports)); - if use_ast || use_imports { + if use_ast || use_imports || use_doc_lines { match rustpython_helpers::parse_program_tokens(tokens, "") { Ok(python_ast) => { if use_ast { @@ -116,6 +125,9 @@ pub(crate) fn check_path( package, )); } + if use_doc_lines { + doc_lines.extend(doc_lines_from_ast(&python_ast)); + } } Err(parse_error) => { if settings.enabled.contains(&RuleCode::E999) { @@ -128,6 +140,12 @@ pub(crate) fn check_path( } } + // Deduplicate and reorder any doc lines. + if use_doc_lines { + doc_lines.sort_unstable(); + doc_lines.dedup(); + } + // Run the lines-based rules. if settings .enabled @@ -137,6 +155,7 @@ pub(crate) fn check_path( diagnostics.extend(check_lines( contents, &directives.commented_lines, + &doc_lines, settings, autofix, )); diff --git a/src/pycodestyle/mod.rs b/src/pycodestyle/mod.rs index 3d0128f841..1c78298742 100644 --- a/src/pycodestyle/mod.rs +++ b/src/pycodestyle/mod.rs @@ -67,6 +67,7 @@ mod tests { &settings::Settings { pycodestyle: Settings { ignore_overlong_task_comments, + ..Settings::default() }, ..settings::Settings::for_rule(RuleCode::E501) }, @@ -74,4 +75,20 @@ mod tests { insta::assert_yaml_snapshot!(snapshot, diagnostics); Ok(()) } + + #[test] + fn max_doc_length() -> Result<()> { + let diagnostics = test_path( + Path::new("./resources/test/fixtures/pycodestyle/W505.py"), + &settings::Settings { + pycodestyle: Settings { + max_doc_length: Some(50), + ..Settings::default() + }, + ..settings::Settings::for_rule(RuleCode::W505) + }, + )?; + insta::assert_yaml_snapshot!(diagnostics); + Ok(()) + } } diff --git a/src/pycodestyle/rules.rs b/src/pycodestyle/rules.rs index 53deb15012..2aec73fd98 100644 --- a/src/pycodestyle/rules.rs +++ b/src/pycodestyle/rules.rs @@ -22,42 +22,88 @@ use crate::violations; static URL_REGEX: Lazy = Lazy::new(|| Regex::new(r"^https?://\S+$").unwrap()); -/// E501 -pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { - let line_length = line.chars().count(); - - if line_length <= settings.line_length { - return None; +fn is_overlong( + line: &str, + line_length: usize, + limit: usize, + ignore_overlong_task_comments: bool, + task_tags: &[String], +) -> bool { + if line_length <= limit { + return false; } let mut chunks = line.split_whitespace(); let (Some(first), Some(second)) = (chunks.next(), chunks.next()) else { // Single word / no printable chars - no way to make the line shorter - return None; + return false; }; if first == "#" { - if settings.pycodestyle.ignore_overlong_task_comments { + if ignore_overlong_task_comments { let second = second.trim_end_matches(':'); - if settings.task_tags.iter().any(|tag| tag == second) { - return None; + if task_tags.iter().any(|tag| tag == second) { + return false; } } // Do not enforce the line length for commented lines that end with a URL // or contain only a single word. if chunks.last().map_or(true, |c| URL_REGEX.is_match(c)) { - return None; + return false; } } - Some(Diagnostic::new( - violations::LineTooLong(line_length, settings.line_length), - Range::new( - Location::new(lineno + 1, settings.line_length), - Location::new(lineno + 1, line_length), - ), - )) + true +} + +/// E501 +pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { + let line_length = line.chars().count(); + let limit = settings.line_length; + if is_overlong( + line, + line_length, + limit, + settings.pycodestyle.ignore_overlong_task_comments, + &settings.task_tags, + ) { + Some(Diagnostic::new( + violations::LineTooLong(line_length, limit), + Range::new( + Location::new(lineno + 1, limit), + Location::new(lineno + 1, line_length), + ), + )) + } else { + None + } +} + +/// W505 +pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { + let Some(limit) = settings.pycodestyle.max_doc_length else { + return None; + }; + + let line_length = line.chars().count(); + if is_overlong( + line, + line_length, + limit, + settings.pycodestyle.ignore_overlong_task_comments, + &settings.task_tags, + ) { + Some(Diagnostic::new( + violations::DocLineTooLong(line_length, limit), + Range::new( + Location::new(lineno + 1, limit), + Location::new(lineno + 1, line_length), + ), + )) + } else { + None + } } fn compare( diff --git a/src/pycodestyle/settings.rs b/src/pycodestyle/settings.rs index dab05086fc..36e96c8a10 100644 --- a/src/pycodestyle/settings.rs +++ b/src/pycodestyle/settings.rs @@ -9,6 +9,16 @@ use serde::{Deserialize, Serialize}; )] #[serde(deny_unknown_fields, rename_all = "kebab-case", rename = "Pycodestyle")] pub struct Options { + #[option( + default = "None", + value_type = "usize", + example = r#" + max-doc-length = 88 + "# + )] + /// The maximum line length to allow for line-length violations within + /// documentation (`W505`), including standalone comments. + pub max_doc_length: Option, #[option( default = "false", value_type = "bool", @@ -24,12 +34,14 @@ pub struct Options { #[derive(Debug, Default, Hash)] pub struct Settings { + pub max_doc_length: Option, pub ignore_overlong_task_comments: bool, } impl From for Settings { fn from(options: Options) -> Self { Self { + max_doc_length: options.max_doc_length, ignore_overlong_task_comments: options .ignore_overlong_task_comments .unwrap_or_default(), @@ -40,6 +52,7 @@ impl From for Settings { impl From for Options { fn from(settings: Settings) -> Self { Self { + max_doc_length: settings.max_doc_length, ignore_overlong_task_comments: Some(settings.ignore_overlong_task_comments), } } diff --git a/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap b/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap new file mode 100644 index 0000000000..452bda8595 --- /dev/null +++ b/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap @@ -0,0 +1,53 @@ +--- +source: src/pycodestyle/mod.rs +expression: diagnostics +--- +- kind: + DocLineTooLong: + - 57 + - 50 + location: + row: 2 + column: 50 + end_location: + row: 2 + column: 57 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 56 + - 50 + location: + row: 6 + column: 50 + end_location: + row: 6 + column: 56 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 56 + - 50 + location: + row: 10 + column: 50 + end_location: + row: 10 + column: 56 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 61 + - 50 + location: + row: 15 + column: 50 + end_location: + row: 15 + column: 61 + fix: ~ + parent: ~ + diff --git a/src/registry.rs b/src/registry.rs index c264eaa72b..fedc846abf 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -132,6 +132,7 @@ define_rule_mapping!( E999 => violations::SyntaxError, // pycodestyle warnings W292 => violations::NoNewLineAtEndOfFile, + W505 => violations::DocLineTooLong, W605 => violations::InvalidEscapeSequence, // pyflakes F401 => violations::UnusedImport, @@ -786,6 +787,7 @@ impl RuleCode { RuleCode::RUF100 => &LintSource::NoQA, RuleCode::E501 | RuleCode::W292 + | RuleCode::W505 | RuleCode::UP009 | RuleCode::PGH003 | RuleCode::PGH004 => &LintSource::Lines, diff --git a/src/settings/mod.rs b/src/settings/mod.rs index e462714d68..b3450b3d6a 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -458,7 +458,7 @@ mod tests { }] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( @@ -478,7 +478,7 @@ mod tests { }] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( @@ -504,7 +504,7 @@ mod tests { ] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( diff --git a/src/violations.rs b/src/violations.rs index ab1d22fcfe..c74db90fb8 100644 --- a/src/violations.rs +++ b/src/violations.rs @@ -315,6 +315,20 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence { } } +define_violation!( + pub struct DocLineTooLong(pub usize, pub usize); +); +impl Violation for DocLineTooLong { + fn message(&self) -> String { + let DocLineTooLong(length, limit) = self; + format!("Doc line too long ({length} > {limit} characters)") + } + + fn placeholder() -> Self { + DocLineTooLong(89, 88) + } +} + // pyflakes define_violation!(