diff --git a/README.md b/README.md index 2de452e189..8320d806c2 100644 --- a/README.md +++ b/README.md @@ -597,6 +597,7 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/2.9.1/) on PyPI | E902 | IOError | IOError: `...` | | | E999 | SyntaxError | SyntaxError: `...` | | | W292 | NoNewLineAtEndOfFile | No newline at end of file | 🛠 | +| W505 | DocLineTooLong | Doc line too long (89 > 88 characters) | | | W605 | InvalidEscapeSequence | Invalid escape sequence: '\c' | 🛠 | ### mccabe (C90) @@ -3161,6 +3162,24 @@ ignore-overlong-task-comments = true --- +#### [`max-doc-length`](#max-doc-length) + +The maximum line length to allow for line-length violations within +documentation (`W505`), including standalone comments. + +**Default value**: `None` + +**Type**: `usize` + +**Example usage**: + +```toml +[tool.ruff.pycodestyle] +max-doc-length = 88 +``` + +--- + ### `pydocstyle` #### [`convention`](#convention) diff --git a/resources/test/fixtures/pycodestyle/W505.py b/resources/test/fixtures/pycodestyle/W505.py new file mode 100644 index 0000000000..62f5faae72 --- /dev/null +++ b/resources/test/fixtures/pycodestyle/W505.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +"""Here's a top-level docstring that's over the limit.""" + + +def f(): + """Here's a docstring that's also over the limit.""" + + x = 1 # Here's a comment that's over the limit, but it's not standalone. + + # Here's a standalone comment that's over the limit. + + print("Here's a string that's over the limit, but it's not a docstring.") + + +"This is also considered a docstring, and is over the limit." diff --git a/ruff.schema.json b/ruff.schema.json index 86a101e3c4..bfaa9af202 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -945,6 +945,15 @@ "boolean", "null" ] + }, + "max-doc-length": { + "description": "The maximum line length to allow for line-length violations within documentation (`W505`), including standalone comments.", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 } }, "additionalProperties": false @@ -1616,6 +1625,9 @@ "W2", "W29", "W292", + "W5", + "W50", + "W505", "W6", "W60", "W605", diff --git a/src/checkers/lines.rs b/src/checkers/lines.rs index 4895cbec87..53e2411e78 100644 --- a/src/checkers/lines.rs +++ b/src/checkers/lines.rs @@ -1,6 +1,6 @@ //! Lint rules based on checking raw physical lines. -use crate::pycodestyle::rules::{line_too_long, no_newline_at_end_of_file}; +use crate::pycodestyle::rules::{doc_line_too_long, line_too_long, no_newline_at_end_of_file}; use crate::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore}; use crate::pyupgrade::rules::unnecessary_coding_comment; use crate::registry::{Diagnostic, RuleCode}; @@ -9,18 +9,21 @@ use crate::settings::{flags, Settings}; pub fn check_lines( contents: &str, commented_lines: &[usize], + doc_lines: &[usize], settings: &Settings, autofix: flags::Autofix, ) -> Vec { let mut diagnostics: Vec = vec![]; - let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009); + let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004); + let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003); + let enforce_doc_line_too_long = settings.enabled.contains(&RuleCode::W505); let enforce_line_too_long = settings.enabled.contains(&RuleCode::E501); let enforce_no_newline_at_end_of_file = settings.enabled.contains(&RuleCode::W292); - let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003); - let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004); + let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009); let mut commented_lines_iter = commented_lines.iter().peekable(); + let mut doc_lines_iter = doc_lines.iter().peekable(); for (index, line) in contents.lines().enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) @@ -40,18 +43,25 @@ pub fn check_lines( } if enforce_blanket_type_ignore { - if commented_lines.contains(&(index + 1)) { - if let Some(diagnostic) = blanket_type_ignore(index, line) { - diagnostics.push(diagnostic); - } + if let Some(diagnostic) = blanket_type_ignore(index, line) { + diagnostics.push(diagnostic); } } if enforce_blanket_noqa { - if commented_lines.contains(&(index + 1)) { - if let Some(diagnostic) = blanket_noqa(index, line) { - diagnostics.push(diagnostic); - } + if let Some(diagnostic) = blanket_noqa(index, line) { + diagnostics.push(diagnostic); + } + } + } + + while doc_lines_iter + .next_if(|lineno| &(index + 1) == *lineno) + .is_some() + { + if enforce_doc_line_too_long { + if let Some(diagnostic) = doc_line_too_long(index, line, settings) { + diagnostics.push(diagnostic); } } } @@ -90,6 +100,7 @@ mod tests { check_lines( line, &[], + &[], &Settings { line_length, ..Settings::for_rule(RuleCode::E501) diff --git a/src/doc_lines.rs b/src/doc_lines.rs new file mode 100644 index 0000000000..a691a459fc --- /dev/null +++ b/src/doc_lines.rs @@ -0,0 +1,58 @@ +//! Doc line extraction. In this context, a doc line is a line consisting of a +//! standalone comment or a constant string statement. + +use rustpython_ast::{Constant, ExprKind, Stmt, StmtKind, Suite}; +use rustpython_parser::lexer::{LexResult, Tok}; + +use crate::ast::visitor; +use crate::ast::visitor::Visitor; + +/// Extract doc lines (standalone comments) from a token sequence. +pub fn doc_lines_from_tokens(lxr: &[LexResult]) -> Vec { + let mut doc_lines: Vec = Vec::default(); + let mut prev: Option = None; + for (start, tok, end) in lxr.iter().flatten() { + if matches!(tok, Tok::Indent | Tok::Dedent | Tok::Newline) { + continue; + } + if matches!(tok, Tok::Comment(..)) { + if let Some(prev) = prev { + if start.row() > prev { + doc_lines.push(start.row()); + } + } else { + doc_lines.push(start.row()); + } + } + prev = Some(end.row()); + } + doc_lines +} + +#[derive(Default)] +struct StringLinesVisitor { + string_lines: Vec, +} + +impl Visitor<'_> for StringLinesVisitor { + fn visit_stmt(&mut self, stmt: &Stmt) { + if let StmtKind::Expr { value } = &stmt.node { + if let ExprKind::Constant { + value: Constant::Str(..), + .. + } = &value.node + { + self.string_lines + .extend(value.location.row()..=value.end_location.unwrap().row()); + } + } + visitor::walk_stmt(self, stmt); + } +} + +/// Extract doc lines (standalone strings) from an AST. +pub fn doc_lines_from_ast(python_ast: &Suite) -> Vec { + let mut visitor = StringLinesVisitor::default(); + visitor.visit_body(python_ast); + visitor.string_lines +} diff --git a/src/lib.rs b/src/lib.rs index 4aa916d315..a6bdc5abf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,4 +93,5 @@ cfg_if! { pub use lib_wasm::check; } } +pub mod doc_lines; pub mod flake8_pie; diff --git a/src/linter.rs b/src/linter.rs index 252555bbc7..1697741d46 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -19,6 +19,7 @@ use crate::checkers::lines::check_lines; use crate::checkers::noqa::check_noqa; use crate::checkers::tokens::check_tokens; use crate::directives::Directives; +use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens}; use crate::message::{Message, Source}; use crate::noqa::add_noqa; use crate::registry::{Diagnostic, LintSource, RuleCode}; @@ -70,6 +71,14 @@ pub(crate) fn check_path( // Aggregate all diagnostics. let mut diagnostics: Vec = vec![]; + // Collect doc lines. This requires a rare mix of tokens (for comments) and AST + // (for docstrings), which demands special-casing at this level. + let use_doc_lines = settings.enabled.contains(&RuleCode::W505); + let mut doc_lines = vec![]; + if use_doc_lines { + doc_lines.extend(doc_lines_from_tokens(&tokens)); + } + // Run the token-based rules. if settings .enabled @@ -89,7 +98,7 @@ pub(crate) fn check_path( .enabled .iter() .any(|rule_code| matches!(rule_code.lint_source(), LintSource::Imports)); - if use_ast || use_imports { + if use_ast || use_imports || use_doc_lines { match rustpython_helpers::parse_program_tokens(tokens, "") { Ok(python_ast) => { if use_ast { @@ -116,6 +125,9 @@ pub(crate) fn check_path( package, )); } + if use_doc_lines { + doc_lines.extend(doc_lines_from_ast(&python_ast)); + } } Err(parse_error) => { if settings.enabled.contains(&RuleCode::E999) { @@ -128,6 +140,12 @@ pub(crate) fn check_path( } } + // Deduplicate and reorder any doc lines. + if use_doc_lines { + doc_lines.sort_unstable(); + doc_lines.dedup(); + } + // Run the lines-based rules. if settings .enabled @@ -137,6 +155,7 @@ pub(crate) fn check_path( diagnostics.extend(check_lines( contents, &directives.commented_lines, + &doc_lines, settings, autofix, )); diff --git a/src/pycodestyle/mod.rs b/src/pycodestyle/mod.rs index 3d0128f841..1c78298742 100644 --- a/src/pycodestyle/mod.rs +++ b/src/pycodestyle/mod.rs @@ -67,6 +67,7 @@ mod tests { &settings::Settings { pycodestyle: Settings { ignore_overlong_task_comments, + ..Settings::default() }, ..settings::Settings::for_rule(RuleCode::E501) }, @@ -74,4 +75,20 @@ mod tests { insta::assert_yaml_snapshot!(snapshot, diagnostics); Ok(()) } + + #[test] + fn max_doc_length() -> Result<()> { + let diagnostics = test_path( + Path::new("./resources/test/fixtures/pycodestyle/W505.py"), + &settings::Settings { + pycodestyle: Settings { + max_doc_length: Some(50), + ..Settings::default() + }, + ..settings::Settings::for_rule(RuleCode::W505) + }, + )?; + insta::assert_yaml_snapshot!(diagnostics); + Ok(()) + } } diff --git a/src/pycodestyle/rules.rs b/src/pycodestyle/rules.rs index 53deb15012..2aec73fd98 100644 --- a/src/pycodestyle/rules.rs +++ b/src/pycodestyle/rules.rs @@ -22,42 +22,88 @@ use crate::violations; static URL_REGEX: Lazy = Lazy::new(|| Regex::new(r"^https?://\S+$").unwrap()); -/// E501 -pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { - let line_length = line.chars().count(); - - if line_length <= settings.line_length { - return None; +fn is_overlong( + line: &str, + line_length: usize, + limit: usize, + ignore_overlong_task_comments: bool, + task_tags: &[String], +) -> bool { + if line_length <= limit { + return false; } let mut chunks = line.split_whitespace(); let (Some(first), Some(second)) = (chunks.next(), chunks.next()) else { // Single word / no printable chars - no way to make the line shorter - return None; + return false; }; if first == "#" { - if settings.pycodestyle.ignore_overlong_task_comments { + if ignore_overlong_task_comments { let second = second.trim_end_matches(':'); - if settings.task_tags.iter().any(|tag| tag == second) { - return None; + if task_tags.iter().any(|tag| tag == second) { + return false; } } // Do not enforce the line length for commented lines that end with a URL // or contain only a single word. if chunks.last().map_or(true, |c| URL_REGEX.is_match(c)) { - return None; + return false; } } - Some(Diagnostic::new( - violations::LineTooLong(line_length, settings.line_length), - Range::new( - Location::new(lineno + 1, settings.line_length), - Location::new(lineno + 1, line_length), - ), - )) + true +} + +/// E501 +pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { + let line_length = line.chars().count(); + let limit = settings.line_length; + if is_overlong( + line, + line_length, + limit, + settings.pycodestyle.ignore_overlong_task_comments, + &settings.task_tags, + ) { + Some(Diagnostic::new( + violations::LineTooLong(line_length, limit), + Range::new( + Location::new(lineno + 1, limit), + Location::new(lineno + 1, line_length), + ), + )) + } else { + None + } +} + +/// W505 +pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { + let Some(limit) = settings.pycodestyle.max_doc_length else { + return None; + }; + + let line_length = line.chars().count(); + if is_overlong( + line, + line_length, + limit, + settings.pycodestyle.ignore_overlong_task_comments, + &settings.task_tags, + ) { + Some(Diagnostic::new( + violations::DocLineTooLong(line_length, limit), + Range::new( + Location::new(lineno + 1, limit), + Location::new(lineno + 1, line_length), + ), + )) + } else { + None + } } fn compare( diff --git a/src/pycodestyle/settings.rs b/src/pycodestyle/settings.rs index dab05086fc..36e96c8a10 100644 --- a/src/pycodestyle/settings.rs +++ b/src/pycodestyle/settings.rs @@ -9,6 +9,16 @@ use serde::{Deserialize, Serialize}; )] #[serde(deny_unknown_fields, rename_all = "kebab-case", rename = "Pycodestyle")] pub struct Options { + #[option( + default = "None", + value_type = "usize", + example = r#" + max-doc-length = 88 + "# + )] + /// The maximum line length to allow for line-length violations within + /// documentation (`W505`), including standalone comments. + pub max_doc_length: Option, #[option( default = "false", value_type = "bool", @@ -24,12 +34,14 @@ pub struct Options { #[derive(Debug, Default, Hash)] pub struct Settings { + pub max_doc_length: Option, pub ignore_overlong_task_comments: bool, } impl From for Settings { fn from(options: Options) -> Self { Self { + max_doc_length: options.max_doc_length, ignore_overlong_task_comments: options .ignore_overlong_task_comments .unwrap_or_default(), @@ -40,6 +52,7 @@ impl From for Settings { impl From for Options { fn from(settings: Settings) -> Self { Self { + max_doc_length: settings.max_doc_length, ignore_overlong_task_comments: Some(settings.ignore_overlong_task_comments), } } diff --git a/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap b/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap new file mode 100644 index 0000000000..452bda8595 --- /dev/null +++ b/src/pycodestyle/snapshots/ruff__pycodestyle__tests__max_doc_length.snap @@ -0,0 +1,53 @@ +--- +source: src/pycodestyle/mod.rs +expression: diagnostics +--- +- kind: + DocLineTooLong: + - 57 + - 50 + location: + row: 2 + column: 50 + end_location: + row: 2 + column: 57 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 56 + - 50 + location: + row: 6 + column: 50 + end_location: + row: 6 + column: 56 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 56 + - 50 + location: + row: 10 + column: 50 + end_location: + row: 10 + column: 56 + fix: ~ + parent: ~ +- kind: + DocLineTooLong: + - 61 + - 50 + location: + row: 15 + column: 50 + end_location: + row: 15 + column: 61 + fix: ~ + parent: ~ + diff --git a/src/registry.rs b/src/registry.rs index c264eaa72b..fedc846abf 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -132,6 +132,7 @@ define_rule_mapping!( E999 => violations::SyntaxError, // pycodestyle warnings W292 => violations::NoNewLineAtEndOfFile, + W505 => violations::DocLineTooLong, W605 => violations::InvalidEscapeSequence, // pyflakes F401 => violations::UnusedImport, @@ -786,6 +787,7 @@ impl RuleCode { RuleCode::RUF100 => &LintSource::NoQA, RuleCode::E501 | RuleCode::W292 + | RuleCode::W505 | RuleCode::UP009 | RuleCode::PGH003 | RuleCode::PGH004 => &LintSource::Lines, diff --git a/src/settings/mod.rs b/src/settings/mod.rs index e462714d68..b3450b3d6a 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -458,7 +458,7 @@ mod tests { }] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( @@ -478,7 +478,7 @@ mod tests { }] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( @@ -504,7 +504,7 @@ mod tests { ] .into_iter(), ); - let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]); + let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]); assert_eq!(actual, expected); let actual = resolve_codes( diff --git a/src/violations.rs b/src/violations.rs index ab1d22fcfe..c74db90fb8 100644 --- a/src/violations.rs +++ b/src/violations.rs @@ -315,6 +315,20 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence { } } +define_violation!( + pub struct DocLineTooLong(pub usize, pub usize); +); +impl Violation for DocLineTooLong { + fn message(&self) -> String { + let DocLineTooLong(length, limit) = self; + format!("Doc line too long ({length} > {limit} characters)") + } + + fn placeholder() -> Self { + DocLineTooLong(89, 88) + } +} + // pyflakes define_violation!(