mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-31 15:48:22 +00:00
Implement doc line length enforcement (#1804)
This PR implements `W505` (`DocLineTooLong`), which is similar to `E501` (`LineTooLong`) but confined to doc lines. I based the "doc line" definition on pycodestyle, which defines a doc line as a standalone comment or string statement. Our definition is a bit more liberal, since we consider any string statement a doc line (even if it's part of a multi-line statement) -- but that seems fine to me. Note that, unusually, this rule requires custom extraction from both the token stream (to find standalone comments) and the AST (to find string statements). Closes #1784.
This commit is contained in:
parent
329946f162
commit
f450e2e79d
14 changed files with 314 additions and 34 deletions
19
README.md
19
README.md
|
@ -597,6 +597,7 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/2.9.1/) on PyPI
|
|||
| E902 | IOError | IOError: `...` | |
|
||||
| E999 | SyntaxError | SyntaxError: `...` | |
|
||||
| W292 | NoNewLineAtEndOfFile | No newline at end of file | 🛠 |
|
||||
| W505 | DocLineTooLong | Doc line too long (89 > 88 characters) | |
|
||||
| W605 | InvalidEscapeSequence | Invalid escape sequence: '\c' | 🛠 |
|
||||
|
||||
### mccabe (C90)
|
||||
|
@ -3161,6 +3162,24 @@ ignore-overlong-task-comments = true
|
|||
|
||||
---
|
||||
|
||||
#### [`max-doc-length`](#max-doc-length)
|
||||
|
||||
The maximum line length to allow for line-length violations within
|
||||
documentation (`W505`), including standalone comments.
|
||||
|
||||
**Default value**: `None`
|
||||
|
||||
**Type**: `usize`
|
||||
|
||||
**Example usage**:
|
||||
|
||||
```toml
|
||||
[tool.ruff.pycodestyle]
|
||||
max-doc-length = 88
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `pydocstyle`
|
||||
|
||||
#### [`convention`](#convention)
|
||||
|
|
15
resources/test/fixtures/pycodestyle/W505.py
vendored
Normal file
15
resources/test/fixtures/pycodestyle/W505.py
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Here's a top-level docstring that's over the limit."""
|
||||
|
||||
|
||||
def f():
|
||||
"""Here's a docstring that's also over the limit."""
|
||||
|
||||
x = 1 # Here's a comment that's over the limit, but it's not standalone.
|
||||
|
||||
# Here's a standalone comment that's over the limit.
|
||||
|
||||
print("Here's a string that's over the limit, but it's not a docstring.")
|
||||
|
||||
|
||||
"This is also considered a docstring, and is over the limit."
|
|
@ -945,6 +945,15 @@
|
|||
"boolean",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"max-doc-length": {
|
||||
"description": "The maximum line length to allow for line-length violations within documentation (`W505`), including standalone comments.",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "uint",
|
||||
"minimum": 0.0
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
|
@ -1616,6 +1625,9 @@
|
|||
"W2",
|
||||
"W29",
|
||||
"W292",
|
||||
"W5",
|
||||
"W50",
|
||||
"W505",
|
||||
"W6",
|
||||
"W60",
|
||||
"W605",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
//! Lint rules based on checking raw physical lines.
|
||||
|
||||
use crate::pycodestyle::rules::{line_too_long, no_newline_at_end_of_file};
|
||||
use crate::pycodestyle::rules::{doc_line_too_long, line_too_long, no_newline_at_end_of_file};
|
||||
use crate::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore};
|
||||
use crate::pyupgrade::rules::unnecessary_coding_comment;
|
||||
use crate::registry::{Diagnostic, RuleCode};
|
||||
|
@ -9,18 +9,21 @@ use crate::settings::{flags, Settings};
|
|||
pub fn check_lines(
|
||||
contents: &str,
|
||||
commented_lines: &[usize],
|
||||
doc_lines: &[usize],
|
||||
settings: &Settings,
|
||||
autofix: flags::Autofix,
|
||||
) -> Vec<Diagnostic> {
|
||||
let mut diagnostics: Vec<Diagnostic> = vec![];
|
||||
|
||||
let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009);
|
||||
let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004);
|
||||
let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003);
|
||||
let enforce_doc_line_too_long = settings.enabled.contains(&RuleCode::W505);
|
||||
let enforce_line_too_long = settings.enabled.contains(&RuleCode::E501);
|
||||
let enforce_no_newline_at_end_of_file = settings.enabled.contains(&RuleCode::W292);
|
||||
let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003);
|
||||
let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004);
|
||||
let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009);
|
||||
|
||||
let mut commented_lines_iter = commented_lines.iter().peekable();
|
||||
let mut doc_lines_iter = doc_lines.iter().peekable();
|
||||
for (index, line) in contents.lines().enumerate() {
|
||||
while commented_lines_iter
|
||||
.next_if(|lineno| &(index + 1) == *lineno)
|
||||
|
@ -40,18 +43,25 @@ pub fn check_lines(
|
|||
}
|
||||
|
||||
if enforce_blanket_type_ignore {
|
||||
if commented_lines.contains(&(index + 1)) {
|
||||
if let Some(diagnostic) = blanket_type_ignore(index, line) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
if let Some(diagnostic) = blanket_type_ignore(index, line) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
|
||||
if enforce_blanket_noqa {
|
||||
if commented_lines.contains(&(index + 1)) {
|
||||
if let Some(diagnostic) = blanket_noqa(index, line) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
if let Some(diagnostic) = blanket_noqa(index, line) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while doc_lines_iter
|
||||
.next_if(|lineno| &(index + 1) == *lineno)
|
||||
.is_some()
|
||||
{
|
||||
if enforce_doc_line_too_long {
|
||||
if let Some(diagnostic) = doc_line_too_long(index, line, settings) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -90,6 +100,7 @@ mod tests {
|
|||
check_lines(
|
||||
line,
|
||||
&[],
|
||||
&[],
|
||||
&Settings {
|
||||
line_length,
|
||||
..Settings::for_rule(RuleCode::E501)
|
||||
|
|
58
src/doc_lines.rs
Normal file
58
src/doc_lines.rs
Normal file
|
@ -0,0 +1,58 @@
|
|||
//! Doc line extraction. In this context, a doc line is a line consisting of a
|
||||
//! standalone comment or a constant string statement.
|
||||
|
||||
use rustpython_ast::{Constant, ExprKind, Stmt, StmtKind, Suite};
|
||||
use rustpython_parser::lexer::{LexResult, Tok};
|
||||
|
||||
use crate::ast::visitor;
|
||||
use crate::ast::visitor::Visitor;
|
||||
|
||||
/// Extract doc lines (standalone comments) from a token sequence.
|
||||
pub fn doc_lines_from_tokens(lxr: &[LexResult]) -> Vec<usize> {
|
||||
let mut doc_lines: Vec<usize> = Vec::default();
|
||||
let mut prev: Option<usize> = None;
|
||||
for (start, tok, end) in lxr.iter().flatten() {
|
||||
if matches!(tok, Tok::Indent | Tok::Dedent | Tok::Newline) {
|
||||
continue;
|
||||
}
|
||||
if matches!(tok, Tok::Comment(..)) {
|
||||
if let Some(prev) = prev {
|
||||
if start.row() > prev {
|
||||
doc_lines.push(start.row());
|
||||
}
|
||||
} else {
|
||||
doc_lines.push(start.row());
|
||||
}
|
||||
}
|
||||
prev = Some(end.row());
|
||||
}
|
||||
doc_lines
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct StringLinesVisitor {
|
||||
string_lines: Vec<usize>,
|
||||
}
|
||||
|
||||
impl Visitor<'_> for StringLinesVisitor {
|
||||
fn visit_stmt(&mut self, stmt: &Stmt) {
|
||||
if let StmtKind::Expr { value } = &stmt.node {
|
||||
if let ExprKind::Constant {
|
||||
value: Constant::Str(..),
|
||||
..
|
||||
} = &value.node
|
||||
{
|
||||
self.string_lines
|
||||
.extend(value.location.row()..=value.end_location.unwrap().row());
|
||||
}
|
||||
}
|
||||
visitor::walk_stmt(self, stmt);
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract doc lines (standalone strings) from an AST.
|
||||
pub fn doc_lines_from_ast(python_ast: &Suite) -> Vec<usize> {
|
||||
let mut visitor = StringLinesVisitor::default();
|
||||
visitor.visit_body(python_ast);
|
||||
visitor.string_lines
|
||||
}
|
|
@ -93,4 +93,5 @@ cfg_if! {
|
|||
pub use lib_wasm::check;
|
||||
}
|
||||
}
|
||||
pub mod doc_lines;
|
||||
pub mod flake8_pie;
|
||||
|
|
|
@ -19,6 +19,7 @@ use crate::checkers::lines::check_lines;
|
|||
use crate::checkers::noqa::check_noqa;
|
||||
use crate::checkers::tokens::check_tokens;
|
||||
use crate::directives::Directives;
|
||||
use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens};
|
||||
use crate::message::{Message, Source};
|
||||
use crate::noqa::add_noqa;
|
||||
use crate::registry::{Diagnostic, LintSource, RuleCode};
|
||||
|
@ -70,6 +71,14 @@ pub(crate) fn check_path(
|
|||
// Aggregate all diagnostics.
|
||||
let mut diagnostics: Vec<Diagnostic> = vec![];
|
||||
|
||||
// Collect doc lines. This requires a rare mix of tokens (for comments) and AST
|
||||
// (for docstrings), which demands special-casing at this level.
|
||||
let use_doc_lines = settings.enabled.contains(&RuleCode::W505);
|
||||
let mut doc_lines = vec![];
|
||||
if use_doc_lines {
|
||||
doc_lines.extend(doc_lines_from_tokens(&tokens));
|
||||
}
|
||||
|
||||
// Run the token-based rules.
|
||||
if settings
|
||||
.enabled
|
||||
|
@ -89,7 +98,7 @@ pub(crate) fn check_path(
|
|||
.enabled
|
||||
.iter()
|
||||
.any(|rule_code| matches!(rule_code.lint_source(), LintSource::Imports));
|
||||
if use_ast || use_imports {
|
||||
if use_ast || use_imports || use_doc_lines {
|
||||
match rustpython_helpers::parse_program_tokens(tokens, "<filename>") {
|
||||
Ok(python_ast) => {
|
||||
if use_ast {
|
||||
|
@ -116,6 +125,9 @@ pub(crate) fn check_path(
|
|||
package,
|
||||
));
|
||||
}
|
||||
if use_doc_lines {
|
||||
doc_lines.extend(doc_lines_from_ast(&python_ast));
|
||||
}
|
||||
}
|
||||
Err(parse_error) => {
|
||||
if settings.enabled.contains(&RuleCode::E999) {
|
||||
|
@ -128,6 +140,12 @@ pub(crate) fn check_path(
|
|||
}
|
||||
}
|
||||
|
||||
// Deduplicate and reorder any doc lines.
|
||||
if use_doc_lines {
|
||||
doc_lines.sort_unstable();
|
||||
doc_lines.dedup();
|
||||
}
|
||||
|
||||
// Run the lines-based rules.
|
||||
if settings
|
||||
.enabled
|
||||
|
@ -137,6 +155,7 @@ pub(crate) fn check_path(
|
|||
diagnostics.extend(check_lines(
|
||||
contents,
|
||||
&directives.commented_lines,
|
||||
&doc_lines,
|
||||
settings,
|
||||
autofix,
|
||||
));
|
||||
|
|
|
@ -67,6 +67,7 @@ mod tests {
|
|||
&settings::Settings {
|
||||
pycodestyle: Settings {
|
||||
ignore_overlong_task_comments,
|
||||
..Settings::default()
|
||||
},
|
||||
..settings::Settings::for_rule(RuleCode::E501)
|
||||
},
|
||||
|
@ -74,4 +75,20 @@ mod tests {
|
|||
insta::assert_yaml_snapshot!(snapshot, diagnostics);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_doc_length() -> Result<()> {
|
||||
let diagnostics = test_path(
|
||||
Path::new("./resources/test/fixtures/pycodestyle/W505.py"),
|
||||
&settings::Settings {
|
||||
pycodestyle: Settings {
|
||||
max_doc_length: Some(50),
|
||||
..Settings::default()
|
||||
},
|
||||
..settings::Settings::for_rule(RuleCode::W505)
|
||||
},
|
||||
)?;
|
||||
insta::assert_yaml_snapshot!(diagnostics);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,42 +22,88 @@ use crate::violations;
|
|||
|
||||
static URL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^https?://\S+$").unwrap());
|
||||
|
||||
/// E501
|
||||
pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
|
||||
let line_length = line.chars().count();
|
||||
|
||||
if line_length <= settings.line_length {
|
||||
return None;
|
||||
fn is_overlong(
|
||||
line: &str,
|
||||
line_length: usize,
|
||||
limit: usize,
|
||||
ignore_overlong_task_comments: bool,
|
||||
task_tags: &[String],
|
||||
) -> bool {
|
||||
if line_length <= limit {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut chunks = line.split_whitespace();
|
||||
let (Some(first), Some(second)) = (chunks.next(), chunks.next()) else {
|
||||
// Single word / no printable chars - no way to make the line shorter
|
||||
return None;
|
||||
return false;
|
||||
};
|
||||
|
||||
if first == "#" {
|
||||
if settings.pycodestyle.ignore_overlong_task_comments {
|
||||
if ignore_overlong_task_comments {
|
||||
let second = second.trim_end_matches(':');
|
||||
if settings.task_tags.iter().any(|tag| tag == second) {
|
||||
return None;
|
||||
if task_tags.iter().any(|tag| tag == second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Do not enforce the line length for commented lines that end with a URL
|
||||
// or contain only a single word.
|
||||
if chunks.last().map_or(true, |c| URL_REGEX.is_match(c)) {
|
||||
return None;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Diagnostic::new(
|
||||
violations::LineTooLong(line_length, settings.line_length),
|
||||
Range::new(
|
||||
Location::new(lineno + 1, settings.line_length),
|
||||
Location::new(lineno + 1, line_length),
|
||||
),
|
||||
))
|
||||
true
|
||||
}
|
||||
|
||||
/// E501
|
||||
pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
|
||||
let line_length = line.chars().count();
|
||||
let limit = settings.line_length;
|
||||
if is_overlong(
|
||||
line,
|
||||
line_length,
|
||||
limit,
|
||||
settings.pycodestyle.ignore_overlong_task_comments,
|
||||
&settings.task_tags,
|
||||
) {
|
||||
Some(Diagnostic::new(
|
||||
violations::LineTooLong(line_length, limit),
|
||||
Range::new(
|
||||
Location::new(lineno + 1, limit),
|
||||
Location::new(lineno + 1, line_length),
|
||||
),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// W505
|
||||
pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
|
||||
let Some(limit) = settings.pycodestyle.max_doc_length else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let line_length = line.chars().count();
|
||||
if is_overlong(
|
||||
line,
|
||||
line_length,
|
||||
limit,
|
||||
settings.pycodestyle.ignore_overlong_task_comments,
|
||||
&settings.task_tags,
|
||||
) {
|
||||
Some(Diagnostic::new(
|
||||
violations::DocLineTooLong(line_length, limit),
|
||||
Range::new(
|
||||
Location::new(lineno + 1, limit),
|
||||
Location::new(lineno + 1, line_length),
|
||||
),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn compare(
|
||||
|
|
|
@ -9,6 +9,16 @@ use serde::{Deserialize, Serialize};
|
|||
)]
|
||||
#[serde(deny_unknown_fields, rename_all = "kebab-case", rename = "Pycodestyle")]
|
||||
pub struct Options {
|
||||
#[option(
|
||||
default = "None",
|
||||
value_type = "usize",
|
||||
example = r#"
|
||||
max-doc-length = 88
|
||||
"#
|
||||
)]
|
||||
/// The maximum line length to allow for line-length violations within
|
||||
/// documentation (`W505`), including standalone comments.
|
||||
pub max_doc_length: Option<usize>,
|
||||
#[option(
|
||||
default = "false",
|
||||
value_type = "bool",
|
||||
|
@ -24,12 +34,14 @@ pub struct Options {
|
|||
|
||||
#[derive(Debug, Default, Hash)]
|
||||
pub struct Settings {
|
||||
pub max_doc_length: Option<usize>,
|
||||
pub ignore_overlong_task_comments: bool,
|
||||
}
|
||||
|
||||
impl From<Options> for Settings {
|
||||
fn from(options: Options) -> Self {
|
||||
Self {
|
||||
max_doc_length: options.max_doc_length,
|
||||
ignore_overlong_task_comments: options
|
||||
.ignore_overlong_task_comments
|
||||
.unwrap_or_default(),
|
||||
|
@ -40,6 +52,7 @@ impl From<Options> for Settings {
|
|||
impl From<Settings> for Options {
|
||||
fn from(settings: Settings) -> Self {
|
||||
Self {
|
||||
max_doc_length: settings.max_doc_length,
|
||||
ignore_overlong_task_comments: Some(settings.ignore_overlong_task_comments),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
---
|
||||
source: src/pycodestyle/mod.rs
|
||||
expression: diagnostics
|
||||
---
|
||||
- kind:
|
||||
DocLineTooLong:
|
||||
- 57
|
||||
- 50
|
||||
location:
|
||||
row: 2
|
||||
column: 50
|
||||
end_location:
|
||||
row: 2
|
||||
column: 57
|
||||
fix: ~
|
||||
parent: ~
|
||||
- kind:
|
||||
DocLineTooLong:
|
||||
- 56
|
||||
- 50
|
||||
location:
|
||||
row: 6
|
||||
column: 50
|
||||
end_location:
|
||||
row: 6
|
||||
column: 56
|
||||
fix: ~
|
||||
parent: ~
|
||||
- kind:
|
||||
DocLineTooLong:
|
||||
- 56
|
||||
- 50
|
||||
location:
|
||||
row: 10
|
||||
column: 50
|
||||
end_location:
|
||||
row: 10
|
||||
column: 56
|
||||
fix: ~
|
||||
parent: ~
|
||||
- kind:
|
||||
DocLineTooLong:
|
||||
- 61
|
||||
- 50
|
||||
location:
|
||||
row: 15
|
||||
column: 50
|
||||
end_location:
|
||||
row: 15
|
||||
column: 61
|
||||
fix: ~
|
||||
parent: ~
|
||||
|
|
@ -132,6 +132,7 @@ define_rule_mapping!(
|
|||
E999 => violations::SyntaxError,
|
||||
// pycodestyle warnings
|
||||
W292 => violations::NoNewLineAtEndOfFile,
|
||||
W505 => violations::DocLineTooLong,
|
||||
W605 => violations::InvalidEscapeSequence,
|
||||
// pyflakes
|
||||
F401 => violations::UnusedImport,
|
||||
|
@ -786,6 +787,7 @@ impl RuleCode {
|
|||
RuleCode::RUF100 => &LintSource::NoQA,
|
||||
RuleCode::E501
|
||||
| RuleCode::W292
|
||||
| RuleCode::W505
|
||||
| RuleCode::UP009
|
||||
| RuleCode::PGH003
|
||||
| RuleCode::PGH004 => &LintSource::Lines,
|
||||
|
|
|
@ -458,7 +458,7 @@ mod tests {
|
|||
}]
|
||||
.into_iter(),
|
||||
);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]);
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let actual = resolve_codes(
|
||||
|
@ -478,7 +478,7 @@ mod tests {
|
|||
}]
|
||||
.into_iter(),
|
||||
);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W605]);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W505, RuleCode::W605]);
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let actual = resolve_codes(
|
||||
|
@ -504,7 +504,7 @@ mod tests {
|
|||
]
|
||||
.into_iter(),
|
||||
);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]);
|
||||
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]);
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let actual = resolve_codes(
|
||||
|
|
|
@ -315,6 +315,20 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
|
|||
}
|
||||
}
|
||||
|
||||
define_violation!(
|
||||
pub struct DocLineTooLong(pub usize, pub usize);
|
||||
);
|
||||
impl Violation for DocLineTooLong {
|
||||
fn message(&self) -> String {
|
||||
let DocLineTooLong(length, limit) = self;
|
||||
format!("Doc line too long ({length} > {limit} characters)")
|
||||
}
|
||||
|
||||
fn placeholder() -> Self {
|
||||
DocLineTooLong(89, 88)
|
||||
}
|
||||
}
|
||||
|
||||
// pyflakes
|
||||
|
||||
define_violation!(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue