Implement doc line length enforcement (#1804)

This PR implements `W505` (`DocLineTooLong`), which is similar to `E501`
(`LineTooLong`) but confined to doc lines.

I based the "doc line" definition on pycodestyle, which defines a doc
line as a standalone comment or string statement. Our definition is a
bit more liberal, since we consider any string statement a doc line
(even if it's part of a multi-line statement) -- but that seems fine to
me.

Note that, unusually, this rule requires custom extraction from both the
token stream (to find standalone comments) and the AST (to find string
statements).

Closes #1784.
This commit is contained in:
Charlie Marsh 2023-01-11 22:32:14 -05:00 committed by GitHub
parent 329946f162
commit f450e2e79d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 314 additions and 34 deletions

View file

@ -597,6 +597,7 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/2.9.1/) on PyPI
| E902 | IOError | IOError: `...` | |
| E999 | SyntaxError | SyntaxError: `...` | |
| W292 | NoNewLineAtEndOfFile | No newline at end of file | 🛠 |
| W505 | DocLineTooLong | Doc line too long (89 > 88 characters) | |
| W605 | InvalidEscapeSequence | Invalid escape sequence: '\c' | 🛠 |
### mccabe (C90)
@ -3161,6 +3162,24 @@ ignore-overlong-task-comments = true
---
#### [`max-doc-length`](#max-doc-length)
The maximum line length to allow for line-length violations within
documentation (`W505`), including standalone comments.
**Default value**: `None`
**Type**: `usize`
**Example usage**:
```toml
[tool.ruff.pycodestyle]
max-doc-length = 88
```
---
### `pydocstyle`
#### [`convention`](#convention)

View file

@ -0,0 +1,15 @@
#!/usr/bin/env python3
"""Here's a top-level docstring that's over the limit."""
def f():
"""Here's a docstring that's also over the limit."""
x = 1 # Here's a comment that's over the limit, but it's not standalone.
# Here's a standalone comment that's over the limit.
print("Here's a string that's over the limit, but it's not a docstring.")
"This is also considered a docstring, and is over the limit."

View file

@ -945,6 +945,15 @@
"boolean",
"null"
]
},
"max-doc-length": {
"description": "The maximum line length to allow for line-length violations within documentation (`W505`), including standalone comments.",
"type": [
"integer",
"null"
],
"format": "uint",
"minimum": 0.0
}
},
"additionalProperties": false
@ -1616,6 +1625,9 @@
"W2",
"W29",
"W292",
"W5",
"W50",
"W505",
"W6",
"W60",
"W605",

View file

@ -1,6 +1,6 @@
//! Lint rules based on checking raw physical lines.
use crate::pycodestyle::rules::{line_too_long, no_newline_at_end_of_file};
use crate::pycodestyle::rules::{doc_line_too_long, line_too_long, no_newline_at_end_of_file};
use crate::pygrep_hooks::rules::{blanket_noqa, blanket_type_ignore};
use crate::pyupgrade::rules::unnecessary_coding_comment;
use crate::registry::{Diagnostic, RuleCode};
@ -9,18 +9,21 @@ use crate::settings::{flags, Settings};
pub fn check_lines(
contents: &str,
commented_lines: &[usize],
doc_lines: &[usize],
settings: &Settings,
autofix: flags::Autofix,
) -> Vec<Diagnostic> {
let mut diagnostics: Vec<Diagnostic> = vec![];
let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009);
let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004);
let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003);
let enforce_doc_line_too_long = settings.enabled.contains(&RuleCode::W505);
let enforce_line_too_long = settings.enabled.contains(&RuleCode::E501);
let enforce_no_newline_at_end_of_file = settings.enabled.contains(&RuleCode::W292);
let enforce_blanket_type_ignore = settings.enabled.contains(&RuleCode::PGH003);
let enforce_blanket_noqa = settings.enabled.contains(&RuleCode::PGH004);
let enforce_unnecessary_coding_comment = settings.enabled.contains(&RuleCode::UP009);
let mut commented_lines_iter = commented_lines.iter().peekable();
let mut doc_lines_iter = doc_lines.iter().peekable();
for (index, line) in contents.lines().enumerate() {
while commented_lines_iter
.next_if(|lineno| &(index + 1) == *lineno)
@ -40,18 +43,25 @@ pub fn check_lines(
}
if enforce_blanket_type_ignore {
if commented_lines.contains(&(index + 1)) {
if let Some(diagnostic) = blanket_type_ignore(index, line) {
diagnostics.push(diagnostic);
}
if let Some(diagnostic) = blanket_type_ignore(index, line) {
diagnostics.push(diagnostic);
}
}
if enforce_blanket_noqa {
if commented_lines.contains(&(index + 1)) {
if let Some(diagnostic) = blanket_noqa(index, line) {
diagnostics.push(diagnostic);
}
if let Some(diagnostic) = blanket_noqa(index, line) {
diagnostics.push(diagnostic);
}
}
}
while doc_lines_iter
.next_if(|lineno| &(index + 1) == *lineno)
.is_some()
{
if enforce_doc_line_too_long {
if let Some(diagnostic) = doc_line_too_long(index, line, settings) {
diagnostics.push(diagnostic);
}
}
}
@ -90,6 +100,7 @@ mod tests {
check_lines(
line,
&[],
&[],
&Settings {
line_length,
..Settings::for_rule(RuleCode::E501)

58
src/doc_lines.rs Normal file
View file

@ -0,0 +1,58 @@
//! Doc line extraction. In this context, a doc line is a line consisting of a
//! standalone comment or a constant string statement.
use rustpython_ast::{Constant, ExprKind, Stmt, StmtKind, Suite};
use rustpython_parser::lexer::{LexResult, Tok};
use crate::ast::visitor;
use crate::ast::visitor::Visitor;
/// Extract doc lines (standalone comments) from a token sequence.
pub fn doc_lines_from_tokens(lxr: &[LexResult]) -> Vec<usize> {
let mut doc_lines: Vec<usize> = Vec::default();
let mut prev: Option<usize> = None;
for (start, tok, end) in lxr.iter().flatten() {
if matches!(tok, Tok::Indent | Tok::Dedent | Tok::Newline) {
continue;
}
if matches!(tok, Tok::Comment(..)) {
if let Some(prev) = prev {
if start.row() > prev {
doc_lines.push(start.row());
}
} else {
doc_lines.push(start.row());
}
}
prev = Some(end.row());
}
doc_lines
}
#[derive(Default)]
struct StringLinesVisitor {
string_lines: Vec<usize>,
}
impl Visitor<'_> for StringLinesVisitor {
fn visit_stmt(&mut self, stmt: &Stmt) {
if let StmtKind::Expr { value } = &stmt.node {
if let ExprKind::Constant {
value: Constant::Str(..),
..
} = &value.node
{
self.string_lines
.extend(value.location.row()..=value.end_location.unwrap().row());
}
}
visitor::walk_stmt(self, stmt);
}
}
/// Extract doc lines (standalone strings) from an AST.
pub fn doc_lines_from_ast(python_ast: &Suite) -> Vec<usize> {
let mut visitor = StringLinesVisitor::default();
visitor.visit_body(python_ast);
visitor.string_lines
}

View file

@ -93,4 +93,5 @@ cfg_if! {
pub use lib_wasm::check;
}
}
pub mod doc_lines;
pub mod flake8_pie;

View file

@ -19,6 +19,7 @@ use crate::checkers::lines::check_lines;
use crate::checkers::noqa::check_noqa;
use crate::checkers::tokens::check_tokens;
use crate::directives::Directives;
use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens};
use crate::message::{Message, Source};
use crate::noqa::add_noqa;
use crate::registry::{Diagnostic, LintSource, RuleCode};
@ -70,6 +71,14 @@ pub(crate) fn check_path(
// Aggregate all diagnostics.
let mut diagnostics: Vec<Diagnostic> = vec![];
// Collect doc lines. This requires a rare mix of tokens (for comments) and AST
// (for docstrings), which demands special-casing at this level.
let use_doc_lines = settings.enabled.contains(&RuleCode::W505);
let mut doc_lines = vec![];
if use_doc_lines {
doc_lines.extend(doc_lines_from_tokens(&tokens));
}
// Run the token-based rules.
if settings
.enabled
@ -89,7 +98,7 @@ pub(crate) fn check_path(
.enabled
.iter()
.any(|rule_code| matches!(rule_code.lint_source(), LintSource::Imports));
if use_ast || use_imports {
if use_ast || use_imports || use_doc_lines {
match rustpython_helpers::parse_program_tokens(tokens, "<filename>") {
Ok(python_ast) => {
if use_ast {
@ -116,6 +125,9 @@ pub(crate) fn check_path(
package,
));
}
if use_doc_lines {
doc_lines.extend(doc_lines_from_ast(&python_ast));
}
}
Err(parse_error) => {
if settings.enabled.contains(&RuleCode::E999) {
@ -128,6 +140,12 @@ pub(crate) fn check_path(
}
}
// Deduplicate and reorder any doc lines.
if use_doc_lines {
doc_lines.sort_unstable();
doc_lines.dedup();
}
// Run the lines-based rules.
if settings
.enabled
@ -137,6 +155,7 @@ pub(crate) fn check_path(
diagnostics.extend(check_lines(
contents,
&directives.commented_lines,
&doc_lines,
settings,
autofix,
));

View file

@ -67,6 +67,7 @@ mod tests {
&settings::Settings {
pycodestyle: Settings {
ignore_overlong_task_comments,
..Settings::default()
},
..settings::Settings::for_rule(RuleCode::E501)
},
@ -74,4 +75,20 @@ mod tests {
insta::assert_yaml_snapshot!(snapshot, diagnostics);
Ok(())
}
#[test]
fn max_doc_length() -> Result<()> {
let diagnostics = test_path(
Path::new("./resources/test/fixtures/pycodestyle/W505.py"),
&settings::Settings {
pycodestyle: Settings {
max_doc_length: Some(50),
..Settings::default()
},
..settings::Settings::for_rule(RuleCode::W505)
},
)?;
insta::assert_yaml_snapshot!(diagnostics);
Ok(())
}
}

View file

@ -22,42 +22,88 @@ use crate::violations;
static URL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^https?://\S+$").unwrap());
/// E501
pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
let line_length = line.chars().count();
if line_length <= settings.line_length {
return None;
fn is_overlong(
line: &str,
line_length: usize,
limit: usize,
ignore_overlong_task_comments: bool,
task_tags: &[String],
) -> bool {
if line_length <= limit {
return false;
}
let mut chunks = line.split_whitespace();
let (Some(first), Some(second)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter
return None;
return false;
};
if first == "#" {
if settings.pycodestyle.ignore_overlong_task_comments {
if ignore_overlong_task_comments {
let second = second.trim_end_matches(':');
if settings.task_tags.iter().any(|tag| tag == second) {
return None;
if task_tags.iter().any(|tag| tag == second) {
return false;
}
}
// Do not enforce the line length for commented lines that end with a URL
// or contain only a single word.
if chunks.last().map_or(true, |c| URL_REGEX.is_match(c)) {
return None;
return false;
}
}
Some(Diagnostic::new(
violations::LineTooLong(line_length, settings.line_length),
Range::new(
Location::new(lineno + 1, settings.line_length),
Location::new(lineno + 1, line_length),
),
))
true
}
/// E501
pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
let line_length = line.chars().count();
let limit = settings.line_length;
if is_overlong(
line,
line_length,
limit,
settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags,
) {
Some(Diagnostic::new(
violations::LineTooLong(line_length, limit),
Range::new(
Location::new(lineno + 1, limit),
Location::new(lineno + 1, line_length),
),
))
} else {
None
}
}
/// W505
pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
let Some(limit) = settings.pycodestyle.max_doc_length else {
return None;
};
let line_length = line.chars().count();
if is_overlong(
line,
line_length,
limit,
settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags,
) {
Some(Diagnostic::new(
violations::DocLineTooLong(line_length, limit),
Range::new(
Location::new(lineno + 1, limit),
Location::new(lineno + 1, line_length),
),
))
} else {
None
}
}
fn compare(

View file

@ -9,6 +9,16 @@ use serde::{Deserialize, Serialize};
)]
#[serde(deny_unknown_fields, rename_all = "kebab-case", rename = "Pycodestyle")]
pub struct Options {
#[option(
default = "None",
value_type = "usize",
example = r#"
max-doc-length = 88
"#
)]
/// The maximum line length to allow for line-length violations within
/// documentation (`W505`), including standalone comments.
pub max_doc_length: Option<usize>,
#[option(
default = "false",
value_type = "bool",
@ -24,12 +34,14 @@ pub struct Options {
#[derive(Debug, Default, Hash)]
pub struct Settings {
pub max_doc_length: Option<usize>,
pub ignore_overlong_task_comments: bool,
}
impl From<Options> for Settings {
fn from(options: Options) -> Self {
Self {
max_doc_length: options.max_doc_length,
ignore_overlong_task_comments: options
.ignore_overlong_task_comments
.unwrap_or_default(),
@ -40,6 +52,7 @@ impl From<Options> for Settings {
impl From<Settings> for Options {
fn from(settings: Settings) -> Self {
Self {
max_doc_length: settings.max_doc_length,
ignore_overlong_task_comments: Some(settings.ignore_overlong_task_comments),
}
}

View file

@ -0,0 +1,53 @@
---
source: src/pycodestyle/mod.rs
expression: diagnostics
---
- kind:
DocLineTooLong:
- 57
- 50
location:
row: 2
column: 50
end_location:
row: 2
column: 57
fix: ~
parent: ~
- kind:
DocLineTooLong:
- 56
- 50
location:
row: 6
column: 50
end_location:
row: 6
column: 56
fix: ~
parent: ~
- kind:
DocLineTooLong:
- 56
- 50
location:
row: 10
column: 50
end_location:
row: 10
column: 56
fix: ~
parent: ~
- kind:
DocLineTooLong:
- 61
- 50
location:
row: 15
column: 50
end_location:
row: 15
column: 61
fix: ~
parent: ~

View file

@ -132,6 +132,7 @@ define_rule_mapping!(
E999 => violations::SyntaxError,
// pycodestyle warnings
W292 => violations::NoNewLineAtEndOfFile,
W505 => violations::DocLineTooLong,
W605 => violations::InvalidEscapeSequence,
// pyflakes
F401 => violations::UnusedImport,
@ -786,6 +787,7 @@ impl RuleCode {
RuleCode::RUF100 => &LintSource::NoQA,
RuleCode::E501
| RuleCode::W292
| RuleCode::W505
| RuleCode::UP009
| RuleCode::PGH003
| RuleCode::PGH004 => &LintSource::Lines,

View file

@ -458,7 +458,7 @@ mod tests {
}]
.into_iter(),
);
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]);
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]);
assert_eq!(actual, expected);
let actual = resolve_codes(
@ -478,7 +478,7 @@ mod tests {
}]
.into_iter(),
);
let expected = FxHashSet::from_iter([RuleCode::W605]);
let expected = FxHashSet::from_iter([RuleCode::W505, RuleCode::W605]);
assert_eq!(actual, expected);
let actual = resolve_codes(
@ -504,7 +504,7 @@ mod tests {
]
.into_iter(),
);
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W605]);
let expected = FxHashSet::from_iter([RuleCode::W292, RuleCode::W505, RuleCode::W605]);
assert_eq!(actual, expected);
let actual = resolve_codes(

View file

@ -315,6 +315,20 @@ impl AlwaysAutofixableViolation for InvalidEscapeSequence {
}
}
define_violation!(
pub struct DocLineTooLong(pub usize, pub usize);
);
impl Violation for DocLineTooLong {
fn message(&self) -> String {
let DocLineTooLong(length, limit) = self;
format!("Doc line too long ({length} > {limit} characters)")
}
fn placeholder() -> Self {
DocLineTooLong(89, 88)
}
}
// pyflakes
define_violation!(