From c2750a59ab8c92ffc8ccec1561e43f6f2f3f8859 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 13 Mar 2023 00:01:29 -0400 Subject: [PATCH] Implement an iterator for universal newlines (#3454) # Summary We need to support CR line endings (as opposed to LF and CRLF line endings, which are already supported). They're rare, but they do appear in Python code, and we tend to panic on any file that uses them. Our `Locator` abstraction now supports CR line endings. However, Rust's `str#lines` implementation does _not_. This PR adds a `UniversalNewlineIterator` implementation that respects all of CR, LF, and CRLF line endings, and plugs it into most of the `.lines()` call sites. As an alternative design, it could be nice if we could leverage `Locator` for this. We've already computed all of the line endings, so we could probably iterate much more efficiently? # Test Plan Largely relying on automated testing, however, also ran over some known failure cases, like #3404. --- Cargo.lock | 1 + crates/ruff/src/autofix/helpers.rs | 6 +- crates/ruff/src/checkers/noqa.rs | 3 +- crates/ruff/src/checkers/physical_lines.rs | 11 +- crates/ruff/src/linter.rs | 2 +- crates/ruff/src/noqa.rs | 5 +- .../src/rules/flake8_simplify/rules/ast_if.rs | 3 +- .../rules/flake8_simplify/rules/ast_with.rs | 3 +- crates/ruff/src/rules/isort/helpers.rs | 3 +- .../rules/invalid_escape_sequence.rs | 3 +- .../pycodestyle/rules/lambda_assignment.rs | 3 +- .../rules/no_newline_at_end_of_file.rs | 11 +- crates/ruff/src/rules/pydocstyle/helpers.rs | 3 +- .../pydocstyle/rules/blank_after_summary.rs | 5 +- .../rules/blank_before_after_class.rs | 7 +- .../rules/blank_before_after_function.rs | 9 +- .../pydocstyle/rules/ends_with_period.rs | 5 +- .../pydocstyle/rules/ends_with_punctuation.rs | 5 +- .../ruff/src/rules/pydocstyle/rules/indent.rs | 4 +- .../rules/multi_line_summary_start.rs | 6 +- .../rules/newline_after_last_paragraph.rs | 6 +- .../rules/pydocstyle/rules/no_signature.rs | 3 +- .../rules/no_surrounding_whitespace.rs | 4 +- .../pydocstyle/rules/non_imperative_mood.rs | 3 +- .../src/rules/pydocstyle/rules/one_liner.rs | 4 +- .../src/rules/pydocstyle/rules/sections.rs | 48 ++--- .../rules/pydocstyle/rules/triple_quotes.rs | 4 +- crates/ruff_python_ast/Cargo.toml | 2 + crates/ruff_python_ast/src/helpers.rs | 3 +- crates/ruff_python_ast/src/lib.rs | 1 + crates/ruff_python_ast/src/newlines.rs | 192 ++++++++++++++++++ .../src/source_code/locator.rs | 16 ++ crates/ruff_python_ast/src/str.rs | 23 +-- crates/ruff_python_ast/src/whitespace.rs | 37 ---- .../ruff_python_formatter/src/cst/helpers.rs | 7 +- 35 files changed, 325 insertions(+), 126 deletions(-) create mode 100644 crates/ruff_python_ast/src/newlines.rs diff --git a/Cargo.lock b/Cargo.lock index 21c7d6c3ef..53457f6061 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2139,6 +2139,7 @@ dependencies = [ "rustc-hash", "rustpython-common", "rustpython-parser", + "serde", "smallvec", ] diff --git a/crates/ruff/src/autofix/helpers.rs b/crates/ruff/src/autofix/helpers.rs index 8ff322b821..362362212a 100644 --- a/crates/ruff/src/autofix/helpers.rs +++ b/crates/ruff/src/autofix/helpers.rs @@ -9,8 +9,8 @@ use rustpython_parser::{lexer, Mode, Tok}; use ruff_diagnostics::Fix; use ruff_python_ast::helpers; use ruff_python_ast::helpers::to_absolute; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::cst::helpers::compose_module_path; use crate::cst::matchers::match_module; @@ -100,7 +100,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result /// of a multi-statement line. fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { let contents = locator.skip(stmt.end_location.unwrap()); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); if trimmed.starts_with(';') { let column = line @@ -123,7 +123,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location { let start_location = Location::new(semicolon.row(), semicolon.column() + 1); let contents = locator.skip(start_location); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); // Skip past any continuations. if trimmed.starts_with('\\') { diff --git a/crates/ruff/src/checkers/noqa.rs b/crates/ruff/src/checkers/noqa.rs index 3ec8a31b73..0d8790a230 100644 --- a/crates/ruff/src/checkers/noqa.rs +++ b/crates/ruff/src/checkers/noqa.rs @@ -5,6 +5,7 @@ use nohash_hasher::IntMap; use rustpython_parser::ast::Location; use ruff_diagnostics::{Diagnostic, Fix}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::codes::NoqaCode; @@ -38,7 +39,7 @@ pub fn check_noqa( // Indices of diagnostics that were ignored by a `noqa` directive. let mut ignored_diagnostics = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = contents.universal_newlines().collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index 442cac67f5..adb2963e40 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -3,7 +3,8 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::newlines::StrExt; +use ruff_python_ast::source_code::{Locator, Stylist}; use crate::registry::Rule; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; @@ -21,8 +22,8 @@ use crate::settings::{flags, Settings}; pub fn check_physical_lines( path: &Path, + locator: &Locator, stylist: &Stylist, - contents: &str, commented_lines: &[usize], doc_lines: &[usize], settings: &Settings, @@ -56,7 +57,7 @@ pub fn check_physical_lines( let mut commented_lines_iter = commented_lines.iter().peekable(); let mut doc_lines_iter = doc_lines.iter().peekable(); - for (index, line) in contents.lines().enumerate() { + for (index, line) in locator.contents().universal_newlines().enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) .is_some() @@ -162,8 +163,8 @@ pub fn check_physical_lines( if enforce_no_newline_at_end_of_file { if let Some(diagnostic) = no_newline_at_end_of_file( + locator, stylist, - contents, autofix.into() && settings.rules.should_fix(&Rule::NoNewLineAtEndOfFile), ) { diagnostics.push(diagnostic); @@ -199,8 +200,8 @@ mod tests { let check_with_max_line_length = |line_length: usize| { check_physical_lines( Path::new("foo.py"), + &locator, &stylist, - line, &[], &[], &Settings { diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index 7c634b4090..1ad51a0893 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -191,8 +191,8 @@ pub fn check_path( { diagnostics.extend(check_physical_lines( path, + locator, stylist, - contents, indexer.commented_lines(), &doc_lines, settings, diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index ad97360211..844a64fe5d 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -12,6 +12,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use rustpython_parser::ast::Location; use ruff_diagnostics::Diagnostic; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::{LineEnding, Locator}; use ruff_python_ast::types::Range; @@ -181,7 +182,7 @@ fn add_noqa_inner( // Codes that are globally exempted (within the current file). let mut file_exemptions: Vec = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = contents.universal_newlines().collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { @@ -263,7 +264,7 @@ fn add_noqa_inner( let mut count: usize = 0; let mut output = String::new(); - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in lines.into_iter().enumerate() { match matches_by_line.get(&lineno) { None => { output.push_str(line); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index 38587687d7..3330d9bea7 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -9,6 +9,7 @@ use ruff_python_ast::helpers::{ contains_call_path, contains_effect, create_expr, create_stmt, first_colon_range, has_comments, has_comments_in, unparse_expr, unparse_stmt, }; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -283,7 +284,7 @@ pub fn nested_if_statements( Ok(fix) => { if fix .content - .lines() + .universal_newlines() .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 800a5830a3..ba1c4fcec7 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -5,6 +5,7 @@ use ruff_diagnostics::Diagnostic; use ruff_diagnostics::{AutofixKind, Availability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -115,7 +116,7 @@ pub fn multiple_with_statements( Ok(fix) => { if fix .content - .lines() + .universal_newlines() .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index f1ca5396f1..53ef62c9bc 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -2,6 +2,7 @@ use rustpython_parser::ast::{Location, Stmt}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_python_ast::helpers::is_docstring_stmt; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use super::types::TrailingComma; @@ -62,7 +63,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool { // # Direct comment. // def f(): pass let mut seen_blank = false; - for line in locator.take(stmt.location).lines().rev() { + for line in locator.take(stmt.location).universal_newlines().rev() { let line = line.trim(); if seen_blank { if line.starts_with('#') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index dd79f339d3..6eeb9b1550 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -4,6 +4,7 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; @@ -76,7 +77,7 @@ pub fn invalid_escape_sequence( let body = &text[(quote_pos + quote.len())..(text.len() - quote.len())]; if !prefix.contains('r') { - for (row_offset, line) in body.lines().enumerate() { + for (row_offset, line) in body.universal_newlines().enumerate() { let chars: Vec = line.chars().collect(); for col_offset in 0..chars.len() { if chars[col_offset] != '\\' { diff --git a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs index 335a9dba13..021b80467e 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs @@ -3,6 +3,7 @@ use rustpython_parser::ast::{Arguments, Expr, ExprKind, Location, Stmt, StmtKind use ruff_diagnostics::{AutofixKind, Availability, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{match_leading_content, match_trailing_content, unparse_stmt}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Stylist; use ruff_python_ast::types::{Range, ScopeKind}; use ruff_python_ast::whitespace::leading_space; @@ -86,7 +87,7 @@ pub fn lambda_assignment(checker: &mut Checker, target: &Expr, value: &Expr, stm let indentation = &leading_space(first_line); let mut indented = String::new(); for (idx, line) in function(id, args, body, checker.stylist) - .lines() + .universal_newlines() .enumerate() { if idx == 0 { diff --git a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs index e5411cc53c..9609c37895 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs @@ -2,7 +2,8 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::newlines::StrExt; +use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::types::Range; /// ## What it does @@ -37,16 +38,16 @@ impl AlwaysAutofixableViolation for NoNewLineAtEndOfFile { /// W292 pub fn no_newline_at_end_of_file( + locator: &Locator, stylist: &Stylist, - contents: &str, autofix: bool, ) -> Option { - if !contents.ends_with('\n') { + if !locator.contents().ends_with(['\n', '\r']) { // Note: if `lines.last()` is `None`, then `contents` is empty (and so we don't // want to raise W292 anyway). - if let Some(line) = contents.lines().last() { + if let Some(line) = locator.contents().universal_newlines().last() { // Both locations are at the end of the file (and thus the same). - let location = Location::new(contents.lines().count(), line.len()); + let location = Location::new(locator.count_lines(), line.len()); let mut diagnostic = Diagnostic::new(NoNewLineAtEndOfFile, Range::new(location, location)); if autofix { diff --git a/crates/ruff/src/rules/pydocstyle/helpers.rs b/crates/ruff/src/rules/pydocstyle/helpers.rs index b6261b1361..5c7e5b4c9a 100644 --- a/crates/ruff/src/rules/pydocstyle/helpers.rs +++ b/crates/ruff/src/rules/pydocstyle/helpers.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use ruff_python_ast::cast; use ruff_python_ast::helpers::{map_callable, to_call_path}; +use ruff_python_ast::newlines::StrExt; use crate::checkers::ast::Checker; use crate::docstrings::definition::{Definition, DefinitionKind}; @@ -10,7 +11,7 @@ use crate::docstrings::definition::{Definition, DefinitionKind}; pub fn logical_line(content: &str) -> Option { // Find the first logical line. let mut logical_line = None; - for (i, line) in content.lines().enumerate() { + for (i, line) in content.universal_newlines().enumerate() { if line.trim().is_empty() { // Empty line. If this is the line _after_ the first logical line, stop. if logical_line.is_some() { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs index 2e39195bd0..3b47420e2e 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{AutofixKind, Availability, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -45,7 +46,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { let mut lines_count = 1; let mut blanks_count = 0; - for line in body.trim().lines().skip(1) { + for line in body.trim().universal_newlines().skip(1) { lines_count += 1; if line.trim().is_empty() { blanks_count += 1; @@ -64,7 +65,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { if blanks_count > 1 { // Find the "summary" line (defined as the first non-blank line). let mut summary_line = 0; - for line in body.lines() { + for line in body.universal_newlines() { if line.trim().is_empty() { summary_line += 1; } else { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs index 9659fca6c1..df7ff18fdb 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -75,7 +76,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { .slice(Range::new(parent.location, docstring.expr.location)); let blank_lines_before = before - .lines() + .universal_newlines() .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -138,7 +139,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { )); let all_blank_after = after - .lines() + .universal_newlines() .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { @@ -146,7 +147,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { } let blank_lines_after = after - .lines() + .universal_newlines() .skip(1) .take_while(|line| line.trim().is_empty()) .count(); diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs index 701808c70a..2d58910fbe 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs @@ -3,6 +3,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -67,7 +68,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) .slice(Range::new(parent.location, docstring.expr.location)); let blank_lines_before = before - .lines() + .universal_newlines() .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -102,7 +103,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // If the docstring is only followed by blank and commented lines, abort. let all_blank_after = after - .lines() + .universal_newlines() .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { @@ -111,7 +112,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // Count the number of blank lines after the docstring. let blank_lines_after = after - .lines() + .universal_newlines() .skip(1) .take_while(|line| line.trim().is_empty()) .count(); @@ -119,7 +120,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // Avoid violations for blank lines followed by inner functions or classes. if blank_lines_after == 1 && after - .lines() + .universal_newlines() .skip(1 + blank_lines_after) .find(|line| !line.trim_start().starts_with('#')) .map_or(false, |line| INNER_FUNCTION_OR_CLASS_REGEX.is_match(line)) diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs index e4d963feff..c97af42816 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs @@ -2,6 +2,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; @@ -31,7 +32,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = body.trim().universal_newlines().next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = body.universal_newlines().nth(index).unwrap(); let trimmed = line.trim_end(); if !trimmed.ends_with('.') { diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs index f163afc6ad..4562cc4c7b 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs @@ -2,6 +2,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; @@ -31,7 +32,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = body.trim().universal_newlines().next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = body.universal_newlines().nth(index).unwrap(); let trimmed = line.trim_end(); if !(trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?')) { let mut diagnostic = Diagnostic::new(EndsInPunctuation, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/indent.rs b/crates/ruff/src/rules/pydocstyle/rules/indent.rs index 475720d40e..dc1a74ec18 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/indent.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/indent.rs @@ -1,9 +1,9 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -53,7 +53,7 @@ pub fn indent(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; // Split the docstring into lines. - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() <= 1 { return; } diff --git a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs index c2d2ec7ea8..30fc1d9f39 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::{NewlineWithTrailingNewline, StrExt}; use ruff_python_ast::str::{is_triple_quote, leading_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -42,10 +42,10 @@ pub fn multi_line_summary_start(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if LinesWithTrailingNewline::from(body).nth(1).is_none() { + if NewlineWithTrailingNewline::from(body).nth(1).is_none() { return; }; - let mut content_lines = contents.lines(); + let mut content_lines = contents.universal_newlines(); let Some(first_line) = content_lines .next() else diff --git a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs index b3f49732e2..19e74ba2bf 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::{NewlineWithTrailingNewline, StrExt}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -29,12 +29,12 @@ pub fn newline_after_last_paragraph(checker: &mut Checker, docstring: &Docstring let body = docstring.body; let mut line_count = 0; - for line in LinesWithTrailingNewline::from(body) { + for line in NewlineWithTrailingNewline::from(body) { if !line.trim().is_empty() { line_count += 1; } if line_count > 1 { - if let Some(last_line) = contents.lines().last().map(str::trim) { + if let Some(last_line) = contents.universal_newlines().last().map(str::trim) { if last_line != "\"\"\"" && last_line != "'''" { let mut diagnostic = Diagnostic::new(NewLineAfterLastParagraph, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs index 6e404dc223..5546a24cf7 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs @@ -2,6 +2,7 @@ use rustpython_parser::ast::StmtKind; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -32,7 +33,7 @@ pub fn no_signature(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; - let Some(first_line) = body.trim().lines().next() else { + let Some(first_line) = body.trim().universal_newlines().next() else { return; }; if !first_line.contains(&format!("{name}(")) { diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs index 609bbe1e8b..c4743f77e3 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -28,7 +28,7 @@ pub fn no_surrounding_whitespace(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let mut lines = LinesWithTrailingNewline::from(body); + let mut lines = NewlineWithTrailingNewline::from(body); let Some(line) = lines.next() else { return; }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs index 9beae70e9f..df9a4c9563 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs @@ -7,6 +7,7 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::cast; use ruff_python_ast::helpers::to_call_path; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::{CallPath, Range}; use ruff_python_ast::visibility::{is_property, is_test}; @@ -48,7 +49,7 @@ pub fn non_imperative_mood( let body = docstring.body; // Find first line, disregarding whitespace. - let line = match body.trim().lines().next() { + let line = match body.trim().universal_newlines().next() { Some(line) => line.trim(), None => return, }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs index 7ecb70f70d..b0dcf37ff6 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -26,7 +26,7 @@ impl AlwaysAutofixableViolation for FitsOnOneLine { pub fn one_liner(checker: &mut Checker, docstring: &Docstring) { let mut line_count = 0; let mut non_empty_line_count = 0; - for line in LinesWithTrailingNewline::from(docstring.body) { + for line in NewlineWithTrailingNewline::from(docstring.body) { line_count += 1; if !line.trim().is_empty() { non_empty_line_count += 1; diff --git a/crates/ruff/src/rules/pydocstyle/rules/sections.rs b/crates/ruff/src/rules/pydocstyle/rules/sections.rs index 3680dc3026..b5d51378d1 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/sections.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/sections.rs @@ -8,9 +8,9 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::identifier_range; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::types::Range; use ruff_python_ast::visibility::is_staticmethod; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use ruff_python_ast::{cast, whitespace}; use crate::checkers::ast::Checker; @@ -273,7 +273,7 @@ impl AlwaysAutofixableViolation for NoBlankLinesBetweenHeaderAndContent { pub fn sections(checker: &mut Checker, docstring: &Docstring, convention: Option<&Convention>) { let body = docstring.body; - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() < 2 { return; } @@ -923,30 +923,32 @@ fn parameters_section(checker: &mut Checker, docstring: &Docstring, context: &Se // Join line continuations, then resplit by line. let adjusted_following_lines = context.following_lines.join("\n").replace("\\\n", ""); - let lines: Vec<&str> = LinesWithTrailingNewline::from(&adjusted_following_lines).collect(); - - for i in 1..lines.len() { - let current_line = lines[i - 1]; - let current_leading_space = whitespace::leading_space(current_line); - let next_line = lines[i]; - if current_leading_space == section_level_indent - && (whitespace::leading_space(next_line).len() > current_leading_space.len()) - && !next_line.trim().is_empty() - { - let parameters = if let Some(semi_index) = current_line.find(':') { - // If the parameter has a type annotation, exclude it. - ¤t_line[..semi_index] - } else { - // Otherwise, it's just a list of parameters on the current line. - current_line.trim() - }; - // Notably, NumPy lets you put multiple parameters of the same type on the same - // line. - for parameter in parameters.split(',') { - docstring_args.insert(parameter.trim()); + let mut lines = NewlineWithTrailingNewline::from(&adjusted_following_lines); + if let Some(mut current_line) = lines.next() { + for next_line in lines { + let current_leading_space = whitespace::leading_space(current_line); + if current_leading_space == section_level_indent + && (whitespace::leading_space(next_line).len() > current_leading_space.len()) + && !next_line.trim().is_empty() + { + let parameters = if let Some(semi_index) = current_line.find(':') { + // If the parameter has a type annotation, exclude it. + ¤t_line[..semi_index] + } else { + // Otherwise, it's just a list of parameters on the current line. + current_line.trim() + }; + // Notably, NumPy lets you put multiple parameters of the same type on the same + // line. + for parameter in parameters.split(',') { + docstring_args.insert(parameter.trim()); + } } + + current_line = next_line; } } + // Validate that all arguments were documented. missing_args(checker, docstring, &docstring_args); } diff --git a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs index 720b70fe81..6e6a2d9571 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -20,8 +21,7 @@ pub fn triple_quotes(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let Some(first_line) = contents - .lines() + let Some(first_line) = contents.universal_newlines() .next() .map(str::to_lowercase) else { diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 087bcaabb9..44df6c5fb9 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -24,4 +24,6 @@ regex = { workspace = true } rustc-hash = { workspace = true } rustpython-common = { workspace = true } rustpython-parser = { workspace = true } +# TODO(charlie): See https://github.com/RustPython/RustPython/pull/4684. +serde = { workspace = true } smallvec = { version = "1.10.0" } diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 047e03c59b..039d31cd8f 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -14,6 +14,7 @@ use rustpython_parser::{lexer, Mode, StringKind, Tok}; use smallvec::{smallvec, SmallVec}; use crate::context::Context; +use crate::newlines::StrExt; use crate::source_code::{Generator, Indexer, Locator, Stylist}; use crate::types::{Binding, BindingKind, CallPath, Range}; use crate::visitor; @@ -1125,7 +1126,7 @@ pub fn end_of_statement(stmt: &Stmt, locator: &Locator) -> Location { } // Otherwise, find the end of the last line that's "part of" the statement. - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in contents.universal_newlines().enumerate() { if line.ends_with('\\') { continue; } diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 529034a98e..2856716fbb 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -6,6 +6,7 @@ pub mod function_type; pub mod hashable; pub mod helpers; pub mod logging; +pub mod newlines; pub mod operations; pub mod relocate; pub mod source_code; diff --git a/crates/ruff_python_ast/src/newlines.rs b/crates/ruff_python_ast/src/newlines.rs new file mode 100644 index 0000000000..371f80a352 --- /dev/null +++ b/crates/ruff_python_ast/src/newlines.rs @@ -0,0 +1,192 @@ +use std::iter::FusedIterator; + +/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`]. +pub trait StrExt { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_>; +} + +impl StrExt for str { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_> { + UniversalNewlineIterator::from(self) + } +} + +/// Like [`str#lines`], but accommodates LF, CRLF, and CR line endings, +/// the latter of which are not supported by [`str#lines`]. +/// +/// ## Examples +/// +/// ```rust +/// use ruff_python_ast::newlines::UniversalNewlineIterator; +/// +/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); +/// +/// assert_eq!(lines.next_back(), Some("bop")); +/// assert_eq!(lines.next(), Some("foo")); +/// assert_eq!(lines.next_back(), Some("baz")); +/// assert_eq!(lines.next(), Some("bar")); +/// assert_eq!(lines.next_back(), Some("")); +/// assert_eq!(lines.next(), None); +/// ``` +pub struct UniversalNewlineIterator<'a> { + text: &'a str, +} + +impl<'a> UniversalNewlineIterator<'a> { + pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> { + UniversalNewlineIterator { text } + } +} + +impl<'a> Iterator for UniversalNewlineIterator<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.text.is_empty() { + return None; + } + + let line = match self.text.find(['\n', '\r']) { + // Non-last line + Some(line_end) => { + let (line, remainder) = self.text.split_at(line_end); + + self.text = match remainder.as_bytes()[0] { + // Explicit branch for `\n` as this is the most likely path + b'\n' => &remainder[1..], + // '\r\n' + b'\r' if remainder.as_bytes().get(1) == Some(&b'\n') => &remainder[2..], + // '\r' + _ => &remainder[1..], + }; + + line + } + // Last line + None => std::mem::take(&mut self.text), + }; + + Some(line) + } + + fn last(mut self) -> Option { + self.next_back() + } +} + +impl DoubleEndedIterator for UniversalNewlineIterator<'_> { + #[inline] + fn next_back(&mut self) -> Option { + if self.text.is_empty() { + return None; + } + + let len = self.text.len(); + + // Trim any trailing newlines. + self.text = match self.text.as_bytes()[len - 1] { + b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2], + b'\n' | b'\r' => &self.text[..len - 1], + _ => self.text, + }; + + // Find the end of the previous line. The previous line is the text up to, but not including + // the newline character. + let line = match self.text.rfind(['\n', '\r']) { + // '\n' or '\r' or '\r\n' + Some(line_end) => { + let (remainder, line) = self.text.split_at(line_end + 1); + self.text = remainder; + + line + } + // Last line + None => std::mem::take(&mut self.text), + }; + + Some(line) + } +} + +impl FusedIterator for UniversalNewlineIterator<'_> {} + +/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line. +pub struct NewlineWithTrailingNewline<'a> { + trailing: Option<&'a str>, + underlying: UniversalNewlineIterator<'a>, +} + +impl<'a> NewlineWithTrailingNewline<'a> { + pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> { + NewlineWithTrailingNewline { + underlying: UniversalNewlineIterator::from(input), + trailing: if input.ends_with(['\r', '\n']) { + Some("") + } else { + None + }, + } + } +} + +impl<'a> Iterator for NewlineWithTrailingNewline<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.underlying.next().or_else(|| self.trailing.take()) + } +} + +#[cfg(test)] +mod tests { + use super::UniversalNewlineIterator; + + #[test] + fn universal_newlines_empty_str() { + let lines: Vec<_> = UniversalNewlineIterator::from("").collect(); + assert_eq!(lines, Vec::<&str>::default()); + + let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect(); + assert_eq!(lines, Vec::<&str>::default()); + } + + #[test] + fn universal_newlines_forward() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop", ""]); + } + + #[test] + fn universal_newlines_backwards() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop") + .rev() + .collect(); + assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n") + .rev() + .collect(); + + assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]); + } + + #[test] + fn universal_newlines_mixed() { + let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); + + assert_eq!(lines.next_back(), Some("bop")); + assert_eq!(lines.next(), Some("foo")); + assert_eq!(lines.next_back(), Some("baz")); + assert_eq!(lines.next(), Some("bar")); + assert_eq!(lines.next_back(), Some("")); + assert_eq!(lines.next(), None); + } +} diff --git a/crates/ruff_python_ast/src/source_code/locator.rs b/crates/ruff_python_ast/src/source_code/locator.rs index 035110f78e..9acf3a95db 100644 --- a/crates/ruff_python_ast/src/source_code/locator.rs +++ b/crates/ruff_python_ast/src/source_code/locator.rs @@ -56,10 +56,18 @@ impl<'a> Locator<'a> { self.contents } + /// Return the number of lines in the source code. + pub fn count_lines(&self) -> usize { + let index = self.get_or_init_index(); + index.count_lines() + } + + /// Return the number of bytes in the source code. pub const fn len(&self) -> usize { self.contents.len() } + /// Return `true` if the source code is empty. pub const fn is_empty(&self) -> bool { self.contents.is_empty() } @@ -83,6 +91,14 @@ impl Index { Index::Utf8(utf8) => utf8.byte_offset(location, contents), } } + + /// Return the number of lines in the source code. + fn count_lines(&self) -> usize { + match self { + Index::Ascii(ascii) => ascii.line_start_byte_offsets.len(), + Index::Utf8(utf8) => utf8.line_start_byte_offsets.len(), + } + } } impl From<&str> for Index { diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs index 1a332b474c..34651f00af 100644 --- a/crates/ruff_python_ast/src/str.rs +++ b/crates/ruff_python_ast/src/str.rs @@ -40,19 +40,18 @@ pub fn raw_contents(contents: &str) -> &str { /// Return the leading quote for a string or byte literal (e.g., `"""`). pub fn leading_quote(content: &str) -> Option<&str> { - if let Some(first_line) = content.lines().next() { - for pattern in TRIPLE_QUOTE_STR_PREFIXES - .iter() - .chain(TRIPLE_QUOTE_BYTE_PREFIXES) - .chain(SINGLE_QUOTE_STR_PREFIXES) - .chain(SINGLE_QUOTE_BYTE_PREFIXES) - { - if first_line.starts_with(pattern) { - return Some(pattern); + TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + .find_map(|pattern| { + if content.starts_with(pattern) { + Some(*pattern) + } else { + None } - } - } - None + }) } /// Return the trailing quote string for a string or byte literal (e.g., `"""`). diff --git a/crates/ruff_python_ast/src/whitespace.rs b/crates/ruff_python_ast/src/whitespace.rs index 8a779421c1..64bdc35c8c 100644 --- a/crates/ruff_python_ast/src/whitespace.rs +++ b/crates/ruff_python_ast/src/whitespace.rs @@ -1,5 +1,3 @@ -use std::str::Lines; - use rustpython_parser::ast::{Located, Location}; use crate::source_code::Locator; @@ -39,38 +37,3 @@ pub fn clean(indentation: &str) -> String { .map(|char| if char.is_whitespace() { char } else { ' ' }) .collect() } - -/// Like `str#lines`, but includes a trailing newline as an empty line. -pub struct LinesWithTrailingNewline<'a> { - trailing: Option<&'a str>, - underlying: Lines<'a>, -} - -impl<'a> LinesWithTrailingNewline<'a> { - pub fn from(input: &'a str) -> LinesWithTrailingNewline<'a> { - LinesWithTrailingNewline { - underlying: input.lines(), - trailing: if input.ends_with('\n') { - Some("") - } else { - None - }, - } - } -} - -impl<'a> Iterator for LinesWithTrailingNewline<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { - let mut next = self.underlying.next(); - if next.is_none() { - if self.trailing.is_some() { - next = self.trailing; - self.trailing = None; - } - } - next - } -} diff --git a/crates/ruff_python_formatter/src/cst/helpers.rs b/crates/ruff_python_formatter/src/cst/helpers.rs index f25a9dd903..19b8cdb7fa 100644 --- a/crates/ruff_python_formatter/src/cst/helpers.rs +++ b/crates/ruff_python_formatter/src/cst/helpers.rs @@ -1,5 +1,6 @@ use rustpython_parser::ast::Location; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; @@ -96,7 +97,11 @@ pub fn expand_indented_block( // Compound statement: from the colon to the end of the block. let mut offset = 0; - for (index, line) in contents[end_index..].lines().skip(1).enumerate() { + for (index, line) in contents[end_index..] + .universal_newlines() + .skip(1) + .enumerate() + { if line.is_empty() { continue; }