From 549cc1e437479ec7fde0dcf3e7360fb95d06076d Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Sun, 9 Jun 2024 15:25:17 +0530 Subject: [PATCH] Build `CommentRanges` outside the parser (#11792) ## Summary This PR updates the parser to remove building the `CommentRanges` and instead it'll be built by the linter and the formatter when it's required. For the linter, it'll be built and owned by the `Indexer` while for the formatter it'll be built from the `Tokens` struct and passed as an argument. ## Test Plan `cargo insta test` --- Cargo.lock | 2 ++ crates/ruff_benchmark/Cargo.toml | 1 + crates/ruff_benchmark/benches/formatter.rs | 8 +++-- crates/ruff_linter/src/checkers/ast/mod.rs | 2 +- crates/ruff_linter/src/checkers/imports.rs | 2 +- .../src/checkers/physical_lines.rs | 4 +-- crates/ruff_linter/src/checkers/tokens.rs | 10 +++---- crates/ruff_linter/src/directives.rs | 13 +++++---- crates/ruff_linter/src/linter.rs | 19 +++++------- .../src/rules/isort/rules/organize_imports.rs | 10 +++---- crates/ruff_linter/src/rules/pyflakes/mod.rs | 2 +- crates/ruff_linter/src/test.rs | 4 +-- crates/ruff_python_formatter/src/cli.rs | 8 +++-- .../ruff_python_formatter/src/comments/mod.rs | 10 +++---- .../src/expression/parentheses.rs | 3 +- crates/ruff_python_formatter/src/lib.rs | 10 +++++-- crates/ruff_python_formatter/src/range.rs | 7 +++-- .../src/statement/suite.rs | 4 ++- .../src/string/docstring.rs | 4 ++- crates/ruff_python_index/src/indexer.rs | 28 +++++++++++++++++- crates/ruff_python_parser/src/lib.rs | 20 ++++++++----- crates/ruff_python_parser/src/parser/mod.rs | 4 +-- crates/ruff_python_parser/src/token_source.rs | 17 ++--------- .../tests/block_comments.rs | 22 +++++++++----- .../tests/simple_tokenizer.rs | 5 ++-- crates/ruff_server/src/lint.rs | 4 +-- crates/ruff_wasm/Cargo.toml | 1 + crates/ruff_wasm/src/lib.rs | 29 +++++++++++++------ 28 files changed, 151 insertions(+), 102 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 888aee89d5..c20e35bd38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1957,6 +1957,7 @@ dependencies = [ "ruff_python_ast", "ruff_python_formatter", "ruff_python_parser", + "ruff_python_trivia", "serde", "serde_json", "tikv-jemallocator", @@ -2377,6 +2378,7 @@ dependencies = [ "ruff_python_formatter", "ruff_python_index", "ruff_python_parser", + "ruff_python_trivia", "ruff_source_file", "ruff_text_size", "ruff_workspace", diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index e8ff3a09b8..c95caf0d13 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -45,6 +45,7 @@ ruff_linter = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_formatter = { workspace = true } ruff_python_parser = { workspace = true } +ruff_python_trivia = { workspace = true } [lints] workspace = true diff --git a/crates/ruff_benchmark/benches/formatter.rs b/crates/ruff_benchmark/benches/formatter.rs index cb6db8608f..af2b1caa76 100644 --- a/crates/ruff_benchmark/benches/formatter.rs +++ b/crates/ruff_benchmark/benches/formatter.rs @@ -6,6 +6,7 @@ use ruff_benchmark::criterion::{ use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions}; use ruff_python_parser::{parse, Mode}; +use ruff_python_trivia::CommentRanges; #[cfg(target_os = "windows")] #[global_allocator] @@ -54,11 +55,14 @@ fn benchmark_formatter(criterion: &mut Criterion) { let parsed = parse(case.code(), Mode::Module).expect("Input should be a valid Python code"); + let comment_ranges = CommentRanges::from(parsed.tokens()); + b.iter(|| { let options = PyFormatOptions::from_extension(Path::new(case.name())) .with_preview(PreviewMode::Enabled); - let formatted = format_module_ast(&parsed, case.code(), options) - .expect("Formatting to succeed"); + let formatted = + format_module_ast(&parsed, &comment_ranges, case.code(), options) + .expect("Formatting to succeed"); formatted.print().expect("Printing to succeed") }); diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index a964a1c01d..4c3fc040fa 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -329,7 +329,7 @@ impl<'a> Checker<'a> { /// Returns the [`CommentRanges`] for the parsed source code. pub(crate) fn comment_ranges(&self) -> &'a CommentRanges { - self.parsed.comment_ranges() + self.indexer.comment_ranges() } /// Returns the [`Tokens`] for the parsed type annotation if the checker is in a typing context diff --git a/crates/ruff_linter/src/checkers/imports.rs b/crates/ruff_linter/src/checkers/imports.rs index c2cc0fccb4..4fbf72626f 100644 --- a/crates/ruff_linter/src/checkers/imports.rs +++ b/crates/ruff_linter/src/checkers/imports.rs @@ -51,7 +51,7 @@ pub(crate) fn check_imports( settings, package, source_type, - parsed, + parsed.tokens(), ) { diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index 938c6be6e4..7f1940a61b 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -3,7 +3,6 @@ use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_trivia::CommentRanges; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextSize; @@ -20,7 +19,6 @@ pub(crate) fn check_physical_lines( locator: &Locator, stylist: &Stylist, indexer: &Indexer, - comment_ranges: &CommentRanges, doc_lines: &[TextSize], settings: &LinterSettings, ) -> Vec { @@ -37,6 +35,7 @@ pub(crate) fn check_physical_lines( let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice); let mut doc_lines_iter = doc_lines.iter().peekable(); + let comment_ranges = indexer.comment_ranges(); for line in locator.contents().universal_newlines() { while doc_lines_iter @@ -115,7 +114,6 @@ mod tests { &locator, &stylist, &indexer, - parsed.comment_ranges(), &[], &LinterSettings { pycodestyle: pycodestyle::settings::Settings { diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 0c59df7857..e90b25301b 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -3,12 +3,12 @@ use std::path::Path; use ruff_notebook::CellOffsets; -use ruff_python_ast::{ModModule, PySourceType}; +use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_diagnostics::Diagnostic; use ruff_python_index::Indexer; -use ruff_python_parser::Parsed; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -23,7 +23,7 @@ use crate::settings::LinterSettings; #[allow(clippy::too_many_arguments)] pub(crate) fn check_tokens( - parsed: &Parsed, + tokens: &Tokens, path: &Path, locator: &Locator, indexer: &Indexer, @@ -33,9 +33,7 @@ pub(crate) fn check_tokens( cell_offsets: Option<&CellOffsets>, ) -> Vec { let mut diagnostics: Vec = vec![]; - - let tokens = parsed.tokens(); - let comment_ranges = parsed.comment_ranges(); + let comment_ranges = indexer.comment_ranges(); if settings.rules.any_enabled(&[ Rule::BlankLineBetweenMethods, diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs index 398d02696a..0cf54a4d24 100644 --- a/crates/ruff_linter/src/directives.rs +++ b/crates/ruff_linter/src/directives.rs @@ -4,8 +4,7 @@ use std::iter::Peekable; use std::str::FromStr; use bitflags::bitflags; -use ruff_python_ast::ModModule; -use ruff_python_parser::{Parsed, TokenKind, Tokens}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -52,19 +51,19 @@ pub struct Directives { } pub fn extract_directives( - parsed: &Parsed, + tokens: &Tokens, flags: Flags, locator: &Locator, indexer: &Indexer, ) -> Directives { Directives { noqa_line_for: if flags.intersects(Flags::NOQA) { - extract_noqa_line_for(parsed.tokens(), locator, indexer) + extract_noqa_line_for(tokens, locator, indexer) } else { NoqaMapping::default() }, isort: if flags.intersects(Flags::ISORT) { - extract_isort_directives(locator, parsed.comment_ranges()) + extract_isort_directives(locator, indexer.comment_ranges()) } else { IsortDirectives::default() }, @@ -380,6 +379,7 @@ impl TodoDirectiveKind { #[cfg(test)] mod tests { use ruff_python_parser::parse_module; + use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_python_index::Indexer; @@ -570,7 +570,8 @@ assert foo, \ fn isort_directives(contents: &str) -> IsortDirectives { let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - extract_isort_directives(&locator, parsed.comment_ranges()) + let comment_ranges = CommentRanges::from(parsed.tokens()); + extract_isort_directives(&locator, &comment_ranges) } #[test] diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index 971644a800..fdfe380fb6 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -88,7 +88,7 @@ pub fn check_path( let mut error = None; let tokens = parsed.tokens(); - let comment_ranges = parsed.comment_ranges(); + let comment_ranges = indexer.comment_ranges(); // Collect doc lines. This requires a rare mix of tokens (for comments) and AST // (for docstrings), which demands special-casing at this level. @@ -105,7 +105,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_tokens()) { diagnostics.extend(check_tokens( - parsed, + tokens, path, locator, indexer, @@ -218,12 +218,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_physical_lines()) { diagnostics.extend(check_physical_lines( - locator, - stylist, - indexer, - comment_ranges, - &doc_lines, - settings, + locator, stylist, indexer, &doc_lines, settings, )); } @@ -385,7 +380,7 @@ pub fn add_noqa_to_path( // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(settings), &locator, &indexer, @@ -428,7 +423,7 @@ pub fn add_noqa_to_path( path, &diagnostics, &locator, - parsed.comment_ranges(), + indexer.comment_ranges(), &settings.external, &directives.noqa_line_for, stylist.line_ending(), @@ -459,7 +454,7 @@ pub fn lint_only( // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(settings), &locator, &indexer, @@ -550,7 +545,7 @@ pub fn lint_fix<'a>( // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(settings), &locator, &indexer, diff --git a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs index 7e0c3be59d..88af0b9055 100644 --- a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs @@ -5,10 +5,10 @@ use itertools::{EitherOrBoth, Itertools}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::whitespace::trailing_lines_end; -use ruff_python_ast::{ModModule, PySourceType, Stmt}; +use ruff_python_ast::{PySourceType, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::Parsed; +use ruff_python_parser::Tokens; use ruff_python_trivia::{leading_indentation, textwrap::indent, PythonWhitespace}; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::{Ranged, TextRange}; @@ -89,7 +89,7 @@ pub(crate) fn organize_imports( settings: &LinterSettings, package: Option<&Path>, source_type: PySourceType, - parsed: &Parsed, + tokens: &Tokens, ) -> Option { let indentation = locator.slice(extract_indentation_range(&block.imports, locator)); let indentation = leading_indentation(indentation); @@ -108,7 +108,7 @@ pub(crate) fn organize_imports( let comments = comments::collect_comments( TextRange::new(range.start(), locator.full_line_end(range.end())), locator, - parsed.comment_ranges(), + indexer.comment_ranges(), ); let trailing_line_end = if block.trailer.is_none() { @@ -130,7 +130,7 @@ pub(crate) fn organize_imports( source_type, settings.target_version, &settings.isort, - parsed.tokens(), + tokens, ); // Expand the span the entire range, including leading and trailing space. diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index f88cc6f285..a700b7ed6c 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -644,7 +644,7 @@ mod tests { let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(&settings), &locator, &indexer, diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index d23406bef0..31fae2bdaa 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -114,7 +114,7 @@ pub(crate) fn test_contents<'a>( let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(settings), &locator, &indexer, @@ -180,7 +180,7 @@ pub(crate) fn test_contents<'a>( let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &parsed, + parsed.tokens(), directives::Flags::from_settings(settings), &locator, &indexer, diff --git a/crates/ruff_python_formatter/src/cli.rs b/crates/ruff_python_formatter/src/cli.rs index f2f86c7bd1..dddd0db5a3 100644 --- a/crates/ruff_python_formatter/src/cli.rs +++ b/crates/ruff_python_formatter/src/cli.rs @@ -8,6 +8,7 @@ use clap::{command, Parser, ValueEnum}; use ruff_formatter::SourceCode; use ruff_python_ast::PySourceType; use ruff_python_parser::{parse, AsMode}; +use ruff_python_trivia::CommentRanges; use ruff_text_size::Ranged; use crate::comments::collect_comments; @@ -62,14 +63,15 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re }); let source_code = SourceCode::new(source); - let formatted = format_module_ast(&parsed, source, options).context("Failed to format node")?; + let comment_ranges = CommentRanges::from(parsed.tokens()); + let formatted = format_module_ast(&parsed, &comment_ranges, source, options) + .context("Failed to format node")?; if cli.print_ir { println!("{}", formatted.document().display(source_code)); } if cli.print_comments { // Print preceding, following and enclosing nodes - let decorated_comments = - collect_comments(parsed.syntax(), source_code, parsed.comment_ranges()); + let decorated_comments = collect_comments(parsed.syntax(), source_code, &comment_ranges); if !decorated_comments.is_empty() { println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment"); } diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index 9d6f1a6817..34808e7867 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -482,11 +482,13 @@ mod tests { use ruff_formatter::SourceCode; use ruff_python_ast::{Mod, PySourceType}; use ruff_python_parser::{parse, AsMode, Parsed}; + use ruff_python_trivia::CommentRanges; use crate::comments::Comments; struct CommentsTestCase<'a> { parsed: Parsed, + comment_ranges: CommentRanges, source_code: SourceCode<'a>, } @@ -496,19 +498,17 @@ mod tests { let source_type = PySourceType::Python; let parsed = parse(source, source_type.as_mode()).expect("Expect source to be valid Python"); + let comment_ranges = CommentRanges::from(parsed.tokens()); CommentsTestCase { parsed, + comment_ranges, source_code, } } fn to_comments(&self) -> Comments { - Comments::from_ast( - self.parsed.syntax(), - self.source_code, - self.parsed.comment_ranges(), - ) + Comments::from_ast(self.parsed.syntax(), self.source_code, &self.comment_ranges) } } diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs index c85355922f..7e617b27b9 100644 --- a/crates/ruff_python_formatter/src/expression/parentheses.rs +++ b/crates/ruff_python_formatter/src/expression/parentheses.rs @@ -444,6 +444,7 @@ impl Format> for FormatEmptyParenthesized<'_> { mod tests { use ruff_python_ast::ExpressionRef; use ruff_python_parser::parse_expression; + use ruff_python_trivia::CommentRanges; use crate::expression::parentheses::is_expression_parenthesized; @@ -453,7 +454,7 @@ mod tests { let parsed = parse_expression(expression).unwrap(); assert!(!is_expression_parenthesized( ExpressionRef::from(parsed.expr()), - parsed.comment_ranges(), + &CommentRanges::default(), expression )); } diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 283727ff76..fcb512c63c 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -114,17 +114,19 @@ pub fn format_module_source( ) -> Result { let source_type = options.source_type(); let parsed = parse(source, source_type.as_mode())?; - let formatted = format_module_ast(&parsed, source, options)?; + let comment_ranges = CommentRanges::from(parsed.tokens()); + let formatted = format_module_ast(&parsed, &comment_ranges, source, options)?; Ok(formatted.print()?) } pub fn format_module_ast<'a>( parsed: &'a Parsed, + comment_ranges: &'a CommentRanges, source: &'a str, options: PyFormatOptions, ) -> FormatResult>> { let source_code = SourceCode::new(source); - let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); + let comments = Comments::from_ast(parsed.syntax(), source_code, comment_ranges); let locator = Locator::new(source); let formatted = format!( @@ -155,6 +157,7 @@ mod tests { use ruff_python_ast::PySourceType; use ruff_python_parser::{parse, AsMode}; + use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; use crate::{format_module_ast, format_module_source, format_range, PyFormatOptions}; @@ -199,8 +202,9 @@ def main() -> None: // Parse the AST. let source_path = "code_inline.py"; let parsed = parse(source, source_type.as_mode()).unwrap(); + let comment_ranges = CommentRanges::from(parsed.tokens()); let options = PyFormatOptions::from_extension(Path::new(source_path)); - let formatted = format_module_ast(&parsed, source, options).unwrap(); + let formatted = format_module_ast(&parsed, &comment_ranges, source, options).unwrap(); // Uncomment the `dbg` to print the IR. // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR diff --git a/crates/ruff_python_formatter/src/range.rs b/crates/ruff_python_formatter/src/range.rs index a41510f1dd..72035410e7 100644 --- a/crates/ruff_python_formatter/src/range.rs +++ b/crates/ruff_python_formatter/src/range.rs @@ -7,7 +7,9 @@ use ruff_formatter::{ use ruff_python_ast::visitor::source_order::{walk_body, SourceOrderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, Stmt, StmtMatch, StmtTry}; use ruff_python_parser::{parse, AsMode}; -use ruff_python_trivia::{indentation_at_offset, BackwardsTokenizer, SimpleToken, SimpleTokenKind}; +use ruff_python_trivia::{ + indentation_at_offset, BackwardsTokenizer, CommentRanges, SimpleToken, SimpleTokenKind, +}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -74,7 +76,8 @@ pub fn format_range( let parsed = parse(source, options.source_type().as_mode())?; let source_code = SourceCode::new(source); - let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); + let comment_ranges = CommentRanges::from(parsed.tokens()); + let comments = Comments::from_ast(parsed.syntax(), source_code, &comment_ranges); let mut context = PyFormatContext::new( options.with_source_map_generation(SourceMapGeneration::Enabled), diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 2df9bca400..f7218d4ce8 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -831,6 +831,7 @@ impl Format> for SuiteChildStatement<'_> { mod tests { use ruff_formatter::format; use ruff_python_parser::parse_module; + use ruff_python_trivia::CommentRanges; use crate::comments::Comments; use crate::prelude::*; @@ -860,11 +861,12 @@ def trailing_func(): "; let parsed = parse_module(source).unwrap(); + let comment_ranges = CommentRanges::from(parsed.tokens()); let context = PyFormatContext::new( PyFormatOptions::default(), source, - Comments::from_ranges(parsed.comment_ranges()), + Comments::from_ranges(&comment_ranges), parsed.tokens(), ); diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index 65de2979b0..f6098f3127 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -9,6 +9,7 @@ use itertools::Itertools; use ruff_formatter::printer::SourceMapGeneration; use ruff_python_ast::{str::Quote, StringFlags}; +use ruff_python_trivia::CommentRanges; use {once_cell::sync::Lazy, regex::Regex}; use { ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, @@ -1552,8 +1553,9 @@ fn docstring_format_source( let source_type = options.source_type(); let parsed = ruff_python_parser::parse(source, source_type.as_mode())?; + let comment_ranges = CommentRanges::from(parsed.tokens()); let source_code = ruff_formatter::SourceCode::new(source); - let comments = crate::Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); + let comments = crate::Comments::from_ast(parsed.syntax(), source_code, &comment_ranges); let locator = Locator::new(source); let ctx = PyFormatContext::new(options, locator.contents(), comments, parsed.tokens()) diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs index fb813f9814..b63080f694 100644 --- a/crates/ruff_python_index/src/indexer.rs +++ b/crates/ruff_python_index/src/indexer.rs @@ -3,7 +3,9 @@ use ruff_python_ast::Stmt; use ruff_python_parser::{TokenKind, Tokens}; -use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace}; +use ruff_python_trivia::{ + has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges, +}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -19,6 +21,9 @@ pub struct Indexer { /// The range of all multiline strings in the source document. multiline_ranges: MultilineRanges, + + /// The range of all comments in the source document. + comment_ranges: CommentRanges, } impl Indexer { @@ -28,6 +33,8 @@ impl Indexer { let mut fstring_ranges_builder = FStringRangesBuilder::default(); let mut multiline_ranges_builder = MultilineRangesBuilder::default(); let mut continuation_lines = Vec::new(); + let mut comment_ranges = Vec::new(); + // Token, end let mut prev_end = TextSize::default(); let mut line_start = TextSize::default(); @@ -64,19 +71,38 @@ impl Indexer { // the closing delimiter, since the token itself can span multiple lines. line_start = locator.line_start(token.end()); } + TokenKind::Comment => { + comment_ranges.push(token.range()); + } _ => {} } prev_end = token.end(); } + // TODO(dhruvmanila): This is temporary until Ruff becomes error resilient. To understand + // why this is required, refer to https://github.com/astral-sh/ruff/pull/11457#issuecomment-2144990269 + // which was released at the time of this writing. Now we can't just revert that behavior, + // so we need to visit the remaining tokens if there are any for the comment ranges. + for token in tokens.after(prev_end) { + if token.kind() == TokenKind::Comment { + comment_ranges.push(token.range()); + } + } + Self { continuation_lines, fstring_ranges: fstring_ranges_builder.finish(), multiline_ranges: multiline_ranges_builder.finish(), + comment_ranges: CommentRanges::new(comment_ranges), } } + /// Returns the byte offset ranges of comments. + pub const fn comment_ranges(&self) -> &CommentRanges { + &self.comment_ranges + } + /// Returns the byte offset ranges of f-strings. pub const fn fstring_ranges(&self) -> &FStringRanges { &self.fstring_ranges diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 5025779197..5ee12ab6ae 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -239,7 +239,6 @@ pub struct Parsed { syntax: T, tokens: Tokens, errors: Vec, - comment_ranges: CommentRanges, } impl Parsed { @@ -258,11 +257,6 @@ impl Parsed { &self.errors } - /// Returns the comment ranges for the parsed output. - pub fn comment_ranges(&self) -> &CommentRanges { - &self.comment_ranges - } - /// Consumes the [`Parsed`] output and returns the contained syntax node. pub fn into_syntax(self) -> T { self.syntax @@ -313,7 +307,6 @@ impl Parsed { syntax: module, tokens: self.tokens, errors: self.errors, - comment_ranges: self.comment_ranges, }), Mod::Expression(_) => None, } @@ -333,7 +326,6 @@ impl Parsed { syntax: expression, tokens: self.tokens, errors: self.errors, - comment_ranges: self.comment_ranges, }), } } @@ -518,6 +510,18 @@ impl Deref for Tokens { } } +impl From<&Tokens> for CommentRanges { + fn from(tokens: &Tokens) -> Self { + let mut ranges = vec![]; + for token in tokens { + if token.kind() == TokenKind::Comment { + ranges.push(token.range()); + } + } + CommentRanges::new(ranges) + } +} + /// Control in the different modes by which a source file can be parsed. /// /// The mode argument specifies in what way code must be parsed. diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 5d4572409d..8430407af2 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -147,7 +147,7 @@ impl<'src> Parser<'src> { // TODO consider re-integrating lexical error handling into the parser? let parse_errors = self.errors; - let (tokens, comment_ranges, lex_errors) = self.tokens.finish(); + let (tokens, lex_errors) = self.tokens.finish(); // Fast path for when there are no lex errors. // There's no fast path for when there are no parse errors because a lex error @@ -156,7 +156,6 @@ impl<'src> Parser<'src> { return Parsed { syntax, tokens: Tokens::new(tokens), - comment_ranges, errors: parse_errors, }; } @@ -188,7 +187,6 @@ impl<'src> Parser<'src> { Parsed { syntax, tokens: Tokens::new(tokens), - comment_ranges, errors: merged, } } diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 79e8a2094f..a8a54e68f0 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,4 +1,3 @@ -use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue}; @@ -14,9 +13,6 @@ pub(crate) struct TokenSource<'src> { /// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector /// holds both the trivia and non-trivia tokens. tokens: Vec, - - /// A vector containing the range of all the comment tokens emitted by the lexer. - comments: Vec, } impl<'src> TokenSource<'src> { @@ -26,7 +22,6 @@ impl<'src> TokenSource<'src> { TokenSource { lexer, tokens: vec![], - comments: vec![], } } @@ -103,9 +98,6 @@ impl<'src> TokenSource<'src> { loop { let kind = self.lexer.next_token(); if is_trivia(kind) { - if kind == TokenKind::Comment { - self.comments.push(self.current_range()); - } self.tokens .push(Token::new(kind, self.current_range(), self.current_flags())); continue; @@ -130,7 +122,6 @@ impl<'src> TokenSource<'src> { TokenSourceCheckpoint { lexer_checkpoint: self.lexer.checkpoint(), tokens_position: self.tokens.len(), - comments_position: self.comments.len(), } } @@ -139,18 +130,16 @@ impl<'src> TokenSource<'src> { let TokenSourceCheckpoint { lexer_checkpoint, tokens_position, - comments_position, } = checkpoint; self.lexer.rewind(lexer_checkpoint); self.tokens.truncate(tokens_position); - self.comments.truncate(comments_position); } /// Consumes the token source, returning the collected tokens, comment ranges, and any errors /// encountered during lexing. The token collection includes both the trivia and non-trivia /// tokens. - pub(crate) fn finish(mut self) -> (Vec, CommentRanges, Vec) { + pub(crate) fn finish(mut self) -> (Vec, Vec) { assert_eq!( self.current_kind(), TokenKind::EndOfFile, @@ -163,15 +152,13 @@ impl<'src> TokenSource<'src> { assert_eq!(last.kind(), TokenKind::EndOfFile); } - let comment_ranges = CommentRanges::new(self.comments); - (self.tokens, comment_ranges, self.lexer.finish()) + (self.tokens, self.lexer.finish()) } } pub(crate) struct TokenSourceCheckpoint { lexer_checkpoint: LexerCheckpoint, tokens_position: usize, - comments_position: usize, } /// Allocates a [`Vec`] with an approximated capacity to fit all tokens diff --git a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs index 8bc8c5eb4c..d93abf4ca4 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs @@ -1,4 +1,5 @@ use ruff_python_parser::{parse_unchecked, Mode}; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -8,9 +9,10 @@ fn block_comments_two_line_block_at_start() { let source = "# line 1\n# line 2\n"; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]); @@ -22,9 +24,10 @@ fn block_comments_indented_block() { let source = " # line 1\n # line 2\n"; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]); @@ -36,9 +39,10 @@ fn block_comments_single_line_is_not_a_block() { let source = "\n"; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -50,9 +54,10 @@ fn block_comments_lines_with_code_not_a_block() { let source = "x = 1 # line 1\ny = 2 # line 2\n"; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -64,9 +69,10 @@ fn block_comments_sequential_lines_not_in_block() { let source = " # line 1\n # line 2\n"; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -83,9 +89,10 @@ fn block_comments_lines_in_triple_quotes_not_a_block() { "#; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -119,9 +126,10 @@ y = 2 # do not form a block comment "#; let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); + let comment_ranges = CommentRanges::from(parsed.tokens()); // act - let block_comments = parsed.comment_ranges().block_comments(&locator); + let block_comments = comment_ranges.block_comments(&locator); // assert assert_eq!( diff --git a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs index 7db3766463..b609294855 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs @@ -1,7 +1,7 @@ use insta::assert_debug_snapshot; use ruff_python_parser::{parse_unchecked, Mode}; -use ruff_python_trivia::{lines_after, lines_before, SimpleToken, SimpleTokenizer}; +use ruff_python_trivia::{lines_after, lines_before, CommentRanges, SimpleToken, SimpleTokenizer}; use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind}; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -23,7 +23,8 @@ impl TokenizationTestCase { fn tokenize_reverse(&self) -> Vec { let parsed = parse_unchecked(self.source, Mode::Module); - BackwardsTokenizer::new(self.source, self.range, parsed.comment_ranges()).collect() + let comment_ranges = CommentRanges::from(parsed.tokens()); + BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect() } fn tokens(&self) -> &[SimpleToken] { diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index fc6088232c..294ded142e 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -109,7 +109,7 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. - let directives = extract_directives(&parsed, Flags::all(), &locator, &indexer); + let directives = extract_directives(parsed.tokens(), Flags::all(), &locator, &indexer); // Generate checks. let LinterResult { data, .. } = check_path( @@ -130,7 +130,7 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno &query.virtual_file_path(), data.as_slice(), &locator, - parsed.comment_ranges(), + indexer.comment_ranges(), &linter_settings.external, &directives.noqa_line_for, stylist.line_ending(), diff --git a/crates/ruff_wasm/Cargo.toml b/crates/ruff_wasm/Cargo.toml index c8cdfc9e71..3761f63e1e 100644 --- a/crates/ruff_wasm/Cargo.toml +++ b/crates/ruff_wasm/Cargo.toml @@ -26,6 +26,7 @@ ruff_formatter = { workspace = true } ruff_python_formatter = { workspace = true } ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } +ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } ruff_workspace = { workspace = true } diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 068975fe83..e2b86508a2 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -1,6 +1,7 @@ use std::path::Path; use js_sys::Error; +use ruff_python_trivia::CommentRanges; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::*; @@ -172,8 +173,12 @@ impl Workspace { let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. - let directives = - directives::extract_directives(&parsed, directives::Flags::empty(), &locator, &indexer); + let directives = directives::extract_directives( + parsed.tokens(), + directives::Flags::empty(), + &locator, + &indexer, + ); // Generate checks. let LinterResult { @@ -241,11 +246,8 @@ impl Workspace { pub fn comments(&self, contents: &str) -> Result { let parsed = ParsedModule::from_source(contents)?; - let comments = pretty_comments( - parsed.parsed.syntax(), - parsed.parsed.comment_ranges(), - contents, - ); + let comment_ranges = CommentRanges::from(parsed.parsed.tokens()); + let comments = pretty_comments(parsed.parsed.syntax(), &comment_ranges, contents); Ok(comments) } @@ -270,13 +272,17 @@ pub(crate) fn into_error(err: E) -> Error { struct ParsedModule<'a> { source_code: &'a str, parsed: Parsed, + comment_ranges: CommentRanges, } impl<'a> ParsedModule<'a> { fn from_source(source_code: &'a str) -> Result { + let parsed = parse(source_code, Mode::Module).map_err(into_error)?; + let comment_ranges = CommentRanges::from(parsed.tokens()); Ok(Self { source_code, - parsed: parse(source_code, Mode::Module).map_err(into_error)?, + parsed, + comment_ranges, }) } @@ -287,6 +293,11 @@ impl<'a> ParsedModule<'a> { .to_format_options(PySourceType::default(), self.source_code) .with_source_map_generation(SourceMapGeneration::Enabled); - format_module_ast(&self.parsed, self.source_code, options) + format_module_ast( + &self.parsed, + &self.comment_ranges, + self.source_code, + options, + ) } }