From 381203c0848dea6fde278ccc7eefae74779d1bc5 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 11 Apr 2023 09:57:36 +0200 Subject: [PATCH] Store source code on message (#3897) --- Cargo.lock | 1 + crates/ruff/Cargo.toml | 1 + crates/ruff/src/autofix/actions.rs | 8 +- crates/ruff/src/autofix/mod.rs | 2 +- crates/ruff/src/importer.rs | 6 +- crates/ruff/src/linter.rs | 34 +-- crates/ruff/src/message/grouped.rs | 50 +--- crates/ruff/src/message/mod.rs | 64 ++---- crates/ruff/src/message/text.rs | 137 +++++++---- .../ruff/src/rules/flake8_return/helpers.rs | 2 +- crates/ruff/src/rules/isort/helpers.rs | 2 +- .../rules/pyflakes/rules/unused_variable.rs | 4 +- crates/ruff/src/rules/pyupgrade/fixes.rs | 2 +- crates/ruff_cli/src/cache.rs | 143 ++++++++++-- crates/ruff_cli/src/diagnostics.rs | 6 +- crates/ruff_python_ast/src/helpers.rs | 4 +- .../src/source_code/line_index.rs | 23 +- .../src/source_code/locator.rs | 48 ++-- crates/ruff_python_ast/src/source_code/mod.rs | 215 ++++++++++++++++++ .../ruff_python_formatter/src/cst/helpers.rs | 2 +- 20 files changed, 537 insertions(+), 217 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41d37d784a..2f1a9c1994 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2015,6 +2015,7 @@ dependencies = [ "ruff_python_semantic", "ruff_python_stdlib", "ruff_rustpython", + "ruff_text_size", "rustc-hash", "rustpython-common", "rustpython-parser", diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 277ff06155..08ec13fec0 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -21,6 +21,7 @@ ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_semantic = { path = "../ruff_python_semantic" } ruff_python_stdlib = { path = "../ruff_python_stdlib" } ruff_rustpython = { path = "../ruff_rustpython" } +ruff_text_size = { path = "../ruff_text_size" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } diff --git a/crates/ruff/src/autofix/actions.rs b/crates/ruff/src/autofix/actions.rs index d370984112..64d588addf 100644 --- a/crates/ruff/src/autofix/actions.rs +++ b/crates/ruff/src/autofix/actions.rs @@ -103,7 +103,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result /// Return the location of a trailing semicolon following a `Stmt`, if it's part /// of a multi-statement line. fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { - let contents = locator.skip(stmt.end_location.unwrap()); + let contents = locator.after(stmt.end_location.unwrap()); for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); if trimmed.starts_with(';') { @@ -126,7 +126,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { /// Find the next valid break for a `Stmt` after a semicolon. fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location { let start_location = Location::new(semicolon.row(), semicolon.column() + 1); - let contents = locator.skip(start_location); + let contents = locator.after(start_location); for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); // Skip past any continuations. @@ -158,7 +158,7 @@ fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location { /// Return `true` if a `Stmt` occurs at the end of a file. fn is_end_of_file(stmt: &Stmt, locator: &Locator) -> bool { - let contents = locator.skip(stmt.end_location.unwrap()); + let contents = locator.after(stmt.end_location.unwrap()); contents.is_empty() } @@ -361,7 +361,7 @@ pub fn remove_argument( remove_parentheses: bool, ) -> Result { // TODO(sbrugman): Preserve trailing comments. - let contents = locator.skip(call_at); + let contents = locator.after(call_at); let mut fix_start = None; let mut fix_end = None; diff --git a/crates/ruff/src/autofix/mod.rs b/crates/ruff/src/autofix/mod.rs index 060963eb06..ec455d2793 100644 --- a/crates/ruff/src/autofix/mod.rs +++ b/crates/ruff/src/autofix/mod.rs @@ -80,7 +80,7 @@ fn apply_fixes<'a>( } // Add the remaining content. - let slice = locator.skip(last_pos.unwrap_or_default()); + let slice = locator.after(last_pos.unwrap_or_default()); output.push_str(slice); (output, fixed) diff --git a/crates/ruff/src/importer.rs b/crates/ruff/src/importer.rs index 19db7cf48c..1a271edf98 100644 --- a/crates/ruff/src/importer.rs +++ b/crates/ruff/src/importer.rs @@ -174,7 +174,7 @@ fn match_docstring_end(body: &[Stmt]) -> Option { /// along with a trailing newline suffix. fn end_of_statement_insertion(stmt: &Stmt, locator: &Locator, stylist: &Stylist) -> Insertion { let location = stmt.end_location.unwrap(); - let mut tokens = lexer::lex_located(locator.skip(location), Mode::Module, location).flatten(); + let mut tokens = lexer::lex_located(locator.after(location), Mode::Module, location).flatten(); if let Some((.., Tok::Semi, end)) = tokens.next() { // If the first token after the docstring is a semicolon, insert after the semicolon as an // inline statement; @@ -207,7 +207,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) -> let mut location = if let Some(location) = match_docstring_end(body) { // If the first token after the docstring is a semicolon, insert after the semicolon as an // inline statement; - let first_token = lexer::lex_located(locator.skip(location), Mode::Module, location) + let first_token = lexer::lex_located(locator.after(location), Mode::Module, location) .flatten() .next(); if let Some((.., Tok::Semi, end)) = first_token { @@ -222,7 +222,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) -> // Skip over any comments and empty lines. for (.., tok, end) in - lexer::lex_located(locator.skip(location), Mode::Module, location).flatten() + lexer::lex_located(locator.after(location), Mode::Module, location).flatten() { if matches!(tok, Tok::Comment(..) | Tok::Newline) { location = Location::new(end.row() + 1, 0); diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index d7c05e037f..6e487a273a 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -22,7 +22,7 @@ use crate::checkers::physical_lines::check_physical_lines; use crate::checkers::tokens::check_tokens; use crate::directives::Directives; use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens}; -use crate::message::{Message, Source}; +use crate::message::Message; use crate::noqa::add_noqa; use crate::registry::{AsRule, Rule}; use crate::rules::pycodestyle; @@ -355,19 +355,26 @@ pub fn lint_only( // Convert from diagnostics to messages. let path_lossy = path.to_string_lossy(); + result.map(|(messages, imports)| { + let source_code = if settings.show_source && !messages.is_empty() { + Some(locator.to_source_code_buf()) + } else { + None + }; + ( messages .into_iter() .map(|diagnostic| { - let source = if settings.show_source { - Some(Source::from_diagnostic(&diagnostic, &locator)) - } else { - None - }; let lineno = diagnostic.location.row(); let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno); - Message::from_diagnostic(diagnostic, path_lossy.to_string(), source, noqa_row) + Message::from_diagnostic( + diagnostic, + path_lossy.to_string(), + source_code.clone(), + noqa_row, + ) }) .collect(), imports, @@ -500,22 +507,23 @@ This indicates a bug in `{}`. If you could open an issue at: let path_lossy = path.to_string_lossy(); return Ok(FixerResult { result: result.map(|(messages, imports)| { + let source_code = if settings.show_source && !messages.is_empty() { + Some(locator.to_source_code_buf()) + } else { + None + }; + ( messages .into_iter() .map(|diagnostic| { - let source = if settings.show_source { - Some(Source::from_diagnostic(&diagnostic, &locator)) - } else { - None - }; let lineno = diagnostic.location.row(); let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno); Message::from_diagnostic( diagnostic, path_lossy.to_string(), - source, + source_code.clone(), noqa_row, ) }) diff --git a/crates/ruff/src/message/grouped.rs b/crates/ruff/src/message/grouped.rs index 7a58dccf8d..eb7151578a 100644 --- a/crates/ruff/src/message/grouped.rs +++ b/crates/ruff/src/message/grouped.rs @@ -1,12 +1,9 @@ use crate::fs::relativize_path; use crate::jupyter::JupyterIndex; -use crate::message::text::RuleCodeAndBody; +use crate::message::text::{MessageCodeFrame, RuleCodeAndBody}; use crate::message::{group_messages_by_filename, Emitter, EmitterContext, Message}; -use crate::registry::AsRule; -use annotate_snippets::display_list::{DisplayList, FormatOptions}; -use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}; use colored::Colorize; -use std::fmt::{Display, Formatter, Write as fmtWrite}; +use std::fmt::{Display, Formatter}; use std::io::Write; #[derive(Default)] @@ -116,47 +113,10 @@ impl Display for DisplayGroupedMessage<'_> { }, )?; - if let Some(source) = &message.source { - let suggestion = message.kind.suggestion.clone(); - let footer = if suggestion.is_some() { - vec![Annotation { - id: None, - label: suggestion.as_deref(), - annotation_type: AnnotationType::Help, - }] - } else { - vec![] - }; - let label = message.kind.rule().noqa_code().to_string(); - let snippet = Snippet { - title: None, - footer, - slices: vec![Slice { - source: &source.contents, - line_start: message.location.row(), - annotations: vec![SourceAnnotation { - label: &label, - annotation_type: AnnotationType::Error, - range: source.range, - }], - // The origin (file name, line number, and column number) is already encoded - // in the `label`. - origin: None, - fold: false, - }], - opt: FormatOptions { - #[cfg(test)] - color: false, - #[cfg(not(test))] - color: colored::control::SHOULD_COLORIZE.should_colorize(), - ..FormatOptions::default() - }, - }; - // Skip the first line, since we format the `label` ourselves. - let message = DisplayList::from(snippet); + { + use std::fmt::Write; let mut padded = PadAdapter::new(f); - - writeln!(&mut padded, "{message}")?; + write!(padded, "{}", MessageCodeFrame { message })?; } writeln!(f)?; diff --git a/crates/ruff/src/message/mod.rs b/crates/ruff/src/message/mod.rs index fbd5fca88c..fa9111faf7 100644 --- a/crates/ruff/src/message/mod.rs +++ b/crates/ruff/src/message/mod.rs @@ -20,22 +20,20 @@ pub use json::JsonEmitter; pub use junit::JunitEmitter; pub use pylint::PylintEmitter; pub use rustpython_parser::ast::Location; -use serde::{Deserialize, Serialize}; pub use text::TextEmitter; use crate::jupyter::JupyterIndex; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; -use ruff_python_ast::source_code::Locator; -use ruff_python_ast::types::Range; +use ruff_python_ast::source_code::SourceCodeBuf; -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, PartialEq, Eq)] pub struct Message { pub kind: DiagnosticKind, pub location: Location, pub end_location: Location, pub fix: Fix, pub filename: String, - pub source: Option, + pub source: Option, pub noqa_row: usize, } @@ -43,7 +41,7 @@ impl Message { pub fn from_diagnostic( diagnostic: Diagnostic, filename: String, - source: Option, + source: Option, noqa_row: usize, ) -> Self { Self { @@ -77,38 +75,6 @@ impl PartialOrd for Message { } } -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)] -pub struct Source { - pub contents: String, - pub range: (usize, usize), -} - -impl Source { - pub fn from_diagnostic(diagnostic: &Diagnostic, locator: &Locator) -> Self { - let location = Location::new(diagnostic.location.row(), 0); - // Diagnostics can already extend one-past-the-end. If they do, though, then - // they'll end at the start of a line. We need to avoid extending by yet another - // line past-the-end. - let end_location = if diagnostic.end_location.column() == 0 { - diagnostic.end_location - } else { - Location::new(diagnostic.end_location.row() + 1, 0) - }; - let source = locator.slice(Range::new(location, end_location)); - let num_chars_in_range = locator - .slice(Range::new(diagnostic.location, diagnostic.end_location)) - .chars() - .count(); - Source { - contents: source.to_string(), - range: ( - diagnostic.location.column(), - diagnostic.location.column() + num_chars_in_range, - ), - } - } -} - fn group_messages_by_filename(messages: &[Message]) -> BTreeMap<&str, Vec<&Message>> { let mut grouped_messages = BTreeMap::default(); for message in messages { @@ -156,15 +122,15 @@ impl<'a> EmitterContext<'a> { #[cfg(test)] mod tests { - use crate::message::{Emitter, EmitterContext, Location, Message, Source}; + use crate::message::{Emitter, EmitterContext, Location, Message}; use crate::rules::pyflakes::rules::{UndefinedName, UnusedImport, UnusedVariable}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; - use ruff_python_ast::source_code::Locator; + use ruff_python_ast::source_code::SourceCodeBuf; use ruff_python_ast::types::Range; use rustc_hash::FxHashMap; pub(super) fn create_messages() -> Vec { - let file_1 = r#"import os + let fib = r#"import os def fibonacci(n): """Compute the nth number in the Fibonacci sequence.""" @@ -177,8 +143,6 @@ def fibonacci(n): return fibonacci(n - 1) + fibonacci(n - 2) "#; - let file_1_locator = Locator::new(file_1); - let unused_import = Diagnostic::new( UnusedImport { name: "os".to_string(), @@ -188,7 +152,7 @@ def fibonacci(n): Range::new(Location::new(1, 7), Location::new(1, 9)), ); - let unused_source = Source::from_diagnostic(&unused_import, &file_1_locator); + let fib_source = SourceCodeBuf::from_content(fib); let unused_variable = Diagnostic::new( UnusedVariable { @@ -201,8 +165,6 @@ def fibonacci(n): Location::new(5, 9), )])); - let unused_variable_source = Source::from_diagnostic(&unused_variable, &file_1_locator); - let file_2 = r#"if a == 1: pass"#; let undefined_name = Diagnostic::new( @@ -212,20 +174,20 @@ def fibonacci(n): Range::new(Location::new(1, 3), Location::new(1, 4)), ); - let undefined_source = Source::from_diagnostic(&undefined_name, &Locator::new(file_2)); + let file_2_source = SourceCodeBuf::from_content(file_2); vec![ - Message::from_diagnostic(unused_import, "fib.py".to_string(), Some(unused_source), 1), Message::from_diagnostic( - unused_variable, + unused_import, "fib.py".to_string(), - Some(unused_variable_source), + Some(fib_source.clone()), 1, ), + Message::from_diagnostic(unused_variable, "fib.py".to_string(), Some(fib_source), 1), Message::from_diagnostic( undefined_name, "undef.py".to_string(), - Some(undefined_source), + Some(file_2_source), 1, ), ] diff --git a/crates/ruff/src/message/text.rs b/crates/ruff/src/message/text.rs index 093137d419..ac791580fa 100644 --- a/crates/ruff/src/message/text.rs +++ b/crates/ruff/src/message/text.rs @@ -1,11 +1,14 @@ use crate::fs::relativize_path; -use crate::message::{Emitter, EmitterContext, Message}; +use crate::message::{Emitter, EmitterContext, Location, Message}; use crate::registry::AsRule; use annotate_snippets::display_list::{DisplayList, FormatOptions}; use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}; use colored::Colorize; use ruff_diagnostics::DiagnosticKind; -use std::fmt::Display; +use ruff_python_ast::source_code::OneIndexed; +use ruff_python_ast::types::Range; +use ruff_text_size::TextRange; +use std::fmt::{Display, Formatter}; use std::io::Write; #[derive(Default)] @@ -63,45 +66,8 @@ impl Emitter for TextEmitter { } )?; - if let Some(source) = &message.source { - let suggestion = message.kind.suggestion.clone(); - let footer = if suggestion.is_some() { - vec![Annotation { - id: None, - label: suggestion.as_deref(), - annotation_type: AnnotationType::Help, - }] - } else { - Vec::new() - }; - - let label = message.kind.rule().noqa_code().to_string(); - let snippet = Snippet { - title: None, - slices: vec![Slice { - source: &source.contents, - line_start: message.location.row(), - annotations: vec![SourceAnnotation { - label: &label, - annotation_type: AnnotationType::Error, - range: source.range, - }], - // The origin (file name, line number, and column number) is already encoded - // in the `label`. - origin: None, - fold: false, - }], - footer, - opt: FormatOptions { - #[cfg(test)] - color: false, - #[cfg(not(test))] - color: colored::control::SHOULD_COLORIZE.should_colorize(), - ..FormatOptions::default() - }, - }; - - writeln!(writer, "{message}\n", message = DisplayList::from(snippet))?; + if message.source.is_some() { + writeln!(writer, "{}", MessageCodeFrame { message })?; } } @@ -147,6 +113,95 @@ impl Display for RuleCodeAndBody<'_> { } } +pub(super) struct MessageCodeFrame<'a> { + pub message: &'a Message, +} + +impl Display for MessageCodeFrame<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let Message { + kind, + source, + location, + end_location, + .. + } = self.message; + + if let Some(source_code) = source { + let suggestion = kind.suggestion.as_deref(); + let footer = if suggestion.is_some() { + vec![Annotation { + id: None, + label: suggestion, + annotation_type: AnnotationType::Help, + }] + } else { + Vec::new() + }; + + let source_code_start = + source_code.line_start(OneIndexed::new(location.row()).unwrap()); + + let source_code_end = source_code.line_start( + OneIndexed::new( + end_location + .row() + .saturating_add(1) + .min(source_code.line_count() + 1), + ) + .unwrap(), + ); + + let source_text = + &source_code.text()[TextRange::new(source_code_start, source_code_end)]; + + let content_range = source_code.text_range(Range::new( + // Subtract 1 because message column indices are 1 based but the index columns are 1 based. + Location::new(location.row(), location.column().saturating_sub(1)), + Location::new(end_location.row(), end_location.column().saturating_sub(1)), + )); + + let annotation_length = &source_text[content_range - source_code_start] + .chars() + .count(); + + let label = kind.rule().noqa_code().to_string(); + + let snippet = Snippet { + title: None, + slices: vec![Slice { + source: source_text, + line_start: location.row(), + annotations: vec![SourceAnnotation { + label: &label, + annotation_type: AnnotationType::Error, + range: ( + location.column() - 1, + location.column() + annotation_length - 1, + ), + }], + // The origin (file name, line number, and column number) is already encoded + // in the `label`. + origin: None, + fold: false, + }], + footer, + opt: FormatOptions { + #[cfg(test)] + color: false, + #[cfg(not(test))] + color: colored::control::SHOULD_COLORIZE.should_colorize(), + ..FormatOptions::default() + }, + }; + + writeln!(f, "{message}", message = DisplayList::from(snippet))?; + } + + Ok(()) + } +} + #[cfg(test)] mod tests { use crate::message::tests::{capture_emitter_output, create_messages}; diff --git a/crates/ruff/src/rules/flake8_return/helpers.rs b/crates/ruff/src/rules/flake8_return/helpers.rs index 1d89603076..baf29f4f30 100644 --- a/crates/ruff/src/rules/flake8_return/helpers.rs +++ b/crates/ruff/src/rules/flake8_return/helpers.rs @@ -29,7 +29,7 @@ pub fn result_exists(returns: &[(&Stmt, Option<&Expr>)]) -> bool { /// This method assumes that the statement is the last statement in its body; specifically, that /// the statement isn't followed by a semicolon, followed by a multi-line statement. pub fn end_of_last_statement(stmt: &Stmt, locator: &Locator) -> Location { - let contents = locator.skip(stmt.end_location.unwrap()); + let contents = locator.after(stmt.end_location.unwrap()); // End-of-file, so just return the end of the statement. if contents.is_empty() { diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index 510c958983..e8ec76e893 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -62,7 +62,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool { // # Direct comment. // def f(): pass let mut seen_blank = false; - for line in locator.take(stmt.location).universal_newlines().rev() { + for line in locator.up_to(stmt.location).universal_newlines().rev() { let line = line.trim(); if seen_blank { if line.starts_with('#') { diff --git a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs index 47f0454b15..880d67928c 100644 --- a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs +++ b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs @@ -66,7 +66,7 @@ fn match_token_after(located: &Located, locator: &Locator, f: F) -> Ran where F: Fn(Tok) -> bool, { - let contents = locator.skip(located.location); + let contents = locator.after(located.location); // Track the bracket depth. let mut par_count = 0; @@ -129,7 +129,7 @@ fn match_token(located: &Located, locator: &Locator, f: F) -> Range where F: Fn(Tok) -> bool, { - let contents = locator.skip(located.location); + let contents = locator.after(located.location); // Track the bracket depth. let mut par_count = 0; diff --git a/crates/ruff/src/rules/pyupgrade/fixes.rs b/crates/ruff/src/rules/pyupgrade/fixes.rs index 74522d8b06..ea001bb01e 100644 --- a/crates/ruff/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff/src/rules/pyupgrade/fixes.rs @@ -142,7 +142,7 @@ pub fn remove_import_members(contents: &str, members: &[&str]) -> String { } // Add the remaining content. - let slice = locator.skip(last_pos); + let slice = locator.after(last_pos); output.push_str(slice); output } diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index 8c4bd8ade3..046117f0e4 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -5,13 +5,18 @@ use std::path::Path; use anyhow::Result; use filetime::FileTime; +use itertools::Itertools; use log::error; use path_absolutize::Absolutize; -use ruff::message::Message; +use ruff::message::{Location, Message}; use ruff::settings::{flags, AllSettings, Settings}; use ruff_cache::{CacheKey, CacheKeyHasher}; +use ruff_diagnostics::{DiagnosticKind, Fix}; use ruff_python_ast::imports::ImportMap; -use serde::{Deserialize, Serialize}; +use ruff_python_ast::source_code::{LineIndex, SourceCodeBuf}; +use rustc_hash::FxHashMap; +use serde::ser::{SerializeSeq, SerializeStruct}; +use serde::{Deserialize, Serialize, Serializer}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; @@ -19,33 +24,90 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); #[derive(Serialize)] struct CheckResultRef<'a> { + #[serde(serialize_with = "serialize_messages")] messages: &'a [Message], imports: &'a ImportMap, + sources: Vec<(&'a str, &'a str)>, +} + +fn serialize_messages(messages: &[Message], serializer: S) -> Result +where + S: Serializer, +{ + let mut s = serializer.serialize_seq(Some(messages.len()))?; + + for message in messages { + s.serialize_element(&SerializeMessage(message))?; + } + + s.end() +} + +struct SerializeMessage<'a>(&'a Message); + +impl Serialize for SerializeMessage<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let Message { + kind, + location, + end_location, + fix, + filename, + // Serialized manually for all files + source: _source, + noqa_row, + } = self.0; + + let mut s = serializer.serialize_struct("Message", 6)?; + + s.serialize_field("kind", &kind)?; + s.serialize_field("location", &location)?; + s.serialize_field("end_location", &end_location)?; + s.serialize_field("fix", &fix)?; + s.serialize_field("filename", &filename)?; + s.serialize_field("noqa_row", &noqa_row)?; + + s.end() + } +} + +#[derive(Deserialize)] +struct MessageHeader { + kind: DiagnosticKind, + location: Location, + end_location: Location, + fix: Fix, + filename: String, + noqa_row: usize, } #[derive(Deserialize)] struct CheckResult { - messages: Vec, + messages: Vec, imports: ImportMap, + sources: Vec<(String, String)>, } fn content_dir() -> &'static Path { Path::new("content") } -fn cache_key>( - path: P, - package: Option<&P>, +fn cache_key( + path: &Path, + package: Option<&Path>, metadata: &fs::Metadata, settings: &Settings, autofix: flags::Autofix, ) -> u64 { let mut hasher = CacheKeyHasher::new(); CARGO_PKG_VERSION.cache_key(&mut hasher); - path.as_ref().absolutize().unwrap().cache_key(&mut hasher); + path.absolutize().unwrap().cache_key(&mut hasher); package .as_ref() - .map(|path| path.as_ref().absolutize().unwrap()) + .map(|path| path.absolutize().unwrap()) .cache_key(&mut hasher); FileTime::from_last_modification_time(metadata).cache_key(&mut hasher); #[cfg(unix)] @@ -92,9 +154,9 @@ fn del_sync(cache_dir: &Path, key: u64) -> Result<(), std::io::Error> { } /// Get a value from the cache. -pub fn get>( - path: P, - package: Option<&P>, +pub fn get( + path: &Path, + package: Option<&Path>, metadata: &fs::Metadata, settings: &AllSettings, autofix: flags::Autofix, @@ -105,7 +167,34 @@ pub fn get>( ) .ok()?; match bincode::deserialize::(&encoded[..]) { - Ok(CheckResult { messages, imports }) => Some((messages, imports)), + Ok(CheckResult { + messages: headers, + imports, + sources, + }) => { + let mut messages = Vec::with_capacity(headers.len()); + let sources: FxHashMap<_, _> = sources + .into_iter() + .map(|(filename, content)| { + let index = LineIndex::from_source_text(&content); + (filename, SourceCodeBuf::new(&content, index)) + }) + .collect(); + + for header in headers { + messages.push(Message { + kind: header.kind, + location: header.location, + end_location: header.end_location, + fix: header.fix, + source: sources.get(&header.filename).cloned(), + filename: header.filename, + noqa_row: header.noqa_row, + }); + } + + Some((messages, imports)) + } Err(e) => { error!("Failed to deserialize encoded cache entry: {e:?}"); None @@ -114,16 +203,32 @@ pub fn get>( } /// Set a value in the cache. -pub fn set>( - path: P, - package: Option<&P>, +pub fn set( + path: &Path, + package: Option<&Path>, metadata: &fs::Metadata, settings: &AllSettings, autofix: flags::Autofix, messages: &[Message], imports: &ImportMap, ) { - let check_result = CheckResultRef { messages, imports }; + // Store the content of the source files, assuming that all files with the same name have the same content + let sources: Vec<_> = messages + .iter() + .filter_map(|message| { + message + .source + .as_ref() + .map(|source| (&*message.filename, source.text())) + }) + .unique_by(|(filename, _)| *filename) + .collect(); + + let check_result = CheckResultRef { + messages, + imports, + sources, + }; if let Err(e) = write_sync( &settings.cli.cache_dir, cache_key(path, package, metadata, &settings.lib, autofix), @@ -134,9 +239,9 @@ pub fn set>( } /// Delete a value from the cache. -pub fn del>( - path: P, - package: Option<&P>, +pub fn del( + path: &Path, + package: Option<&Path>, metadata: &fs::Metadata, settings: &AllSettings, autofix: flags::Autofix, diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index 7de65cf4bf..57022aa9a9 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -118,7 +118,7 @@ pub fn lint_path( { let metadata = path.metadata()?; if let Some((messages, imports)) = - cache::get(path, package.as_ref(), &metadata, settings, autofix.into()) + cache::get(path, package, &metadata, settings, autofix.into()) { debug!("Cache hit for: {}", path.display()); return Ok(Diagnostics::new(messages, imports)); @@ -207,14 +207,14 @@ pub fn lint_path( // Purge the cache. if let Some(metadata) = metadata { - cache::del(path, package.as_ref(), &metadata, settings, autofix.into()); + cache::del(path, package, &metadata, settings, autofix.into()); } } else { // Re-populate the cache. if let Some(metadata) = metadata { cache::set( path, - package.as_ref(), + package, &metadata, settings, autofix.into(), diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 6956899193..8eed8fc45a 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -897,7 +897,7 @@ pub fn match_trailing_comment(located: &Located, locator: &Locator) -> Opt /// Return the number of trailing empty lines following a statement. pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize { - let suffix = locator.skip(Location::new(stmt.end_location.unwrap().row() + 1, 0)); + let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0)); suffix .lines() .take_while(|line| line.trim().is_empty()) @@ -906,7 +906,7 @@ pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize { /// Return the range of the first parenthesis pair after a given [`Location`]. pub fn match_parens(start: Location, locator: &Locator) -> Option { - let contents = locator.skip(start); + let contents = locator.after(start); let mut fix_start = None; let mut fix_end = None; let mut count: usize = 0; diff --git a/crates/ruff_python_ast/src/source_code/line_index.rs b/crates/ruff_python_ast/src/source_code/line_index.rs index a15697dd85..1258bdedf5 100644 --- a/crates/ruff_python_ast/src/source_code/line_index.rs +++ b/crates/ruff_python_ast/src/source_code/line_index.rs @@ -83,12 +83,12 @@ impl LineIndex { } /// Return the number of lines in the source code. - pub(crate) fn lines_count(&self) -> usize { + pub(crate) fn line_count(&self) -> usize { self.line_starts().len() } /// Returns the [byte offset](TextSize) for the `line` with the given index. - fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize { + pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize { let row_index = line.to_zero_indexed(); let starts = self.line_starts(); @@ -103,7 +103,7 @@ impl LineIndex { /// Returns the [`TextRange`] of the `line` with the given index. /// The start points to the first character's [byte offset](TextSize), the end up to, and including /// the newline character ending the line (if any). - fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange { + pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange { let starts = self.line_starts(); if starts.len() == line.to_zero_indexed() { @@ -175,6 +175,11 @@ impl OneIndexed { Self(ONE.saturating_add(value)) } + /// Returns the value as a primitive type. + pub const fn get(self) -> usize { + self.0.get() + } + /// Return the zero-indexed primitive value for this [`OneIndexed`] pub const fn to_zero_indexed(self) -> usize { self.0.get() - 1 @@ -306,18 +311,18 @@ mod tests { #[test] fn utf8_index() { let index = LineIndex::from_source_text("x = '🫣'"); - assert_eq!(index.lines_count(), 1); + assert_eq!(index.line_count(), 1); assert_eq!(index.line_starts(), &[TextSize::from(0)]); let index = LineIndex::from_source_text("x = '🫣'\n"); - assert_eq!(index.lines_count(), 2); + assert_eq!(index.line_count(), 2); assert_eq!( index.line_starts(), &[TextSize::from(0), TextSize::from(11)] ); let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n"); - assert_eq!(index.lines_count(), 4); + assert_eq!(index.line_count(), 4); assert_eq!( index.line_starts(), &[ @@ -329,7 +334,7 @@ mod tests { ); let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\""); - assert_eq!(index.lines_count(), 3); + assert_eq!(index.line_count(), 3); assert_eq!( index.line_starts(), &[TextSize::from(0), TextSize::from(7), TextSize::from(18)] @@ -340,7 +345,7 @@ mod tests { fn utf8_carriage_return() { let contents = "x = '🫣'\ry = 3"; let index = LineIndex::from_source_text(contents); - assert_eq!(index.lines_count(), 2); + assert_eq!(index.line_count(), 2); assert_eq!( index.line_starts(), &[TextSize::from(0), TextSize::from(11)] @@ -365,7 +370,7 @@ mod tests { fn utf8_carriage_return_newline() { let contents = "x = '🫣'\r\ny = 3"; let index = LineIndex::from_source_text(contents); - assert_eq!(index.lines_count(), 2); + assert_eq!(index.line_count(), 2); assert_eq!( index.line_starts(), &[TextSize::from(0), TextSize::from(12)] diff --git a/crates/ruff_python_ast/src/source_code/locator.rs b/crates/ruff_python_ast/src/source_code/locator.rs index becfed511a..8b25eadf6b 100644 --- a/crates/ruff_python_ast/src/source_code/locator.rs +++ b/crates/ruff_python_ast/src/source_code/locator.rs @@ -1,57 +1,65 @@ //! Struct used to efficiently slice source code at (row, column) Locations. use crate::source_code::line_index::LineIndex; +use crate::source_code::{SourceCode, SourceCodeBuf}; use once_cell::unsync::OnceCell; -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::TextSize; use rustpython_parser::ast::Location; use crate::types::Range; pub struct Locator<'a> { contents: &'a str, - index: OnceCell, + line_index: OnceCell, } impl<'a> Locator<'a> { pub const fn new(contents: &'a str) -> Self { Self { contents, - index: OnceCell::new(), + line_index: OnceCell::new(), } } fn get_or_init_index(&self) -> &LineIndex { - self.index + self.line_index .get_or_init(|| LineIndex::from_source_text(self.contents)) } + fn source_code(&self) -> SourceCode<'a, '_> { + SourceCode { + index: self.get_or_init_index(), + text: self.contents, + } + } + + #[inline] + pub fn to_source_code_buf(&self) -> SourceCodeBuf { + self.source_code().to_owned() + } + /// Take the source code up to the given [`Location`]. - pub fn take(&self, location: Location) -> &'a str { - let index = self.get_or_init_index(); - let offset = index.location_offset(location, self.contents); - &self.contents[TextRange::up_to(offset)] + #[inline] + pub fn up_to(&self, location: Location) -> &'a str { + self.source_code().up_to(location) } /// Take the source code after the given [`Location`]. - pub fn skip(&self, location: Location) -> &'a str { - let index = self.get_or_init_index(); - let offset = index.location_offset(location, self.contents); - &self.contents[usize::from(offset)..] + #[inline] + pub fn after(&self, location: Location) -> &'a str { + self.source_code().after(location) } /// Take the source code between the given [`Range`]. + #[inline] pub fn slice>(&self, range: R) -> &'a str { - let index = self.get_or_init_index(); - let range = range.into(); - let start = index.location_offset(range.location, self.contents); - let end = index.location_offset(range.end_location, self.contents); - &self.contents[TextRange::new(start, end)] + self.source_code().slice(range) } /// Return the byte offset of the given [`Location`]. + #[inline] pub fn offset(&self, location: Location) -> TextSize { - let index = self.get_or_init_index(); - index.location_offset(location, self.contents) + self.source_code().offset(location) } /// Return the underlying source code. @@ -62,7 +70,7 @@ impl<'a> Locator<'a> { /// Return the number of lines in the source code. pub fn count_lines(&self) -> usize { let index = self.get_or_init_index(); - index.lines_count() + index.line_count() } /// Return the number of bytes in the source code. diff --git a/crates/ruff_python_ast/src/source_code/mod.rs b/crates/ruff_python_ast/src/source_code/mod.rs index 87eb82a592..41e025c930 100644 --- a/crates/ruff_python_ast/src/source_code/mod.rs +++ b/crates/ruff_python_ast/src/source_code/mod.rs @@ -5,12 +5,16 @@ mod locator; mod stylist; pub use crate::source_code::line_index::{LineIndex, OneIndexed}; +use crate::types::Range; pub use generator::Generator; pub use indexer::Indexer; pub use locator::Locator; +use ruff_text_size::{TextRange, TextSize}; use rustpython_parser as parser; +use rustpython_parser::ast::Location; use rustpython_parser::{lexer, Mode, ParseError}; +use std::sync::Arc; pub use stylist::{LineEnding, Stylist}; /// Run round-trip source code generation on a given Python code. @@ -23,3 +27,214 @@ pub fn round_trip(code: &str, source_path: &str) -> Result { generator.unparse_suite(&python_ast); Ok(generator.generate()) } + +/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets. +#[derive(Debug)] +pub struct SourceCode<'src, 'index> { + text: &'src str, + index: &'index LineIndex, +} + +impl<'src, 'index> SourceCode<'src, 'index> { + pub fn new(content: &'src str, index: &'index LineIndex) -> Self { + Self { + text: content, + index, + } + } + + /// Take the source code up to the given [`Location`]. + pub fn up_to(&self, location: Location) -> &'src str { + let offset = self.index.location_offset(location, self.text); + &self.text[TextRange::up_to(offset)] + } + + /// Take the source code after the given [`Location`]. + pub fn after(&self, location: Location) -> &'src str { + let offset = self.index.location_offset(location, self.text); + &self.text[usize::from(offset)..] + } + + /// Take the source code between the given [`Range`]. + pub fn slice>(&self, range: R) -> &'src str { + let range = self.text_range(range); + &self.text[range] + } + + /// Converts a [`Location`] range to a byte offset range + pub fn text_range>(&self, range: R) -> TextRange { + let range = range.into(); + let start = self.index.location_offset(range.location, self.text); + let end = self.index.location_offset(range.end_location, self.text); + TextRange::new(start, end) + } + + /// Return the byte offset of the given [`Location`]. + pub fn offset(&self, location: Location) -> TextSize { + self.index.location_offset(location, self.text) + } + + pub fn line_start(&self, line: OneIndexed) -> TextSize { + self.index.line_start(line, self.text) + } + + pub fn line_range(&self, line: OneIndexed) -> TextRange { + self.index.line_range(line, self.text) + } + + /// Returns a string with the lines spawning between location and end location. + pub fn lines(&self, range: Range) -> &'src str { + let start_line = self + .index + .line_range(OneIndexed::new(range.location.row()).unwrap(), self.text); + + let end_line = self.index.line_range( + OneIndexed::new(range.end_location.row()).unwrap(), + self.text, + ); + + &self.text[TextRange::new(start_line.start(), end_line.end())] + } + + /// Returns the source text of the line with the given index + #[inline] + pub fn line_text(&self, index: OneIndexed) -> &'src str { + let range = self.index.line_range(index, self.text); + &self.text[range] + } + + pub fn text(&self) -> &'src str { + self.text + } + + #[inline] + pub fn line_count(&self) -> usize { + self.index.line_count() + } + + pub fn to_source_code_buf(&self) -> SourceCodeBuf { + self.to_owned() + } + + pub fn to_owned(&self) -> SourceCodeBuf { + SourceCodeBuf::new(self.text, self.index.clone()) + } +} + +impl PartialEq for SourceCode<'_, '_> { + fn eq(&self, other: &Self) -> bool { + self.text == other.text + } +} + +impl Eq for SourceCode<'_, '_> {} + +impl PartialEq for SourceCode<'_, '_> { + fn eq(&self, other: &SourceCodeBuf) -> bool { + self.text == &*other.text + } +} + +/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets. +/// +/// This is the owned pendant to [`SourceCode`]. Cloning only requires bumping reference counters. +#[derive(Clone, Debug)] +pub struct SourceCodeBuf { + text: Arc, + index: LineIndex, +} + +impl SourceCodeBuf { + pub fn new(content: &str, index: LineIndex) -> Self { + Self { + text: Arc::from(content), + index, + } + } + + /// Creates the [`LineIndex`] for `text` and returns the [`SourceCodeBuf`]. + pub fn from_content(text: &str) -> Self { + Self::new(text, LineIndex::from_source_text(text)) + } + + #[inline] + fn as_source_code(&self) -> SourceCode { + SourceCode { + text: &self.text, + index: &self.index, + } + } + + /// Take the source code up to the given [`Location`]. + pub fn up_to(&self, location: Location) -> &str { + self.as_source_code().up_to(location) + } + + /// Take the source code after the given [`Location`]. + pub fn after(&self, location: Location) -> &str { + self.as_source_code().after(location) + } + + /// Take the source code between the given [`Range`]. + #[inline] + pub fn slice>(&self, range: R) -> &str { + self.as_source_code().slice(range) + } + + /// Converts a [`Location`] range to a byte offset range + #[inline] + pub fn text_range>(&self, range: R) -> TextRange { + self.as_source_code().text_range(range) + } + + #[inline] + pub fn line_range(&self, line: OneIndexed) -> TextRange { + self.as_source_code().line_range(line) + } + + /// Return the byte offset of the given [`Location`]. + #[inline] + pub fn offset(&self, location: Location) -> TextSize { + self.as_source_code().offset(location) + } + + #[inline] + pub fn line_start(&self, line: OneIndexed) -> TextSize { + self.as_source_code().line_start(line) + } + + #[inline] + pub fn lines(&self, range: Range) -> &str { + self.as_source_code().lines(range) + } + + /// Returns the source text of the line with the given index + #[inline] + pub fn line_text(&self, index: OneIndexed) -> &str { + self.as_source_code().line_text(index) + } + + #[inline] + pub fn line_count(&self) -> usize { + self.index.line_count() + } + + pub fn text(&self) -> &str { + &self.text + } +} + +impl PartialEq for SourceCodeBuf { + // The same source text should have the same index + fn eq(&self, other: &Self) -> bool { + self.text == other.text + } +} + +impl PartialEq> for SourceCodeBuf { + fn eq(&self, other: &SourceCode<'_, '_>) -> bool { + &*self.text == other.text + } +} + +impl Eq for SourceCodeBuf {} diff --git a/crates/ruff_python_formatter/src/cst/helpers.rs b/crates/ruff_python_formatter/src/cst/helpers.rs index 9aac063f99..e103e77827 100644 --- a/crates/ruff_python_formatter/src/cst/helpers.rs +++ b/crates/ruff_python_formatter/src/cst/helpers.rs @@ -125,7 +125,7 @@ pub fn expand_indented_block( /// Return true if the `orelse` block of an `if` statement is an `elif` statement. pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bool { if orelse.len() == 1 && matches!(orelse[0].node, rustpython_parser::ast::StmtKind::If { .. }) { - let contents = locator.skip(orelse[0].location); + let contents = locator.after(orelse[0].location); if contents.starts_with("elif") { return true; }