Store source code on message (#3897)

This commit is contained in:
Micha Reiser 2023-04-11 09:57:36 +02:00 committed by GitHub
parent 76c47a9a43
commit 381203c084
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 537 additions and 217 deletions

1
Cargo.lock generated
View file

@ -2015,6 +2015,7 @@ dependencies = [
"ruff_python_semantic",
"ruff_python_stdlib",
"ruff_rustpython",
"ruff_text_size",
"rustc-hash",
"rustpython-common",
"rustpython-parser",

View file

@ -21,6 +21,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_semantic = { path = "../ruff_python_semantic" }
ruff_python_stdlib = { path = "../ruff_python_stdlib" }
ruff_rustpython = { path = "../ruff_rustpython" }
ruff_text_size = { path = "../ruff_text_size" }
annotate-snippets = { version = "0.9.1", features = ["color"] }
anyhow = { workspace = true }

View file

@ -103,7 +103,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result<bool>
/// Return the location of a trailing semicolon following a `Stmt`, if it's part
/// of a multi-statement line.
fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option<Location> {
let contents = locator.skip(stmt.end_location.unwrap());
let contents = locator.after(stmt.end_location.unwrap());
for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() {
let trimmed = line.trim();
if trimmed.starts_with(';') {
@ -126,7 +126,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option<Location> {
/// Find the next valid break for a `Stmt` after a semicolon.
fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location {
let start_location = Location::new(semicolon.row(), semicolon.column() + 1);
let contents = locator.skip(start_location);
let contents = locator.after(start_location);
for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() {
let trimmed = line.trim();
// Skip past any continuations.
@ -158,7 +158,7 @@ fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location {
/// Return `true` if a `Stmt` occurs at the end of a file.
fn is_end_of_file(stmt: &Stmt, locator: &Locator) -> bool {
let contents = locator.skip(stmt.end_location.unwrap());
let contents = locator.after(stmt.end_location.unwrap());
contents.is_empty()
}
@ -361,7 +361,7 @@ pub fn remove_argument(
remove_parentheses: bool,
) -> Result<Edit> {
// TODO(sbrugman): Preserve trailing comments.
let contents = locator.skip(call_at);
let contents = locator.after(call_at);
let mut fix_start = None;
let mut fix_end = None;

View file

@ -80,7 +80,7 @@ fn apply_fixes<'a>(
}
// Add the remaining content.
let slice = locator.skip(last_pos.unwrap_or_default());
let slice = locator.after(last_pos.unwrap_or_default());
output.push_str(slice);
(output, fixed)

View file

@ -174,7 +174,7 @@ fn match_docstring_end(body: &[Stmt]) -> Option<Location> {
/// along with a trailing newline suffix.
fn end_of_statement_insertion(stmt: &Stmt, locator: &Locator, stylist: &Stylist) -> Insertion {
let location = stmt.end_location.unwrap();
let mut tokens = lexer::lex_located(locator.skip(location), Mode::Module, location).flatten();
let mut tokens = lexer::lex_located(locator.after(location), Mode::Module, location).flatten();
if let Some((.., Tok::Semi, end)) = tokens.next() {
// If the first token after the docstring is a semicolon, insert after the semicolon as an
// inline statement;
@ -207,7 +207,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) ->
let mut location = if let Some(location) = match_docstring_end(body) {
// If the first token after the docstring is a semicolon, insert after the semicolon as an
// inline statement;
let first_token = lexer::lex_located(locator.skip(location), Mode::Module, location)
let first_token = lexer::lex_located(locator.after(location), Mode::Module, location)
.flatten()
.next();
if let Some((.., Tok::Semi, end)) = first_token {
@ -222,7 +222,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) ->
// Skip over any comments and empty lines.
for (.., tok, end) in
lexer::lex_located(locator.skip(location), Mode::Module, location).flatten()
lexer::lex_located(locator.after(location), Mode::Module, location).flatten()
{
if matches!(tok, Tok::Comment(..) | Tok::Newline) {
location = Location::new(end.row() + 1, 0);

View file

@ -22,7 +22,7 @@ use crate::checkers::physical_lines::check_physical_lines;
use crate::checkers::tokens::check_tokens;
use crate::directives::Directives;
use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens};
use crate::message::{Message, Source};
use crate::message::Message;
use crate::noqa::add_noqa;
use crate::registry::{AsRule, Rule};
use crate::rules::pycodestyle;
@ -355,19 +355,26 @@ pub fn lint_only(
// Convert from diagnostics to messages.
let path_lossy = path.to_string_lossy();
result.map(|(messages, imports)| {
let source_code = if settings.show_source && !messages.is_empty() {
Some(locator.to_source_code_buf())
} else {
None
};
(
messages
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(diagnostic, path_lossy.to_string(), source, noqa_row)
Message::from_diagnostic(
diagnostic,
path_lossy.to_string(),
source_code.clone(),
noqa_row,
)
})
.collect(),
imports,
@ -500,22 +507,23 @@ This indicates a bug in `{}`. If you could open an issue at:
let path_lossy = path.to_string_lossy();
return Ok(FixerResult {
result: result.map(|(messages, imports)| {
let source_code = if settings.show_source && !messages.is_empty() {
Some(locator.to_source_code_buf())
} else {
None
};
(
messages
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row =
*directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(
diagnostic,
path_lossy.to_string(),
source,
source_code.clone(),
noqa_row,
)
})

View file

@ -1,12 +1,9 @@
use crate::fs::relativize_path;
use crate::jupyter::JupyterIndex;
use crate::message::text::RuleCodeAndBody;
use crate::message::text::{MessageCodeFrame, RuleCodeAndBody};
use crate::message::{group_messages_by_filename, Emitter, EmitterContext, Message};
use crate::registry::AsRule;
use annotate_snippets::display_list::{DisplayList, FormatOptions};
use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation};
use colored::Colorize;
use std::fmt::{Display, Formatter, Write as fmtWrite};
use std::fmt::{Display, Formatter};
use std::io::Write;
#[derive(Default)]
@ -116,47 +113,10 @@ impl Display for DisplayGroupedMessage<'_> {
},
)?;
if let Some(source) = &message.source {
let suggestion = message.kind.suggestion.clone();
let footer = if suggestion.is_some() {
vec![Annotation {
id: None,
label: suggestion.as_deref(),
annotation_type: AnnotationType::Help,
}]
} else {
vec![]
};
let label = message.kind.rule().noqa_code().to_string();
let snippet = Snippet {
title: None,
footer,
slices: vec![Slice {
source: &source.contents,
line_start: message.location.row(),
annotations: vec![SourceAnnotation {
label: &label,
annotation_type: AnnotationType::Error,
range: source.range,
}],
// The origin (file name, line number, and column number) is already encoded
// in the `label`.
origin: None,
fold: false,
}],
opt: FormatOptions {
#[cfg(test)]
color: false,
#[cfg(not(test))]
color: colored::control::SHOULD_COLORIZE.should_colorize(),
..FormatOptions::default()
},
};
// Skip the first line, since we format the `label` ourselves.
let message = DisplayList::from(snippet);
{
use std::fmt::Write;
let mut padded = PadAdapter::new(f);
writeln!(&mut padded, "{message}")?;
write!(padded, "{}", MessageCodeFrame { message })?;
}
writeln!(f)?;

View file

@ -20,22 +20,20 @@ pub use json::JsonEmitter;
pub use junit::JunitEmitter;
pub use pylint::PylintEmitter;
pub use rustpython_parser::ast::Location;
use serde::{Deserialize, Serialize};
pub use text::TextEmitter;
use crate::jupyter::JupyterIndex;
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use ruff_python_ast::source_code::SourceCodeBuf;
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Eq)]
pub struct Message {
pub kind: DiagnosticKind,
pub location: Location,
pub end_location: Location,
pub fix: Fix,
pub filename: String,
pub source: Option<Source>,
pub source: Option<SourceCodeBuf>,
pub noqa_row: usize,
}
@ -43,7 +41,7 @@ impl Message {
pub fn from_diagnostic(
diagnostic: Diagnostic,
filename: String,
source: Option<Source>,
source: Option<SourceCodeBuf>,
noqa_row: usize,
) -> Self {
Self {
@ -77,38 +75,6 @@ impl PartialOrd for Message {
}
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
pub struct Source {
pub contents: String,
pub range: (usize, usize),
}
impl Source {
pub fn from_diagnostic(diagnostic: &Diagnostic, locator: &Locator) -> Self {
let location = Location::new(diagnostic.location.row(), 0);
// Diagnostics can already extend one-past-the-end. If they do, though, then
// they'll end at the start of a line. We need to avoid extending by yet another
// line past-the-end.
let end_location = if diagnostic.end_location.column() == 0 {
diagnostic.end_location
} else {
Location::new(diagnostic.end_location.row() + 1, 0)
};
let source = locator.slice(Range::new(location, end_location));
let num_chars_in_range = locator
.slice(Range::new(diagnostic.location, diagnostic.end_location))
.chars()
.count();
Source {
contents: source.to_string(),
range: (
diagnostic.location.column(),
diagnostic.location.column() + num_chars_in_range,
),
}
}
}
fn group_messages_by_filename(messages: &[Message]) -> BTreeMap<&str, Vec<&Message>> {
let mut grouped_messages = BTreeMap::default();
for message in messages {
@ -156,15 +122,15 @@ impl<'a> EmitterContext<'a> {
#[cfg(test)]
mod tests {
use crate::message::{Emitter, EmitterContext, Location, Message, Source};
use crate::message::{Emitter, EmitterContext, Location, Message};
use crate::rules::pyflakes::rules::{UndefinedName, UnusedImport, UnusedVariable};
use ruff_diagnostics::{Diagnostic, Edit, Fix};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::source_code::SourceCodeBuf;
use ruff_python_ast::types::Range;
use rustc_hash::FxHashMap;
pub(super) fn create_messages() -> Vec<Message> {
let file_1 = r#"import os
let fib = r#"import os
def fibonacci(n):
"""Compute the nth number in the Fibonacci sequence."""
@ -177,8 +143,6 @@ def fibonacci(n):
return fibonacci(n - 1) + fibonacci(n - 2)
"#;
let file_1_locator = Locator::new(file_1);
let unused_import = Diagnostic::new(
UnusedImport {
name: "os".to_string(),
@ -188,7 +152,7 @@ def fibonacci(n):
Range::new(Location::new(1, 7), Location::new(1, 9)),
);
let unused_source = Source::from_diagnostic(&unused_import, &file_1_locator);
let fib_source = SourceCodeBuf::from_content(fib);
let unused_variable = Diagnostic::new(
UnusedVariable {
@ -201,8 +165,6 @@ def fibonacci(n):
Location::new(5, 9),
)]));
let unused_variable_source = Source::from_diagnostic(&unused_variable, &file_1_locator);
let file_2 = r#"if a == 1: pass"#;
let undefined_name = Diagnostic::new(
@ -212,20 +174,20 @@ def fibonacci(n):
Range::new(Location::new(1, 3), Location::new(1, 4)),
);
let undefined_source = Source::from_diagnostic(&undefined_name, &Locator::new(file_2));
let file_2_source = SourceCodeBuf::from_content(file_2);
vec![
Message::from_diagnostic(unused_import, "fib.py".to_string(), Some(unused_source), 1),
Message::from_diagnostic(
unused_variable,
unused_import,
"fib.py".to_string(),
Some(unused_variable_source),
Some(fib_source.clone()),
1,
),
Message::from_diagnostic(unused_variable, "fib.py".to_string(), Some(fib_source), 1),
Message::from_diagnostic(
undefined_name,
"undef.py".to_string(),
Some(undefined_source),
Some(file_2_source),
1,
),
]

View file

@ -1,11 +1,14 @@
use crate::fs::relativize_path;
use crate::message::{Emitter, EmitterContext, Message};
use crate::message::{Emitter, EmitterContext, Location, Message};
use crate::registry::AsRule;
use annotate_snippets::display_list::{DisplayList, FormatOptions};
use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation};
use colored::Colorize;
use ruff_diagnostics::DiagnosticKind;
use std::fmt::Display;
use ruff_python_ast::source_code::OneIndexed;
use ruff_python_ast::types::Range;
use ruff_text_size::TextRange;
use std::fmt::{Display, Formatter};
use std::io::Write;
#[derive(Default)]
@ -63,45 +66,8 @@ impl Emitter for TextEmitter {
}
)?;
if let Some(source) = &message.source {
let suggestion = message.kind.suggestion.clone();
let footer = if suggestion.is_some() {
vec![Annotation {
id: None,
label: suggestion.as_deref(),
annotation_type: AnnotationType::Help,
}]
} else {
Vec::new()
};
let label = message.kind.rule().noqa_code().to_string();
let snippet = Snippet {
title: None,
slices: vec![Slice {
source: &source.contents,
line_start: message.location.row(),
annotations: vec![SourceAnnotation {
label: &label,
annotation_type: AnnotationType::Error,
range: source.range,
}],
// The origin (file name, line number, and column number) is already encoded
// in the `label`.
origin: None,
fold: false,
}],
footer,
opt: FormatOptions {
#[cfg(test)]
color: false,
#[cfg(not(test))]
color: colored::control::SHOULD_COLORIZE.should_colorize(),
..FormatOptions::default()
},
};
writeln!(writer, "{message}\n", message = DisplayList::from(snippet))?;
if message.source.is_some() {
writeln!(writer, "{}", MessageCodeFrame { message })?;
}
}
@ -147,6 +113,95 @@ impl Display for RuleCodeAndBody<'_> {
}
}
pub(super) struct MessageCodeFrame<'a> {
pub message: &'a Message,
}
impl Display for MessageCodeFrame<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let Message {
kind,
source,
location,
end_location,
..
} = self.message;
if let Some(source_code) = source {
let suggestion = kind.suggestion.as_deref();
let footer = if suggestion.is_some() {
vec![Annotation {
id: None,
label: suggestion,
annotation_type: AnnotationType::Help,
}]
} else {
Vec::new()
};
let source_code_start =
source_code.line_start(OneIndexed::new(location.row()).unwrap());
let source_code_end = source_code.line_start(
OneIndexed::new(
end_location
.row()
.saturating_add(1)
.min(source_code.line_count() + 1),
)
.unwrap(),
);
let source_text =
&source_code.text()[TextRange::new(source_code_start, source_code_end)];
let content_range = source_code.text_range(Range::new(
// Subtract 1 because message column indices are 1 based but the index columns are 1 based.
Location::new(location.row(), location.column().saturating_sub(1)),
Location::new(end_location.row(), end_location.column().saturating_sub(1)),
));
let annotation_length = &source_text[content_range - source_code_start]
.chars()
.count();
let label = kind.rule().noqa_code().to_string();
let snippet = Snippet {
title: None,
slices: vec![Slice {
source: source_text,
line_start: location.row(),
annotations: vec![SourceAnnotation {
label: &label,
annotation_type: AnnotationType::Error,
range: (
location.column() - 1,
location.column() + annotation_length - 1,
),
}],
// The origin (file name, line number, and column number) is already encoded
// in the `label`.
origin: None,
fold: false,
}],
footer,
opt: FormatOptions {
#[cfg(test)]
color: false,
#[cfg(not(test))]
color: colored::control::SHOULD_COLORIZE.should_colorize(),
..FormatOptions::default()
},
};
writeln!(f, "{message}", message = DisplayList::from(snippet))?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::message::tests::{capture_emitter_output, create_messages};

View file

@ -29,7 +29,7 @@ pub fn result_exists(returns: &[(&Stmt, Option<&Expr>)]) -> bool {
/// This method assumes that the statement is the last statement in its body; specifically, that
/// the statement isn't followed by a semicolon, followed by a multi-line statement.
pub fn end_of_last_statement(stmt: &Stmt, locator: &Locator) -> Location {
let contents = locator.skip(stmt.end_location.unwrap());
let contents = locator.after(stmt.end_location.unwrap());
// End-of-file, so just return the end of the statement.
if contents.is_empty() {

View file

@ -62,7 +62,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool {
// # Direct comment.
// def f(): pass
let mut seen_blank = false;
for line in locator.take(stmt.location).universal_newlines().rev() {
for line in locator.up_to(stmt.location).universal_newlines().rev() {
let line = line.trim();
if seen_blank {
if line.starts_with('#') {

View file

@ -66,7 +66,7 @@ fn match_token_after<F, T>(located: &Located<T>, locator: &Locator, f: F) -> Ran
where
F: Fn(Tok) -> bool,
{
let contents = locator.skip(located.location);
let contents = locator.after(located.location);
// Track the bracket depth.
let mut par_count = 0;
@ -129,7 +129,7 @@ fn match_token<F, T>(located: &Located<T>, locator: &Locator, f: F) -> Range
where
F: Fn(Tok) -> bool,
{
let contents = locator.skip(located.location);
let contents = locator.after(located.location);
// Track the bracket depth.
let mut par_count = 0;

View file

@ -142,7 +142,7 @@ pub fn remove_import_members(contents: &str, members: &[&str]) -> String {
}
// Add the remaining content.
let slice = locator.skip(last_pos);
let slice = locator.after(last_pos);
output.push_str(slice);
output
}

View file

@ -5,13 +5,18 @@ use std::path::Path;
use anyhow::Result;
use filetime::FileTime;
use itertools::Itertools;
use log::error;
use path_absolutize::Absolutize;
use ruff::message::Message;
use ruff::message::{Location, Message};
use ruff::settings::{flags, AllSettings, Settings};
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::{DiagnosticKind, Fix};
use ruff_python_ast::imports::ImportMap;
use serde::{Deserialize, Serialize};
use ruff_python_ast::source_code::{LineIndex, SourceCodeBuf};
use rustc_hash::FxHashMap;
use serde::ser::{SerializeSeq, SerializeStruct};
use serde::{Deserialize, Serialize, Serializer};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
@ -19,33 +24,90 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Serialize)]
struct CheckResultRef<'a> {
#[serde(serialize_with = "serialize_messages")]
messages: &'a [Message],
imports: &'a ImportMap,
sources: Vec<(&'a str, &'a str)>,
}
fn serialize_messages<S>(messages: &[Message], serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut s = serializer.serialize_seq(Some(messages.len()))?;
for message in messages {
s.serialize_element(&SerializeMessage(message))?;
}
s.end()
}
struct SerializeMessage<'a>(&'a Message);
impl Serialize for SerializeMessage<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let Message {
kind,
location,
end_location,
fix,
filename,
// Serialized manually for all files
source: _source,
noqa_row,
} = self.0;
let mut s = serializer.serialize_struct("Message", 6)?;
s.serialize_field("kind", &kind)?;
s.serialize_field("location", &location)?;
s.serialize_field("end_location", &end_location)?;
s.serialize_field("fix", &fix)?;
s.serialize_field("filename", &filename)?;
s.serialize_field("noqa_row", &noqa_row)?;
s.end()
}
}
#[derive(Deserialize)]
struct MessageHeader {
kind: DiagnosticKind,
location: Location,
end_location: Location,
fix: Fix,
filename: String,
noqa_row: usize,
}
#[derive(Deserialize)]
struct CheckResult {
messages: Vec<Message>,
messages: Vec<MessageHeader>,
imports: ImportMap,
sources: Vec<(String, String)>,
}
fn content_dir() -> &'static Path {
Path::new("content")
}
fn cache_key<P: AsRef<Path>>(
path: P,
package: Option<&P>,
fn cache_key(
path: &Path,
package: Option<&Path>,
metadata: &fs::Metadata,
settings: &Settings,
autofix: flags::Autofix,
) -> u64 {
let mut hasher = CacheKeyHasher::new();
CARGO_PKG_VERSION.cache_key(&mut hasher);
path.as_ref().absolutize().unwrap().cache_key(&mut hasher);
path.absolutize().unwrap().cache_key(&mut hasher);
package
.as_ref()
.map(|path| path.as_ref().absolutize().unwrap())
.map(|path| path.absolutize().unwrap())
.cache_key(&mut hasher);
FileTime::from_last_modification_time(metadata).cache_key(&mut hasher);
#[cfg(unix)]
@ -92,9 +154,9 @@ fn del_sync(cache_dir: &Path, key: u64) -> Result<(), std::io::Error> {
}
/// Get a value from the cache.
pub fn get<P: AsRef<Path>>(
path: P,
package: Option<&P>,
pub fn get(
path: &Path,
package: Option<&Path>,
metadata: &fs::Metadata,
settings: &AllSettings,
autofix: flags::Autofix,
@ -105,7 +167,34 @@ pub fn get<P: AsRef<Path>>(
)
.ok()?;
match bincode::deserialize::<CheckResult>(&encoded[..]) {
Ok(CheckResult { messages, imports }) => Some((messages, imports)),
Ok(CheckResult {
messages: headers,
imports,
sources,
}) => {
let mut messages = Vec::with_capacity(headers.len());
let sources: FxHashMap<_, _> = sources
.into_iter()
.map(|(filename, content)| {
let index = LineIndex::from_source_text(&content);
(filename, SourceCodeBuf::new(&content, index))
})
.collect();
for header in headers {
messages.push(Message {
kind: header.kind,
location: header.location,
end_location: header.end_location,
fix: header.fix,
source: sources.get(&header.filename).cloned(),
filename: header.filename,
noqa_row: header.noqa_row,
});
}
Some((messages, imports))
}
Err(e) => {
error!("Failed to deserialize encoded cache entry: {e:?}");
None
@ -114,16 +203,32 @@ pub fn get<P: AsRef<Path>>(
}
/// Set a value in the cache.
pub fn set<P: AsRef<Path>>(
path: P,
package: Option<&P>,
pub fn set(
path: &Path,
package: Option<&Path>,
metadata: &fs::Metadata,
settings: &AllSettings,
autofix: flags::Autofix,
messages: &[Message],
imports: &ImportMap,
) {
let check_result = CheckResultRef { messages, imports };
// Store the content of the source files, assuming that all files with the same name have the same content
let sources: Vec<_> = messages
.iter()
.filter_map(|message| {
message
.source
.as_ref()
.map(|source| (&*message.filename, source.text()))
})
.unique_by(|(filename, _)| *filename)
.collect();
let check_result = CheckResultRef {
messages,
imports,
sources,
};
if let Err(e) = write_sync(
&settings.cli.cache_dir,
cache_key(path, package, metadata, &settings.lib, autofix),
@ -134,9 +239,9 @@ pub fn set<P: AsRef<Path>>(
}
/// Delete a value from the cache.
pub fn del<P: AsRef<Path>>(
path: P,
package: Option<&P>,
pub fn del(
path: &Path,
package: Option<&Path>,
metadata: &fs::Metadata,
settings: &AllSettings,
autofix: flags::Autofix,

View file

@ -118,7 +118,7 @@ pub fn lint_path(
{
let metadata = path.metadata()?;
if let Some((messages, imports)) =
cache::get(path, package.as_ref(), &metadata, settings, autofix.into())
cache::get(path, package, &metadata, settings, autofix.into())
{
debug!("Cache hit for: {}", path.display());
return Ok(Diagnostics::new(messages, imports));
@ -207,14 +207,14 @@ pub fn lint_path(
// Purge the cache.
if let Some(metadata) = metadata {
cache::del(path, package.as_ref(), &metadata, settings, autofix.into());
cache::del(path, package, &metadata, settings, autofix.into());
}
} else {
// Re-populate the cache.
if let Some(metadata) = metadata {
cache::set(
path,
package.as_ref(),
package,
&metadata,
settings,
autofix.into(),

View file

@ -897,7 +897,7 @@ pub fn match_trailing_comment<T>(located: &Located<T>, locator: &Locator) -> Opt
/// Return the number of trailing empty lines following a statement.
pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
let suffix = locator.skip(Location::new(stmt.end_location.unwrap().row() + 1, 0));
let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0));
suffix
.lines()
.take_while(|line| line.trim().is_empty())
@ -906,7 +906,7 @@ pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
/// Return the range of the first parenthesis pair after a given [`Location`].
pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
let contents = locator.skip(start);
let contents = locator.after(start);
let mut fix_start = None;
let mut fix_end = None;
let mut count: usize = 0;

View file

@ -83,12 +83,12 @@ impl LineIndex {
}
/// Return the number of lines in the source code.
pub(crate) fn lines_count(&self) -> usize {
pub(crate) fn line_count(&self) -> usize {
self.line_starts().len()
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
@ -103,7 +103,7 @@ impl LineIndex {
/// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any).
fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed() {
@ -175,6 +175,11 @@ impl OneIndexed {
Self(ONE.saturating_add(value))
}
/// Returns the value as a primitive type.
pub const fn get(self) -> usize {
self.0.get()
}
/// Return the zero-indexed primitive value for this [`OneIndexed`]
pub const fn to_zero_indexed(self) -> usize {
self.0.get() - 1
@ -306,18 +311,18 @@ mod tests {
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.lines_count(), 1);
assert_eq!(index.line_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.lines_count(), 4);
assert_eq!(index.line_count(), 4);
assert_eq!(
index.line_starts(),
&[
@ -329,7 +334,7 @@ mod tests {
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.lines_count(), 3);
assert_eq!(index.line_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
@ -340,7 +345,7 @@ mod tests {
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
@ -365,7 +370,7 @@ mod tests {
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]

View file

@ -1,57 +1,65 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use crate::source_code::line_index::LineIndex;
use crate::source_code::{SourceCode, SourceCodeBuf};
use once_cell::unsync::OnceCell;
use ruff_text_size::{TextRange, TextSize};
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
use crate::types::Range;
pub struct Locator<'a> {
contents: &'a str,
index: OnceCell<LineIndex>,
line_index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
index: OnceCell::new(),
line_index: OnceCell::new(),
}
}
fn get_or_init_index(&self) -> &LineIndex {
self.index
self.line_index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
fn source_code(&self) -> SourceCode<'a, '_> {
SourceCode {
index: self.get_or_init_index(),
text: self.contents,
}
}
#[inline]
pub fn to_source_code_buf(&self) -> SourceCodeBuf {
self.source_code().to_owned()
}
/// Take the source code up to the given [`Location`].
pub fn take(&self, location: Location) -> &'a str {
let index = self.get_or_init_index();
let offset = index.location_offset(location, self.contents);
&self.contents[TextRange::up_to(offset)]
#[inline]
pub fn up_to(&self, location: Location) -> &'a str {
self.source_code().up_to(location)
}
/// Take the source code after the given [`Location`].
pub fn skip(&self, location: Location) -> &'a str {
let index = self.get_or_init_index();
let offset = index.location_offset(location, self.contents);
&self.contents[usize::from(offset)..]
#[inline]
pub fn after(&self, location: Location) -> &'a str {
self.source_code().after(location)
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
let index = self.get_or_init_index();
let range = range.into();
let start = index.location_offset(range.location, self.contents);
let end = index.location_offset(range.end_location, self.contents);
&self.contents[TextRange::new(start, end)]
self.source_code().slice(range)
}
/// Return the byte offset of the given [`Location`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
let index = self.get_or_init_index();
index.location_offset(location, self.contents)
self.source_code().offset(location)
}
/// Return the underlying source code.
@ -62,7 +70,7 @@ impl<'a> Locator<'a> {
/// Return the number of lines in the source code.
pub fn count_lines(&self) -> usize {
let index = self.get_or_init_index();
index.lines_count()
index.line_count()
}
/// Return the number of bytes in the source code.

View file

@ -5,12 +5,16 @@ mod locator;
mod stylist;
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
use crate::types::Range;
pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser as parser;
use rustpython_parser::ast::Location;
use rustpython_parser::{lexer, Mode, ParseError};
use std::sync::Arc;
pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code.
@ -23,3 +27,214 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
generator.unparse_suite(&python_ast);
Ok(generator.generate())
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
index: &'index LineIndex,
}
impl<'src, 'index> SourceCode<'src, 'index> {
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
Self {
text: content,
index,
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`Range`].
pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
let range = self.text_range(range);
&self.text[range]
}
/// Converts a [`Location`] range to a byte offset range
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
let range = range.into();
let start = self.index.location_offset(range.location, self.text);
let end = self.index.location_offset(range.end_location, self.text);
TextRange::new(start, end)
}
/// Return the byte offset of the given [`Location`].
pub fn offset(&self, location: Location) -> TextSize {
self.index.location_offset(location, self.text)
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.index.line_range(line, self.text)
}
/// Returns a string with the lines spawning between location and end location.
pub fn lines(&self, range: Range) -> &'src str {
let start_line = self
.index
.line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
let end_line = self.index.line_range(
OneIndexed::new(range.end_location.row()).unwrap(),
self.text,
);
&self.text[TextRange::new(start_line.start(), end_line.end())]
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
let range = self.index.line_range(index, self.text);
&self.text[range]
}
pub fn text(&self) -> &'src str {
self.text
}
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
pub fn to_source_code_buf(&self) -> SourceCodeBuf {
self.to_owned()
}
pub fn to_owned(&self) -> SourceCodeBuf {
SourceCodeBuf::new(self.text, self.index.clone())
}
}
impl PartialEq<Self> for SourceCode<'_, '_> {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for SourceCode<'_, '_> {}
impl PartialEq<SourceCodeBuf> for SourceCode<'_, '_> {
fn eq(&self, other: &SourceCodeBuf) -> bool {
self.text == &*other.text
}
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
///
/// This is the owned pendant to [`SourceCode`]. Cloning only requires bumping reference counters.
#[derive(Clone, Debug)]
pub struct SourceCodeBuf {
text: Arc<str>,
index: LineIndex,
}
impl SourceCodeBuf {
pub fn new(content: &str, index: LineIndex) -> Self {
Self {
text: Arc::from(content),
index,
}
}
/// Creates the [`LineIndex`] for `text` and returns the [`SourceCodeBuf`].
pub fn from_content(text: &str) -> Self {
Self::new(text, LineIndex::from_source_text(text))
}
#[inline]
fn as_source_code(&self) -> SourceCode {
SourceCode {
text: &self.text,
index: &self.index,
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &str {
self.as_source_code().up_to(location)
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &str {
self.as_source_code().after(location)
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &str {
self.as_source_code().slice(range)
}
/// Converts a [`Location`] range to a byte offset range
#[inline]
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
self.as_source_code().text_range(range)
}
#[inline]
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.as_source_code().line_range(line)
}
/// Return the byte offset of the given [`Location`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
self.as_source_code().offset(location)
}
#[inline]
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.as_source_code().line_start(line)
}
#[inline]
pub fn lines(&self, range: Range) -> &str {
self.as_source_code().lines(range)
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &str {
self.as_source_code().line_text(index)
}
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
pub fn text(&self) -> &str {
&self.text
}
}
impl PartialEq<Self> for SourceCodeBuf {
// The same source text should have the same index
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl PartialEq<SourceCode<'_, '_>> for SourceCodeBuf {
fn eq(&self, other: &SourceCode<'_, '_>) -> bool {
&*self.text == other.text
}
}
impl Eq for SourceCodeBuf {}

View file

@ -125,7 +125,7 @@ pub fn expand_indented_block(
/// Return true if the `orelse` block of an `if` statement is an `elif` statement.
pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bool {
if orelse.len() == 1 && matches!(orelse[0].node, rustpython_parser::ast::StmtKind::If { .. }) {
let contents = locator.skip(orelse[0].location);
let contents = locator.after(orelse[0].location);
if contents.starts_with("elif") {
return true;
}