Use a single SourceCodeLocator everywhere (#510)

This commit is contained in:
Charlie Marsh 2022-10-29 18:23:24 -04:00 committed by GitHub
parent 2fcbf3ab62
commit db59d5b558
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 63 additions and 66 deletions

View file

@ -1,3 +1,4 @@
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Stmt, StmtKind}; use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Stmt, StmtKind};
use crate::ast::types::{BindingKind, Range, Scope}; use crate::ast::types::{BindingKind, Range, Scope};
@ -120,15 +121,15 @@ pub fn is_unpacking_assignment(stmt: &Stmt) -> bool {
/// Struct used to efficiently slice source code at (row, column) Locations. /// Struct used to efficiently slice source code at (row, column) Locations.
pub struct SourceCodeLocator<'a> { pub struct SourceCodeLocator<'a> {
content: &'a str, contents: &'a str,
offsets: Vec<Vec<usize>>, offsets: OnceCell<Vec<Vec<usize>>>,
} }
impl<'a> SourceCodeLocator<'a> { impl<'a> SourceCodeLocator<'a> {
pub fn new(content: &'a str) -> Self { pub fn new(contents: &'a str) -> Self {
SourceCodeLocator { SourceCodeLocator {
content, contents,
offsets: Self::compute_offsets(content), offsets: OnceCell::new(),
} }
} }
@ -145,15 +146,22 @@ impl<'a> SourceCodeLocator<'a> {
offsets offsets
} }
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
self.offsets
.get_or_init(|| Self::compute_offsets(self.contents))
}
pub fn slice_source_code_at(&self, location: &Location) -> &'a str { pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
let offset = self.offsets[location.row() - 1][location.column() - 1]; let offsets = self.get_or_init_offsets();
&self.content[offset..] let offset = offsets[location.row() - 1][location.column() - 1];
&self.contents[offset..]
} }
pub fn slice_source_code_range(&self, range: &Range) -> &'a str { pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
let start = self.offsets[range.location.row() - 1][range.location.column() - 1]; let offsets = self.get_or_init_offsets();
let end = self.offsets[range.end_location.row() - 1][range.end_location.column() - 1]; let start = offsets[range.location.row() - 1][range.location.column() - 1];
&self.content[start..end] let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1];
&self.contents[start..end]
} }
pub fn partition_source_code_at( pub fn partition_source_code_at(
@ -161,14 +169,15 @@ impl<'a> SourceCodeLocator<'a> {
outer: &Range, outer: &Range,
inner: &Range, inner: &Range,
) -> (&'a str, &'a str, &'a str) { ) -> (&'a str, &'a str, &'a str) {
let outer_start = self.offsets[outer.location.row() - 1][outer.location.column() - 1]; let offsets = self.get_or_init_offsets();
let outer_end = self.offsets[outer.end_location.row() - 1][outer.end_location.column() - 1]; let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1];
let inner_start = self.offsets[inner.location.row() - 1][inner.location.column() - 1]; let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1];
let inner_end = self.offsets[inner.end_location.row() - 1][inner.end_location.column() - 1]; let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1];
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1];
( (
&self.content[outer_start..inner_start], &self.contents[outer_start..inner_start],
&self.content[inner_start..inner_end], &self.contents[inner_start..inner_end],
&self.content[inner_end..outer_end], &self.contents[inner_end..outer_end],
) )
} }
} }
@ -181,20 +190,19 @@ mod tests {
fn source_code_locator_init() { fn source_code_locator_init() {
let content = "x = 1\ny = 2\nz = x + y\n"; let content = "x = 1\ny = 2\nz = x + y\n";
let locator = SourceCodeLocator::new(content); let locator = SourceCodeLocator::new(content);
assert_eq!(locator.offsets.len(), 4); let offsets = locator.get_or_init_offsets();
assert_eq!(locator.offsets[0], [0, 1, 2, 3, 4, 5]); assert_eq!(offsets.len(), 4);
assert_eq!(locator.offsets[1], [6, 7, 8, 9, 10, 11]); assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(locator.offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]); assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
assert!(locator.offsets[3].is_empty()); assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
assert!(offsets[3].is_empty());
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\""; let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
let locator = SourceCodeLocator::new(content); let locator = SourceCodeLocator::new(content);
assert_eq!(locator.offsets.len(), 3); let offsets = locator.get_or_init_offsets();
assert_eq!(locator.offsets[0], [0, 1, 2, 5]); assert_eq!(offsets.len(), 3);
assert_eq!(locator.offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); assert_eq!(offsets[0], [0, 1, 2, 5]);
assert_eq!( assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
locator.offsets[2], assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
);
} }
} }

View file

@ -5,7 +5,6 @@ use std::ops::Deref;
use std::path::Path; use std::path::Path;
use log::error; use log::error;
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::{ use rustpython_parser::ast::{
Arg, Arguments, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprContext, ExprKind, Arg, Arguments, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprContext, ExprKind,
KeywordData, Operator, Stmt, StmtKind, Suite, KeywordData, Operator, Stmt, StmtKind, Suite,
@ -39,13 +38,11 @@ pub const GLOBAL_SCOPE_INDEX: usize = 0;
pub struct Checker<'a> { pub struct Checker<'a> {
// Input data. // Input data.
path: &'a Path, path: &'a Path,
content: &'a str,
autofix: &'a fixer::Mode, autofix: &'a fixer::Mode,
pub(crate) settings: &'a Settings, pub(crate) settings: &'a Settings,
pub(crate) locator: &'a SourceCodeLocator<'a>,
// Computed checks. // Computed checks.
checks: Vec<Check>, checks: Vec<Check>,
// Efficient source-code slicing.
locator: OnceCell<SourceCodeLocator<'a>>,
// Docstring tracking. // Docstring tracking.
docstrings: Vec<(Definition<'a>, Visibility)>, docstrings: Vec<(Definition<'a>, Visibility)>,
// Edit tracking. // Edit tracking.
@ -79,14 +76,13 @@ impl<'a> Checker<'a> {
settings: &'a Settings, settings: &'a Settings,
autofix: &'a fixer::Mode, autofix: &'a fixer::Mode,
path: &'a Path, path: &'a Path,
content: &'a str, locator: &'a SourceCodeLocator,
) -> Checker<'a> { ) -> Checker<'a> {
Checker { Checker {
settings, settings,
autofix, autofix,
path, path,
content, locator,
locator: OnceCell::new(),
checks: Default::default(), checks: Default::default(),
docstrings: Default::default(), docstrings: Default::default(),
deletions: Default::default(), deletions: Default::default(),
@ -114,12 +110,6 @@ impl<'a> Checker<'a> {
} }
} }
/// Get access to a lazily-initialized `SourceCodeLocator` for the file contents.
pub fn get_locator(&self) -> &SourceCodeLocator {
self.locator
.get_or_init(|| SourceCodeLocator::new(self.content))
}
/// Return `true` if a patch should be generated under the given autofix `Mode`. /// Return `true` if a patch should be generated under the given autofix `Mode`.
pub fn patch(&self) -> bool { pub fn patch(&self) -> bool {
self.autofix.patch() self.autofix.patch()
@ -2146,7 +2136,7 @@ impl<'a> Checker<'a> {
ImportKind::ImportFrom => pyflakes::fixes::remove_unused_import_froms, ImportKind::ImportFrom => pyflakes::fixes::remove_unused_import_froms,
}; };
match removal_fn(self.get_locator(), &full_names, child, parent, &deleted) { match removal_fn(self.locator, &full_names, child, parent, &deleted) {
Ok(fix) => Some(fix), Ok(fix) => Some(fix),
Err(e) => { Err(e) => {
error!("Failed to fix unused imports: {}", e); error!("Failed to fix unused imports: {}", e);
@ -2306,12 +2296,12 @@ impl<'a> Checker<'a> {
pub fn check_ast( pub fn check_ast(
python_ast: &Suite, python_ast: &Suite,
contents: &str, locator: &SourceCodeLocator,
settings: &Settings, settings: &Settings,
autofix: &fixer::Mode, autofix: &fixer::Mode,
path: &Path, path: &Path,
) -> Vec<Check> { ) -> Vec<Check> {
let mut checker = Checker::new(settings, autofix, path, contents); let mut checker = Checker::new(settings, autofix, path, locator);
checker.push_scope(Scope::new(ScopeKind::Module)); checker.push_scope(Scope::new(ScopeKind::Module));
checker.bind_builtins(); checker.bind_builtins();

View file

@ -9,7 +9,7 @@ use crate::{flake8_quotes, pycodestyle, Settings};
pub fn check_tokens( pub fn check_tokens(
checks: &mut Vec<Check>, checks: &mut Vec<Check>,
contents: &str, locator: &SourceCodeLocator,
tokens: &[LexResult], tokens: &[LexResult],
settings: &Settings, settings: &Settings,
) { ) {
@ -19,16 +19,13 @@ pub fn check_tokens(
| settings.enabled.contains(&CheckCode::Q002) | settings.enabled.contains(&CheckCode::Q002)
| settings.enabled.contains(&CheckCode::Q003); | settings.enabled.contains(&CheckCode::Q003);
// TODO(charlie): Use a shared SourceCodeLocator between this site and the AST traversal.
let locator = SourceCodeLocator::new(contents);
let mut state_machine = StateMachine::new(); let mut state_machine = StateMachine::new();
for (start, tok, end) in tokens.iter().flatten() { for (start, tok, end) in tokens.iter().flatten() {
// W605 // W605
if enforce_invalid_escape_sequence { if enforce_invalid_escape_sequence {
if matches!(tok, Tok::String { .. }) { if matches!(tok, Tok::String { .. }) {
checks.extend(pycodestyle::checks::invalid_escape_sequence( checks.extend(pycodestyle::checks::invalid_escape_sequence(
&locator, start, end, locator, start, end,
)); ));
} }
} }
@ -38,7 +35,7 @@ pub fn check_tokens(
let is_docstring = state_machine.consume(tok); let is_docstring = state_machine.consume(tok);
if matches!(tok, Tok::String { .. }) { if matches!(tok, Tok::String { .. }) {
if let Some(check) = flake8_quotes::checks::quotes( if let Some(check) = flake8_quotes::checks::quotes(
&locator, locator,
start, start,
end, end,
is_docstring, is_docstring,

View file

@ -27,7 +27,7 @@ pub fn leading_space(line: &str) -> String {
/// Extract the leading indentation from a docstring. /// Extract the leading indentation from a docstring.
pub fn indentation<'a>(checker: &'a Checker, docstring: &Expr) -> &'a str { pub fn indentation<'a>(checker: &'a Checker, docstring: &Expr) -> &'a str {
let range = Range::from_located(docstring); let range = Range::from_located(docstring);
checker.get_locator().slice_source_code_range(&Range { checker.locator.slice_source_code_range(&Range {
location: Location::new(range.location.row(), 1), location: Location::new(range.location.row(), 1),
end_location: Location::new(range.location.row(), range.location.column()), end_location: Location::new(range.location.row(), range.location.column()),
}) })

View file

@ -9,6 +9,7 @@ use log::debug;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, parser}; use rustpython_parser::{lexer, parser};
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::Range; use crate::ast::types::Range;
use crate::autofix::fixer; use crate::autofix::fixer;
use crate::autofix::fixer::fix_file; use crate::autofix::fixer::fix_file;
@ -46,13 +47,16 @@ pub(crate) fn check_path(
// Aggregate all checks. // Aggregate all checks.
let mut checks: Vec<Check> = vec![]; let mut checks: Vec<Check> = vec![];
// Initialize the SourceCodeLocator (which computes offsets lazily).
let locator = SourceCodeLocator::new(contents);
// Run the token-based checks. // Run the token-based checks.
if settings if settings
.enabled .enabled
.iter() .iter()
.any(|check_code| matches!(check_code.lint_source(), LintSource::Tokens)) .any(|check_code| matches!(check_code.lint_source(), LintSource::Tokens))
{ {
check_tokens(&mut checks, contents, &tokens, settings); check_tokens(&mut checks, &locator, &tokens, settings);
} }
// Run the AST-based checks. // Run the AST-based checks.
@ -63,7 +67,7 @@ pub(crate) fn check_path(
{ {
match parser::parse_program_tokens(tokens, "<filename>") { match parser::parse_program_tokens(tokens, "<filename>") {
Ok(python_ast) => { Ok(python_ast) => {
checks.extend(check_ast(&python_ast, contents, settings, autofix, path)) checks.extend(check_ast(&python_ast, &locator, settings, autofix, path))
} }
Err(parse_error) => { Err(parse_error) => {
if settings.enabled.contains(&CheckCode::E999) { if settings.enabled.contains(&CheckCode::E999) {

View file

@ -162,7 +162,7 @@ pub fn blank_before_after_function(checker: &mut Checker, definition: &Definitio
} = &docstring.node } = &docstring.node
{ {
if checker.settings.enabled.contains(&CheckCode::D201) { if checker.settings.enabled.contains(&CheckCode::D201) {
let (before, _, _) = checker.get_locator().partition_source_code_at( let (before, _, _) = checker.locator.partition_source_code_at(
&Range::from_located(parent), &Range::from_located(parent),
&Range::from_located(docstring), &Range::from_located(docstring),
); );
@ -190,7 +190,7 @@ pub fn blank_before_after_function(checker: &mut Checker, definition: &Definitio
} }
if checker.settings.enabled.contains(&CheckCode::D202) { if checker.settings.enabled.contains(&CheckCode::D202) {
let (_, _, after) = checker.get_locator().partition_source_code_at( let (_, _, after) = checker.locator.partition_source_code_at(
&Range::from_located(parent), &Range::from_located(parent),
&Range::from_located(docstring), &Range::from_located(docstring),
); );
@ -253,7 +253,7 @@ pub fn blank_before_after_class(checker: &mut Checker, definition: &Definition)
if checker.settings.enabled.contains(&CheckCode::D203) if checker.settings.enabled.contains(&CheckCode::D203)
|| checker.settings.enabled.contains(&CheckCode::D211) || checker.settings.enabled.contains(&CheckCode::D211)
{ {
let (before, _, _) = checker.get_locator().partition_source_code_at( let (before, _, _) = checker.locator.partition_source_code_at(
&Range::from_located(parent), &Range::from_located(parent),
&Range::from_located(docstring), &Range::from_located(docstring),
); );
@ -300,7 +300,7 @@ pub fn blank_before_after_class(checker: &mut Checker, definition: &Definition)
} }
if checker.settings.enabled.contains(&CheckCode::D204) { if checker.settings.enabled.contains(&CheckCode::D204) {
let (_, _, after) = checker.get_locator().partition_source_code_at( let (_, _, after) = checker.locator.partition_source_code_at(
&Range::from_located(parent), &Range::from_located(parent),
&Range::from_located(docstring), &Range::from_located(docstring),
); );
@ -530,7 +530,7 @@ pub fn newline_after_last_paragraph(checker: &mut Checker, definition: &Definiti
} }
if line_count > 1 { if line_count > 1 {
let content = checker let content = checker
.get_locator() .locator
.slice_source_code_range(&Range::from_located(docstring)); .slice_source_code_range(&Range::from_located(docstring));
if let Some(last_line) = content.lines().last().map(|line| line.trim()) { if let Some(last_line) = content.lines().last().map(|line| line.trim()) {
if last_line != "\"\"\"" && last_line != "'''" { if last_line != "\"\"\"" && last_line != "'''" {
@ -583,7 +583,7 @@ pub fn no_surrounding_whitespace(checker: &mut Checker, definition: &Definition)
); );
if checker.patch() { if checker.patch() {
if let Some(first_line) = checker if let Some(first_line) = checker
.get_locator() .locator
.slice_source_code_range(&Range::from_located(docstring)) .slice_source_code_range(&Range::from_located(docstring))
.lines() .lines()
.next() .next()
@ -629,7 +629,7 @@ pub fn multi_line_summary_start(checker: &mut Checker, definition: &Definition)
{ {
if string.lines().nth(1).is_some() { if string.lines().nth(1).is_some() {
if let Some(first_line) = checker if let Some(first_line) = checker
.get_locator() .locator
.slice_source_code_range(&Range::from_located(docstring)) .slice_source_code_range(&Range::from_located(docstring))
.lines() .lines()
.next() .next()
@ -665,7 +665,7 @@ pub fn triple_quotes(checker: &mut Checker, definition: &Definition) {
} = &docstring.node } = &docstring.node
{ {
if let Some(first_line) = checker if let Some(first_line) = checker
.get_locator() .locator
.slice_source_code_range(&Range::from_located(docstring)) .slice_source_code_range(&Range::from_located(docstring))
.lines() .lines()
.next() .next()

View file

@ -17,9 +17,7 @@ pub fn super_call_with_parameters(checker: &mut Checker, expr: &Expr, func: &Exp
.collect(); .collect();
if let Some(mut check) = checks::super_args(scope, &parents, expr, func, args) { if let Some(mut check) = checks::super_args(scope, &parents, expr, func, args) {
if checker.patch() { if checker.patch() {
if let Some(fix) = if let Some(fix) = pyupgrade::fixes::remove_super_arguments(checker.locator, expr) {
pyupgrade::fixes::remove_super_arguments(checker.get_locator(), expr)
{
check.amend(fix); check.amend(fix);
} }
} }

View file

@ -15,7 +15,7 @@ pub fn useless_object_inheritance(
if let Some(mut check) = checks::useless_object_inheritance(name, bases, scope) { if let Some(mut check) = checks::useless_object_inheritance(name, bases, scope) {
if checker.patch() { if checker.patch() {
if let Some(fix) = pyupgrade::fixes::remove_class_def_base( if let Some(fix) = pyupgrade::fixes::remove_class_def_base(
checker.get_locator(), checker.locator,
&stmt.location, &stmt.location,
check.location, check.location,
bases, bases,