From b060ae2f223dfd292a7d2eec579f154e763ca87b Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 30 Oct 2022 15:51:59 -0400 Subject: [PATCH] Move SourceCodeLocator to its own module (#522) --- benches/source_code_locator.rs | 2 +- src/ast/operations.rs | 114 +-------------------------------- src/check_ast.rs | 3 +- src/check_tokens.rs | 2 +- src/flake8_quotes/checks.rs | 2 +- src/lib.rs | 3 +- src/linter.rs | 2 +- src/pycodestyle/checks.rs | 2 +- src/pyflakes/fixes.rs | 2 +- src/pyupgrade/fixes.rs | 2 +- src/source_code_locator.rs | 114 +++++++++++++++++++++++++++++++++ 11 files changed, 127 insertions(+), 121 deletions(-) create mode 100644 src/source_code_locator.rs diff --git a/benches/source_code_locator.rs b/benches/source_code_locator.rs index f5ce95f213..7009795464 100644 --- a/benches/source_code_locator.rs +++ b/benches/source_code_locator.rs @@ -2,8 +2,8 @@ use std::path::Path; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use ruff::ast::operations::{compute_offsets, compute_offsets_v0}; use ruff::fs; +use ruff::source_code_locator::compute_offsets; fn criterion_benchmark(c: &mut Criterion) { let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap(); diff --git a/src/ast/operations.rs b/src/ast/operations.rs index bf9bf9b044..0888e00089 100644 --- a/src/ast/operations.rs +++ b/src/ast/operations.rs @@ -1,7 +1,6 @@ -use once_cell::unsync::OnceCell; -use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Stmt, StmtKind}; +use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind}; -use crate::ast::types::{BindingKind, Range, Scope}; +use crate::ast::types::{BindingKind, Scope}; /// Extract the names bound to a given __all__ assignment. pub fn extract_all_names(stmt: &Stmt, scope: &Scope) -> Vec { @@ -118,112 +117,3 @@ pub fn is_unpacking_assignment(stmt: &Stmt) -> bool { } false } - -/// Struct used to efficiently slice source code at (row, column) Locations. -pub struct SourceCodeLocator<'a> { - contents: &'a str, - offsets: OnceCell>>, -} - -pub fn compute_offsets(contents: &str) -> Vec> { - let mut offsets = vec![vec![]]; - let mut line_index = 0; - let mut char_index = 0; - let mut newline = false; - for (i, char) in contents.char_indices() { - offsets[line_index].push(i); - - newline = char == '\n'; - if newline { - line_index += 1; - offsets.push(vec![]); - char_index = i + char.len_utf8(); - } - } - // If we end in a newline, add an extra character to indicate the start of that line. - if newline { - offsets[line_index].push(char_index); - } - offsets -} - -impl<'a> SourceCodeLocator<'a> { - pub fn new(contents: &'a str) -> Self { - SourceCodeLocator { - contents, - offsets: OnceCell::new(), - } - } - - fn get_or_init_offsets(&self) -> &Vec> { - self.offsets.get_or_init(|| compute_offsets(self.contents)) - } - - pub fn slice_source_code_at(&self, location: &Location) -> &'a str { - let offsets = self.get_or_init_offsets(); - let offset = offsets[location.row() - 1][location.column() - 1]; - &self.contents[offset..] - } - - pub fn slice_source_code_range(&self, range: &Range) -> &'a str { - let offsets = self.get_or_init_offsets(); - let start = offsets[range.location.row() - 1][range.location.column() - 1]; - let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1]; - &self.contents[start..end] - } - - pub fn partition_source_code_at( - &self, - outer: &Range, - inner: &Range, - ) -> (&'a str, &'a str, &'a str) { - let offsets = self.get_or_init_offsets(); - let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1]; - let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1]; - let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1]; - let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1]; - ( - &self.contents[outer_start..inner_start], - &self.contents[inner_start..inner_end], - &self.contents[inner_end..outer_end], - ) - } -} - -#[cfg(test)] -mod tests { - use super::SourceCodeLocator; - - #[test] - fn source_code_locator_init() { - let content = "x = 1"; - let locator = SourceCodeLocator::new(content); - let offsets = locator.get_or_init_offsets(); - assert_eq!(offsets.len(), 1); - assert_eq!(offsets[0], [0, 1, 2, 3, 4]); - - let content = "x = 1\n"; - let locator = SourceCodeLocator::new(content); - let offsets = locator.get_or_init_offsets(); - assert_eq!(offsets.len(), 2); - assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]); - assert_eq!(offsets[1], [6]); - - let content = "x = 1\ny = 2\nz = x + y\n"; - let locator = SourceCodeLocator::new(content); - let offsets = locator.get_or_init_offsets(); - assert_eq!(offsets.len(), 4); - assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]); - assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]); - assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]); - assert_eq!(offsets[3], [22]); - - let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\""; - let locator = SourceCodeLocator::new(content); - let offsets = locator.get_or_init_offsets(); - assert_eq!(offsets.len(), 3); - assert_eq!(offsets[0], [0, 1, 2, 5]); - assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); - assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]); - } -} diff --git a/src/check_ast.rs b/src/check_ast.rs index f03f38e36f..419280c2fc 100644 --- a/src/check_ast.rs +++ b/src/check_ast.rs @@ -12,7 +12,7 @@ use rustpython_parser::ast::{ use rustpython_parser::parser; use crate::ast::helpers::{extract_handler_names, match_name_or_attr, SubscriptKind}; -use crate::ast::operations::{extract_all_names, SourceCodeLocator}; +use crate::ast::operations::extract_all_names; use crate::ast::relocate::relocate_expr; use crate::ast::types::{ Binding, BindingContext, BindingKind, CheckLocator, FunctionScope, ImportKind, Range, Scope, @@ -27,6 +27,7 @@ use crate::python::builtins::{BUILTINS, MAGIC_GLOBALS}; use crate::python::future::ALL_FEATURE_NAMES; use crate::settings::types::PythonVersion; use crate::settings::Settings; +use crate::source_code_locator::SourceCodeLocator; use crate::visibility::{module_visibility, transition_scope, Modifier, Visibility, VisibleScope}; use crate::{ docstrings, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_print, pep8_naming, diff --git a/src/check_tokens.rs b/src/check_tokens.rs index 7c37098186..b648341342 100644 --- a/src/check_tokens.rs +++ b/src/check_tokens.rs @@ -2,9 +2,9 @@ use rustpython_parser::lexer::{LexResult, Tok}; -use crate::ast::operations::SourceCodeLocator; use crate::checks::{Check, CheckCode}; use crate::flake8_quotes::docstring_detection::StateMachine; +use crate::source_code_locator::SourceCodeLocator; use crate::{flake8_quotes, pycodestyle, Settings}; pub fn check_tokens( diff --git a/src/flake8_quotes/checks.rs b/src/flake8_quotes/checks.rs index e865072e9a..ecddf68916 100644 --- a/src/flake8_quotes/checks.rs +++ b/src/flake8_quotes/checks.rs @@ -1,9 +1,9 @@ use rustpython_ast::Location; -use crate::ast::operations::SourceCodeLocator; use crate::ast::types::Range; use crate::checks::{Check, CheckKind}; use crate::flake8_quotes::settings::{Quote, Settings}; +use crate::source_code_locator::SourceCodeLocator; fn good_single(quote: &Quote) -> char { match quote { diff --git a/src/lib.rs b/src/lib.rs index 0faf98ba5e..56e0f56ece 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ use crate::linter::{check_path, tokenize}; use crate::message::Message; use crate::settings::configuration::Configuration; -pub mod ast; +mod ast; mod autofix; pub mod cache; pub mod check_ast; @@ -44,6 +44,7 @@ mod pyflakes; mod python; mod pyupgrade; pub mod settings; +pub mod source_code_locator; pub mod visibility; /// Run ruff over Python source code directly. diff --git a/src/linter.rs b/src/linter.rs index 71d416166e..9abdc711d6 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -9,7 +9,6 @@ use log::debug; use rustpython_parser::lexer::LexResult; use rustpython_parser::{lexer, parser}; -use crate::ast::operations::SourceCodeLocator; use crate::ast::types::Range; use crate::autofix::fixer; use crate::autofix::fixer::fix_file; @@ -21,6 +20,7 @@ use crate::code_gen::SourceGenerator; use crate::message::Message; use crate::noqa::add_noqa; use crate::settings::Settings; +use crate::source_code_locator::SourceCodeLocator; use crate::{cache, fs, noqa}; /// Collect tokens up to and including the first error. diff --git a/src/pycodestyle/checks.rs b/src/pycodestyle/checks.rs index 9f18806780..4a2321af0d 100644 --- a/src/pycodestyle/checks.rs +++ b/src/pycodestyle/checks.rs @@ -2,9 +2,9 @@ use itertools::izip; use rustpython_ast::Location; use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Unaryop}; -use crate::ast::operations::SourceCodeLocator; use crate::ast::types::{CheckLocator, Range}; use crate::checks::{Check, CheckKind, RejectedCmpop}; +use crate::source_code_locator::SourceCodeLocator; fn is_ambiguous_name(name: &str) -> bool { name == "l" || name == "I" || name == "O" diff --git a/src/pyflakes/fixes.rs b/src/pyflakes/fixes.rs index 2fdd75da5f..65efe557cc 100644 --- a/src/pyflakes/fixes.rs +++ b/src/pyflakes/fixes.rs @@ -2,10 +2,10 @@ use anyhow::Result; use libcst_native::{Codegen, ImportNames, NameOrAttribute, SmallStatement, Statement}; use rustpython_ast::Stmt; -use crate::ast::operations::SourceCodeLocator; use crate::ast::types::Range; use crate::autofix::{helpers, Fix}; use crate::cst::helpers::compose_module_path; +use crate::source_code_locator::SourceCodeLocator; /// Generate a Fix to remove any unused imports from an `import` statement. pub fn remove_unused_imports( diff --git a/src/pyupgrade/fixes.rs b/src/pyupgrade/fixes.rs index f8a2154127..2017c47699 100644 --- a/src/pyupgrade/fixes.rs +++ b/src/pyupgrade/fixes.rs @@ -4,9 +4,9 @@ use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; use crate::ast::helpers; -use crate::ast::operations::SourceCodeLocator; use crate::ast::types::Range; use crate::autofix::Fix; +use crate::source_code_locator::SourceCodeLocator; /// Generate a fix to remove a base from a ClassDef statement. pub fn remove_class_def_base( diff --git a/src/source_code_locator.rs b/src/source_code_locator.rs new file mode 100644 index 0000000000..e6af75ced6 --- /dev/null +++ b/src/source_code_locator.rs @@ -0,0 +1,114 @@ +//! Struct used to efficiently slice source code at (row, column) Locations. + +use once_cell::unsync::OnceCell; +use rustpython_ast::Location; + +use crate::ast::types::Range; + +pub struct SourceCodeLocator<'a> { + contents: &'a str, + offsets: OnceCell>>, +} + +pub fn compute_offsets(contents: &str) -> Vec> { + let mut offsets = vec![vec![]]; + let mut line_index = 0; + let mut char_index = 0; + let mut newline = false; + for (i, char) in contents.char_indices() { + offsets[line_index].push(i); + + newline = char == '\n'; + if newline { + line_index += 1; + offsets.push(vec![]); + char_index = i + char.len_utf8(); + } + } + // If we end in a newline, add an extra character to indicate the start of that line. + if newline { + offsets[line_index].push(char_index); + } + offsets +} + +impl<'a> SourceCodeLocator<'a> { + pub fn new(contents: &'a str) -> Self { + SourceCodeLocator { + contents, + offsets: OnceCell::new(), + } + } + + fn get_or_init_offsets(&self) -> &Vec> { + self.offsets.get_or_init(|| compute_offsets(self.contents)) + } + + pub fn slice_source_code_at(&self, location: &Location) -> &'a str { + let offsets = self.get_or_init_offsets(); + let offset = offsets[location.row() - 1][location.column() - 1]; + &self.contents[offset..] + } + + pub fn slice_source_code_range(&self, range: &Range) -> &'a str { + let offsets = self.get_or_init_offsets(); + let start = offsets[range.location.row() - 1][range.location.column() - 1]; + let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1]; + &self.contents[start..end] + } + + pub fn partition_source_code_at( + &self, + outer: &Range, + inner: &Range, + ) -> (&'a str, &'a str, &'a str) { + let offsets = self.get_or_init_offsets(); + let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1]; + let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1]; + let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1]; + let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1]; + ( + &self.contents[outer_start..inner_start], + &self.contents[inner_start..inner_end], + &self.contents[inner_end..outer_end], + ) + } +} + +#[cfg(test)] +mod tests { + use crate::source_code_locator::SourceCodeLocator; + + #[test] + fn source_code_locator_init() { + let content = "x = 1"; + let locator = SourceCodeLocator::new(content); + let offsets = locator.get_or_init_offsets(); + assert_eq!(offsets.len(), 1); + assert_eq!(offsets[0], [0, 1, 2, 3, 4]); + + let content = "x = 1\n"; + let locator = SourceCodeLocator::new(content); + let offsets = locator.get_or_init_offsets(); + assert_eq!(offsets.len(), 2); + assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]); + assert_eq!(offsets[1], [6]); + + let content = "x = 1\ny = 2\nz = x + y\n"; + let locator = SourceCodeLocator::new(content); + let offsets = locator.get_or_init_offsets(); + assert_eq!(offsets.len(), 4); + assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]); + assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]); + assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]); + assert_eq!(offsets[3], [22]); + + let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\""; + let locator = SourceCodeLocator::new(content); + let offsets = locator.get_or_init_offsets(); + assert_eq!(offsets.len(), 3); + assert_eq!(offsets[0], [0, 1, 2, 5]); + assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]); + } +}