Move SourceCodeLocator to its own module (#522)

This commit is contained in:
Charlie Marsh 2022-10-30 15:51:59 -04:00 committed by GitHub
parent 9aa91d3d3c
commit b060ae2f22
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 127 additions and 121 deletions

View file

@ -2,8 +2,8 @@ use std::path::Path;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ruff::ast::operations::{compute_offsets, compute_offsets_v0};
use ruff::fs;
use ruff::source_code_locator::compute_offsets;
fn criterion_benchmark(c: &mut Criterion) {
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();

View file

@ -1,7 +1,6 @@
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Stmt, StmtKind};
use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind};
use crate::ast::types::{BindingKind, Range, Scope};
use crate::ast::types::{BindingKind, Scope};
/// Extract the names bound to a given __all__ assignment.
pub fn extract_all_names(stmt: &Stmt, scope: &Scope) -> Vec<String> {
@ -118,112 +117,3 @@ pub fn is_unpacking_assignment(stmt: &Stmt) -> bool {
}
false
}
/// Struct used to efficiently slice source code at (row, column) Locations.
pub struct SourceCodeLocator<'a> {
contents: &'a str,
offsets: OnceCell<Vec<Vec<usize>>>,
}
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
let mut offsets = vec![vec![]];
let mut line_index = 0;
let mut char_index = 0;
let mut newline = false;
for (i, char) in contents.char_indices() {
offsets[line_index].push(i);
newline = char == '\n';
if newline {
line_index += 1;
offsets.push(vec![]);
char_index = i + char.len_utf8();
}
}
// If we end in a newline, add an extra character to indicate the start of that line.
if newline {
offsets[line_index].push(char_index);
}
offsets
}
impl<'a> SourceCodeLocator<'a> {
pub fn new(contents: &'a str) -> Self {
SourceCodeLocator {
contents,
offsets: OnceCell::new(),
}
}
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
self.offsets.get_or_init(|| compute_offsets(self.contents))
}
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
let offsets = self.get_or_init_offsets();
let offset = offsets[location.row() - 1][location.column() - 1];
&self.contents[offset..]
}
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
let offsets = self.get_or_init_offsets();
let start = offsets[range.location.row() - 1][range.location.column() - 1];
let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1];
&self.contents[start..end]
}
pub fn partition_source_code_at(
&self,
outer: &Range,
inner: &Range,
) -> (&'a str, &'a str, &'a str) {
let offsets = self.get_or_init_offsets();
let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1];
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1];
let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1];
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1];
(
&self.contents[outer_start..inner_start],
&self.contents[inner_start..inner_end],
&self.contents[inner_end..outer_end],
)
}
}
#[cfg(test)]
mod tests {
use super::SourceCodeLocator;
#[test]
fn source_code_locator_init() {
let content = "x = 1";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 1);
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
let content = "x = 1\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 2);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6]);
let content = "x = 1\ny = 2\nz = x + y\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 4);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
assert_eq!(offsets[3], [22]);
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 3);
assert_eq!(offsets[0], [0, 1, 2, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
}
}

View file

@ -12,7 +12,7 @@ use rustpython_parser::ast::{
use rustpython_parser::parser;
use crate::ast::helpers::{extract_handler_names, match_name_or_attr, SubscriptKind};
use crate::ast::operations::{extract_all_names, SourceCodeLocator};
use crate::ast::operations::extract_all_names;
use crate::ast::relocate::relocate_expr;
use crate::ast::types::{
Binding, BindingContext, BindingKind, CheckLocator, FunctionScope, ImportKind, Range, Scope,
@ -27,6 +27,7 @@ use crate::python::builtins::{BUILTINS, MAGIC_GLOBALS};
use crate::python::future::ALL_FEATURE_NAMES;
use crate::settings::types::PythonVersion;
use crate::settings::Settings;
use crate::source_code_locator::SourceCodeLocator;
use crate::visibility::{module_visibility, transition_scope, Modifier, Visibility, VisibleScope};
use crate::{
docstrings, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_print, pep8_naming,

View file

@ -2,9 +2,9 @@
use rustpython_parser::lexer::{LexResult, Tok};
use crate::ast::operations::SourceCodeLocator;
use crate::checks::{Check, CheckCode};
use crate::flake8_quotes::docstring_detection::StateMachine;
use crate::source_code_locator::SourceCodeLocator;
use crate::{flake8_quotes, pycodestyle, Settings};
pub fn check_tokens(

View file

@ -1,9 +1,9 @@
use rustpython_ast::Location;
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::Range;
use crate::checks::{Check, CheckKind};
use crate::flake8_quotes::settings::{Quote, Settings};
use crate::source_code_locator::SourceCodeLocator;
fn good_single(quote: &Quote) -> char {
match quote {

View file

@ -14,7 +14,7 @@ use crate::linter::{check_path, tokenize};
use crate::message::Message;
use crate::settings::configuration::Configuration;
pub mod ast;
mod ast;
mod autofix;
pub mod cache;
pub mod check_ast;
@ -44,6 +44,7 @@ mod pyflakes;
mod python;
mod pyupgrade;
pub mod settings;
pub mod source_code_locator;
pub mod visibility;
/// Run ruff over Python source code directly.

View file

@ -9,7 +9,6 @@ use log::debug;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, parser};
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::Range;
use crate::autofix::fixer;
use crate::autofix::fixer::fix_file;
@ -21,6 +20,7 @@ use crate::code_gen::SourceGenerator;
use crate::message::Message;
use crate::noqa::add_noqa;
use crate::settings::Settings;
use crate::source_code_locator::SourceCodeLocator;
use crate::{cache, fs, noqa};
/// Collect tokens up to and including the first error.

View file

@ -2,9 +2,9 @@ use itertools::izip;
use rustpython_ast::Location;
use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Unaryop};
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::{CheckLocator, Range};
use crate::checks::{Check, CheckKind, RejectedCmpop};
use crate::source_code_locator::SourceCodeLocator;
fn is_ambiguous_name(name: &str) -> bool {
name == "l" || name == "I" || name == "O"

View file

@ -2,10 +2,10 @@ use anyhow::Result;
use libcst_native::{Codegen, ImportNames, NameOrAttribute, SmallStatement, Statement};
use rustpython_ast::Stmt;
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::Range;
use crate::autofix::{helpers, Fix};
use crate::cst::helpers::compose_module_path;
use crate::source_code_locator::SourceCodeLocator;
/// Generate a Fix to remove any unused imports from an `import` statement.
pub fn remove_unused_imports(

View file

@ -4,9 +4,9 @@ use rustpython_parser::lexer;
use rustpython_parser::lexer::Tok;
use crate::ast::helpers;
use crate::ast::operations::SourceCodeLocator;
use crate::ast::types::Range;
use crate::autofix::Fix;
use crate::source_code_locator::SourceCodeLocator;
/// Generate a fix to remove a base from a ClassDef statement.
pub fn remove_class_def_base(

114
src/source_code_locator.rs Normal file
View file

@ -0,0 +1,114 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use once_cell::unsync::OnceCell;
use rustpython_ast::Location;
use crate::ast::types::Range;
pub struct SourceCodeLocator<'a> {
contents: &'a str,
offsets: OnceCell<Vec<Vec<usize>>>,
}
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
let mut offsets = vec![vec![]];
let mut line_index = 0;
let mut char_index = 0;
let mut newline = false;
for (i, char) in contents.char_indices() {
offsets[line_index].push(i);
newline = char == '\n';
if newline {
line_index += 1;
offsets.push(vec![]);
char_index = i + char.len_utf8();
}
}
// If we end in a newline, add an extra character to indicate the start of that line.
if newline {
offsets[line_index].push(char_index);
}
offsets
}
impl<'a> SourceCodeLocator<'a> {
pub fn new(contents: &'a str) -> Self {
SourceCodeLocator {
contents,
offsets: OnceCell::new(),
}
}
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
self.offsets.get_or_init(|| compute_offsets(self.contents))
}
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
let offsets = self.get_or_init_offsets();
let offset = offsets[location.row() - 1][location.column() - 1];
&self.contents[offset..]
}
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
let offsets = self.get_or_init_offsets();
let start = offsets[range.location.row() - 1][range.location.column() - 1];
let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1];
&self.contents[start..end]
}
pub fn partition_source_code_at(
&self,
outer: &Range,
inner: &Range,
) -> (&'a str, &'a str, &'a str) {
let offsets = self.get_or_init_offsets();
let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1];
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1];
let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1];
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1];
(
&self.contents[outer_start..inner_start],
&self.contents[inner_start..inner_end],
&self.contents[inner_end..outer_end],
)
}
}
#[cfg(test)]
mod tests {
use crate::source_code_locator::SourceCodeLocator;
#[test]
fn source_code_locator_init() {
let content = "x = 1";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 1);
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
let content = "x = 1\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 2);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6]);
let content = "x = 1\ny = 2\nz = x + y\n";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 4);
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
assert_eq!(offsets[3], [22]);
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
let locator = SourceCodeLocator::new(content);
let offsets = locator.get_or_init_offsets();
assert_eq!(offsets.len(), 3);
assert_eq!(offsets[0], [0, 1, 2, 5]);
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
}
}