mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-01 14:21:53 +00:00
Move SourceCodeLocator to its own module (#522)
This commit is contained in:
parent
9aa91d3d3c
commit
b060ae2f22
11 changed files with 127 additions and 121 deletions
|
@ -2,8 +2,8 @@ use std::path::Path;
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
use ruff::ast::operations::{compute_offsets, compute_offsets_v0};
|
|
||||||
use ruff::fs;
|
use ruff::fs;
|
||||||
|
use ruff::source_code_locator::compute_offsets;
|
||||||
|
|
||||||
fn criterion_benchmark(c: &mut Criterion) {
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
|
let contents = fs::read_file(Path::new("resources/test/fixtures/D.py")).unwrap();
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
use once_cell::unsync::OnceCell;
|
use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind};
|
||||||
use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Stmt, StmtKind};
|
|
||||||
|
|
||||||
use crate::ast::types::{BindingKind, Range, Scope};
|
use crate::ast::types::{BindingKind, Scope};
|
||||||
|
|
||||||
/// Extract the names bound to a given __all__ assignment.
|
/// Extract the names bound to a given __all__ assignment.
|
||||||
pub fn extract_all_names(stmt: &Stmt, scope: &Scope) -> Vec<String> {
|
pub fn extract_all_names(stmt: &Stmt, scope: &Scope) -> Vec<String> {
|
||||||
|
@ -118,112 +117,3 @@ pub fn is_unpacking_assignment(stmt: &Stmt) -> bool {
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Struct used to efficiently slice source code at (row, column) Locations.
|
|
||||||
pub struct SourceCodeLocator<'a> {
|
|
||||||
contents: &'a str,
|
|
||||||
offsets: OnceCell<Vec<Vec<usize>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
|
|
||||||
let mut offsets = vec![vec![]];
|
|
||||||
let mut line_index = 0;
|
|
||||||
let mut char_index = 0;
|
|
||||||
let mut newline = false;
|
|
||||||
for (i, char) in contents.char_indices() {
|
|
||||||
offsets[line_index].push(i);
|
|
||||||
|
|
||||||
newline = char == '\n';
|
|
||||||
if newline {
|
|
||||||
line_index += 1;
|
|
||||||
offsets.push(vec![]);
|
|
||||||
char_index = i + char.len_utf8();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we end in a newline, add an extra character to indicate the start of that line.
|
|
||||||
if newline {
|
|
||||||
offsets[line_index].push(char_index);
|
|
||||||
}
|
|
||||||
offsets
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> SourceCodeLocator<'a> {
|
|
||||||
pub fn new(contents: &'a str) -> Self {
|
|
||||||
SourceCodeLocator {
|
|
||||||
contents,
|
|
||||||
offsets: OnceCell::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
|
|
||||||
self.offsets.get_or_init(|| compute_offsets(self.contents))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
|
|
||||||
let offsets = self.get_or_init_offsets();
|
|
||||||
let offset = offsets[location.row() - 1][location.column() - 1];
|
|
||||||
&self.contents[offset..]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
|
|
||||||
let offsets = self.get_or_init_offsets();
|
|
||||||
let start = offsets[range.location.row() - 1][range.location.column() - 1];
|
|
||||||
let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1];
|
|
||||||
&self.contents[start..end]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn partition_source_code_at(
|
|
||||||
&self,
|
|
||||||
outer: &Range,
|
|
||||||
inner: &Range,
|
|
||||||
) -> (&'a str, &'a str, &'a str) {
|
|
||||||
let offsets = self.get_or_init_offsets();
|
|
||||||
let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1];
|
|
||||||
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1];
|
|
||||||
let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1];
|
|
||||||
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1];
|
|
||||||
(
|
|
||||||
&self.contents[outer_start..inner_start],
|
|
||||||
&self.contents[inner_start..inner_end],
|
|
||||||
&self.contents[inner_end..outer_end],
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::SourceCodeLocator;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn source_code_locator_init() {
|
|
||||||
let content = "x = 1";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 1);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
|
|
||||||
|
|
||||||
let content = "x = 1\n";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 2);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
|
||||||
assert_eq!(offsets[1], [6]);
|
|
||||||
|
|
||||||
let content = "x = 1\ny = 2\nz = x + y\n";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 4);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
|
||||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
|
|
||||||
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
|
|
||||||
assert_eq!(offsets[3], [22]);
|
|
||||||
|
|
||||||
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
|
|
||||||
let locator = SourceCodeLocator::new(content);
|
|
||||||
let offsets = locator.get_or_init_offsets();
|
|
||||||
assert_eq!(offsets.len(), 3);
|
|
||||||
assert_eq!(offsets[0], [0, 1, 2, 5]);
|
|
||||||
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
|
|
||||||
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ use rustpython_parser::ast::{
|
||||||
use rustpython_parser::parser;
|
use rustpython_parser::parser;
|
||||||
|
|
||||||
use crate::ast::helpers::{extract_handler_names, match_name_or_attr, SubscriptKind};
|
use crate::ast::helpers::{extract_handler_names, match_name_or_attr, SubscriptKind};
|
||||||
use crate::ast::operations::{extract_all_names, SourceCodeLocator};
|
use crate::ast::operations::extract_all_names;
|
||||||
use crate::ast::relocate::relocate_expr;
|
use crate::ast::relocate::relocate_expr;
|
||||||
use crate::ast::types::{
|
use crate::ast::types::{
|
||||||
Binding, BindingContext, BindingKind, CheckLocator, FunctionScope, ImportKind, Range, Scope,
|
Binding, BindingContext, BindingKind, CheckLocator, FunctionScope, ImportKind, Range, Scope,
|
||||||
|
@ -27,6 +27,7 @@ use crate::python::builtins::{BUILTINS, MAGIC_GLOBALS};
|
||||||
use crate::python::future::ALL_FEATURE_NAMES;
|
use crate::python::future::ALL_FEATURE_NAMES;
|
||||||
use crate::settings::types::PythonVersion;
|
use crate::settings::types::PythonVersion;
|
||||||
use crate::settings::Settings;
|
use crate::settings::Settings;
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
use crate::visibility::{module_visibility, transition_scope, Modifier, Visibility, VisibleScope};
|
use crate::visibility::{module_visibility, transition_scope, Modifier, Visibility, VisibleScope};
|
||||||
use crate::{
|
use crate::{
|
||||||
docstrings, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_print, pep8_naming,
|
docstrings, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_print, pep8_naming,
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
use rustpython_parser::lexer::{LexResult, Tok};
|
use rustpython_parser::lexer::{LexResult, Tok};
|
||||||
|
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::checks::{Check, CheckCode};
|
use crate::checks::{Check, CheckCode};
|
||||||
use crate::flake8_quotes::docstring_detection::StateMachine;
|
use crate::flake8_quotes::docstring_detection::StateMachine;
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
use crate::{flake8_quotes, pycodestyle, Settings};
|
use crate::{flake8_quotes, pycodestyle, Settings};
|
||||||
|
|
||||||
pub fn check_tokens(
|
pub fn check_tokens(
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use rustpython_ast::Location;
|
use rustpython_ast::Location;
|
||||||
|
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::ast::types::Range;
|
use crate::ast::types::Range;
|
||||||
use crate::checks::{Check, CheckKind};
|
use crate::checks::{Check, CheckKind};
|
||||||
use crate::flake8_quotes::settings::{Quote, Settings};
|
use crate::flake8_quotes::settings::{Quote, Settings};
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
|
|
||||||
fn good_single(quote: &Quote) -> char {
|
fn good_single(quote: &Quote) -> char {
|
||||||
match quote {
|
match quote {
|
||||||
|
|
|
@ -14,7 +14,7 @@ use crate::linter::{check_path, tokenize};
|
||||||
use crate::message::Message;
|
use crate::message::Message;
|
||||||
use crate::settings::configuration::Configuration;
|
use crate::settings::configuration::Configuration;
|
||||||
|
|
||||||
pub mod ast;
|
mod ast;
|
||||||
mod autofix;
|
mod autofix;
|
||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod check_ast;
|
pub mod check_ast;
|
||||||
|
@ -44,6 +44,7 @@ mod pyflakes;
|
||||||
mod python;
|
mod python;
|
||||||
mod pyupgrade;
|
mod pyupgrade;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
|
pub mod source_code_locator;
|
||||||
pub mod visibility;
|
pub mod visibility;
|
||||||
|
|
||||||
/// Run ruff over Python source code directly.
|
/// Run ruff over Python source code directly.
|
||||||
|
|
|
@ -9,7 +9,6 @@ use log::debug;
|
||||||
use rustpython_parser::lexer::LexResult;
|
use rustpython_parser::lexer::LexResult;
|
||||||
use rustpython_parser::{lexer, parser};
|
use rustpython_parser::{lexer, parser};
|
||||||
|
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::ast::types::Range;
|
use crate::ast::types::Range;
|
||||||
use crate::autofix::fixer;
|
use crate::autofix::fixer;
|
||||||
use crate::autofix::fixer::fix_file;
|
use crate::autofix::fixer::fix_file;
|
||||||
|
@ -21,6 +20,7 @@ use crate::code_gen::SourceGenerator;
|
||||||
use crate::message::Message;
|
use crate::message::Message;
|
||||||
use crate::noqa::add_noqa;
|
use crate::noqa::add_noqa;
|
||||||
use crate::settings::Settings;
|
use crate::settings::Settings;
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
use crate::{cache, fs, noqa};
|
use crate::{cache, fs, noqa};
|
||||||
|
|
||||||
/// Collect tokens up to and including the first error.
|
/// Collect tokens up to and including the first error.
|
||||||
|
|
|
@ -2,9 +2,9 @@ use itertools::izip;
|
||||||
use rustpython_ast::Location;
|
use rustpython_ast::Location;
|
||||||
use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Unaryop};
|
use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Unaryop};
|
||||||
|
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::ast::types::{CheckLocator, Range};
|
use crate::ast::types::{CheckLocator, Range};
|
||||||
use crate::checks::{Check, CheckKind, RejectedCmpop};
|
use crate::checks::{Check, CheckKind, RejectedCmpop};
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
|
|
||||||
fn is_ambiguous_name(name: &str) -> bool {
|
fn is_ambiguous_name(name: &str) -> bool {
|
||||||
name == "l" || name == "I" || name == "O"
|
name == "l" || name == "I" || name == "O"
|
||||||
|
|
|
@ -2,10 +2,10 @@ use anyhow::Result;
|
||||||
use libcst_native::{Codegen, ImportNames, NameOrAttribute, SmallStatement, Statement};
|
use libcst_native::{Codegen, ImportNames, NameOrAttribute, SmallStatement, Statement};
|
||||||
use rustpython_ast::Stmt;
|
use rustpython_ast::Stmt;
|
||||||
|
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::ast::types::Range;
|
use crate::ast::types::Range;
|
||||||
use crate::autofix::{helpers, Fix};
|
use crate::autofix::{helpers, Fix};
|
||||||
use crate::cst::helpers::compose_module_path;
|
use crate::cst::helpers::compose_module_path;
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
|
|
||||||
/// Generate a Fix to remove any unused imports from an `import` statement.
|
/// Generate a Fix to remove any unused imports from an `import` statement.
|
||||||
pub fn remove_unused_imports(
|
pub fn remove_unused_imports(
|
||||||
|
|
|
@ -4,9 +4,9 @@ use rustpython_parser::lexer;
|
||||||
use rustpython_parser::lexer::Tok;
|
use rustpython_parser::lexer::Tok;
|
||||||
|
|
||||||
use crate::ast::helpers;
|
use crate::ast::helpers;
|
||||||
use crate::ast::operations::SourceCodeLocator;
|
|
||||||
use crate::ast::types::Range;
|
use crate::ast::types::Range;
|
||||||
use crate::autofix::Fix;
|
use crate::autofix::Fix;
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
|
|
||||||
/// Generate a fix to remove a base from a ClassDef statement.
|
/// Generate a fix to remove a base from a ClassDef statement.
|
||||||
pub fn remove_class_def_base(
|
pub fn remove_class_def_base(
|
||||||
|
|
114
src/source_code_locator.rs
Normal file
114
src/source_code_locator.rs
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||||
|
|
||||||
|
use once_cell::unsync::OnceCell;
|
||||||
|
use rustpython_ast::Location;
|
||||||
|
|
||||||
|
use crate::ast::types::Range;
|
||||||
|
|
||||||
|
pub struct SourceCodeLocator<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
offsets: OnceCell<Vec<Vec<usize>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compute_offsets(contents: &str) -> Vec<Vec<usize>> {
|
||||||
|
let mut offsets = vec![vec![]];
|
||||||
|
let mut line_index = 0;
|
||||||
|
let mut char_index = 0;
|
||||||
|
let mut newline = false;
|
||||||
|
for (i, char) in contents.char_indices() {
|
||||||
|
offsets[line_index].push(i);
|
||||||
|
|
||||||
|
newline = char == '\n';
|
||||||
|
if newline {
|
||||||
|
line_index += 1;
|
||||||
|
offsets.push(vec![]);
|
||||||
|
char_index = i + char.len_utf8();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we end in a newline, add an extra character to indicate the start of that line.
|
||||||
|
if newline {
|
||||||
|
offsets[line_index].push(char_index);
|
||||||
|
}
|
||||||
|
offsets
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SourceCodeLocator<'a> {
|
||||||
|
pub fn new(contents: &'a str) -> Self {
|
||||||
|
SourceCodeLocator {
|
||||||
|
contents,
|
||||||
|
offsets: OnceCell::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_or_init_offsets(&self) -> &Vec<Vec<usize>> {
|
||||||
|
self.offsets.get_or_init(|| compute_offsets(self.contents))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn slice_source_code_at(&self, location: &Location) -> &'a str {
|
||||||
|
let offsets = self.get_or_init_offsets();
|
||||||
|
let offset = offsets[location.row() - 1][location.column() - 1];
|
||||||
|
&self.contents[offset..]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn slice_source_code_range(&self, range: &Range) -> &'a str {
|
||||||
|
let offsets = self.get_or_init_offsets();
|
||||||
|
let start = offsets[range.location.row() - 1][range.location.column() - 1];
|
||||||
|
let end = offsets[range.end_location.row() - 1][range.end_location.column() - 1];
|
||||||
|
&self.contents[start..end]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn partition_source_code_at(
|
||||||
|
&self,
|
||||||
|
outer: &Range,
|
||||||
|
inner: &Range,
|
||||||
|
) -> (&'a str, &'a str, &'a str) {
|
||||||
|
let offsets = self.get_or_init_offsets();
|
||||||
|
let outer_start = offsets[outer.location.row() - 1][outer.location.column() - 1];
|
||||||
|
let outer_end = offsets[outer.end_location.row() - 1][outer.end_location.column() - 1];
|
||||||
|
let inner_start = offsets[inner.location.row() - 1][inner.location.column() - 1];
|
||||||
|
let inner_end = offsets[inner.end_location.row() - 1][inner.end_location.column() - 1];
|
||||||
|
(
|
||||||
|
&self.contents[outer_start..inner_start],
|
||||||
|
&self.contents[inner_start..inner_end],
|
||||||
|
&self.contents[inner_end..outer_end],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::source_code_locator::SourceCodeLocator;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn source_code_locator_init() {
|
||||||
|
let content = "x = 1";
|
||||||
|
let locator = SourceCodeLocator::new(content);
|
||||||
|
let offsets = locator.get_or_init_offsets();
|
||||||
|
assert_eq!(offsets.len(), 1);
|
||||||
|
assert_eq!(offsets[0], [0, 1, 2, 3, 4]);
|
||||||
|
|
||||||
|
let content = "x = 1\n";
|
||||||
|
let locator = SourceCodeLocator::new(content);
|
||||||
|
let offsets = locator.get_or_init_offsets();
|
||||||
|
assert_eq!(offsets.len(), 2);
|
||||||
|
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
||||||
|
assert_eq!(offsets[1], [6]);
|
||||||
|
|
||||||
|
let content = "x = 1\ny = 2\nz = x + y\n";
|
||||||
|
let locator = SourceCodeLocator::new(content);
|
||||||
|
let offsets = locator.get_or_init_offsets();
|
||||||
|
assert_eq!(offsets.len(), 4);
|
||||||
|
assert_eq!(offsets[0], [0, 1, 2, 3, 4, 5]);
|
||||||
|
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11]);
|
||||||
|
assert_eq!(offsets[2], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]);
|
||||||
|
assert_eq!(offsets[3], [22]);
|
||||||
|
|
||||||
|
let content = "# \u{4e9c}\nclass Foo:\n \"\"\".\"\"\"";
|
||||||
|
let locator = SourceCodeLocator::new(content);
|
||||||
|
let offsets = locator.get_or_init_offsets();
|
||||||
|
assert_eq!(offsets.len(), 3);
|
||||||
|
assert_eq!(offsets[0], [0, 1, 2, 5]);
|
||||||
|
assert_eq!(offsets[1], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
|
||||||
|
assert_eq!(offsets[2], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue