diff --git a/crates/ruff_python_ast/src/source_code/locator.rs b/crates/ruff_python_ast/src/source_code/locator.rs index 978712abc1..ee6cf94e05 100644 --- a/crates/ruff_python_ast/src/source_code/locator.rs +++ b/crates/ruff_python_ast/src/source_code/locator.rs @@ -133,6 +133,17 @@ impl<'a> Locator<'a> { &self.contents[start..end] } + /// Return the byte offset of the given [`Location`]. + pub fn offset(&self, location: Location) -> usize { + let index = self.get_or_init_index(); + truncate(location, index, self.contents) + } + + /// Return the underlying source code. + pub fn contents(&self) -> &'a str { + self.contents + } + pub const fn len(&self) -> usize { self.contents.len() } diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 75adddeaa5..3d516a4f74 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -1,15 +1,21 @@ -use ruff_formatter::{FormatContext, SimpleFormatOptions}; +use std::rc::Rc; -use crate::core::locator::Locator; +use ruff_formatter::{FormatContext, SimpleFormatOptions}; +use ruff_python_ast::source_code::Locator; pub struct ASTFormatContext<'a> { options: SimpleFormatOptions, + contents: Rc, locator: Locator<'a>, } impl<'a> ASTFormatContext<'a> { pub fn new(options: SimpleFormatOptions, locator: Locator<'a>) -> Self { - Self { options, locator } + Self { + options, + contents: Rc::from(locator.contents()), + locator, + } } } @@ -22,6 +28,10 @@ impl FormatContext for ASTFormatContext<'_> { } impl<'a> ASTFormatContext<'a> { + pub fn contents(&'a self) -> Rc { + self.contents.clone() + } + pub fn locator(&'a self) -> &'a Locator { &self.locator } diff --git a/crates/ruff_python_formatter/src/core/helpers.rs b/crates/ruff_python_formatter/src/core/helpers.rs index 032ba06fa8..f25a9dd903 100644 --- a/crates/ruff_python_formatter/src/core/helpers.rs +++ b/crates/ruff_python_formatter/src/core/helpers.rs @@ -1,7 +1,7 @@ use rustpython_parser::ast::Location; -use crate::core::locator::Locator; -use crate::core::types::Range; +use ruff_python_ast::source_code::Locator; +use ruff_python_ast::types::Range; /// Return `true` if the given string is a radix literal (e.g., `0b101`). pub fn is_radix_literal(content: &str) -> bool { @@ -20,9 +20,8 @@ pub fn find_tok( locator: &Locator, f: impl Fn(rustpython_parser::Tok) -> bool, ) -> (Location, Location) { - let (source, start_index, end_index) = locator.slice(Range::new(location, end_location)); for (start, tok, end) in rustpython_parser::lexer::lex_located( - &source[start_index..end_index], + locator.slice(Range::new(location, end_location)), rustpython_parser::Mode::Module, location, ) @@ -48,8 +47,8 @@ pub fn expand_indented_block( locator: &Locator, ) -> (Location, Location) { let contents = locator.contents(); - let start_index = locator.index(location); - let end_index = locator.index(end_location); + let start_index = locator.offset(location); + let end_index = locator.offset(end_location); // Find the colon, which indicates the end of the header. let mut nesting = 0; @@ -76,7 +75,7 @@ pub fn expand_indented_block( } } let colon_location = colon.unwrap(); - let colon_index = locator.index(colon_location); + let colon_index = locator.offset(colon_location); // From here, we have two options: simple statement or compound statement. let indent = rustpython_parser::lexer::lex_located( @@ -120,11 +119,8 @@ pub fn expand_indented_block( /// Return true if the `orelse` block of an `if` statement is an `elif` statement. pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bool { if orelse.len() == 1 && matches!(orelse[0].node, rustpython_parser::ast::StmtKind::If { .. }) { - let (source, start, end) = locator.slice(Range::new( - orelse[0].location, - orelse[0].end_location.unwrap(), - )); - if source[start..end].starts_with("elif") { + let contents = locator.skip(orelse[0].location); + if contents.starts_with("elif") { return true; } } diff --git a/crates/ruff_python_formatter/src/core/locator.rs b/crates/ruff_python_formatter/src/core/locator.rs deleted file mode 100644 index 060b9774ed..0000000000 --- a/crates/ruff_python_formatter/src/core/locator.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! Struct used to efficiently slice source code at (row, column) Locations. - -use std::rc::Rc; - -use once_cell::unsync::OnceCell; -use rustpython_parser::ast::Location; - -use crate::core::types::Range; - -pub struct Locator<'a> { - contents: &'a str, - contents_rc: Rc, - index: OnceCell, -} - -pub enum Index { - Ascii(Vec), - Utf8(Vec>), -} - -/// Compute the starting byte index of each line in ASCII source code. -fn index_ascii(contents: &str) -> Vec { - let mut index = Vec::with_capacity(48); - index.push(0); - let bytes = contents.as_bytes(); - for (i, byte) in bytes.iter().enumerate() { - if *byte == b'\n' { - index.push(i + 1); - } - } - index -} - -/// Compute the starting byte index of each character in UTF-8 source code. -fn index_utf8(contents: &str) -> Vec> { - let mut index = Vec::with_capacity(48); - let mut current_row = Vec::with_capacity(48); - let mut current_byte_offset = 0; - let mut previous_char = '\0'; - for char in contents.chars() { - current_row.push(current_byte_offset); - if char == '\n' { - if previous_char == '\r' { - current_row.pop(); - } - index.push(current_row); - current_row = Vec::with_capacity(48); - } - current_byte_offset += char.len_utf8(); - previous_char = char; - } - index.push(current_row); - index -} - -/// Compute the starting byte index of each line in source code. -pub fn index(contents: &str) -> Index { - if contents.is_ascii() { - Index::Ascii(index_ascii(contents)) - } else { - Index::Utf8(index_utf8(contents)) - } -} - -/// Truncate a [`Location`] to a byte offset in ASCII source code. -fn truncate_ascii(location: Location, index: &[usize], contents: &str) -> usize { - if location.row() - 1 == index.len() && location.column() == 0 - || (!index.is_empty() - && location.row() - 1 == index.len() - 1 - && index[location.row() - 1] + location.column() >= contents.len()) - { - contents.len() - } else { - index[location.row() - 1] + location.column() - } -} - -/// Truncate a [`Location`] to a byte offset in UTF-8 source code. -fn truncate_utf8(location: Location, index: &[Vec], contents: &str) -> usize { - if (location.row() - 1 == index.len() && location.column() == 0) - || (location.row() - 1 == index.len() - 1 - && location.column() == index[location.row() - 1].len()) - { - contents.len() - } else { - index[location.row() - 1][location.column()] - } -} - -/// Truncate a [`Location`] to a byte offset in source code. -fn truncate(location: Location, index: &Index, contents: &str) -> usize { - match index { - Index::Ascii(index) => truncate_ascii(location, index, contents), - Index::Utf8(index) => truncate_utf8(location, index, contents), - } -} - -impl<'a> Locator<'a> { - pub fn new(contents: &'a str) -> Self { - Locator { - contents, - contents_rc: Rc::from(contents), - index: OnceCell::new(), - } - } - - fn get_or_init_index(&self) -> &Index { - self.index.get_or_init(|| index(self.contents)) - } - - pub fn index(&self, location: Location) -> usize { - let index = self.get_or_init_index(); - truncate(location, index, self.contents) - } - - pub fn contents(&self) -> &str { - self.contents - } - - /// Slice the source code at a [`Range`]. - pub fn slice(&self, range: Range) -> (Rc, usize, usize) { - let index = self.get_or_init_index(); - let start = truncate(range.location, index, self.contents); - let end = truncate(range.end_location, index, self.contents); - (Rc::clone(&self.contents_rc), start, end) - } -} diff --git a/crates/ruff_python_formatter/src/core/mod.rs b/crates/ruff_python_formatter/src/core/mod.rs index 927be4f07d..8e6e8a0b73 100644 --- a/crates/ruff_python_formatter/src/core/mod.rs +++ b/crates/ruff_python_formatter/src/core/mod.rs @@ -1,4 +1,2 @@ pub mod helpers; -pub mod locator; -pub mod types; pub mod visitor; diff --git a/crates/ruff_python_formatter/src/core/types.rs b/crates/ruff_python_formatter/src/core/types.rs deleted file mode 100644 index e1f1a49041..0000000000 --- a/crates/ruff_python_formatter/src/core/types.rs +++ /dev/null @@ -1,76 +0,0 @@ -use std::ops::Deref; - -use rustpython_parser::ast::Location; - -use crate::cst::{Expr, Located, Stmt}; - -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] -pub struct Range { - pub location: Location, - pub end_location: Location, -} - -impl Range { - pub fn new(location: Location, end_location: Location) -> Self { - Self { - location, - end_location, - } - } - - pub fn from_located(located: &Located) -> Self { - Range::new(located.location, located.end_location.unwrap()) - } -} - -#[derive(Debug, Copy, Clone)] -pub struct RefEquality<'a, T>(pub &'a T); - -impl<'a, T> std::hash::Hash for RefEquality<'a, T> { - fn hash(&self, state: &mut H) - where - H: std::hash::Hasher, - { - (self.0 as *const T).hash(state); - } -} - -impl<'a, 'b, T> PartialEq> for RefEquality<'a, T> { - fn eq(&self, other: &RefEquality<'b, T>) -> bool { - std::ptr::eq(self.0, other.0) - } -} - -impl<'a, T> Eq for RefEquality<'a, T> {} - -impl<'a, T> Deref for RefEquality<'a, T> { - type Target = T; - - fn deref(&self) -> &T { - self.0 - } -} - -impl<'a> From<&RefEquality<'a, Stmt>> for &'a Stmt { - fn from(r: &RefEquality<'a, Stmt>) -> Self { - r.0 - } -} - -impl<'a> From<&RefEquality<'a, Expr>> for &'a Expr { - fn from(r: &RefEquality<'a, Expr>) -> Self { - r.0 - } -} - -impl<'a> From<&RefEquality<'a, rustpython_parser::ast::Stmt>> for &'a rustpython_parser::ast::Stmt { - fn from(r: &RefEquality<'a, rustpython_parser::ast::Stmt>) -> Self { - r.0 - } -} - -impl<'a> From<&RefEquality<'a, rustpython_parser::ast::Expr>> for &'a rustpython_parser::ast::Expr { - fn from(r: &RefEquality<'a, rustpython_parser::ast::Expr>) -> Self { - r.0 - } -} diff --git a/crates/ruff_python_formatter/src/cst.rs b/crates/ruff_python_formatter/src/cst.rs index 702731c0b7..c304a1799a 100644 --- a/crates/ruff_python_formatter/src/cst.rs +++ b/crates/ruff_python_formatter/src/cst.rs @@ -2,14 +2,14 @@ use std::iter; +use itertools::Itertools; use rustpython_parser::ast::{Constant, Location}; use rustpython_parser::Mode; -use itertools::Itertools; +use ruff_python_ast::source_code::Locator; +use ruff_python_ast::types::Range; use crate::core::helpers::{expand_indented_block, find_tok, is_elif}; -use crate::core::locator::Locator; -use crate::core::types::Range; use crate::trivia::{Parenthesize, Trivia}; type Ident = String; @@ -45,13 +45,13 @@ impl Located { impl From<&Located> for Range { fn from(located: &Located) -> Self { - Range::new(located.location, located.end_location.unwrap()) + Self::new(located.location, located.end_location.unwrap()) } } impl From<&Box>> for Range { fn from(located: &Box>) -> Self { - Range::new(located.location, located.end_location.unwrap()) + Self::new(located.location, located.end_location.unwrap()) } } @@ -2158,10 +2158,8 @@ impl From<(rustpython_parser::ast::Expr, &Locator<'_>)> for Expr { }, rustpython_parser::ast::ExprKind::Slice { lower, upper, step } => { // Locate the colon tokens, which indicate the number of index segments. - let (source, start, end) = - locator.slice(Range::new(expr.location, expr.end_location.unwrap())); let tokens = rustpython_parser::lexer::lex_located( - &source[start..end], + locator.slice(Range::new(expr.location, expr.end_location.unwrap())), Mode::Module, expr.location, ); diff --git a/crates/ruff_python_formatter/src/format/builders.rs b/crates/ruff_python_formatter/src/format/builders.rs index 166459c12f..67b9729ebb 100644 --- a/crates/ruff_python_formatter/src/format/builders.rs +++ b/crates/ruff_python_formatter/src/format/builders.rs @@ -1,9 +1,9 @@ use ruff_formatter::prelude::*; use ruff_formatter::{write, Format}; +use ruff_python_ast::types::Range; use ruff_text_size::{TextRange, TextSize}; use crate::context::ASTFormatContext; -use crate::core::types::Range; use crate::cst::{Body, Stmt}; use crate::shared_traits::AsFormat; use crate::trivia::{Relationship, TriviaKind}; @@ -73,10 +73,17 @@ pub struct Literal { impl Format> for Literal { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (text, start, end) = f.context().locator().slice(self.range); + let text = f.context().contents(); + let locator = f.context().locator(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); + f.write_element(FormatElement::StaticTextSlice { text, - range: TextRange::new(start.try_into().unwrap(), end.try_into().unwrap()), + range: TextRange::new( + start_index.try_into().unwrap(), + end_index.try_into().unwrap(), + ), }) } } diff --git a/crates/ruff_python_formatter/src/format/expr.rs b/crates/ruff_python_formatter/src/format/expr.rs index aa66887abe..ef95842632 100644 --- a/crates/ruff_python_formatter/src/format/expr.rs +++ b/crates/ruff_python_formatter/src/format/expr.rs @@ -4,10 +4,10 @@ use rustpython_parser::ast::Constant; use ruff_formatter::prelude::*; use ruff_formatter::{format_args, write}; +use ruff_python_ast::types::Range; use ruff_text_size::TextSize; use crate::context::ASTFormatContext; -use crate::core::types::Range; use crate::cst::{ Arguments, BoolOp, CmpOp, Comprehension, Expr, ExprKind, Keyword, Operator, OperatorKind, SliceIndex, SliceIndexKind, UnaryOp, UnaryOpKind, diff --git a/crates/ruff_python_formatter/src/format/numbers.rs b/crates/ruff_python_formatter/src/format/numbers.rs index 11c3fb0bd9..5ef3edffaf 100644 --- a/crates/ruff_python_formatter/src/format/numbers.rs +++ b/crates/ruff_python_formatter/src/format/numbers.rs @@ -2,10 +2,10 @@ use rustpython_parser::ast::Location; use ruff_formatter::prelude::*; use ruff_formatter::{write, Format}; +use ruff_python_ast::types::Range; use ruff_text_size::TextSize; use crate::context::ASTFormatContext; -use crate::core::types::Range; use crate::format::builders::literal; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -15,11 +15,14 @@ struct FloatAtom { impl Format> for FloatAtom { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (source, start, end) = f.context().locator().slice(self.range); + let locator = f.context().locator(); + let contents = f.context().contents(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); - if let Some(dot_index) = source[start..end].find('.') { - let integer = &source[start..start + dot_index]; - let fractional = &source[start + dot_index + 1..end]; + if let Some(dot_index) = contents[start_index..end_index].find('.') { + let integer = &contents[start_index..start_index + dot_index]; + let fractional = &contents[start_index + dot_index + 1..end_index]; if integer.is_empty() { write!(f, [text("0")])?; @@ -72,12 +75,15 @@ pub struct FloatLiteral { impl Format> for FloatLiteral { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (source, start, end) = f.context().locator().slice(self.range); + let locator = f.context().locator(); + let contents = f.context().contents(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); // Scientific notation - if let Some(exponent_index) = source[start..end] + if let Some(exponent_index) = contents[start_index..end_index] .find('e') - .or_else(|| source[start..end].find('E')) + .or_else(|| contents[start_index..end_index].find('E')) { // Write the base. write!( @@ -94,7 +100,7 @@ impl Format> for FloatLiteral { write!(f, [text("e")])?; // Write the exponent, omitting the sign if it's positive. - let plus = source[start + exponent_index + 1..end].starts_with('+'); + let plus = contents[start_index + exponent_index + 1..end_index].starts_with('+'); write!( f, [literal(Range::new( @@ -125,13 +131,16 @@ pub struct IntLiteral { impl Format> for IntLiteral { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (source, start, end) = f.context().locator().slice(self.range); + let locator = f.context().locator(); + let contents = f.context().contents(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); for prefix in ["0b", "0B", "0o", "0O", "0x", "0X"] { - if source[start..end].starts_with(prefix) { + if contents[start_index..end_index].starts_with(prefix) { // In each case, the prefix must be lowercase, while the suffix must be uppercase. - let prefix = &source[start..start + prefix.len()]; - let suffix = &source[start + prefix.len()..end]; + let prefix = &contents[start_index..start_index + prefix.len()]; + let suffix = &contents[start_index + prefix.len()..end_index]; if prefix.bytes().any(|b| b.is_ascii_uppercase()) || suffix.bytes().any(|b| b.is_ascii_lowercase()) @@ -171,11 +180,14 @@ pub struct ComplexLiteral { impl Format> for ComplexLiteral { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (source, start, end) = f.context().locator().slice(self.range); + let locator = f.context().locator(); + let contents = f.context().contents(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); - if source[start..end].ends_with('j') { + if contents[start_index..end_index].ends_with('j') { write!(f, [literal(self.range)])?; - } else if source[start..end].ends_with('J') { + } else if contents[start_index..end_index].ends_with('J') { write!( f, [literal(Range::new( diff --git a/crates/ruff_python_formatter/src/format/pattern.rs b/crates/ruff_python_formatter/src/format/pattern.rs index 4fe8f5e396..f17921bbbf 100644 --- a/crates/ruff_python_formatter/src/format/pattern.rs +++ b/crates/ruff_python_formatter/src/format/pattern.rs @@ -1,7 +1,8 @@ +use rustpython_parser::ast::Constant; + use ruff_formatter::prelude::*; use ruff_formatter::write; use ruff_text_size::TextSize; -use rustpython_parser::ast::Constant; use crate::context::ASTFormatContext; use crate::cst::{Pattern, PatternKind}; diff --git a/crates/ruff_python_formatter/src/format/strings.rs b/crates/ruff_python_formatter/src/format/strings.rs index 1444715559..6ce5c1b30a 100644 --- a/crates/ruff_python_formatter/src/format/strings.rs +++ b/crates/ruff_python_formatter/src/format/strings.rs @@ -3,10 +3,10 @@ use rustpython_parser::{Mode, Tok}; use ruff_formatter::prelude::*; use ruff_formatter::{write, Format}; use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_ast::types::Range; use ruff_text_size::TextSize; use crate::context::ASTFormatContext; -use crate::core::types::Range; use crate::cst::Expr; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -16,13 +16,16 @@ pub struct StringLiteralPart { impl Format> for StringLiteralPart { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let (source, start, end) = f.context().locator().slice(self.range); + let locator = f.context().locator(); + let contents = f.context().contents(); + let start_index = locator.offset(self.range.location); + let end_index = locator.offset(self.range.end_location); // Extract leading and trailing quotes. - let content = &source[start..end]; - let leading_quote = leading_quote(content).unwrap(); - let trailing_quote = trailing_quote(content).unwrap(); - let body = &content[leading_quote.len()..content.len() - trailing_quote.len()]; + let contents = &contents[start_index..end_index]; + let leading_quote = leading_quote(contents).unwrap(); + let trailing_quote = trailing_quote(contents).unwrap(); + let body = &contents[leading_quote.len()..contents.len() - trailing_quote.len()]; // Determine the correct quote style. // TODO(charlie): Make this parameterizable. @@ -126,18 +129,17 @@ impl Format> for StringLiteral<'_> { // TODO(charlie): This tokenization needs to happen earlier, so that we can attach // comments to individual string literals. - let (source, start, end) = f.context().locator().slice(Range::from(expr)); - let elts = - rustpython_parser::lexer::lex_located(&source[start..end], Mode::Module, expr.location) - .flatten() - .filter_map(|(start, tok, end)| { - if matches!(tok, Tok::String { .. }) { - Some(Range::new(start, end)) - } else { - None - } - }) - .collect::>(); + let contents = f.context().locator().slice(expr); + let elts = rustpython_parser::lexer::lex_located(contents, Mode::Module, expr.location) + .flatten() + .filter_map(|(start, tok, end)| { + if matches!(tok, Tok::String { .. }) { + Some(Range::new(start, end)) + } else { + None + } + }) + .collect::>(); write!( f, [group(&format_with(|f| { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index bffc9892e5..89434db479 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -2,10 +2,10 @@ use anyhow::Result; use rustpython_parser::lexer::LexResult; use ruff_formatter::{format, Formatted, IndentStyle, SimpleFormatOptions}; +use ruff_python_ast::source_code::Locator; use crate::attachment::attach; use crate::context::ASTFormatContext; -use crate::core::locator::Locator; use crate::cst::Stmt; use crate::newlines::normalize_newlines; use crate::parentheses::normalize_parentheses; diff --git a/crates/ruff_python_formatter/src/parentheses.rs b/crates/ruff_python_formatter/src/parentheses.rs index 52d07c8e40..22c2432a10 100644 --- a/crates/ruff_python_formatter/src/parentheses.rs +++ b/crates/ruff_python_formatter/src/parentheses.rs @@ -1,8 +1,8 @@ use rustpython_parser::ast::Constant; +use ruff_python_ast::source_code::Locator; + use crate::core::helpers::is_radix_literal; -use crate::core::locator::Locator; -use crate::core::types::Range; use crate::core::visitor; use crate::core::visitor::Visitor; use crate::cst::{Expr, ExprKind, Stmt, StmtKind}; @@ -154,9 +154,8 @@ impl<'a> Visitor<'a> for ParenthesesNormalizer<'_> { .. }, ) { - let (source, start, end) = self.locator.slice(Range::from(&*value)); // TODO(charlie): Encode this in the AST via separate node types. - if !is_radix_literal(&source[start..end]) { + if !is_radix_literal(self.locator.slice(&**value)) { value.parentheses = Parenthesize::Always; } } diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_formatter/src/trivia.rs index 75bbb2e3d3..cc1902e782 100644 --- a/crates/ruff_python_formatter/src/trivia.rs +++ b/crates/ruff_python_formatter/src/trivia.rs @@ -3,7 +3,8 @@ use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; -use crate::core::types::Range; +use ruff_python_ast::types::Range; + use crate::cst::{ Alias, Arg, Body, BoolOp, CmpOp, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword, Operator, Pattern, PatternKind, SliceIndex, SliceIndexKind, Stmt, StmtKind, UnaryOp,