Replace row/column based Location with byte-offsets. (#3931)

This commit is contained in:
Micha Reiser 2023-04-26 20:11:02 +02:00 committed by GitHub
parent ee91598835
commit cab65b25da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
418 changed files with 6203 additions and 7040 deletions

View file

@ -9,7 +9,7 @@ rust-version = { workspace = true }
[dependencies]
ruff_rustpython = { path = "../ruff_rustpython" }
ruff_text_size = { path = "../ruff_text_size" }
ruff_text_size = { workspace = true, features = ["serde"] }
anyhow = { workspace = true }
bitflags = { workspace = true }
@ -23,5 +23,5 @@ regex = { workspace = true }
rustc-hash = { workspace = true }
rustpython-common = { workspace = true }
rustpython-parser = { workspace = true }
serde = { workspace = true }
serde = { workspace = true, optional = true }
smallvec = { workspace = true }

View file

@ -6,28 +6,29 @@ use log::error;
use num_traits::Zero;
use once_cell::sync::Lazy;
use regex::Regex;
use ruff_text_size::{TextRange, TextSize};
use rustc_hash::{FxHashMap, FxHashSet};
use rustpython_parser::ast::{
Arguments, Cmpop, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword,
KeywordData, Located, Location, MatchCase, Pattern, PatternKind, Stmt, StmtKind,
KeywordData, Located, MatchCase, Pattern, PatternKind, Stmt, StmtKind,
};
use rustpython_parser::{lexer, Mode, Tok};
use smallvec::SmallVec;
use crate::call_path::CallPath;
use crate::newlines::UniversalNewlineIterator;
use crate::source_code::{Generator, Indexer, Locator, Stylist};
use crate::types::Range;
use crate::visitor;
use crate::visitor::Visitor;
/// Create an `Expr` with default location from an `ExprKind`.
pub fn create_expr(node: ExprKind) -> Expr {
Expr::new(Location::default(), Location::default(), node)
Expr::with_range(node, TextRange::default())
}
/// Create a `Stmt` with a default location from a `StmtKind`.
pub fn create_stmt(node: StmtKind) -> Stmt {
Stmt::new(Location::default(), Location::default(), node)
Stmt::with_range(node, TextRange::default())
}
/// Generate source code from an [`Expr`].
@ -617,24 +618,27 @@ pub fn map_callable(decorator: &Expr) -> &Expr {
/// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(located: &Located<T>, locator: &Locator) -> bool {
let start = if match_leading_content(located, locator) {
located.location
let start = if has_leading_content(located, locator) {
located.start()
} else {
Location::new(located.location.row(), 0)
locator.line_start(located.start())
};
let end = if match_trailing_content(located, locator) {
located.end_location.unwrap()
let end = if has_trailing_content(located, locator) {
located.end()
} else {
Location::new(located.end_location.unwrap().row() + 1, 0)
locator.line_end(located.end())
};
has_comments_in(Range::new(start, end), locator)
has_comments_in(TextRange::new(start, end), locator)
}
/// Returns `true` if a [`Range`] includes at least one comment.
pub fn has_comments_in(range: Range, locator: &Locator) -> bool {
for tok in lexer::lex_located(locator.slice(range), Mode::Module, range.location) {
/// Returns `true` if a [`TextRange`] includes at least one comment.
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
let source = &locator.contents()[range];
for tok in lexer::lex_located(source, Mode::Module, range.start()) {
match tok {
Ok((_, tok, _)) => {
Ok((tok, _)) => {
if matches!(tok, Tok::Comment(..)) {
return true;
}
@ -836,7 +840,7 @@ where
/// A [`Visitor`] that collects all `raise` statements in a function or method.
#[derive(Default)]
pub struct RaiseStatementVisitor<'a> {
pub raises: Vec<(Range, Option<&'a Expr>, Option<&'a Expr>)>,
pub raises: Vec<(TextRange, Option<&'a Expr>, Option<&'a Expr>)>,
}
impl<'a, 'b> Visitor<'b> for RaiseStatementVisitor<'b>
@ -847,7 +851,7 @@ where
match &stmt.node {
StmtKind::Raise { exc, cause } => {
self.raises
.push((Range::from(stmt), exc.as_deref(), cause.as_deref()));
.push((stmt.range(), exc.as_deref(), cause.as_deref()));
}
StmtKind::ClassDef { .. }
| StmtKind::FunctionDef { .. }
@ -907,45 +911,19 @@ pub fn extract_globals(body: &[Stmt]) -> FxHashMap<&str, &Stmt> {
visitor.globals
}
/// Convert a location within a file (relative to `base`) to an absolute
/// position.
pub fn to_absolute(relative: Location, base: Location) -> Location {
if relative.row() == 1 {
Location::new(
relative.row() + base.row() - 1,
relative.column() + base.column(),
)
} else {
Location::new(relative.row() + base.row() - 1, relative.column())
}
}
pub fn to_relative(absolute: Location, base: Location) -> Location {
if absolute.row() == base.row() {
Location::new(
absolute.row() - base.row() + 1,
absolute.column() - base.column(),
)
} else {
Location::new(absolute.row() - base.row() + 1, absolute.column())
}
}
/// Return `true` if a [`Located`] has leading content.
pub fn match_leading_content<T>(located: &Located<T>, locator: &Locator) -> bool {
let range = Range::new(Location::new(located.location.row(), 0), located.location);
let prefix = locator.slice(range);
prefix.chars().any(|char| !char.is_whitespace())
pub fn has_leading_content<T>(located: &Located<T>, locator: &Locator) -> bool {
let line_start = locator.line_start(located.start());
let leading = &locator.contents()[TextRange::new(line_start, located.start())];
leading.chars().any(|char| !char.is_whitespace())
}
/// Return `true` if a [`Located`] has trailing content.
pub fn match_trailing_content<T>(located: &Located<T>, locator: &Locator) -> bool {
let range = Range::new(
located.end_location.unwrap(),
Location::new(located.end_location.unwrap().row() + 1, 0),
);
let suffix = locator.slice(range);
for char in suffix.chars() {
pub fn has_trailing_content<T>(located: &Located<T>, locator: &Locator) -> bool {
let line_end = locator.line_end(located.end());
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
for char in trailing.chars() {
if char == '#' {
return false;
}
@ -957,55 +935,66 @@ pub fn match_trailing_content<T>(located: &Located<T>, locator: &Locator) -> boo
}
/// If a [`Located`] has a trailing comment, return the index of the hash.
pub fn match_trailing_comment<T>(located: &Located<T>, locator: &Locator) -> Option<usize> {
let range = Range::new(
located.end_location.unwrap(),
Location::new(located.end_location.unwrap().row() + 1, 0),
);
let suffix = locator.slice(range);
for (i, char) in suffix.chars().enumerate() {
pub fn trailing_comment_start_offset<T>(
located: &Located<T>,
locator: &Locator,
) -> Option<TextSize> {
let line_end = locator.line_end(located.end());
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
for (i, char) in trailing.chars().enumerate() {
if char == '#' {
return Some(i);
return TextSize::try_from(i).ok();
}
if !char.is_whitespace() {
return None;
}
}
None
}
/// Return the number of trailing empty lines following a statement.
pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0));
suffix
.lines()
/// Return the end offset at which the empty lines following a statement.
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
let line_end = locator.full_line_end(stmt.end());
let rest = &locator.contents()[usize::from(line_end)..];
UniversalNewlineIterator::with_offset(rest, line_end)
.take_while(|line| line.trim().is_empty())
.count()
.last()
.map_or(line_end, |l| l.full_end())
}
/// Return the range of the first parenthesis pair after a given [`Location`].
pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
let contents = locator.after(start);
/// Return the range of the first parenthesis pair after a given [`TextSize`].
pub fn match_parens(start: TextSize, locator: &Locator) -> Option<TextRange> {
let contents = &locator.contents()[usize::from(start)..];
let mut fix_start = None;
let mut fix_end = None;
let mut count: usize = 0;
for (start, tok, end) in lexer::lex_located(contents, Mode::Module, start).flatten() {
if matches!(tok, Tok::Lpar) {
if count == 0 {
fix_start = Some(start);
for (tok, range) in lexer::lex_located(contents, Mode::Module, start).flatten() {
match tok {
Tok::Lpar => {
if count == 0 {
fix_start = Some(range.start());
}
count += 1;
}
count += 1;
}
if matches!(tok, Tok::Rpar) {
count -= 1;
if count == 0 {
fix_end = Some(end);
break;
Tok::Rpar => {
count -= 1;
if count == 0 {
fix_end = Some(range.end());
break;
}
}
_ => {}
}
}
match (fix_start, fix_end) {
(Some(start), Some(end)) => Some(Range::new(start, end)),
(Some(start), Some(end)) => Some(TextRange::new(start, end)),
_ => None,
}
}
@ -1013,182 +1002,175 @@ pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
/// Return the appropriate visual `Range` for any message that spans a `Stmt`.
/// Specifically, this method returns the range of a function or class name,
/// rather than that of the entire function or class body.
pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> Range {
pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> TextRange {
if matches!(
stmt.node,
StmtKind::ClassDef { .. }
| StmtKind::FunctionDef { .. }
| StmtKind::AsyncFunctionDef { .. }
) {
let contents = locator.slice(stmt);
for (start, tok, end) in lexer::lex_located(contents, Mode::Module, stmt.location).flatten()
{
let contents = &locator.contents()[stmt.range()];
for (tok, range) in lexer::lex_located(contents, Mode::Module, stmt.start()).flatten() {
if matches!(tok, Tok::Name { .. }) {
return Range::new(start, end);
return range;
}
}
error!("Failed to find identifier for {:?}", stmt);
}
Range::from(stmt)
stmt.range()
}
/// Return the ranges of [`Tok::Name`] tokens within a specified node.
pub fn find_names<'a, T>(
located: &'a Located<T>,
locator: &'a Locator,
) -> impl Iterator<Item = Range> + 'a {
let contents = locator.slice(located);
lexer::lex_located(contents, Mode::Module, located.location)
) -> impl Iterator<Item = TextRange> + 'a {
let contents = locator.slice(located.range());
lexer::lex_located(contents, Mode::Module, located.start())
.flatten()
.filter(|(_, tok, _)| matches!(tok, Tok::Name { .. }))
.map(|(start, _, end)| Range {
location: start,
end_location: end,
})
.filter(|(tok, _)| matches!(tok, Tok::Name { .. }))
.map(|(_, range)| range)
}
/// Return the `Range` of `name` in `Excepthandler`.
pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> Option<Range> {
pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> Option<TextRange> {
let ExcepthandlerKind::ExceptHandler {
name, type_, body, ..
} = &handler.node;
match (name, type_) {
(Some(_), Some(type_)) => {
let type_end_location = type_.end_location.unwrap();
let contents = locator.slice(Range::new(type_end_location, body[0].location));
let range = lexer::lex_located(contents, Mode::Module, type_end_location)
let contents = &locator.contents()[TextRange::new(type_.end(), body[0].start())];
lexer::lex_located(contents, Mode::Module, type_.end())
.flatten()
.tuple_windows()
.find(|(tok, next_tok)| {
matches!(tok.1, Tok::As) && matches!(next_tok.1, Tok::Name { .. })
matches!(tok.0, Tok::As) && matches!(next_tok.0, Tok::Name { .. })
})
.map(|((..), (location, _, end_location))| Range::new(location, end_location));
range
.map(|((..), (_, range))| range)
}
_ => None,
}
}
/// Return the `Range` of `except` in `Excepthandler`.
pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
pub fn except_range(handler: &Excepthandler, locator: &Locator) -> TextRange {
let ExcepthandlerKind::ExceptHandler { body, type_, .. } = &handler.node;
let end = if let Some(type_) = type_ {
type_.location
type_.end()
} else {
body.first()
.expect("Expected body to be non-empty")
.location
body.first().expect("Expected body to be non-empty").start()
};
let contents = locator.slice(Range {
location: handler.location,
end_location: end,
});
let range = lexer::lex_located(contents, Mode::Module, handler.location)
let contents = &locator.contents()[TextRange::new(handler.start(), end)];
lexer::lex_located(contents, Mode::Module, handler.start())
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Except { .. }))
.map(|(location, _, end_location)| Range {
location,
end_location,
})
.expect("Failed to find `except` range");
range
.find(|(kind, _)| matches!(kind, Tok::Except { .. }))
.map(|(_, range)| range)
.expect("Failed to find `except` range")
}
/// Return the `Range` of `else` in `For`, `AsyncFor`, and `While` statements.
pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
match &stmt.node {
StmtKind::For { body, orelse, .. }
| StmtKind::AsyncFor { body, orelse, .. }
| StmtKind::While { body, orelse, .. }
if !orelse.is_empty() =>
{
let body_end = body
.last()
.expect("Expected body to be non-empty")
.end_location
.unwrap();
let contents = locator.slice(Range {
location: body_end,
end_location: orelse
.first()
.expect("Expected orelse to be non-empty")
.location,
});
let range = lexer::lex_located(contents, Mode::Module, body_end)
let body_end = body.last().expect("Expected body to be non-empty").end();
let or_else_start = orelse
.first()
.expect("Expected orelse to be non-empty")
.start();
let contents = &locator.contents()[TextRange::new(body_end, or_else_start)];
lexer::lex_located(contents, Mode::Module, body_end)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Else))
.map(|(location, _, end_location)| Range {
location,
end_location,
});
range
.find(|(kind, _)| matches!(kind, Tok::Else))
.map(|(_, range)| range)
}
_ => None,
}
}
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
pub fn first_colon_range(range: Range, locator: &Locator) -> Option<Range> {
let contents = locator.slice(range);
let range = lexer::lex_located(contents, Mode::Module, range.location)
pub fn first_colon_range(range: TextRange, locator: &Locator) -> Option<TextRange> {
let contents = &locator.contents()[range];
let range = lexer::lex_located(contents, Mode::Module, range.start())
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Colon))
.map(|(location, _, end_location)| Range {
location,
end_location,
});
.find(|(kind, _)| matches!(kind, Tok::Colon))
.map(|(_, range)| range);
range
}
/// Return the `Range` of the first `Elif` or `Else` token in an `If` statement.
pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
let StmtKind::If { body, orelse, .. } = &stmt.node else {
return None;
};
let start = body
.last()
.expect("Expected body to be non-empty")
.end_location
.unwrap();
let start = body.last().expect("Expected body to be non-empty").end();
let end = match &orelse[..] {
[Stmt {
node: StmtKind::If { test, .. },
..
}] => test.location,
[stmt, ..] => stmt.location,
}] => test.start(),
[stmt, ..] => stmt.start(),
_ => return None,
};
let contents = locator.slice(Range::new(start, end));
let range = lexer::lex_located(contents, Mode::Module, start)
let contents = &locator.contents()[TextRange::new(start, end)];
lexer::lex_located(contents, Mode::Module, start)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Elif | Tok::Else))
.map(|(location, _, end_location)| Range {
location,
end_location,
});
range
.find(|(kind, _)| matches!(kind, Tok::Elif | Tok::Else))
.map(|(_, range)| range)
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements preceding it.
pub fn preceded_by_continuation(stmt: &Stmt, indexer: &Indexer) -> bool {
stmt.location.row() > 1
&& indexer
.continuation_lines()
.contains(&(stmt.location.row() - 1))
pub fn preceded_by_continuation(stmt: &Stmt, indexer: &Indexer, locator: &Locator) -> bool {
let previous_line_end = locator.line_start(stmt.start());
let newline_pos = usize::from(previous_line_end).saturating_sub(1);
// Compute start of preceding line
let newline_len = match locator.contents().as_bytes()[newline_pos] {
b'\n' => {
if locator
.contents()
.as_bytes()
.get(newline_pos.saturating_sub(1))
== Some(&b'\r')
{
2
} else {
1
}
}
b'\r' => 1,
// No preceding line
_ => return false,
};
// See if the position is in the continuation line starts
indexer.is_continuation(previous_line_end - TextSize::from(newline_len), locator)
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements preceding it.
pub fn preceded_by_multi_statement_line(stmt: &Stmt, locator: &Locator, indexer: &Indexer) -> bool {
match_leading_content(stmt, locator) || preceded_by_continuation(stmt, indexer)
has_leading_content(stmt, locator) || preceded_by_continuation(stmt, indexer, locator)
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements following it.
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
match_trailing_content(stmt, locator)
has_trailing_content(stmt, locator)
}
/// Return `true` if a `Stmt` is a docstring.
@ -1370,7 +1352,7 @@ pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
let mut ops: Vec<LocatedCmpop> = vec![];
let mut count: usize = 0;
loop {
let Some((start, tok, end)) = tok_iter.next() else {
let Some((tok, range)) = tok_iter.next() else {
break;
};
if matches!(tok, Tok::Lpar) {
@ -1383,42 +1365,46 @@ pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
if count == 0 {
match tok {
Tok::Not => {
if let Some((_, _, end)) =
tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::In))
if let Some((_, next_range)) =
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::In))
{
ops.push(LocatedCmpop::new(start, end, Cmpop::NotIn));
ops.push(LocatedCmpop::new(
range.start(),
next_range.end(),
Cmpop::NotIn,
));
}
}
Tok::In => {
ops.push(LocatedCmpop::new(start, end, Cmpop::In));
ops.push(LocatedCmpop::with_range(Cmpop::In, range));
}
Tok::Is => {
let op = if let Some((_, _, end)) =
tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::Not))
let op = if let Some((_, next_range)) =
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::Not))
{
LocatedCmpop::new(start, end, Cmpop::IsNot)
LocatedCmpop::new(range.start(), next_range.end(), Cmpop::IsNot)
} else {
LocatedCmpop::new(start, end, Cmpop::Is)
LocatedCmpop::with_range(Cmpop::Is, range)
};
ops.push(op);
}
Tok::NotEqual => {
ops.push(LocatedCmpop::new(start, end, Cmpop::NotEq));
ops.push(LocatedCmpop::with_range(Cmpop::NotEq, range));
}
Tok::EqEqual => {
ops.push(LocatedCmpop::new(start, end, Cmpop::Eq));
ops.push(LocatedCmpop::with_range(Cmpop::Eq, range));
}
Tok::GreaterEqual => {
ops.push(LocatedCmpop::new(start, end, Cmpop::GtE));
ops.push(LocatedCmpop::with_range(Cmpop::GtE, range));
}
Tok::Greater => {
ops.push(LocatedCmpop::new(start, end, Cmpop::Gt));
ops.push(LocatedCmpop::with_range(Cmpop::Gt, range));
}
Tok::LessEqual => {
ops.push(LocatedCmpop::new(start, end, Cmpop::LtE));
ops.push(LocatedCmpop::with_range(Cmpop::LtE, range));
}
Tok::Less => {
ops.push(LocatedCmpop::new(start, end, Cmpop::Lt));
ops.push(LocatedCmpop::with_range(Cmpop::Lt, range));
}
_ => {}
}
@ -1524,15 +1510,15 @@ mod tests {
use std::borrow::Cow;
use anyhow::Result;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser as parser;
use rustpython_parser::ast::{Cmpop, Location};
use rustpython_parser::ast::Cmpop;
use crate::helpers::{
elif_else_range, else_range, first_colon_range, identifier_range, locate_cmpops,
match_trailing_content, resolve_imported_module_path, LocatedCmpop,
elif_else_range, else_range, first_colon_range, has_trailing_content, identifier_range,
locate_cmpops, resolve_imported_module_path, LocatedCmpop,
};
use crate::source_code::Locator;
use crate::types::Range;
#[test]
fn trailing_content() -> Result<()> {
@ -1540,25 +1526,25 @@ mod tests {
let program = parser::parse_program(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!match_trailing_content(stmt, &locator));
assert!(!has_trailing_content(stmt, &locator));
let contents = "x = 1; y = 2";
let program = parser::parse_program(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(match_trailing_content(stmt, &locator));
assert!(has_trailing_content(stmt, &locator));
let contents = "x = 1 ";
let program = parser::parse_program(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!match_trailing_content(stmt, &locator));
assert!(!has_trailing_content(stmt, &locator));
let contents = "x = 1 # Comment";
let program = parser::parse_program(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!match_trailing_content(stmt, &locator));
assert!(!has_trailing_content(stmt, &locator));
let contents = r#"
x = 1
@ -1568,7 +1554,7 @@ y = 2
let program = parser::parse_program(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!match_trailing_content(stmt, &locator));
assert!(!has_trailing_content(stmt, &locator));
Ok(())
}
@ -1581,7 +1567,7 @@ y = 2
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(1, 4), Location::new(1, 5),)
TextRange::new(TextSize::from(4), TextSize::from(5))
);
let contents = r#"
@ -1595,7 +1581,7 @@ def \
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(2, 2), Location::new(2, 3),)
TextRange::new(TextSize::from(8), TextSize::from(9))
);
let contents = "class Class(): pass".trim();
@ -1604,7 +1590,7 @@ def \
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(1, 6), Location::new(1, 11),)
TextRange::new(TextSize::from(6), TextSize::from(11))
);
let contents = "class Class: pass".trim();
@ -1613,7 +1599,7 @@ def \
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(1, 6), Location::new(1, 11),)
TextRange::new(TextSize::from(6), TextSize::from(11))
);
let contents = r#"
@ -1627,7 +1613,7 @@ class Class():
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(2, 6), Location::new(2, 11),)
TextRange::new(TextSize::from(19), TextSize::from(24))
);
let contents = r#"x = y + 1"#.trim();
@ -1636,7 +1622,7 @@ class Class():
let locator = Locator::new(contents);
assert_eq!(
identifier_range(stmt, &locator),
Range::new(Location::new(1, 0), Location::new(1, 9),)
TextRange::new(TextSize::from(0), TextSize::from(9))
);
Ok(())
@ -1692,10 +1678,11 @@ else:
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
let range = else_range(stmt, &locator).unwrap();
assert_eq!(range.location.row(), 3);
assert_eq!(range.location.column(), 0);
assert_eq!(range.end_location.row(), 3);
assert_eq!(range.end_location.column(), 4);
assert_eq!(&contents[range], "else");
assert_eq!(
range,
TextRange::new(TextSize::from(21), TextSize::from(25))
);
Ok(())
}
@ -1704,14 +1691,12 @@ else:
let contents = "with a: pass";
let locator = Locator::new(contents);
let range = first_colon_range(
Range::new(Location::new(1, 0), Location::new(1, contents.len())),
TextRange::new(TextSize::from(0), contents.text_len()),
&locator,
)
.unwrap();
assert_eq!(range.location.row(), 1);
assert_eq!(range.location.column(), 6);
assert_eq!(range.end_location.row(), 1);
assert_eq!(range.end_location.column(), 7);
assert_eq!(&contents[range], ":");
assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
}
#[test]
@ -1727,10 +1712,9 @@ elif b:
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(stmt, &locator).unwrap();
assert_eq!(range.location.row(), 3);
assert_eq!(range.location.column(), 0);
assert_eq!(range.end_location.row(), 3);
assert_eq!(range.end_location.column(), 4);
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));
let contents = "
if a:
...
@ -1742,10 +1726,9 @@ else:
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(stmt, &locator).unwrap();
assert_eq!(range.location.row(), 3);
assert_eq!(range.location.column(), 0);
assert_eq!(range.end_location.row(), 3);
assert_eq!(range.end_location.column(), 4);
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));
Ok(())
}
@ -1754,8 +1737,8 @@ else:
assert_eq!(
locate_cmpops("x == 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 4),
TextSize::from(2),
TextSize::from(4),
Cmpop::Eq
)]
);
@ -1763,8 +1746,8 @@ else:
assert_eq!(
locate_cmpops("x != 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 4),
TextSize::from(2),
TextSize::from(4),
Cmpop::NotEq
)]
);
@ -1772,8 +1755,8 @@ else:
assert_eq!(
locate_cmpops("x is 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 4),
TextSize::from(2),
TextSize::from(4),
Cmpop::Is
)]
);
@ -1781,8 +1764,8 @@ else:
assert_eq!(
locate_cmpops("x is not 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 8),
TextSize::from(2),
TextSize::from(8),
Cmpop::IsNot
)]
);
@ -1790,8 +1773,8 @@ else:
assert_eq!(
locate_cmpops("x in 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 4),
TextSize::from(2),
TextSize::from(4),
Cmpop::In
)]
);
@ -1799,8 +1782,8 @@ else:
assert_eq!(
locate_cmpops("x not in 1"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 8),
TextSize::from(2),
TextSize::from(8),
Cmpop::NotIn
)]
);
@ -1808,8 +1791,8 @@ else:
assert_eq!(
locate_cmpops("x != (1 is not 2)"),
vec![LocatedCmpop::new(
Location::new(1, 2),
Location::new(1, 4),
TextSize::from(2),
TextSize::from(4),
Cmpop::NotEq
)]
);

View file

@ -1,8 +1,8 @@
use ruff_text_size::TextRange;
use rustc_hash::FxHashMap;
use rustpython_parser::ast::Location;
use serde::{Deserialize, Serialize};
use crate::types::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// A representation of an individual name imported via any import statement.
#[derive(Debug, Clone, PartialEq, Eq)]
@ -102,31 +102,28 @@ impl FutureImport for AnyImport<'_> {
}
/// A representation of a module reference in an import statement.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ModuleImport {
module: String,
location: Location,
end_location: Location,
range: TextRange,
}
impl ModuleImport {
pub fn new(module: String, location: Location, end_location: Location) -> Self {
Self {
module,
location,
end_location,
}
pub fn new(module: String, range: TextRange) -> Self {
Self { module, range }
}
}
impl From<&ModuleImport> for Range {
fn from(import: &ModuleImport) -> Range {
Range::new(import.location, import.end_location)
impl From<&ModuleImport> for TextRange {
fn from(import: &ModuleImport) -> TextRange {
import.range
}
}
/// A representation of the import dependencies between modules.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ImportMap {
/// A map from dot-delimited module name to the list of imports in that module.
module_to_imports: FxHashMap<String, Vec<ModuleImport>>,

View file

@ -1,4 +1,6 @@
use ruff_text_size::{TextLen, TextRange, TextSize};
use std::iter::FusedIterator;
use std::ops::Deref;
/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`].
pub trait StrExt {
@ -17,32 +19,42 @@ impl StrExt for str {
/// ## Examples
///
/// ```rust
/// use ruff_python_ast::newlines::UniversalNewlineIterator;
///
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::newlines::{Line, UniversalNewlineIterator};
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
///
/// assert_eq!(lines.next_back(), Some("bop"));
/// assert_eq!(lines.next(), Some("foo"));
/// assert_eq!(lines.next_back(), Some("baz"));
/// assert_eq!(lines.next(), Some("bar"));
/// assert_eq!(lines.next_back(), Some(""));
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10))));
/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8))));
/// assert_eq!(lines.next(), None);
/// ```
pub struct UniversalNewlineIterator<'a> {
text: &'a str,
offset: TextSize,
offset_back: TextSize,
}
impl<'a> UniversalNewlineIterator<'a> {
pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
UniversalNewlineIterator {
text,
offset,
offset_back: offset + text.text_len(),
}
}
pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
UniversalNewlineIterator { text }
Self::with_offset(text, TextSize::default())
}
}
impl<'a> Iterator for UniversalNewlineIterator<'a> {
type Item = &'a str;
type Item = Line<'a>;
#[inline]
fn next(&mut self) -> Option<&'a str> {
fn next(&mut self) -> Option<Line<'a>> {
if self.text.is_empty() {
return None;
}
@ -50,21 +62,32 @@ impl<'a> Iterator for UniversalNewlineIterator<'a> {
let line = match self.text.find(['\n', '\r']) {
// Non-last line
Some(line_end) => {
let (line, remainder) = self.text.split_at(line_end);
self.text = match remainder.as_bytes()[0] {
let offset: usize = match self.text.as_bytes()[line_end] {
// Explicit branch for `\n` as this is the most likely path
b'\n' => &remainder[1..],
b'\n' => 1,
// '\r\n'
b'\r' if remainder.as_bytes().get(1) == Some(&b'\n') => &remainder[2..],
b'\r' if self.text.as_bytes().get(line_end + 1) == Some(&b'\n') => 2,
// '\r'
_ => &remainder[1..],
_ => 1,
};
let (text, remainder) = self.text.split_at(line_end + offset);
let line = Line {
offset: self.offset,
text,
};
self.text = remainder;
self.offset += text.text_len();
line
}
// Last line
None => std::mem::take(&mut self.text),
None => Line {
offset: self.offset,
text: std::mem::take(&mut self.text),
},
};
Some(line)
@ -85,7 +108,7 @@ impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
let len = self.text.len();
// Trim any trailing newlines.
self.text = match self.text.as_bytes()[len - 1] {
let haystack = match self.text.as_bytes()[len - 1] {
b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
b'\n' | b'\r' => &self.text[..len - 1],
_ => self.text,
@ -93,16 +116,23 @@ impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
// Find the end of the previous line. The previous line is the text up to, but not including
// the newline character.
let line = match self.text.rfind(['\n', '\r']) {
let line = if let Some(line_end) = haystack.rfind(['\n', '\r']) {
// '\n' or '\r' or '\r\n'
Some(line_end) => {
let (remainder, line) = self.text.split_at(line_end + 1);
self.text = remainder;
let (remainder, line) = self.text.split_at(line_end + 1);
self.text = remainder;
self.offset_back -= line.text_len();
line
Line {
text: line,
offset: self.offset_back,
}
} else {
// Last line
None => std::mem::take(&mut self.text),
let offset = self.offset_back - self.text.text_len();
Line {
text: std::mem::take(&mut self.text),
offset,
}
};
Some(line)
@ -113,16 +143,23 @@ impl FusedIterator for UniversalNewlineIterator<'_> {}
/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line.
pub struct NewlineWithTrailingNewline<'a> {
trailing: Option<&'a str>,
trailing: Option<Line<'a>>,
underlying: UniversalNewlineIterator<'a>,
}
impl<'a> NewlineWithTrailingNewline<'a> {
pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
Self::with_offset(input, TextSize::default())
}
pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
NewlineWithTrailingNewline {
underlying: UniversalNewlineIterator::from(input),
underlying: UniversalNewlineIterator::with_offset(input, offset),
trailing: if input.ends_with(['\r', '\n']) {
Some("")
Some(Line {
text: "",
offset: offset + input.text_len(),
})
} else {
None
},
@ -131,37 +168,159 @@ impl<'a> NewlineWithTrailingNewline<'a> {
}
impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
type Item = &'a str;
type Item = Line<'a>;
#[inline]
fn next(&mut self) -> Option<&'a str> {
fn next(&mut self) -> Option<Line<'a>> {
self.underlying.next().or_else(|| self.trailing.take())
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Line<'a> {
text: &'a str,
offset: TextSize,
}
impl<'a> Line<'a> {
pub fn new(text: &'a str, offset: TextSize) -> Self {
Self { text, offset }
}
#[inline]
pub const fn start(&self) -> TextSize {
self.offset
}
/// Returns the byte offset where the line ends, including its terminating new line character.
#[inline]
pub fn full_end(&self) -> TextSize {
self.offset + self.full_text_len()
}
/// Returns the byte offset where the line ends, excluding its new line character
#[inline]
pub fn end(&self) -> TextSize {
self.offset + self.as_str().text_len()
}
/// Returns the range of the line, including its terminating new line character.
#[inline]
pub fn full_range(&self) -> TextRange {
TextRange::at(self.offset, self.text.text_len())
}
/// Returns the range of the line, excluding its terminating new line character
#[inline]
pub fn range(&self) -> TextRange {
TextRange::new(self.start(), self.end())
}
/// Returns the text of the line, excluding the terminating new line character.
#[inline]
pub fn as_str(&self) -> &'a str {
let mut bytes = self.text.bytes().rev();
let newline_len = match bytes.next() {
Some(b'\n') => {
if bytes.next() == Some(b'\r') {
2
} else {
1
}
}
Some(b'\r') => 1,
_ => 0,
};
&self.text[..self.text.len() - newline_len]
}
/// Returns the line's text, including the terminating new line character.
#[inline]
pub fn as_full_str(&self) -> &'a str {
self.text
}
#[inline]
pub fn full_text_len(&self) -> TextSize {
self.text.text_len()
}
}
impl Deref for Line<'_> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<&str> for Line<'_> {
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl PartialEq<Line<'_>> for &str {
fn eq(&self, other: &Line<'_>) -> bool {
*self == other.as_str()
}
}
#[cfg(test)]
mod tests {
use super::UniversalNewlineIterator;
use crate::newlines::Line;
use ruff_text_size::TextSize;
#[test]
fn universal_newlines_empty_str() {
let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
assert_eq!(lines, Vec::<&str>::default());
assert_eq!(lines, Vec::<Line>::new());
let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
assert_eq!(lines, Vec::<&str>::default());
assert_eq!(lines, Vec::<Line>::new());
}
#[test]
fn universal_newlines_forward() {
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]);
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop", TextSize::from(14)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]);
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop\n", TextSize::from(14)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop", ""]);
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop\n", TextSize::from(14)),
Line::new("\n", TextSize::from(18)),
]
);
}
#[test]
@ -169,24 +328,52 @@ mod tests {
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
.rev()
.collect();
assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]);
assert_eq!(
lines,
vec![
Line::new("bop", TextSize::from(14)),
Line::new("baz\r", TextSize::from(10)),
Line::new("\r\n", TextSize::from(8)),
Line::new("bar\n", TextSize::from(4)),
Line::new("foo\n", TextSize::from(0)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
.rev()
.map(|line| line.as_str())
.collect();
assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]);
assert_eq!(
lines,
vec![
Line::new("bop\n", TextSize::from(13)),
Line::new("baz\r", TextSize::from(9)),
Line::new("\n", TextSize::from(8)),
Line::new("bar\n", TextSize::from(4)),
Line::new("foo\n", TextSize::from(0)),
]
);
}
#[test]
fn universal_newlines_mixed() {
let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
assert_eq!(lines.next_back(), Some("bop"));
assert_eq!(lines.next(), Some("foo"));
assert_eq!(lines.next_back(), Some("baz"));
assert_eq!(lines.next(), Some("bar"));
assert_eq!(lines.next_back(), Some(""));
assert_eq!(
lines.next_back(),
Some(Line::new("bop", TextSize::from(14)))
);
assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
assert_eq!(
lines.next_back(),
Some(Line::new("baz\r", TextSize::from(10)))
);
assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
assert_eq!(
lines.next_back(),
Some(Line::new("\r\n", TextSize::from(8)))
);
assert_eq!(lines.next(), None);
}
}

View file

@ -1,18 +1,15 @@
use ruff_text_size::TextRange;
use rustpython_parser::ast::{Expr, ExprKind, Keyword};
use crate::types::Range;
fn relocate_keyword(keyword: &mut Keyword, location: Range) {
keyword.location = location.location;
keyword.end_location = Some(location.end_location);
fn relocate_keyword(keyword: &mut Keyword, location: TextRange) {
keyword.range = location;
relocate_expr(&mut keyword.node.value, location);
}
/// Change an expression's location (recursively) to match a desired, fixed
/// location.
pub fn relocate_expr(expr: &mut Expr, location: Range) {
expr.location = location.location;
expr.end_location = Some(location.end_location);
pub fn relocate_expr(expr: &mut Expr, location: TextRange) {
expr.range = location;
match &mut expr.node {
ExprKind::BoolOp { values, .. } => {
for expr in values {

View file

@ -3,10 +3,11 @@
use std::ops::Deref;
use rustpython_parser::ast::{
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Excepthandler,
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Excepthandler,
ExcepthandlerKind, Expr, ExprKind, MatchCase, Operator, Pattern, PatternKind, Stmt, StmtKind,
Suite, Withitem,
};
use rustpython_parser::ConversionFlag;
use ruff_rustpython::vendor::{bytes, str};

View file

@ -1,98 +1,135 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use rustpython_parser::ast::Location;
use crate::source_code::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::types::Range;
pub struct Indexer {
commented_lines: Vec<usize>,
continuation_lines: Vec<usize>,
string_ranges: Vec<Range>,
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
comment_ranges: Vec<TextRange>,
/// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>,
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
triple_quoted_string_ranges: Vec<TextRange>,
}
impl Indexer {
/// Return a slice of all lines that include a comment.
pub fn commented_lines(&self) -> &[usize] {
&self.commented_lines
}
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
assert!(TextSize::try_from(locator.contents().len()).is_ok());
/// Return a slice of all lines that end with a continuation (backslash).
pub fn continuation_lines(&self) -> &[usize] {
&self.continuation_lines
}
/// Return a slice of all ranges that include a triple-quoted string.
pub fn string_ranges(&self) -> &[Range] {
&self.string_ranges
}
}
impl From<&[LexResult]> for Indexer {
fn from(lxr: &[LexResult]) -> Self {
let mut commented_lines = Vec::new();
let mut continuation_lines = Vec::new();
let mut string_ranges = Vec::new();
let mut prev: Option<(&Location, &Tok, &Location)> = None;
for (start, tok, end) in lxr.iter().flatten() {
// Token, end
let mut prev_end = TextSize::default();
let mut prev_token: Option<&Tok> = None;
let mut line_start = TextSize::default();
for (tok, range) in tokens.iter().flatten() {
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
// Get the trivia between the previous and the current token and detect any newlines.
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
// between any two tokens that form a continuation nor multiple newlines in a row.
// That's why we have to extract the newlines "manually".
for (index, text) in trivia.match_indices(['\n', '\r']) {
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
continue;
}
// Newlines after a comment or new-line never form a continuation.
if !matches!(
prev_token,
Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None
) {
continuation_lines.push(line_start);
}
// SAFETY: Safe because of the len assertion at the top of the function.
#[allow(clippy::cast_possible_truncation)]
{
line_start = prev_end + TextSize::new((index + 1) as u32);
}
}
match tok {
Tok::Comment(..) => commented_lines.push(start.row()),
Tok::Comment(..) => {
commented_lines.push(*range);
}
Tok::Newline | Tok::NonLogicalNewline => {
line_start = range.end();
}
Tok::String {
triple_quoted: true,
..
} => string_ranges.push(Range::new(*start, *end)),
_ => (),
} => string_ranges.push(*range),
_ => {}
}
if let Some((.., prev_tok, prev_end)) = prev {
if !matches!(
prev_tok,
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
) {
for line in prev_end.row()..start.row() {
continuation_lines.push(line);
}
}
}
prev = Some((start, tok, end));
prev_token = Some(tok);
prev_end = range.end();
}
Self {
commented_lines,
comment_ranges: commented_lines,
continuation_lines,
string_ranges,
triple_quoted_string_ranges: string_ranges,
}
}
/// Returns the byte offset ranges of comments
pub fn comment_ranges(&self) -> &[TextRange] {
&self.comment_ranges
}
/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines
}
/// Return a slice of all ranges that include a triple-quoted string. The ranges are sorted by
/// [`TextRange::start`] in increasing order. No two ranges are overlapping.
pub fn triple_quoted_string_ranges(&self) -> &[TextRange] {
&self.triple_quoted_string_ranges
}
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
let line_start = locator.line_start(offset);
self.continuation_lines.binary_search(&line_start).is_ok()
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::ast::Location;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::source_code::Indexer;
use crate::types::Range;
use crate::source_code::{Indexer, Locator};
#[test]
fn continuation() {
let contents = r#"x = 1"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
# Hello, world!
# Hello, world!
x = 1
y = 2
"#
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
x = \
@ -111,8 +148,20 @@ if True:
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [1, 5, 6, 11]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 1
TextSize::from(0),
// row 5
TextSize::from(22),
// row 6
TextSize::from(32),
// row 11
TextSize::from(71),
]
);
let contents = r#"
x = 1; import sys
@ -131,16 +180,24 @@ import os
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [9, 12]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 9
TextSize::from(84),
// row 12
TextSize::from(116)
]
);
}
#[test]
fn string_ranges() {
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.string_ranges(), &vec![]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(indexer.triple_quoted_string_ranges(), []);
let contents = r#"
"""
@ -148,10 +205,10 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
indexer.triple_quoted_string_ranges(),
[TextRange::new(TextSize::from(13), TextSize::from(71))]
);
let contents = r#"
@ -160,10 +217,10 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
indexer.triple_quoted_string_ranges(),
[TextRange::new(TextSize::from(13), TextSize::from(107))]
);
let contents = r#"
@ -177,12 +234,12 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![
Range::new(Location::new(2, 12), Location::new(5, 15)),
Range::new(Location::new(6, 12), Location::new(9, 15))
indexer.triple_quoted_string_ranges(),
&[
TextRange::new(TextSize::from(13), TextSize::from(85)),
TextRange::new(TextSize::from(98), TextSize::from(161))
]
);
}

View file

@ -1,12 +1,14 @@
use crate::source_code::SourceLocation;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser::ast::Location;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroUsize;
use std::ops::Deref;
use std::sync::Arc;
/// Index for fast [`Location`] to [byte offset](TextSize) conversions.
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
@ -58,28 +60,63 @@ impl LineIndex {
self.inner.kind
}
/// Converts a [`Location`] to it's [byte offset](TextSize) in the source code.
pub fn location_offset(&self, location: Location, contents: &str) -> TextSize {
let line_index = OneIndexed::new(location.row()).unwrap();
let line_range = self.line_range(line_index, contents);
/// Returns the row and column index for an offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(
/// index.source_location(TextSize::from(0), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(4), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(13), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row];
let column_offset = match self.kind() {
IndexKind::Ascii => TextSize::try_from(location.column()).unwrap(),
IndexKind::Utf8 => {
let line = &contents[line_range];
let column = if self.kind().is_ascii() {
usize::from(offset) - usize::from(line_start)
} else {
// Don't count the BOM character as a column.
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
// Skip the bom character
let bom_len =
usize::from(line_index.to_zero_indexed() == 0 && line.starts_with('\u{feff}'));
content[TextRange::new(line_start, offset)].chars().count()
};
match line.char_indices().nth(location.column() + bom_len) {
Some((offset, _)) => TextSize::try_from(offset).unwrap(),
None => line_range.len(),
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
}
}
};
line_range.start() + column_offset
}
}
/// Return the number of lines in the source code.
@ -87,6 +124,35 @@ impl LineIndex {
self.line_starts().len()
}
/// Returns the row number for a given offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => OneIndexed::from_zero_indexed(row),
Err(row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
OneIndexed::from_zero_indexed(row - 1)
}
}
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
@ -159,12 +225,19 @@ enum IndexKind {
Utf8,
}
impl IndexKind {
const fn is_ascii(self) -> bool {
matches!(self, IndexKind::Ascii)
}
}
/// Type-safe wrapper for a value whose logical range starts at `1`, for
/// instance the line or column numbers in a file
///
/// Internally this is represented as a [`NonZeroUsize`], this enables some
/// memory optimizations
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OneIndexed(NonZeroUsize);
impl OneIndexed {
@ -238,8 +311,8 @@ const fn unwrap<T: Copy>(option: Option<T>) -> T {
#[cfg(test)]
mod tests {
use crate::source_code::line_index::LineIndex;
use crate::source_code::{OneIndexed, SourceLocation};
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
#[test]
fn ascii_index() {
@ -265,21 +338,38 @@ mod tests {
}
#[test]
fn ascii_byte_offset() {
fn ascii_source_location() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
let loc = index.source_location(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(6));
let loc = index.source_location(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(11));
let loc = index.source_location(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
#[test]
@ -289,16 +379,25 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(6)
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(7)
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -309,16 +408,25 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(7)
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(8)
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -367,16 +475,25 @@ mod tests {
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(11)
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(12)
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -392,16 +509,25 @@ mod tests {
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(12)
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(13)
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -415,23 +541,51 @@ mod tests {
);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
let loc = index.source_location(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.location_offset(Location::new(1, 5), contents);
assert_eq!(loc, TextSize::from(5));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "☃'\ny = 2");
let loc = index.source_location(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.location_offset(Location::new(1, 6), contents);
assert_eq!(loc, TextSize::from(8));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "'\ny = 2");
let loc = index.source_location(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(10));
let loc = index.source_location(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(15));
let loc = index.source_location(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
}

View file

@ -1,61 +1,399 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use crate::source_code::line_index::LineIndex;
use crate::source_code::SourceCode;
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
use once_cell::unsync::OnceCell;
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
use crate::types::Range;
use ruff_text_size::{TextLen, TextRange, TextSize};
use std::ops::Add;
pub struct Locator<'a> {
contents: &'a str,
line_index: OnceCell<LineIndex>,
index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
line_index: OnceCell::new(),
index: OnceCell::new(),
}
}
fn get_or_init_index(&self) -> &LineIndex {
self.line_index
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
self.to_index().line_index(offset)
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
self.to_source_code().source_location(offset)
}
fn to_index(&self) -> &LineIndex {
self.index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
#[inline]
pub fn to_source_code(&self) -> SourceCode<'a, '_> {
pub fn line_index(&self) -> Option<&LineIndex> {
self.index.get()
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
index: self.get_or_init_index(),
index: self.to_index(),
text: self.contents,
}
}
/// Take the source code up to the given [`Location`].
#[inline]
pub fn up_to(&self, location: Location) -> &'a str {
self.to_source_code().up_to(location)
/// Computes the start position of the line of `offset`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\rthird line");
///
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
///
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_start(&self, offset: TextSize) -> TextSize {
if let Some(index) = self.contents[TextRange::up_to(offset)].rfind(['\n', '\r']) {
// SAFETY: Safe because `index < offset`
TextSize::try_from(index).unwrap().add(TextSize::from(1))
} else {
TextSize::default()
}
}
/// Take the source code after the given [`Location`].
#[inline]
pub fn after(&self, location: Location) -> &'a str {
self.to_source_code().after(location)
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
offset == TextSize::from(0)
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
self.to_source_code().slice(range)
/// Computes the offset that is right after the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = slice.find(['\n', '\r']) {
let bytes = slice.as_bytes();
// `\r\n`
let relative_offset = if bytes[index] == b'\r' && bytes.get(index + 1) == Some(&b'\n') {
TextSize::try_from(index + 2).unwrap()
}
// `\r` or `\n`
else {
TextSize::try_from(index + 1).unwrap()
};
offset.add(relative_offset)
} else {
self.contents.text_len()
}
}
/// Return the byte offset of the given [`Location`].
/// Computes the offset that is right before the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = slice.find(['\n', '\r']) {
offset + TextSize::try_from(index).unwrap()
} else {
self.contents.text_len()
}
}
/// Computes the range of this `offset`s line.
///
/// The range starts at the beginning of the line and goes up to, and including, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.full_line_end(offset))
}
/// Computes the range of this `offset`s line ending before the newline character.
///
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.line_end(offset))
}
/// Returns the text of the `offset`'s line.
///
/// The line includes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line(&self, offset: TextSize) -> &'a str {
&self.contents[self.full_line_range(offset)]
}
/// Returns the text of the `offset`'s line.
///
/// Excludes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line(&self, offset: TextSize) -> &'a str {
&self.contents[self.line_range(offset)]
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
/// at the end of `range.ends()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(11))
/// );
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(24))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(
self.line_start(range.start()),
self.full_line_end(range.end()),
)
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
/// at the end of `range.end()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(10))
/// );
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(22))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
}
/// Returns true if the text of `range` contains any line break.
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert!(
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// );
/// assert!(
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// );
/// ```
///
/// ## Panics
/// If the `range` is out of bounds.
pub fn contains_line_break(&self, range: TextRange) -> bool {
let text = &self.contents[range];
text.contains(['\n', '\r'])
}
/// Returns the text of all lines that include `range`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line"
/// );
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines(&self, range: TextRange) -> &'a str {
&self.contents[self.lines_range(range)]
}
/// Returns the text of all lines that include `range`.
///
/// Includes the newline characters of the last line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line\n"
/// );
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line\r\n"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines(&self, range: TextRange) -> &'a str {
&self.contents[self.full_lines_range(range)]
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
self.to_source_code().offset(location)
pub fn up_to(&self, offset: TextSize) -> &'a str {
&self.contents[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'a str {
&self.contents[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
#[inline]
pub fn slice(&self, range: TextRange) -> &'a str {
&self.contents[range]
}
/// Return the underlying source code.
@ -63,17 +401,15 @@ impl<'a> Locator<'a> {
self.contents
}
/// Return the number of lines in the source code.
pub fn count_lines(&self) -> usize {
let index = self.get_or_init_index();
index.line_count()
}
/// Return the number of bytes in the source code.
pub const fn len(&self) -> usize {
self.contents.len()
}
pub fn text_len(&self) -> TextSize {
self.contents.text_len()
}
/// Return `true` if the source code is empty.
pub const fn is_empty(&self) -> bool {
self.contents.is_empty()

View file

@ -5,17 +5,17 @@ mod locator;
mod stylist;
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
use crate::types::Range;
pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser as parser;
use rustpython_parser::ast::Location;
use rustpython_parser::{lexer, Mode, ParseError};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code.
@ -29,7 +29,7 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
Ok(generator.generate())
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
@ -44,37 +44,34 @@ impl<'src, 'index> SourceCode<'src, 'index> {
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
/// Computes the one indexed row and column numbers for `offset`.
#[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
self.index.source_location(offset, self.text)
}
#[inline]
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
self.index.line_index(offset)
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'src str {
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'src str {
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`Range`].
pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
let range = self.text_range(range);
/// Take the source code between the given [`TextRange`].
pub fn slice(&self, range: TextRange) -> &'src str {
&self.text[range]
}
/// Converts a [`Location`] range to a byte offset range
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
let range = range.into();
let start = self.index.location_offset(range.location, self.text);
let end = self.index.location_offset(range.end_location, self.text);
TextRange::new(start, end)
}
/// Return the byte offset of the given [`Location`].
pub fn offset(&self, location: Location) -> TextSize {
self.index.location_offset(location, self.text)
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
@ -87,20 +84,6 @@ impl<'src, 'index> SourceCode<'src, 'index> {
self.index.line_range(line, self.text)
}
/// Returns a string with the lines spawning between location and end location.
pub fn lines(&self, range: Range) -> &'src str {
let start_line = self
.index
.line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
let end_line = self.index.line_range(
OneIndexed::new(range.end_location.row()).unwrap(),
self.text,
);
&self.text[TextRange::new(start_line.start(), end_line.end())]
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
@ -131,69 +114,43 @@ impl Eq for SourceCode<'_, '_> {}
/// A Builder for constructing a [`SourceFile`]
pub struct SourceFileBuilder {
name: Box<str>,
code: Option<FileSourceCode>,
code: Box<str>,
index: Option<LineIndex>,
}
impl SourceFileBuilder {
/// Creates a new builder for a file named `name`.
pub fn new(name: &str) -> Self {
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
Self {
name: Box::from(name),
code: None,
name: name.into(),
code: code.into(),
index: None,
}
}
/// Creates a enw builder for a file named `name`
pub fn from_string(name: String) -> Self {
Self {
name: Box::from(name),
code: None,
}
}
/// Consumes `self` and returns a builder for a file with the source text and the [`LineIndex`] copied
/// from `source`.
#[must_use]
pub fn source_code(mut self, source: &SourceCode) -> Self {
self.set_source_code(source);
pub fn line_index(mut self, index: LineIndex) -> Self {
self.index = Some(index);
self
}
/// Copies the source text and [`LineIndex`] from `source`.
pub fn set_source_code(&mut self, source: &SourceCode) {
self.code = Some(FileSourceCode {
text: Box::from(source.text()),
index: source.index.clone(),
});
}
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
#[must_use]
pub fn source_text(self, text: &str) -> Self {
self.source_code(&SourceCode::new(text, &LineIndex::from_source_text(text)))
}
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
#[must_use]
pub fn source_text_string(mut self, text: String) -> Self {
self.set_source_text_string(text);
self
}
/// Copies the source text `text` and builds the [`LineIndex`] from `text`.
pub fn set_source_text_string(&mut self, text: String) {
self.code = Some(FileSourceCode {
index: LineIndex::from_source_text(&text),
text: Box::from(text),
});
pub fn set_line_index(&mut self, index: LineIndex) {
self.index = Some(index);
}
/// Consumes `self` and returns the [`SourceFile`].
pub fn finish(self) -> SourceFile {
let index = if let Some(index) = self.index {
once_cell::sync::OnceCell::with_value(index)
} else {
once_cell::sync::OnceCell::new()
};
SourceFile {
inner: Arc::new(SourceFileInner {
name: self.name,
code: self.code,
line_index: index,
}),
}
}
@ -211,7 +168,7 @@ impl Debug for SourceFile {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceFile")
.field("name", &self.name())
.field("code", &self.source_code())
.field("code", &self.source_text())
.finish()
}
}
@ -223,38 +180,57 @@ impl SourceFile {
&self.inner.name
}
/// Returns `Some` with the source code if set, or `None`.
#[inline]
pub fn source_code(&self) -> Option<SourceCode> {
self.inner.code.as_ref().map(|code| SourceCode {
text: &code.text,
index: &code.index,
})
pub fn slice(&self, range: TextRange) -> &str {
&self.source_text()[range]
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
text: self.source_text(),
index: self.index(),
}
}
fn index(&self) -> &LineIndex {
self.inner
.line_index
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
}
/// Returns `Some` with the source text if set, or `None`.
#[inline]
pub fn source_text(&self) -> Option<&str> {
self.inner.code.as_ref().map(|code| &*code.text)
pub fn source_text(&self) -> &str {
&self.inner.code
}
}
#[derive(Eq, PartialEq)]
struct SourceFileInner {
name: Box<str>,
code: Option<FileSourceCode>,
code: Box<str>,
line_index: once_cell::sync::OnceCell<LineIndex>,
}
struct FileSourceCode {
text: Box<str>,
index: LineIndex,
}
impl PartialEq for FileSourceCode {
impl PartialEq for SourceFileInner {
fn eq(&self, other: &Self) -> bool {
// It should be safe to assume that the index for two source files are identical
self.text == other.text
self.name == other.name && self.code == other.code
}
}
impl Eq for FileSourceCode {}
impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl Debug for SourceLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation")
.field("row", &self.row.get())
.field("column", &self.column.get())
.finish()
}
}

View file

@ -4,7 +4,6 @@ use std::fmt;
use std::ops::Deref;
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
@ -12,48 +11,21 @@ use ruff_rustpython::vendor;
use crate::source_code::Locator;
use crate::str::leading_quote;
use crate::types::Range;
pub struct Stylist<'a> {
locator: &'a Locator<'a>,
indentation: OnceCell<Indentation>,
indent_end: Option<Location>,
quote: OnceCell<Quote>,
quote_range: Option<Range>,
indentation: Indentation,
quote: Quote,
line_ending: OnceCell<LineEnding>,
}
impl<'a> Stylist<'a> {
pub fn indentation(&'a self) -> &'a Indentation {
self.indentation.get_or_init(|| {
if let Some(indent_end) = self.indent_end {
let start = Location::new(indent_end.row(), 0);
let whitespace = self.locator.slice(Range::new(start, indent_end));
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
})
&self.indentation
}
pub fn quote(&'a self) -> Quote {
*self.quote.get_or_init(|| {
self.quote_range
.and_then(|quote_range| {
let content = self.locator.slice(quote_range);
leading_quote(content)
})
.map(|pattern| {
if pattern.contains('\'') {
Quote::Single
} else if pattern.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
}
})
.unwrap_or_default()
})
self.quote
}
pub fn line_ending(&'a self) -> LineEnding {
@ -63,33 +35,60 @@ impl<'a> Stylist<'a> {
}
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
let indent_end = tokens.iter().flatten().find_map(|(_, t, end)| {
if matches!(t, Tok::Indent) {
Some(*end)
} else {
None
}
});
let quote_range = tokens.iter().flatten().find_map(|(start, t, end)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(Range::new(*start, *end)),
_ => None,
});
let indentation = detect_indention(tokens, locator);
Self {
locator,
indentation: OnceCell::default(),
indent_end,
quote_range,
quote: OnceCell::default(),
indentation,
quote: detect_quote(tokens, locator),
line_ending: OnceCell::default(),
}
}
}
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(*range),
_ => None,
});
if let Some(quote_range) = quote_range {
let content = &locator.slice(quote_range);
if let Some(quotes) = leading_quote(content) {
return if quotes.contains('\'') {
Quote::Single
} else if quotes.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
};
}
}
Quote::default()
}
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
if matches!(t, Tok::Indent) {
Some(range)
} else {
None
}
});
if let Some(indent_range) = indent_range {
let whitespace = locator.slice(*indent_range);
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
}
/// The quotation style used in Python source code.
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
pub enum Quote {
@ -198,17 +197,18 @@ impl Deref for LineEnding {
/// Detect the line ending style of the given contents.
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
if let Some(position) = contents.find('\n') {
let position = position.saturating_sub(1);
return if let Some('\r') = contents.chars().nth(position) {
if let Some(position) = contents.find(['\n', '\r']) {
let bytes = contents.as_bytes();
if bytes[position] == b'\n' {
Some(LineEnding::Lf)
} else if bytes.get(position.saturating_add(1)) == Some(&b'\n') {
Some(LineEnding::CrLf)
} else {
Some(LineEnding::Lf)
};
} else if contents.find('\r').is_some() {
return Some(LineEnding::Cr);
Some(LineEnding::Cr)
}
} else {
None
}
None
}
#[cfg(test)]

View file

@ -1,3 +1,5 @@
use ruff_text_size::{TextLen, TextRange};
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
const TRIPLE_QUOTE_STR_PREFIXES: &[&str] = &[
"u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''",
@ -21,9 +23,19 @@ const SINGLE_QUOTE_SUFFIXES: &[&str] = &["\"", "'"];
/// Assumes that the string is a valid string literal, but does not verify that the string
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
pub fn raw_contents(contents: &str) -> Option<&str> {
let range = raw_contents_range(contents)?;
Some(&contents[range])
}
pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
let leading_quote_str = leading_quote(contents)?;
let trailing_quote_str = trailing_quote(contents)?;
Some(&contents[leading_quote_str.len()..contents.len() - trailing_quote_str.len()])
Some(TextRange::new(
leading_quote_str.text_len(),
contents.text_len() - trailing_quote_str.text_len(),
))
}
/// Return the leading quote for a string or byte literal (e.g., `"""`).

View file

@ -1,6 +1,6 @@
use std::ops::Deref;
use rustpython_parser::ast::{Expr, Located, Location, Stmt};
use rustpython_parser::ast::{Expr, Stmt};
#[derive(Clone)]
pub enum Node<'a> {
@ -8,33 +8,6 @@ pub enum Node<'a> {
Expr(&'a Expr),
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct Range {
pub location: Location,
pub end_location: Location,
}
impl Range {
pub const fn new(location: Location, end_location: Location) -> Self {
Self {
location,
end_location,
}
}
}
impl<T> From<&Located<T>> for Range {
fn from(located: &Located<T>) -> Self {
Range::new(located.location, located.end_location.unwrap())
}
}
impl<T> From<&Box<Located<T>>> for Range {
fn from(located: &Box<Located<T>>) -> Self {
Range::new(located.location, located.end_location.unwrap())
}
}
#[derive(Debug)]
pub struct RefEquality<'a, T>(pub &'a T);

View file

@ -1,11 +1,11 @@
use anyhow::Result;
use ruff_text_size::{TextLen, TextRange};
use rustpython_parser as parser;
use rustpython_parser::ast::{Expr, Location};
use rustpython_parser::ast::Expr;
use crate::relocate::relocate_expr;
use crate::source_code::Locator;
use crate::str;
use crate::types::Range;
#[derive(is_macro::Is, Copy, Clone)]
pub enum AnnotationKind {
@ -24,10 +24,11 @@ pub enum AnnotationKind {
/// Parse a type annotation from a string.
pub fn parse_type_annotation(
value: &str,
range: Range,
range: TextRange,
locator: &Locator,
) -> Result<(Expr, AnnotationKind)> {
let expression = locator.slice(range);
let expression = &locator.contents()[range];
if str::raw_contents(expression).map_or(false, |body| body == value) {
// The annotation is considered "simple" if and only if the raw representation (e.g.,
// `List[int]` within "List[int]") exactly matches the parsed representation. This
@ -37,10 +38,7 @@ pub fn parse_type_annotation(
let expr = parser::parse_expression_located(
value,
"<filename>",
Location::new(
range.location.row(),
range.location.column() + leading_quote.len(),
),
range.start() + leading_quote.text_len(),
)?;
Ok((expr, AnnotationKind::Simple))
} else {

View file

@ -1,15 +1,13 @@
use rustpython_parser::ast::{Located, Location};
use ruff_text_size::TextRange;
use rustpython_parser::ast::Located;
use crate::source_code::Locator;
use crate::types::Range;
/// Extract the leading indentation from a line.
pub fn indentation<'a, T>(locator: &'a Locator, located: &'a Located<T>) -> Option<&'a str> {
let range = Range::from(located);
let indentation = locator.slice(Range::new(
Location::new(range.location.row(), 0),
Location::new(range.location.row(), range.location.column()),
));
pub fn indentation<'a, T>(locator: &'a Locator, located: &Located<T>) -> Option<&'a str> {
let line_start = locator.line_start(located.start());
let indentation = &locator.contents()[TextRange::new(line_start, located.start())];
if indentation.chars().all(char::is_whitespace) {
Some(indentation)
} else {