mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Replace row/column based Location
with byte-offsets. (#3931)
This commit is contained in:
parent
ee91598835
commit
cab65b25da
418 changed files with 6203 additions and 7040 deletions
|
@ -9,7 +9,7 @@ rust-version = { workspace = true }
|
|||
|
||||
[dependencies]
|
||||
ruff_rustpython = { path = "../ruff_rustpython" }
|
||||
ruff_text_size = { path = "../ruff_text_size" }
|
||||
ruff_text_size = { workspace = true, features = ["serde"] }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
|
@ -23,5 +23,5 @@ regex = { workspace = true }
|
|||
rustc-hash = { workspace = true }
|
||||
rustpython-common = { workspace = true }
|
||||
rustpython-parser = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
smallvec = { workspace = true }
|
||||
|
|
|
@ -6,28 +6,29 @@ use log::error;
|
|||
use num_traits::Zero;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use rustpython_parser::ast::{
|
||||
Arguments, Cmpop, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword,
|
||||
KeywordData, Located, Location, MatchCase, Pattern, PatternKind, Stmt, StmtKind,
|
||||
KeywordData, Located, MatchCase, Pattern, PatternKind, Stmt, StmtKind,
|
||||
};
|
||||
use rustpython_parser::{lexer, Mode, Tok};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::call_path::CallPath;
|
||||
use crate::newlines::UniversalNewlineIterator;
|
||||
use crate::source_code::{Generator, Indexer, Locator, Stylist};
|
||||
use crate::types::Range;
|
||||
use crate::visitor;
|
||||
use crate::visitor::Visitor;
|
||||
|
||||
/// Create an `Expr` with default location from an `ExprKind`.
|
||||
pub fn create_expr(node: ExprKind) -> Expr {
|
||||
Expr::new(Location::default(), Location::default(), node)
|
||||
Expr::with_range(node, TextRange::default())
|
||||
}
|
||||
|
||||
/// Create a `Stmt` with a default location from a `StmtKind`.
|
||||
pub fn create_stmt(node: StmtKind) -> Stmt {
|
||||
Stmt::new(Location::default(), Location::default(), node)
|
||||
Stmt::with_range(node, TextRange::default())
|
||||
}
|
||||
|
||||
/// Generate source code from an [`Expr`].
|
||||
|
@ -617,24 +618,27 @@ pub fn map_callable(decorator: &Expr) -> &Expr {
|
|||
|
||||
/// Returns `true` if a statement or expression includes at least one comment.
|
||||
pub fn has_comments<T>(located: &Located<T>, locator: &Locator) -> bool {
|
||||
let start = if match_leading_content(located, locator) {
|
||||
located.location
|
||||
let start = if has_leading_content(located, locator) {
|
||||
located.start()
|
||||
} else {
|
||||
Location::new(located.location.row(), 0)
|
||||
locator.line_start(located.start())
|
||||
};
|
||||
let end = if match_trailing_content(located, locator) {
|
||||
located.end_location.unwrap()
|
||||
let end = if has_trailing_content(located, locator) {
|
||||
located.end()
|
||||
} else {
|
||||
Location::new(located.end_location.unwrap().row() + 1, 0)
|
||||
locator.line_end(located.end())
|
||||
};
|
||||
has_comments_in(Range::new(start, end), locator)
|
||||
|
||||
has_comments_in(TextRange::new(start, end), locator)
|
||||
}
|
||||
|
||||
/// Returns `true` if a [`Range`] includes at least one comment.
|
||||
pub fn has_comments_in(range: Range, locator: &Locator) -> bool {
|
||||
for tok in lexer::lex_located(locator.slice(range), Mode::Module, range.location) {
|
||||
/// Returns `true` if a [`TextRange`] includes at least one comment.
|
||||
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
|
||||
let source = &locator.contents()[range];
|
||||
|
||||
for tok in lexer::lex_located(source, Mode::Module, range.start()) {
|
||||
match tok {
|
||||
Ok((_, tok, _)) => {
|
||||
Ok((tok, _)) => {
|
||||
if matches!(tok, Tok::Comment(..)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -836,7 +840,7 @@ where
|
|||
/// A [`Visitor`] that collects all `raise` statements in a function or method.
|
||||
#[derive(Default)]
|
||||
pub struct RaiseStatementVisitor<'a> {
|
||||
pub raises: Vec<(Range, Option<&'a Expr>, Option<&'a Expr>)>,
|
||||
pub raises: Vec<(TextRange, Option<&'a Expr>, Option<&'a Expr>)>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Visitor<'b> for RaiseStatementVisitor<'b>
|
||||
|
@ -847,7 +851,7 @@ where
|
|||
match &stmt.node {
|
||||
StmtKind::Raise { exc, cause } => {
|
||||
self.raises
|
||||
.push((Range::from(stmt), exc.as_deref(), cause.as_deref()));
|
||||
.push((stmt.range(), exc.as_deref(), cause.as_deref()));
|
||||
}
|
||||
StmtKind::ClassDef { .. }
|
||||
| StmtKind::FunctionDef { .. }
|
||||
|
@ -907,45 +911,19 @@ pub fn extract_globals(body: &[Stmt]) -> FxHashMap<&str, &Stmt> {
|
|||
visitor.globals
|
||||
}
|
||||
|
||||
/// Convert a location within a file (relative to `base`) to an absolute
|
||||
/// position.
|
||||
pub fn to_absolute(relative: Location, base: Location) -> Location {
|
||||
if relative.row() == 1 {
|
||||
Location::new(
|
||||
relative.row() + base.row() - 1,
|
||||
relative.column() + base.column(),
|
||||
)
|
||||
} else {
|
||||
Location::new(relative.row() + base.row() - 1, relative.column())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_relative(absolute: Location, base: Location) -> Location {
|
||||
if absolute.row() == base.row() {
|
||||
Location::new(
|
||||
absolute.row() - base.row() + 1,
|
||||
absolute.column() - base.column(),
|
||||
)
|
||||
} else {
|
||||
Location::new(absolute.row() - base.row() + 1, absolute.column())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return `true` if a [`Located`] has leading content.
|
||||
pub fn match_leading_content<T>(located: &Located<T>, locator: &Locator) -> bool {
|
||||
let range = Range::new(Location::new(located.location.row(), 0), located.location);
|
||||
let prefix = locator.slice(range);
|
||||
prefix.chars().any(|char| !char.is_whitespace())
|
||||
pub fn has_leading_content<T>(located: &Located<T>, locator: &Locator) -> bool {
|
||||
let line_start = locator.line_start(located.start());
|
||||
let leading = &locator.contents()[TextRange::new(line_start, located.start())];
|
||||
leading.chars().any(|char| !char.is_whitespace())
|
||||
}
|
||||
|
||||
/// Return `true` if a [`Located`] has trailing content.
|
||||
pub fn match_trailing_content<T>(located: &Located<T>, locator: &Locator) -> bool {
|
||||
let range = Range::new(
|
||||
located.end_location.unwrap(),
|
||||
Location::new(located.end_location.unwrap().row() + 1, 0),
|
||||
);
|
||||
let suffix = locator.slice(range);
|
||||
for char in suffix.chars() {
|
||||
pub fn has_trailing_content<T>(located: &Located<T>, locator: &Locator) -> bool {
|
||||
let line_end = locator.line_end(located.end());
|
||||
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
|
||||
|
||||
for char in trailing.chars() {
|
||||
if char == '#' {
|
||||
return false;
|
||||
}
|
||||
|
@ -957,55 +935,66 @@ pub fn match_trailing_content<T>(located: &Located<T>, locator: &Locator) -> boo
|
|||
}
|
||||
|
||||
/// If a [`Located`] has a trailing comment, return the index of the hash.
|
||||
pub fn match_trailing_comment<T>(located: &Located<T>, locator: &Locator) -> Option<usize> {
|
||||
let range = Range::new(
|
||||
located.end_location.unwrap(),
|
||||
Location::new(located.end_location.unwrap().row() + 1, 0),
|
||||
);
|
||||
let suffix = locator.slice(range);
|
||||
for (i, char) in suffix.chars().enumerate() {
|
||||
pub fn trailing_comment_start_offset<T>(
|
||||
located: &Located<T>,
|
||||
locator: &Locator,
|
||||
) -> Option<TextSize> {
|
||||
let line_end = locator.line_end(located.end());
|
||||
|
||||
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
|
||||
|
||||
for (i, char) in trailing.chars().enumerate() {
|
||||
if char == '#' {
|
||||
return Some(i);
|
||||
return TextSize::try_from(i).ok();
|
||||
}
|
||||
if !char.is_whitespace() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Return the number of trailing empty lines following a statement.
|
||||
pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
|
||||
let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0));
|
||||
suffix
|
||||
.lines()
|
||||
/// Return the end offset at which the empty lines following a statement.
|
||||
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
|
||||
let line_end = locator.full_line_end(stmt.end());
|
||||
let rest = &locator.contents()[usize::from(line_end)..];
|
||||
|
||||
UniversalNewlineIterator::with_offset(rest, line_end)
|
||||
.take_while(|line| line.trim().is_empty())
|
||||
.count()
|
||||
.last()
|
||||
.map_or(line_end, |l| l.full_end())
|
||||
}
|
||||
|
||||
/// Return the range of the first parenthesis pair after a given [`Location`].
|
||||
pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
|
||||
let contents = locator.after(start);
|
||||
/// Return the range of the first parenthesis pair after a given [`TextSize`].
|
||||
pub fn match_parens(start: TextSize, locator: &Locator) -> Option<TextRange> {
|
||||
let contents = &locator.contents()[usize::from(start)..];
|
||||
|
||||
let mut fix_start = None;
|
||||
let mut fix_end = None;
|
||||
let mut count: usize = 0;
|
||||
for (start, tok, end) in lexer::lex_located(contents, Mode::Module, start).flatten() {
|
||||
if matches!(tok, Tok::Lpar) {
|
||||
if count == 0 {
|
||||
fix_start = Some(start);
|
||||
|
||||
for (tok, range) in lexer::lex_located(contents, Mode::Module, start).flatten() {
|
||||
match tok {
|
||||
Tok::Lpar => {
|
||||
if count == 0 {
|
||||
fix_start = Some(range.start());
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
if matches!(tok, Tok::Rpar) {
|
||||
count -= 1;
|
||||
if count == 0 {
|
||||
fix_end = Some(end);
|
||||
break;
|
||||
Tok::Rpar => {
|
||||
count -= 1;
|
||||
if count == 0 {
|
||||
fix_end = Some(range.end());
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
match (fix_start, fix_end) {
|
||||
(Some(start), Some(end)) => Some(Range::new(start, end)),
|
||||
(Some(start), Some(end)) => Some(TextRange::new(start, end)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -1013,182 +1002,175 @@ pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
|
|||
/// Return the appropriate visual `Range` for any message that spans a `Stmt`.
|
||||
/// Specifically, this method returns the range of a function or class name,
|
||||
/// rather than that of the entire function or class body.
|
||||
pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> Range {
|
||||
pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> TextRange {
|
||||
if matches!(
|
||||
stmt.node,
|
||||
StmtKind::ClassDef { .. }
|
||||
| StmtKind::FunctionDef { .. }
|
||||
| StmtKind::AsyncFunctionDef { .. }
|
||||
) {
|
||||
let contents = locator.slice(stmt);
|
||||
for (start, tok, end) in lexer::lex_located(contents, Mode::Module, stmt.location).flatten()
|
||||
{
|
||||
let contents = &locator.contents()[stmt.range()];
|
||||
|
||||
for (tok, range) in lexer::lex_located(contents, Mode::Module, stmt.start()).flatten() {
|
||||
if matches!(tok, Tok::Name { .. }) {
|
||||
return Range::new(start, end);
|
||||
return range;
|
||||
}
|
||||
}
|
||||
error!("Failed to find identifier for {:?}", stmt);
|
||||
}
|
||||
Range::from(stmt)
|
||||
|
||||
stmt.range()
|
||||
}
|
||||
|
||||
/// Return the ranges of [`Tok::Name`] tokens within a specified node.
|
||||
pub fn find_names<'a, T>(
|
||||
located: &'a Located<T>,
|
||||
locator: &'a Locator,
|
||||
) -> impl Iterator<Item = Range> + 'a {
|
||||
let contents = locator.slice(located);
|
||||
lexer::lex_located(contents, Mode::Module, located.location)
|
||||
) -> impl Iterator<Item = TextRange> + 'a {
|
||||
let contents = locator.slice(located.range());
|
||||
|
||||
lexer::lex_located(contents, Mode::Module, located.start())
|
||||
.flatten()
|
||||
.filter(|(_, tok, _)| matches!(tok, Tok::Name { .. }))
|
||||
.map(|(start, _, end)| Range {
|
||||
location: start,
|
||||
end_location: end,
|
||||
})
|
||||
.filter(|(tok, _)| matches!(tok, Tok::Name { .. }))
|
||||
.map(|(_, range)| range)
|
||||
}
|
||||
|
||||
/// Return the `Range` of `name` in `Excepthandler`.
|
||||
pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> Option<Range> {
|
||||
pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> Option<TextRange> {
|
||||
let ExcepthandlerKind::ExceptHandler {
|
||||
name, type_, body, ..
|
||||
} = &handler.node;
|
||||
|
||||
match (name, type_) {
|
||||
(Some(_), Some(type_)) => {
|
||||
let type_end_location = type_.end_location.unwrap();
|
||||
let contents = locator.slice(Range::new(type_end_location, body[0].location));
|
||||
let range = lexer::lex_located(contents, Mode::Module, type_end_location)
|
||||
let contents = &locator.contents()[TextRange::new(type_.end(), body[0].start())];
|
||||
|
||||
lexer::lex_located(contents, Mode::Module, type_.end())
|
||||
.flatten()
|
||||
.tuple_windows()
|
||||
.find(|(tok, next_tok)| {
|
||||
matches!(tok.1, Tok::As) && matches!(next_tok.1, Tok::Name { .. })
|
||||
matches!(tok.0, Tok::As) && matches!(next_tok.0, Tok::Name { .. })
|
||||
})
|
||||
.map(|((..), (location, _, end_location))| Range::new(location, end_location));
|
||||
range
|
||||
.map(|((..), (_, range))| range)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the `Range` of `except` in `Excepthandler`.
|
||||
pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
|
||||
pub fn except_range(handler: &Excepthandler, locator: &Locator) -> TextRange {
|
||||
let ExcepthandlerKind::ExceptHandler { body, type_, .. } = &handler.node;
|
||||
let end = if let Some(type_) = type_ {
|
||||
type_.location
|
||||
type_.end()
|
||||
} else {
|
||||
body.first()
|
||||
.expect("Expected body to be non-empty")
|
||||
.location
|
||||
body.first().expect("Expected body to be non-empty").start()
|
||||
};
|
||||
let contents = locator.slice(Range {
|
||||
location: handler.location,
|
||||
end_location: end,
|
||||
});
|
||||
let range = lexer::lex_located(contents, Mode::Module, handler.location)
|
||||
let contents = &locator.contents()[TextRange::new(handler.start(), end)];
|
||||
|
||||
lexer::lex_located(contents, Mode::Module, handler.start())
|
||||
.flatten()
|
||||
.find(|(_, kind, _)| matches!(kind, Tok::Except { .. }))
|
||||
.map(|(location, _, end_location)| Range {
|
||||
location,
|
||||
end_location,
|
||||
})
|
||||
.expect("Failed to find `except` range");
|
||||
range
|
||||
.find(|(kind, _)| matches!(kind, Tok::Except { .. }))
|
||||
.map(|(_, range)| range)
|
||||
.expect("Failed to find `except` range")
|
||||
}
|
||||
|
||||
/// Return the `Range` of `else` in `For`, `AsyncFor`, and `While` statements.
|
||||
pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
|
||||
pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
|
||||
match &stmt.node {
|
||||
StmtKind::For { body, orelse, .. }
|
||||
| StmtKind::AsyncFor { body, orelse, .. }
|
||||
| StmtKind::While { body, orelse, .. }
|
||||
if !orelse.is_empty() =>
|
||||
{
|
||||
let body_end = body
|
||||
.last()
|
||||
.expect("Expected body to be non-empty")
|
||||
.end_location
|
||||
.unwrap();
|
||||
let contents = locator.slice(Range {
|
||||
location: body_end,
|
||||
end_location: orelse
|
||||
.first()
|
||||
.expect("Expected orelse to be non-empty")
|
||||
.location,
|
||||
});
|
||||
let range = lexer::lex_located(contents, Mode::Module, body_end)
|
||||
let body_end = body.last().expect("Expected body to be non-empty").end();
|
||||
let or_else_start = orelse
|
||||
.first()
|
||||
.expect("Expected orelse to be non-empty")
|
||||
.start();
|
||||
let contents = &locator.contents()[TextRange::new(body_end, or_else_start)];
|
||||
|
||||
lexer::lex_located(contents, Mode::Module, body_end)
|
||||
.flatten()
|
||||
.find(|(_, kind, _)| matches!(kind, Tok::Else))
|
||||
.map(|(location, _, end_location)| Range {
|
||||
location,
|
||||
end_location,
|
||||
});
|
||||
range
|
||||
.find(|(kind, _)| matches!(kind, Tok::Else))
|
||||
.map(|(_, range)| range)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
|
||||
pub fn first_colon_range(range: Range, locator: &Locator) -> Option<Range> {
|
||||
let contents = locator.slice(range);
|
||||
let range = lexer::lex_located(contents, Mode::Module, range.location)
|
||||
pub fn first_colon_range(range: TextRange, locator: &Locator) -> Option<TextRange> {
|
||||
let contents = &locator.contents()[range];
|
||||
let range = lexer::lex_located(contents, Mode::Module, range.start())
|
||||
.flatten()
|
||||
.find(|(_, kind, _)| matches!(kind, Tok::Colon))
|
||||
.map(|(location, _, end_location)| Range {
|
||||
location,
|
||||
end_location,
|
||||
});
|
||||
.find(|(kind, _)| matches!(kind, Tok::Colon))
|
||||
.map(|(_, range)| range);
|
||||
range
|
||||
}
|
||||
|
||||
/// Return the `Range` of the first `Elif` or `Else` token in an `If` statement.
|
||||
pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
|
||||
pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
|
||||
let StmtKind::If { body, orelse, .. } = &stmt.node else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let start = body
|
||||
.last()
|
||||
.expect("Expected body to be non-empty")
|
||||
.end_location
|
||||
.unwrap();
|
||||
let start = body.last().expect("Expected body to be non-empty").end();
|
||||
|
||||
let end = match &orelse[..] {
|
||||
[Stmt {
|
||||
node: StmtKind::If { test, .. },
|
||||
..
|
||||
}] => test.location,
|
||||
[stmt, ..] => stmt.location,
|
||||
}] => test.start(),
|
||||
[stmt, ..] => stmt.start(),
|
||||
_ => return None,
|
||||
};
|
||||
let contents = locator.slice(Range::new(start, end));
|
||||
let range = lexer::lex_located(contents, Mode::Module, start)
|
||||
|
||||
let contents = &locator.contents()[TextRange::new(start, end)];
|
||||
lexer::lex_located(contents, Mode::Module, start)
|
||||
.flatten()
|
||||
.find(|(_, kind, _)| matches!(kind, Tok::Elif | Tok::Else))
|
||||
.map(|(location, _, end_location)| Range {
|
||||
location,
|
||||
end_location,
|
||||
});
|
||||
range
|
||||
.find(|(kind, _)| matches!(kind, Tok::Elif | Tok::Else))
|
||||
.map(|(_, range)| range)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements preceding it.
|
||||
pub fn preceded_by_continuation(stmt: &Stmt, indexer: &Indexer) -> bool {
|
||||
stmt.location.row() > 1
|
||||
&& indexer
|
||||
.continuation_lines()
|
||||
.contains(&(stmt.location.row() - 1))
|
||||
pub fn preceded_by_continuation(stmt: &Stmt, indexer: &Indexer, locator: &Locator) -> bool {
|
||||
let previous_line_end = locator.line_start(stmt.start());
|
||||
let newline_pos = usize::from(previous_line_end).saturating_sub(1);
|
||||
|
||||
// Compute start of preceding line
|
||||
let newline_len = match locator.contents().as_bytes()[newline_pos] {
|
||||
b'\n' => {
|
||||
if locator
|
||||
.contents()
|
||||
.as_bytes()
|
||||
.get(newline_pos.saturating_sub(1))
|
||||
== Some(&b'\r')
|
||||
{
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
b'\r' => 1,
|
||||
// No preceding line
|
||||
_ => return false,
|
||||
};
|
||||
|
||||
// See if the position is in the continuation line starts
|
||||
indexer.is_continuation(previous_line_end - TextSize::from(newline_len), locator)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements preceding it.
|
||||
pub fn preceded_by_multi_statement_line(stmt: &Stmt, locator: &Locator, indexer: &Indexer) -> bool {
|
||||
match_leading_content(stmt, locator) || preceded_by_continuation(stmt, indexer)
|
||||
has_leading_content(stmt, locator) || preceded_by_continuation(stmt, indexer, locator)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements following it.
|
||||
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
|
||||
match_trailing_content(stmt, locator)
|
||||
has_trailing_content(stmt, locator)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` is a docstring.
|
||||
|
@ -1370,7 +1352,7 @@ pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
|
|||
let mut ops: Vec<LocatedCmpop> = vec![];
|
||||
let mut count: usize = 0;
|
||||
loop {
|
||||
let Some((start, tok, end)) = tok_iter.next() else {
|
||||
let Some((tok, range)) = tok_iter.next() else {
|
||||
break;
|
||||
};
|
||||
if matches!(tok, Tok::Lpar) {
|
||||
|
@ -1383,42 +1365,46 @@ pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
|
|||
if count == 0 {
|
||||
match tok {
|
||||
Tok::Not => {
|
||||
if let Some((_, _, end)) =
|
||||
tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::In))
|
||||
if let Some((_, next_range)) =
|
||||
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::In))
|
||||
{
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::NotIn));
|
||||
ops.push(LocatedCmpop::new(
|
||||
range.start(),
|
||||
next_range.end(),
|
||||
Cmpop::NotIn,
|
||||
));
|
||||
}
|
||||
}
|
||||
Tok::In => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::In));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::In, range));
|
||||
}
|
||||
Tok::Is => {
|
||||
let op = if let Some((_, _, end)) =
|
||||
tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::Not))
|
||||
let op = if let Some((_, next_range)) =
|
||||
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::Not))
|
||||
{
|
||||
LocatedCmpop::new(start, end, Cmpop::IsNot)
|
||||
LocatedCmpop::new(range.start(), next_range.end(), Cmpop::IsNot)
|
||||
} else {
|
||||
LocatedCmpop::new(start, end, Cmpop::Is)
|
||||
LocatedCmpop::with_range(Cmpop::Is, range)
|
||||
};
|
||||
ops.push(op);
|
||||
}
|
||||
Tok::NotEqual => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::NotEq));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::NotEq, range));
|
||||
}
|
||||
Tok::EqEqual => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::Eq));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::Eq, range));
|
||||
}
|
||||
Tok::GreaterEqual => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::GtE));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::GtE, range));
|
||||
}
|
||||
Tok::Greater => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::Gt));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::Gt, range));
|
||||
}
|
||||
Tok::LessEqual => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::LtE));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::LtE, range));
|
||||
}
|
||||
Tok::Less => {
|
||||
ops.push(LocatedCmpop::new(start, end, Cmpop::Lt));
|
||||
ops.push(LocatedCmpop::with_range(Cmpop::Lt, range));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -1524,15 +1510,15 @@ mod tests {
|
|||
use std::borrow::Cow;
|
||||
|
||||
use anyhow::Result;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use rustpython_parser as parser;
|
||||
use rustpython_parser::ast::{Cmpop, Location};
|
||||
use rustpython_parser::ast::Cmpop;
|
||||
|
||||
use crate::helpers::{
|
||||
elif_else_range, else_range, first_colon_range, identifier_range, locate_cmpops,
|
||||
match_trailing_content, resolve_imported_module_path, LocatedCmpop,
|
||||
elif_else_range, else_range, first_colon_range, has_trailing_content, identifier_range,
|
||||
locate_cmpops, resolve_imported_module_path, LocatedCmpop,
|
||||
};
|
||||
use crate::source_code::Locator;
|
||||
use crate::types::Range;
|
||||
|
||||
#[test]
|
||||
fn trailing_content() -> Result<()> {
|
||||
|
@ -1540,25 +1526,25 @@ mod tests {
|
|||
let program = parser::parse_program(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!match_trailing_content(stmt, &locator));
|
||||
assert!(!has_trailing_content(stmt, &locator));
|
||||
|
||||
let contents = "x = 1; y = 2";
|
||||
let program = parser::parse_program(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(match_trailing_content(stmt, &locator));
|
||||
assert!(has_trailing_content(stmt, &locator));
|
||||
|
||||
let contents = "x = 1 ";
|
||||
let program = parser::parse_program(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!match_trailing_content(stmt, &locator));
|
||||
assert!(!has_trailing_content(stmt, &locator));
|
||||
|
||||
let contents = "x = 1 # Comment";
|
||||
let program = parser::parse_program(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!match_trailing_content(stmt, &locator));
|
||||
assert!(!has_trailing_content(stmt, &locator));
|
||||
|
||||
let contents = r#"
|
||||
x = 1
|
||||
|
@ -1568,7 +1554,7 @@ y = 2
|
|||
let program = parser::parse_program(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!match_trailing_content(stmt, &locator));
|
||||
assert!(!has_trailing_content(stmt, &locator));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1581,7 +1567,7 @@ y = 2
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(1, 4), Location::new(1, 5),)
|
||||
TextRange::new(TextSize::from(4), TextSize::from(5))
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -1595,7 +1581,7 @@ def \
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(2, 2), Location::new(2, 3),)
|
||||
TextRange::new(TextSize::from(8), TextSize::from(9))
|
||||
);
|
||||
|
||||
let contents = "class Class(): pass".trim();
|
||||
|
@ -1604,7 +1590,7 @@ def \
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(1, 6), Location::new(1, 11),)
|
||||
TextRange::new(TextSize::from(6), TextSize::from(11))
|
||||
);
|
||||
|
||||
let contents = "class Class: pass".trim();
|
||||
|
@ -1613,7 +1599,7 @@ def \
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(1, 6), Location::new(1, 11),)
|
||||
TextRange::new(TextSize::from(6), TextSize::from(11))
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -1627,7 +1613,7 @@ class Class():
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(2, 6), Location::new(2, 11),)
|
||||
TextRange::new(TextSize::from(19), TextSize::from(24))
|
||||
);
|
||||
|
||||
let contents = r#"x = y + 1"#.trim();
|
||||
|
@ -1636,7 +1622,7 @@ class Class():
|
|||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
identifier_range(stmt, &locator),
|
||||
Range::new(Location::new(1, 0), Location::new(1, 9),)
|
||||
TextRange::new(TextSize::from(0), TextSize::from(9))
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
@ -1692,10 +1678,11 @@ else:
|
|||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
let range = else_range(stmt, &locator).unwrap();
|
||||
assert_eq!(range.location.row(), 3);
|
||||
assert_eq!(range.location.column(), 0);
|
||||
assert_eq!(range.end_location.row(), 3);
|
||||
assert_eq!(range.end_location.column(), 4);
|
||||
assert_eq!(&contents[range], "else");
|
||||
assert_eq!(
|
||||
range,
|
||||
TextRange::new(TextSize::from(21), TextSize::from(25))
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1704,14 +1691,12 @@ else:
|
|||
let contents = "with a: pass";
|
||||
let locator = Locator::new(contents);
|
||||
let range = first_colon_range(
|
||||
Range::new(Location::new(1, 0), Location::new(1, contents.len())),
|
||||
TextRange::new(TextSize::from(0), contents.text_len()),
|
||||
&locator,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(range.location.row(), 1);
|
||||
assert_eq!(range.location.column(), 6);
|
||||
assert_eq!(range.end_location.row(), 1);
|
||||
assert_eq!(range.end_location.column(), 7);
|
||||
assert_eq!(&contents[range], ":");
|
||||
assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1727,10 +1712,9 @@ elif b:
|
|||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
let range = elif_else_range(stmt, &locator).unwrap();
|
||||
assert_eq!(range.location.row(), 3);
|
||||
assert_eq!(range.location.column(), 0);
|
||||
assert_eq!(range.end_location.row(), 3);
|
||||
assert_eq!(range.end_location.column(), 4);
|
||||
assert_eq!(range.start(), TextSize::from(14));
|
||||
assert_eq!(range.end(), TextSize::from(18));
|
||||
|
||||
let contents = "
|
||||
if a:
|
||||
...
|
||||
|
@ -1742,10 +1726,9 @@ else:
|
|||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
let range = elif_else_range(stmt, &locator).unwrap();
|
||||
assert_eq!(range.location.row(), 3);
|
||||
assert_eq!(range.location.column(), 0);
|
||||
assert_eq!(range.end_location.row(), 3);
|
||||
assert_eq!(range.end_location.column(), 4);
|
||||
assert_eq!(range.start(), TextSize::from(14));
|
||||
assert_eq!(range.end(), TextSize::from(18));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1754,8 +1737,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x == 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 4),
|
||||
TextSize::from(2),
|
||||
TextSize::from(4),
|
||||
Cmpop::Eq
|
||||
)]
|
||||
);
|
||||
|
@ -1763,8 +1746,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x != 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 4),
|
||||
TextSize::from(2),
|
||||
TextSize::from(4),
|
||||
Cmpop::NotEq
|
||||
)]
|
||||
);
|
||||
|
@ -1772,8 +1755,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x is 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 4),
|
||||
TextSize::from(2),
|
||||
TextSize::from(4),
|
||||
Cmpop::Is
|
||||
)]
|
||||
);
|
||||
|
@ -1781,8 +1764,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x is not 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 8),
|
||||
TextSize::from(2),
|
||||
TextSize::from(8),
|
||||
Cmpop::IsNot
|
||||
)]
|
||||
);
|
||||
|
@ -1790,8 +1773,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x in 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 4),
|
||||
TextSize::from(2),
|
||||
TextSize::from(4),
|
||||
Cmpop::In
|
||||
)]
|
||||
);
|
||||
|
@ -1799,8 +1782,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x not in 1"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 8),
|
||||
TextSize::from(2),
|
||||
TextSize::from(8),
|
||||
Cmpop::NotIn
|
||||
)]
|
||||
);
|
||||
|
@ -1808,8 +1791,8 @@ else:
|
|||
assert_eq!(
|
||||
locate_cmpops("x != (1 is not 2)"),
|
||||
vec![LocatedCmpop::new(
|
||||
Location::new(1, 2),
|
||||
Location::new(1, 4),
|
||||
TextSize::from(2),
|
||||
TextSize::from(4),
|
||||
Cmpop::NotEq
|
||||
)]
|
||||
);
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use ruff_text_size::TextRange;
|
||||
use rustc_hash::FxHashMap;
|
||||
use rustpython_parser::ast::Location;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::types::Range;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A representation of an individual name imported via any import statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
@ -102,31 +102,28 @@ impl FutureImport for AnyImport<'_> {
|
|||
}
|
||||
|
||||
/// A representation of a module reference in an import statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct ModuleImport {
|
||||
module: String,
|
||||
location: Location,
|
||||
end_location: Location,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl ModuleImport {
|
||||
pub fn new(module: String, location: Location, end_location: Location) -> Self {
|
||||
Self {
|
||||
module,
|
||||
location,
|
||||
end_location,
|
||||
}
|
||||
pub fn new(module: String, range: TextRange) -> Self {
|
||||
Self { module, range }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ModuleImport> for Range {
|
||||
fn from(import: &ModuleImport) -> Range {
|
||||
Range::new(import.location, import.end_location)
|
||||
impl From<&ModuleImport> for TextRange {
|
||||
fn from(import: &ModuleImport) -> TextRange {
|
||||
import.range
|
||||
}
|
||||
}
|
||||
|
||||
/// A representation of the import dependencies between modules.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct ImportMap {
|
||||
/// A map from dot-delimited module name to the list of imports in that module.
|
||||
module_to_imports: FxHashMap<String, Vec<ModuleImport>>,
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use std::iter::FusedIterator;
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`].
|
||||
pub trait StrExt {
|
||||
|
@ -17,32 +19,42 @@ impl StrExt for str {
|
|||
/// ## Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruff_python_ast::newlines::UniversalNewlineIterator;
|
||||
///
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::newlines::{Line, UniversalNewlineIterator};
|
||||
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
|
||||
///
|
||||
/// assert_eq!(lines.next_back(), Some("bop"));
|
||||
/// assert_eq!(lines.next(), Some("foo"));
|
||||
/// assert_eq!(lines.next_back(), Some("baz"));
|
||||
/// assert_eq!(lines.next(), Some("bar"));
|
||||
/// assert_eq!(lines.next_back(), Some(""));
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
|
||||
/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10))));
|
||||
/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8))));
|
||||
/// assert_eq!(lines.next(), None);
|
||||
/// ```
|
||||
pub struct UniversalNewlineIterator<'a> {
|
||||
text: &'a str,
|
||||
offset: TextSize,
|
||||
offset_back: TextSize,
|
||||
}
|
||||
|
||||
impl<'a> UniversalNewlineIterator<'a> {
|
||||
pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
|
||||
UniversalNewlineIterator {
|
||||
text,
|
||||
offset,
|
||||
offset_back: offset + text.text_len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
|
||||
UniversalNewlineIterator { text }
|
||||
Self::with_offset(text, TextSize::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
||||
type Item = &'a str;
|
||||
type Item = Line<'a>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
fn next(&mut self) -> Option<Line<'a>> {
|
||||
if self.text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
@ -50,21 +62,32 @@ impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
|||
let line = match self.text.find(['\n', '\r']) {
|
||||
// Non-last line
|
||||
Some(line_end) => {
|
||||
let (line, remainder) = self.text.split_at(line_end);
|
||||
|
||||
self.text = match remainder.as_bytes()[0] {
|
||||
let offset: usize = match self.text.as_bytes()[line_end] {
|
||||
// Explicit branch for `\n` as this is the most likely path
|
||||
b'\n' => &remainder[1..],
|
||||
b'\n' => 1,
|
||||
// '\r\n'
|
||||
b'\r' if remainder.as_bytes().get(1) == Some(&b'\n') => &remainder[2..],
|
||||
b'\r' if self.text.as_bytes().get(line_end + 1) == Some(&b'\n') => 2,
|
||||
// '\r'
|
||||
_ => &remainder[1..],
|
||||
_ => 1,
|
||||
};
|
||||
|
||||
let (text, remainder) = self.text.split_at(line_end + offset);
|
||||
|
||||
let line = Line {
|
||||
offset: self.offset,
|
||||
text,
|
||||
};
|
||||
|
||||
self.text = remainder;
|
||||
self.offset += text.text_len();
|
||||
|
||||
line
|
||||
}
|
||||
// Last line
|
||||
None => std::mem::take(&mut self.text),
|
||||
None => Line {
|
||||
offset: self.offset,
|
||||
text: std::mem::take(&mut self.text),
|
||||
},
|
||||
};
|
||||
|
||||
Some(line)
|
||||
|
@ -85,7 +108,7 @@ impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
|
|||
let len = self.text.len();
|
||||
|
||||
// Trim any trailing newlines.
|
||||
self.text = match self.text.as_bytes()[len - 1] {
|
||||
let haystack = match self.text.as_bytes()[len - 1] {
|
||||
b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
|
||||
b'\n' | b'\r' => &self.text[..len - 1],
|
||||
_ => self.text,
|
||||
|
@ -93,16 +116,23 @@ impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
|
|||
|
||||
// Find the end of the previous line. The previous line is the text up to, but not including
|
||||
// the newline character.
|
||||
let line = match self.text.rfind(['\n', '\r']) {
|
||||
let line = if let Some(line_end) = haystack.rfind(['\n', '\r']) {
|
||||
// '\n' or '\r' or '\r\n'
|
||||
Some(line_end) => {
|
||||
let (remainder, line) = self.text.split_at(line_end + 1);
|
||||
self.text = remainder;
|
||||
let (remainder, line) = self.text.split_at(line_end + 1);
|
||||
self.text = remainder;
|
||||
self.offset_back -= line.text_len();
|
||||
|
||||
line
|
||||
Line {
|
||||
text: line,
|
||||
offset: self.offset_back,
|
||||
}
|
||||
} else {
|
||||
// Last line
|
||||
None => std::mem::take(&mut self.text),
|
||||
let offset = self.offset_back - self.text.text_len();
|
||||
Line {
|
||||
text: std::mem::take(&mut self.text),
|
||||
offset,
|
||||
}
|
||||
};
|
||||
|
||||
Some(line)
|
||||
|
@ -113,16 +143,23 @@ impl FusedIterator for UniversalNewlineIterator<'_> {}
|
|||
|
||||
/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line.
|
||||
pub struct NewlineWithTrailingNewline<'a> {
|
||||
trailing: Option<&'a str>,
|
||||
trailing: Option<Line<'a>>,
|
||||
underlying: UniversalNewlineIterator<'a>,
|
||||
}
|
||||
|
||||
impl<'a> NewlineWithTrailingNewline<'a> {
|
||||
pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
|
||||
Self::with_offset(input, TextSize::default())
|
||||
}
|
||||
|
||||
pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
|
||||
NewlineWithTrailingNewline {
|
||||
underlying: UniversalNewlineIterator::from(input),
|
||||
underlying: UniversalNewlineIterator::with_offset(input, offset),
|
||||
trailing: if input.ends_with(['\r', '\n']) {
|
||||
Some("")
|
||||
Some(Line {
|
||||
text: "",
|
||||
offset: offset + input.text_len(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
|
@ -131,37 +168,159 @@ impl<'a> NewlineWithTrailingNewline<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
|
||||
type Item = &'a str;
|
||||
type Item = Line<'a>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
fn next(&mut self) -> Option<Line<'a>> {
|
||||
self.underlying.next().or_else(|| self.trailing.take())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Line<'a> {
|
||||
text: &'a str,
|
||||
offset: TextSize,
|
||||
}
|
||||
|
||||
impl<'a> Line<'a> {
|
||||
pub fn new(text: &'a str, offset: TextSize) -> Self {
|
||||
Self { text, offset }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn start(&self) -> TextSize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
/// Returns the byte offset where the line ends, including its terminating new line character.
|
||||
#[inline]
|
||||
pub fn full_end(&self) -> TextSize {
|
||||
self.offset + self.full_text_len()
|
||||
}
|
||||
|
||||
/// Returns the byte offset where the line ends, excluding its new line character
|
||||
#[inline]
|
||||
pub fn end(&self) -> TextSize {
|
||||
self.offset + self.as_str().text_len()
|
||||
}
|
||||
|
||||
/// Returns the range of the line, including its terminating new line character.
|
||||
#[inline]
|
||||
pub fn full_range(&self) -> TextRange {
|
||||
TextRange::at(self.offset, self.text.text_len())
|
||||
}
|
||||
|
||||
/// Returns the range of the line, excluding its terminating new line character
|
||||
#[inline]
|
||||
pub fn range(&self) -> TextRange {
|
||||
TextRange::new(self.start(), self.end())
|
||||
}
|
||||
|
||||
/// Returns the text of the line, excluding the terminating new line character.
|
||||
#[inline]
|
||||
pub fn as_str(&self) -> &'a str {
|
||||
let mut bytes = self.text.bytes().rev();
|
||||
|
||||
let newline_len = match bytes.next() {
|
||||
Some(b'\n') => {
|
||||
if bytes.next() == Some(b'\r') {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
Some(b'\r') => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
&self.text[..self.text.len() - newline_len]
|
||||
}
|
||||
|
||||
/// Returns the line's text, including the terminating new line character.
|
||||
#[inline]
|
||||
pub fn as_full_str(&self) -> &'a str {
|
||||
self.text
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn full_text_len(&self) -> TextSize {
|
||||
self.text.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Line<'_> {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<&str> for Line<'_> {
|
||||
fn eq(&self, other: &&str) -> bool {
|
||||
self.as_str() == *other
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<Line<'_>> for &str {
|
||||
fn eq(&self, other: &Line<'_>) -> bool {
|
||||
*self == other.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::UniversalNewlineIterator;
|
||||
use crate::newlines::Line;
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_empty_str() {
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
|
||||
assert_eq!(lines, Vec::<&str>::default());
|
||||
assert_eq!(lines, Vec::<Line>::new());
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
|
||||
assert_eq!(lines, Vec::<&str>::default());
|
||||
assert_eq!(lines, Vec::<Line>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_forward() {
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
|
||||
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]);
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop", TextSize::from(14)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
|
||||
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]);
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop\n", TextSize::from(14)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
|
||||
assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop", ""]);
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop\n", TextSize::from(14)),
|
||||
Line::new("\n", TextSize::from(18)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -169,24 +328,52 @@ mod tests {
|
|||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
|
||||
.rev()
|
||||
.collect();
|
||||
assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]);
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("bop", TextSize::from(14)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
|
||||
.rev()
|
||||
.map(|line| line.as_str())
|
||||
.collect();
|
||||
|
||||
assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]);
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("bop\n", TextSize::from(13)),
|
||||
Line::new("baz\r", TextSize::from(9)),
|
||||
Line::new("\n", TextSize::from(8)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_mixed() {
|
||||
let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
|
||||
|
||||
assert_eq!(lines.next_back(), Some("bop"));
|
||||
assert_eq!(lines.next(), Some("foo"));
|
||||
assert_eq!(lines.next_back(), Some("baz"));
|
||||
assert_eq!(lines.next(), Some("bar"));
|
||||
assert_eq!(lines.next_back(), Some(""));
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("bop", TextSize::from(14)))
|
||||
);
|
||||
assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("baz\r", TextSize::from(10)))
|
||||
);
|
||||
assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("\r\n", TextSize::from(8)))
|
||||
);
|
||||
assert_eq!(lines.next(), None);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +1,15 @@
|
|||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::ast::{Expr, ExprKind, Keyword};
|
||||
|
||||
use crate::types::Range;
|
||||
|
||||
fn relocate_keyword(keyword: &mut Keyword, location: Range) {
|
||||
keyword.location = location.location;
|
||||
keyword.end_location = Some(location.end_location);
|
||||
fn relocate_keyword(keyword: &mut Keyword, location: TextRange) {
|
||||
keyword.range = location;
|
||||
relocate_expr(&mut keyword.node.value, location);
|
||||
}
|
||||
|
||||
/// Change an expression's location (recursively) to match a desired, fixed
|
||||
/// location.
|
||||
pub fn relocate_expr(expr: &mut Expr, location: Range) {
|
||||
expr.location = location.location;
|
||||
expr.end_location = Some(location.end_location);
|
||||
pub fn relocate_expr(expr: &mut Expr, location: TextRange) {
|
||||
expr.range = location;
|
||||
match &mut expr.node {
|
||||
ExprKind::BoolOp { values, .. } => {
|
||||
for expr in values {
|
||||
|
|
|
@ -3,10 +3,11 @@
|
|||
use std::ops::Deref;
|
||||
|
||||
use rustpython_parser::ast::{
|
||||
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Excepthandler,
|
||||
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Excepthandler,
|
||||
ExcepthandlerKind, Expr, ExprKind, MatchCase, Operator, Pattern, PatternKind, Stmt, StmtKind,
|
||||
Suite, Withitem,
|
||||
};
|
||||
use rustpython_parser::ConversionFlag;
|
||||
|
||||
use ruff_rustpython::vendor::{bytes, str};
|
||||
|
||||
|
|
|
@ -1,98 +1,135 @@
|
|||
//! Struct used to index source code, to enable efficient lookup of tokens that
|
||||
//! are omitted from the AST (e.g., commented lines).
|
||||
|
||||
use rustpython_parser::ast::Location;
|
||||
use crate::source_code::Locator;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
use crate::types::Range;
|
||||
|
||||
pub struct Indexer {
|
||||
commented_lines: Vec<usize>,
|
||||
continuation_lines: Vec<usize>,
|
||||
string_ranges: Vec<Range>,
|
||||
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
comment_ranges: Vec<TextRange>,
|
||||
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
triple_quoted_string_ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
/// Return a slice of all lines that include a comment.
|
||||
pub fn commented_lines(&self) -> &[usize] {
|
||||
&self.commented_lines
|
||||
}
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
|
||||
assert!(TextSize::try_from(locator.contents().len()).is_ok());
|
||||
|
||||
/// Return a slice of all lines that end with a continuation (backslash).
|
||||
pub fn continuation_lines(&self) -> &[usize] {
|
||||
&self.continuation_lines
|
||||
}
|
||||
|
||||
/// Return a slice of all ranges that include a triple-quoted string.
|
||||
pub fn string_ranges(&self) -> &[Range] {
|
||||
&self.string_ranges
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[LexResult]> for Indexer {
|
||||
fn from(lxr: &[LexResult]) -> Self {
|
||||
let mut commented_lines = Vec::new();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut string_ranges = Vec::new();
|
||||
let mut prev: Option<(&Location, &Tok, &Location)> = None;
|
||||
for (start, tok, end) in lxr.iter().flatten() {
|
||||
// Token, end
|
||||
let mut prev_end = TextSize::default();
|
||||
let mut prev_token: Option<&Tok> = None;
|
||||
let mut line_start = TextSize::default();
|
||||
|
||||
for (tok, range) in tokens.iter().flatten() {
|
||||
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
|
||||
|
||||
// Get the trivia between the previous and the current token and detect any newlines.
|
||||
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
|
||||
// between any two tokens that form a continuation nor multiple newlines in a row.
|
||||
// That's why we have to extract the newlines "manually".
|
||||
for (index, text) in trivia.match_indices(['\n', '\r']) {
|
||||
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Newlines after a comment or new-line never form a continuation.
|
||||
if !matches!(
|
||||
prev_token,
|
||||
Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None
|
||||
) {
|
||||
continuation_lines.push(line_start);
|
||||
}
|
||||
|
||||
// SAFETY: Safe because of the len assertion at the top of the function.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
{
|
||||
line_start = prev_end + TextSize::new((index + 1) as u32);
|
||||
}
|
||||
}
|
||||
|
||||
match tok {
|
||||
Tok::Comment(..) => commented_lines.push(start.row()),
|
||||
Tok::Comment(..) => {
|
||||
commented_lines.push(*range);
|
||||
}
|
||||
Tok::Newline | Tok::NonLogicalNewline => {
|
||||
line_start = range.end();
|
||||
}
|
||||
Tok::String {
|
||||
triple_quoted: true,
|
||||
..
|
||||
} => string_ranges.push(Range::new(*start, *end)),
|
||||
_ => (),
|
||||
} => string_ranges.push(*range),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if let Some((.., prev_tok, prev_end)) = prev {
|
||||
if !matches!(
|
||||
prev_tok,
|
||||
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
|
||||
) {
|
||||
for line in prev_end.row()..start.row() {
|
||||
continuation_lines.push(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
prev = Some((start, tok, end));
|
||||
prev_token = Some(tok);
|
||||
prev_end = range.end();
|
||||
}
|
||||
Self {
|
||||
commented_lines,
|
||||
comment_ranges: commented_lines,
|
||||
continuation_lines,
|
||||
string_ranges,
|
||||
triple_quoted_string_ranges: string_ranges,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of comments
|
||||
pub fn comment_ranges(&self) -> &[TextRange] {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the line start positions of continuations (backslash).
|
||||
pub fn continuation_line_starts(&self) -> &[TextSize] {
|
||||
&self.continuation_lines
|
||||
}
|
||||
|
||||
/// Return a slice of all ranges that include a triple-quoted string. The ranges are sorted by
|
||||
/// [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
pub fn triple_quoted_string_ranges(&self) -> &[TextRange] {
|
||||
&self.triple_quoted_string_ranges
|
||||
}
|
||||
|
||||
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
|
||||
let line_start = locator.line_start(offset);
|
||||
self.continuation_lines.binary_search(&line_start).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rustpython_parser::ast::Location;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{lexer, Mode};
|
||||
|
||||
use crate::source_code::Indexer;
|
||||
use crate::types::Range;
|
||||
use crate::source_code::{Indexer, Locator};
|
||||
|
||||
#[test]
|
||||
fn continuation() {
|
||||
let contents = r#"x = 1"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
# Hello, world!
|
||||
# Hello, world!
|
||||
|
||||
x = 1
|
||||
|
||||
y = 2
|
||||
"#
|
||||
"#
|
||||
.trim();
|
||||
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
x = \
|
||||
|
@ -111,8 +148,20 @@ if True:
|
|||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
assert_eq!(indexer.continuation_lines(), [1, 5, 6, 11]);
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 1
|
||||
TextSize::from(0),
|
||||
// row 5
|
||||
TextSize::from(22),
|
||||
// row 6
|
||||
TextSize::from(32),
|
||||
// row 11
|
||||
TextSize::from(71),
|
||||
]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
x = 1; import sys
|
||||
|
@ -131,16 +180,24 @@ import os
|
|||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
assert_eq!(indexer.continuation_lines(), [9, 12]);
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 9
|
||||
TextSize::from(84),
|
||||
// row 12
|
||||
TextSize::from(116)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_ranges() {
|
||||
let contents = r#""this is a single-quoted string""#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
assert_eq!(indexer.string_ranges(), &vec![]);
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(indexer.triple_quoted_string_ranges(), []);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
|
@ -148,10 +205,10 @@ import os
|
|||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.string_ranges(),
|
||||
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
|
||||
indexer.triple_quoted_string_ranges(),
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(71))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -160,10 +217,10 @@ import os
|
|||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.string_ranges(),
|
||||
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
|
||||
indexer.triple_quoted_string_ranges(),
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(107))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -177,12 +234,12 @@ import os
|
|||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer: Indexer = lxr.as_slice().into();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.string_ranges(),
|
||||
&vec![
|
||||
Range::new(Location::new(2, 12), Location::new(5, 15)),
|
||||
Range::new(Location::new(6, 12), Location::new(9, 15))
|
||||
indexer.triple_quoted_string_ranges(),
|
||||
&[
|
||||
TextRange::new(TextSize::from(13), TextSize::from(85)),
|
||||
TextRange::new(TextSize::from(98), TextSize::from(161))
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
use crate::source_code::SourceLocation;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use rustpython_parser::ast::Location;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Index for fast [`Location`] to [byte offset](TextSize) conversions.
|
||||
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
|
||||
///
|
||||
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
|
||||
#[derive(Clone)]
|
||||
|
@ -58,28 +60,63 @@ impl LineIndex {
|
|||
self.inner.kind
|
||||
}
|
||||
|
||||
/// Converts a [`Location`] to it's [byte offset](TextSize) in the source code.
|
||||
pub fn location_offset(&self, location: Location, contents: &str) -> TextSize {
|
||||
let line_index = OneIndexed::new(location.row()).unwrap();
|
||||
let line_range = self.line_range(line_index, contents);
|
||||
/// Returns the row and column index for an offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(0), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(4), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(13), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
|
||||
match self.line_starts().binary_search(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(0),
|
||||
},
|
||||
Err(next_row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
let row = next_row - 1;
|
||||
let mut line_start = self.line_starts()[row];
|
||||
|
||||
let column_offset = match self.kind() {
|
||||
IndexKind::Ascii => TextSize::try_from(location.column()).unwrap(),
|
||||
IndexKind::Utf8 => {
|
||||
let line = &contents[line_range];
|
||||
let column = if self.kind().is_ascii() {
|
||||
usize::from(offset) - usize::from(line_start)
|
||||
} else {
|
||||
// Don't count the BOM character as a column.
|
||||
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
|
||||
line_start = '\u{feff}'.text_len();
|
||||
}
|
||||
|
||||
// Skip the bom character
|
||||
let bom_len =
|
||||
usize::from(line_index.to_zero_indexed() == 0 && line.starts_with('\u{feff}'));
|
||||
content[TextRange::new(line_start, offset)].chars().count()
|
||||
};
|
||||
|
||||
match line.char_indices().nth(location.column() + bom_len) {
|
||||
Some((offset, _)) => TextSize::try_from(offset).unwrap(),
|
||||
None => line_range.len(),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(column),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
line_range.start() + column_offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
|
@ -87,6 +124,35 @@ impl LineIndex {
|
|||
self.line_starts().len()
|
||||
}
|
||||
|
||||
/// Returns the row number for a given offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
match self.line_starts().binary_search(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => OneIndexed::from_zero_indexed(row),
|
||||
Err(row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
OneIndexed::from_zero_indexed(row - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offset](TextSize) for the `line` with the given index.
|
||||
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
|
||||
let row_index = line.to_zero_indexed();
|
||||
|
@ -159,12 +225,19 @@ enum IndexKind {
|
|||
Utf8,
|
||||
}
|
||||
|
||||
impl IndexKind {
|
||||
const fn is_ascii(self) -> bool {
|
||||
matches!(self, IndexKind::Ascii)
|
||||
}
|
||||
}
|
||||
|
||||
/// Type-safe wrapper for a value whose logical range starts at `1`, for
|
||||
/// instance the line or column numbers in a file
|
||||
///
|
||||
/// Internally this is represented as a [`NonZeroUsize`], this enables some
|
||||
/// memory optimizations
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct OneIndexed(NonZeroUsize);
|
||||
|
||||
impl OneIndexed {
|
||||
|
@ -238,8 +311,8 @@ const fn unwrap<T: Copy>(option: Option<T>) -> T {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::source_code::line_index::LineIndex;
|
||||
use crate::source_code::{OneIndexed, SourceLocation};
|
||||
use ruff_text_size::TextSize;
|
||||
use rustpython_parser::ast::Location;
|
||||
|
||||
#[test]
|
||||
fn ascii_index() {
|
||||
|
@ -265,21 +338,38 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_byte_offset() {
|
||||
fn ascii_source_location() {
|
||||
let contents = "x = 1\ny = 2";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
|
||||
// First row.
|
||||
let loc = index.location_offset(Location::new(1, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(0));
|
||||
let loc = index.source_location(TextSize::from(2), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(2)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.location_offset(Location::new(2, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(6));
|
||||
let loc = index.source_location(TextSize::from(6), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
// One-past-the-end.
|
||||
let loc = index.location_offset(Location::new(3, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(11));
|
||||
let loc = index.source_location(TextSize::from(11), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -289,16 +379,25 @@ mod tests {
|
|||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
|
||||
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(1, 4), contents),
|
||||
TextSize::from(4)
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 0), contents),
|
||||
TextSize::from(6)
|
||||
index.source_location(TextSize::from(6), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 1), contents),
|
||||
TextSize::from(7)
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -309,16 +408,25 @@ mod tests {
|
|||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
|
||||
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(1, 4), contents),
|
||||
TextSize::from(4)
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 0), contents),
|
||||
TextSize::from(7)
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 1), contents),
|
||||
TextSize::from(8)
|
||||
index.source_location(TextSize::from(8), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -367,16 +475,25 @@ mod tests {
|
|||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(1, 6), contents),
|
||||
TextSize::from(9)
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 0), contents),
|
||||
TextSize::from(11)
|
||||
index.source_location(TextSize::from(11), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 1), contents),
|
||||
TextSize::from(12)
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -392,16 +509,25 @@ mod tests {
|
|||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(1, 6), contents),
|
||||
TextSize::from(9)
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 0), contents),
|
||||
TextSize::from(12)
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.location_offset(Location::new(2, 1), contents),
|
||||
TextSize::from(13)
|
||||
index.source_location(TextSize::from(13), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -415,23 +541,51 @@ mod tests {
|
|||
);
|
||||
|
||||
// First row.
|
||||
let loc = index.location_offset(Location::new(1, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(0));
|
||||
let loc = index.source_location(TextSize::from(0), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.location_offset(Location::new(1, 5), contents);
|
||||
assert_eq!(loc, TextSize::from(5));
|
||||
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "☃'\ny = 2");
|
||||
let loc = index.source_location(TextSize::from(5), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.location_offset(Location::new(1, 6), contents);
|
||||
assert_eq!(loc, TextSize::from(8));
|
||||
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "'\ny = 2");
|
||||
let loc = index.source_location(TextSize::from(8), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.location_offset(Location::new(2, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(10));
|
||||
let loc = index.source_location(TextSize::from(10), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
// One-past-the-end.
|
||||
let loc = index.location_offset(Location::new(3, 0), contents);
|
||||
assert_eq!(loc, TextSize::from(15));
|
||||
let loc = index.source_location(TextSize::from(15), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,61 +1,399 @@
|
|||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||
|
||||
use crate::source_code::line_index::LineIndex;
|
||||
use crate::source_code::SourceCode;
|
||||
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
||||
use once_cell::unsync::OnceCell;
|
||||
use ruff_text_size::TextSize;
|
||||
use rustpython_parser::ast::Location;
|
||||
|
||||
use crate::types::Range;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use std::ops::Add;
|
||||
|
||||
pub struct Locator<'a> {
|
||||
contents: &'a str,
|
||||
line_index: OnceCell<LineIndex>,
|
||||
index: OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
impl<'a> Locator<'a> {
|
||||
pub const fn new(contents: &'a str) -> Self {
|
||||
Self {
|
||||
contents,
|
||||
line_index: OnceCell::new(),
|
||||
index: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_or_init_index(&self) -> &LineIndex {
|
||||
self.line_index
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.to_index().line_index(offset)
|
||||
}
|
||||
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.to_source_code().source_location(offset)
|
||||
}
|
||||
|
||||
fn to_index(&self) -> &LineIndex {
|
||||
self.index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.contents))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_source_code(&self) -> SourceCode<'a, '_> {
|
||||
pub fn line_index(&self) -> Option<&LineIndex> {
|
||||
self.index.get()
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
index: self.get_or_init_index(),
|
||||
index: self.to_index(),
|
||||
text: self.contents,
|
||||
}
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`Location`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, location: Location) -> &'a str {
|
||||
self.to_source_code().up_to(location)
|
||||
/// Computes the start position of the line of `offset`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\rthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
||||
if let Some(index) = self.contents[TextRange::up_to(offset)].rfind(['\n', '\r']) {
|
||||
// SAFETY: Safe because `index < offset`
|
||||
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
||||
} else {
|
||||
TextSize::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`Location`].
|
||||
#[inline]
|
||||
pub fn after(&self, location: Location) -> &'a str {
|
||||
self.to_source_code().after(location)
|
||||
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
|
||||
offset == TextSize::from(0)
|
||||
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`Range`].
|
||||
#[inline]
|
||||
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
|
||||
self.to_source_code().slice(range)
|
||||
/// Computes the offset that is right after the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some(index) = slice.find(['\n', '\r']) {
|
||||
let bytes = slice.as_bytes();
|
||||
|
||||
// `\r\n`
|
||||
let relative_offset = if bytes[index] == b'\r' && bytes.get(index + 1) == Some(&b'\n') {
|
||||
TextSize::try_from(index + 2).unwrap()
|
||||
}
|
||||
// `\r` or `\n`
|
||||
else {
|
||||
TextSize::try_from(index + 1).unwrap()
|
||||
};
|
||||
|
||||
offset.add(relative_offset)
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the byte offset of the given [`Location`].
|
||||
/// Computes the offset that is right before the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some(index) = slice.find(['\n', '\r']) {
|
||||
offset + TextSize::try_from(index).unwrap()
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, and including, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.full_line_end(offset))
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line ending before the newline character.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.line_end(offset))
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// The line includes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.full_line_range(offset)]
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// Excludes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
|
||||
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
|
||||
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.line_range(offset)]
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
|
||||
/// at the end of `range.ends()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(11))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(24))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(
|
||||
self.line_start(range.start()),
|
||||
self.full_line_end(range.end()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
|
||||
/// at the end of `range.end()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(10))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(22))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
|
||||
}
|
||||
|
||||
/// Returns true if the text of `range` contains any line break.
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert!(
|
||||
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// );
|
||||
/// assert!(
|
||||
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the `range` is out of bounds.
|
||||
pub fn contains_line_break(&self, range: TextRange) -> bool {
|
||||
let text = &self.contents[range];
|
||||
text.contains(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.lines_range(range)]
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// Includes the newline characters of the last line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line\n"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line\r\n"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.full_lines_range(range)]
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn offset(&self, location: Location) -> TextSize {
|
||||
self.to_source_code().offset(location)
|
||||
pub fn up_to(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
#[inline]
|
||||
pub fn slice(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[range]
|
||||
}
|
||||
|
||||
/// Return the underlying source code.
|
||||
|
@ -63,17 +401,15 @@ impl<'a> Locator<'a> {
|
|||
self.contents
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
pub fn count_lines(&self) -> usize {
|
||||
let index = self.get_or_init_index();
|
||||
index.line_count()
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the source code.
|
||||
pub const fn len(&self) -> usize {
|
||||
self.contents.len()
|
||||
}
|
||||
|
||||
pub fn text_len(&self) -> TextSize {
|
||||
self.contents.text_len()
|
||||
}
|
||||
|
||||
/// Return `true` if the source code is empty.
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.contents.is_empty()
|
||||
|
|
|
@ -5,17 +5,17 @@ mod locator;
|
|||
mod stylist;
|
||||
|
||||
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
|
||||
use crate::types::Range;
|
||||
pub use generator::Generator;
|
||||
pub use indexer::Indexer;
|
||||
pub use locator::Locator;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser as parser;
|
||||
use rustpython_parser::ast::Location;
|
||||
use rustpython_parser::{lexer, Mode, ParseError};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use stylist::{LineEnding, Stylist};
|
||||
|
||||
/// Run round-trip source code generation on a given Python code.
|
||||
|
@ -29,7 +29,7 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
|
|||
Ok(generator.generate())
|
||||
}
|
||||
|
||||
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
|
||||
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
|
||||
#[derive(Debug)]
|
||||
pub struct SourceCode<'src, 'index> {
|
||||
text: &'src str,
|
||||
|
@ -44,37 +44,34 @@ impl<'src, 'index> SourceCode<'src, 'index> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`Location`].
|
||||
pub fn up_to(&self, location: Location) -> &'src str {
|
||||
let offset = self.index.location_offset(location, self.text);
|
||||
/// Computes the one indexed row and column numbers for `offset`.
|
||||
#[inline]
|
||||
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.index.source_location(offset, self.text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.index.line_index(offset)
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`Location`].
|
||||
pub fn after(&self, location: Location) -> &'src str {
|
||||
let offset = self.index.location_offset(location, self.text);
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`Range`].
|
||||
pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
|
||||
let range = self.text_range(range);
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
pub fn slice(&self, range: TextRange) -> &'src str {
|
||||
&self.text[range]
|
||||
}
|
||||
|
||||
/// Converts a [`Location`] range to a byte offset range
|
||||
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
|
||||
let range = range.into();
|
||||
let start = self.index.location_offset(range.location, self.text);
|
||||
let end = self.index.location_offset(range.end_location, self.text);
|
||||
TextRange::new(start, end)
|
||||
}
|
||||
|
||||
/// Return the byte offset of the given [`Location`].
|
||||
pub fn offset(&self, location: Location) -> TextSize {
|
||||
self.index.location_offset(location, self.text)
|
||||
}
|
||||
|
||||
pub fn line_start(&self, line: OneIndexed) -> TextSize {
|
||||
self.index.line_start(line, self.text)
|
||||
}
|
||||
|
@ -87,20 +84,6 @@ impl<'src, 'index> SourceCode<'src, 'index> {
|
|||
self.index.line_range(line, self.text)
|
||||
}
|
||||
|
||||
/// Returns a string with the lines spawning between location and end location.
|
||||
pub fn lines(&self, range: Range) -> &'src str {
|
||||
let start_line = self
|
||||
.index
|
||||
.line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
|
||||
|
||||
let end_line = self.index.line_range(
|
||||
OneIndexed::new(range.end_location.row()).unwrap(),
|
||||
self.text,
|
||||
);
|
||||
|
||||
&self.text[TextRange::new(start_line.start(), end_line.end())]
|
||||
}
|
||||
|
||||
/// Returns the source text of the line with the given index
|
||||
#[inline]
|
||||
pub fn line_text(&self, index: OneIndexed) -> &'src str {
|
||||
|
@ -131,69 +114,43 @@ impl Eq for SourceCode<'_, '_> {}
|
|||
/// A Builder for constructing a [`SourceFile`]
|
||||
pub struct SourceFileBuilder {
|
||||
name: Box<str>,
|
||||
code: Option<FileSourceCode>,
|
||||
code: Box<str>,
|
||||
index: Option<LineIndex>,
|
||||
}
|
||||
|
||||
impl SourceFileBuilder {
|
||||
/// Creates a new builder for a file named `name`.
|
||||
pub fn new(name: &str) -> Self {
|
||||
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
|
||||
Self {
|
||||
name: Box::from(name),
|
||||
code: None,
|
||||
name: name.into(),
|
||||
code: code.into(),
|
||||
index: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a enw builder for a file named `name`
|
||||
pub fn from_string(name: String) -> Self {
|
||||
Self {
|
||||
name: Box::from(name),
|
||||
code: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns a builder for a file with the source text and the [`LineIndex`] copied
|
||||
/// from `source`.
|
||||
#[must_use]
|
||||
pub fn source_code(mut self, source: &SourceCode) -> Self {
|
||||
self.set_source_code(source);
|
||||
pub fn line_index(mut self, index: LineIndex) -> Self {
|
||||
self.index = Some(index);
|
||||
self
|
||||
}
|
||||
|
||||
/// Copies the source text and [`LineIndex`] from `source`.
|
||||
pub fn set_source_code(&mut self, source: &SourceCode) {
|
||||
self.code = Some(FileSourceCode {
|
||||
text: Box::from(source.text()),
|
||||
index: source.index.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
|
||||
#[must_use]
|
||||
pub fn source_text(self, text: &str) -> Self {
|
||||
self.source_code(&SourceCode::new(text, &LineIndex::from_source_text(text)))
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
|
||||
#[must_use]
|
||||
pub fn source_text_string(mut self, text: String) -> Self {
|
||||
self.set_source_text_string(text);
|
||||
self
|
||||
}
|
||||
|
||||
/// Copies the source text `text` and builds the [`LineIndex`] from `text`.
|
||||
pub fn set_source_text_string(&mut self, text: String) {
|
||||
self.code = Some(FileSourceCode {
|
||||
index: LineIndex::from_source_text(&text),
|
||||
text: Box::from(text),
|
||||
});
|
||||
pub fn set_line_index(&mut self, index: LineIndex) {
|
||||
self.index = Some(index);
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns the [`SourceFile`].
|
||||
pub fn finish(self) -> SourceFile {
|
||||
let index = if let Some(index) = self.index {
|
||||
once_cell::sync::OnceCell::with_value(index)
|
||||
} else {
|
||||
once_cell::sync::OnceCell::new()
|
||||
};
|
||||
|
||||
SourceFile {
|
||||
inner: Arc::new(SourceFileInner {
|
||||
name: self.name,
|
||||
code: self.code,
|
||||
line_index: index,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
@ -211,7 +168,7 @@ impl Debug for SourceFile {
|
|||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceFile")
|
||||
.field("name", &self.name())
|
||||
.field("code", &self.source_code())
|
||||
.field("code", &self.source_text())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
@ -223,38 +180,57 @@ impl SourceFile {
|
|||
&self.inner.name
|
||||
}
|
||||
|
||||
/// Returns `Some` with the source code if set, or `None`.
|
||||
#[inline]
|
||||
pub fn source_code(&self) -> Option<SourceCode> {
|
||||
self.inner.code.as_ref().map(|code| SourceCode {
|
||||
text: &code.text,
|
||||
index: &code.index,
|
||||
})
|
||||
pub fn slice(&self, range: TextRange) -> &str {
|
||||
&self.source_text()[range]
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
text: self.source_text(),
|
||||
index: self.index(),
|
||||
}
|
||||
}
|
||||
|
||||
fn index(&self) -> &LineIndex {
|
||||
self.inner
|
||||
.line_index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
|
||||
}
|
||||
|
||||
/// Returns `Some` with the source text if set, or `None`.
|
||||
#[inline]
|
||||
pub fn source_text(&self) -> Option<&str> {
|
||||
self.inner.code.as_ref().map(|code| &*code.text)
|
||||
pub fn source_text(&self) -> &str {
|
||||
&self.inner.code
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct SourceFileInner {
|
||||
name: Box<str>,
|
||||
code: Option<FileSourceCode>,
|
||||
code: Box<str>,
|
||||
line_index: once_cell::sync::OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
struct FileSourceCode {
|
||||
text: Box<str>,
|
||||
index: LineIndex,
|
||||
}
|
||||
|
||||
impl PartialEq for FileSourceCode {
|
||||
impl PartialEq for SourceFileInner {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// It should be safe to assume that the index for two source files are identical
|
||||
self.text == other.text
|
||||
self.name == other.name && self.code == other.code
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for FileSourceCode {}
|
||||
impl Eq for SourceFileInner {}
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct SourceLocation {
|
||||
pub row: OneIndexed,
|
||||
pub column: OneIndexed,
|
||||
}
|
||||
|
||||
impl Debug for SourceLocation {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceLocation")
|
||||
.field("row", &self.row.get())
|
||||
.field("column", &self.column.get())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@ use std::fmt;
|
|||
use std::ops::Deref;
|
||||
|
||||
use once_cell::unsync::OnceCell;
|
||||
use rustpython_parser::ast::Location;
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
|
@ -12,48 +11,21 @@ use ruff_rustpython::vendor;
|
|||
|
||||
use crate::source_code::Locator;
|
||||
use crate::str::leading_quote;
|
||||
use crate::types::Range;
|
||||
|
||||
pub struct Stylist<'a> {
|
||||
locator: &'a Locator<'a>,
|
||||
indentation: OnceCell<Indentation>,
|
||||
indent_end: Option<Location>,
|
||||
quote: OnceCell<Quote>,
|
||||
quote_range: Option<Range>,
|
||||
indentation: Indentation,
|
||||
quote: Quote,
|
||||
line_ending: OnceCell<LineEnding>,
|
||||
}
|
||||
|
||||
impl<'a> Stylist<'a> {
|
||||
pub fn indentation(&'a self) -> &'a Indentation {
|
||||
self.indentation.get_or_init(|| {
|
||||
if let Some(indent_end) = self.indent_end {
|
||||
let start = Location::new(indent_end.row(), 0);
|
||||
let whitespace = self.locator.slice(Range::new(start, indent_end));
|
||||
Indentation(whitespace.to_string())
|
||||
} else {
|
||||
Indentation::default()
|
||||
}
|
||||
})
|
||||
&self.indentation
|
||||
}
|
||||
|
||||
pub fn quote(&'a self) -> Quote {
|
||||
*self.quote.get_or_init(|| {
|
||||
self.quote_range
|
||||
.and_then(|quote_range| {
|
||||
let content = self.locator.slice(quote_range);
|
||||
leading_quote(content)
|
||||
})
|
||||
.map(|pattern| {
|
||||
if pattern.contains('\'') {
|
||||
Quote::Single
|
||||
} else if pattern.contains('"') {
|
||||
Quote::Double
|
||||
} else {
|
||||
unreachable!("Expected string to start with a valid quote prefix")
|
||||
}
|
||||
})
|
||||
.unwrap_or_default()
|
||||
})
|
||||
self.quote
|
||||
}
|
||||
|
||||
pub fn line_ending(&'a self) -> LineEnding {
|
||||
|
@ -63,33 +35,60 @@ impl<'a> Stylist<'a> {
|
|||
}
|
||||
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
||||
let indent_end = tokens.iter().flatten().find_map(|(_, t, end)| {
|
||||
if matches!(t, Tok::Indent) {
|
||||
Some(*end)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let quote_range = tokens.iter().flatten().find_map(|(start, t, end)| match t {
|
||||
Tok::String {
|
||||
triple_quoted: false,
|
||||
..
|
||||
} => Some(Range::new(*start, *end)),
|
||||
_ => None,
|
||||
});
|
||||
let indentation = detect_indention(tokens, locator);
|
||||
|
||||
Self {
|
||||
locator,
|
||||
indentation: OnceCell::default(),
|
||||
indent_end,
|
||||
quote_range,
|
||||
quote: OnceCell::default(),
|
||||
indentation,
|
||||
quote: detect_quote(tokens, locator),
|
||||
line_ending: OnceCell::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
|
||||
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
|
||||
Tok::String {
|
||||
triple_quoted: false,
|
||||
..
|
||||
} => Some(*range),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
if let Some(quote_range) = quote_range {
|
||||
let content = &locator.slice(quote_range);
|
||||
if let Some(quotes) = leading_quote(content) {
|
||||
return if quotes.contains('\'') {
|
||||
Quote::Single
|
||||
} else if quotes.contains('"') {
|
||||
Quote::Double
|
||||
} else {
|
||||
unreachable!("Expected string to start with a valid quote prefix")
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Quote::default()
|
||||
}
|
||||
|
||||
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
|
||||
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
|
||||
if matches!(t, Tok::Indent) {
|
||||
Some(range)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(indent_range) = indent_range {
|
||||
let whitespace = locator.slice(*indent_range);
|
||||
|
||||
Indentation(whitespace.to_string())
|
||||
} else {
|
||||
Indentation::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// The quotation style used in Python source code.
|
||||
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum Quote {
|
||||
|
@ -198,17 +197,18 @@ impl Deref for LineEnding {
|
|||
|
||||
/// Detect the line ending style of the given contents.
|
||||
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
|
||||
if let Some(position) = contents.find('\n') {
|
||||
let position = position.saturating_sub(1);
|
||||
return if let Some('\r') = contents.chars().nth(position) {
|
||||
if let Some(position) = contents.find(['\n', '\r']) {
|
||||
let bytes = contents.as_bytes();
|
||||
if bytes[position] == b'\n' {
|
||||
Some(LineEnding::Lf)
|
||||
} else if bytes.get(position.saturating_add(1)) == Some(&b'\n') {
|
||||
Some(LineEnding::CrLf)
|
||||
} else {
|
||||
Some(LineEnding::Lf)
|
||||
};
|
||||
} else if contents.find('\r').is_some() {
|
||||
return Some(LineEnding::Cr);
|
||||
Some(LineEnding::Cr)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use ruff_text_size::{TextLen, TextRange};
|
||||
|
||||
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
|
||||
const TRIPLE_QUOTE_STR_PREFIXES: &[&str] = &[
|
||||
"u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''",
|
||||
|
@ -21,9 +23,19 @@ const SINGLE_QUOTE_SUFFIXES: &[&str] = &["\"", "'"];
|
|||
/// Assumes that the string is a valid string literal, but does not verify that the string
|
||||
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
|
||||
pub fn raw_contents(contents: &str) -> Option<&str> {
|
||||
let range = raw_contents_range(contents)?;
|
||||
|
||||
Some(&contents[range])
|
||||
}
|
||||
|
||||
pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
|
||||
let leading_quote_str = leading_quote(contents)?;
|
||||
let trailing_quote_str = trailing_quote(contents)?;
|
||||
Some(&contents[leading_quote_str.len()..contents.len() - trailing_quote_str.len()])
|
||||
|
||||
Some(TextRange::new(
|
||||
leading_quote_str.text_len(),
|
||||
contents.text_len() - trailing_quote_str.text_len(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Return the leading quote for a string or byte literal (e.g., `"""`).
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::ops::Deref;
|
||||
|
||||
use rustpython_parser::ast::{Expr, Located, Location, Stmt};
|
||||
use rustpython_parser::ast::{Expr, Stmt};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Node<'a> {
|
||||
|
@ -8,33 +8,6 @@ pub enum Node<'a> {
|
|||
Expr(&'a Expr),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Range {
|
||||
pub location: Location,
|
||||
pub end_location: Location,
|
||||
}
|
||||
|
||||
impl Range {
|
||||
pub const fn new(location: Location, end_location: Location) -> Self {
|
||||
Self {
|
||||
location,
|
||||
end_location,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<&Located<T>> for Range {
|
||||
fn from(located: &Located<T>) -> Self {
|
||||
Range::new(located.location, located.end_location.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<&Box<Located<T>>> for Range {
|
||||
fn from(located: &Box<Located<T>>) -> Self {
|
||||
Range::new(located.location, located.end_location.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RefEquality<'a, T>(pub &'a T);
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use anyhow::Result;
|
||||
use ruff_text_size::{TextLen, TextRange};
|
||||
use rustpython_parser as parser;
|
||||
use rustpython_parser::ast::{Expr, Location};
|
||||
use rustpython_parser::ast::Expr;
|
||||
|
||||
use crate::relocate::relocate_expr;
|
||||
use crate::source_code::Locator;
|
||||
use crate::str;
|
||||
use crate::types::Range;
|
||||
|
||||
#[derive(is_macro::Is, Copy, Clone)]
|
||||
pub enum AnnotationKind {
|
||||
|
@ -24,10 +24,11 @@ pub enum AnnotationKind {
|
|||
/// Parse a type annotation from a string.
|
||||
pub fn parse_type_annotation(
|
||||
value: &str,
|
||||
range: Range,
|
||||
range: TextRange,
|
||||
locator: &Locator,
|
||||
) -> Result<(Expr, AnnotationKind)> {
|
||||
let expression = locator.slice(range);
|
||||
let expression = &locator.contents()[range];
|
||||
|
||||
if str::raw_contents(expression).map_or(false, |body| body == value) {
|
||||
// The annotation is considered "simple" if and only if the raw representation (e.g.,
|
||||
// `List[int]` within "List[int]") exactly matches the parsed representation. This
|
||||
|
@ -37,10 +38,7 @@ pub fn parse_type_annotation(
|
|||
let expr = parser::parse_expression_located(
|
||||
value,
|
||||
"<filename>",
|
||||
Location::new(
|
||||
range.location.row(),
|
||||
range.location.column() + leading_quote.len(),
|
||||
),
|
||||
range.start() + leading_quote.text_len(),
|
||||
)?;
|
||||
Ok((expr, AnnotationKind::Simple))
|
||||
} else {
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
use rustpython_parser::ast::{Located, Location};
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::ast::Located;
|
||||
|
||||
use crate::source_code::Locator;
|
||||
use crate::types::Range;
|
||||
|
||||
/// Extract the leading indentation from a line.
|
||||
pub fn indentation<'a, T>(locator: &'a Locator, located: &'a Located<T>) -> Option<&'a str> {
|
||||
let range = Range::from(located);
|
||||
let indentation = locator.slice(Range::new(
|
||||
Location::new(range.location.row(), 0),
|
||||
Location::new(range.location.row(), range.location.column()),
|
||||
));
|
||||
pub fn indentation<'a, T>(locator: &'a Locator, located: &Located<T>) -> Option<&'a str> {
|
||||
let line_start = locator.line_start(located.start());
|
||||
let indentation = &locator.contents()[TextRange::new(line_start, located.start())];
|
||||
|
||||
if indentation.chars().all(char::is_whitespace) {
|
||||
Some(indentation)
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue