mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-30 15:17:40 +00:00
Remove parser dependency from ruff-python-ast (#6096)
This commit is contained in:
parent
99127243f4
commit
2cf00fee96
658 changed files with 1714 additions and 1546 deletions
|
@ -1,5 +1,5 @@
|
|||
use bitflags::bitflags;
|
||||
use rustpython_parser::ast::{self, Constant, Expr, Stmt};
|
||||
use rustpython_ast::{self as ast, Constant, Expr, Stmt};
|
||||
|
||||
bitflags! {
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use rustpython_parser::ast::{self, Expr};
|
||||
use rustpython_ast::{self as ast, Expr};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
/// A representation of a qualified name, like `typing.List`.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use rustpython_parser::ast::{self, Decorator, Stmt};
|
||||
use rustpython_ast::{self as ast, Decorator, Stmt};
|
||||
|
||||
pub fn name(stmt: &Stmt) -> &str {
|
||||
match stmt {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
//! ability to compare expressions for equality (via [`Eq`] and [`Hash`]).
|
||||
|
||||
use num_bigint::BigInt;
|
||||
use rustpython_parser::ast;
|
||||
use rustpython_ast as ast;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
|
||||
pub enum ComparableExprContext {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::hash::Hash;
|
||||
|
||||
use rustpython_parser::ast::Expr;
|
||||
use rustpython_ast::Expr;
|
||||
|
||||
use crate::comparable::ComparableExpr;
|
||||
|
||||
|
|
|
@ -1,21 +1,15 @@
|
|||
use std::borrow::Cow;
|
||||
use std::ops::Sub;
|
||||
use std::path::Path;
|
||||
|
||||
use num_traits::Zero;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_ast::CmpOp;
|
||||
use rustpython_parser::ast::{
|
||||
self, Arguments, Constant, ExceptHandler, Expr, Keyword, MatchCase, Pattern, Ranged, Stmt,
|
||||
TypeParam,
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_ast::{
|
||||
self as ast, Arguments, Constant, ExceptHandler, Expr, Keyword, MatchCase, Pattern, Ranged,
|
||||
Stmt, TypeParam,
|
||||
};
|
||||
use rustpython_parser::{lexer, Mode, Tok};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use ruff_python_trivia::{is_python_whitespace, PythonWhitespace, UniversalNewlineIterator};
|
||||
|
||||
use crate::call_path::CallPath;
|
||||
use crate::source_code::{Indexer, Locator};
|
||||
use crate::statement_visitor::{walk_body, walk_stmt, StatementVisitor};
|
||||
|
||||
/// Return `true` if the `Stmt` is a compound statement (as opposed to a simple statement).
|
||||
|
@ -772,27 +766,6 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if a statement or expression includes at least one comment.
|
||||
pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
|
||||
where
|
||||
T: Ranged,
|
||||
{
|
||||
let start = if has_leading_content(node.start(), locator) {
|
||||
node.start()
|
||||
} else {
|
||||
locator.line_start(node.start())
|
||||
};
|
||||
let end = if has_trailing_content(node.end(), locator) {
|
||||
node.end()
|
||||
} else {
|
||||
locator.line_end(node.end())
|
||||
};
|
||||
|
||||
indexer
|
||||
.comment_ranges()
|
||||
.intersects(TextRange::new(start, end))
|
||||
}
|
||||
|
||||
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.
|
||||
///
|
||||
/// Accepts a closure that determines whether a given name (e.g., `"list"`) is a Python builtin.
|
||||
|
@ -1027,197 +1000,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Return `true` if the node starting the given [`TextSize`] has leading content.
|
||||
pub fn has_leading_content(offset: TextSize, locator: &Locator) -> bool {
|
||||
let line_start = locator.line_start(offset);
|
||||
let leading = &locator.contents()[TextRange::new(line_start, offset)];
|
||||
leading.chars().any(|char| !is_python_whitespace(char))
|
||||
}
|
||||
|
||||
/// Return `true` if the node ending at the given [`TextSize`] has trailing content.
|
||||
pub fn has_trailing_content(offset: TextSize, locator: &Locator) -> bool {
|
||||
let line_end = locator.line_end(offset);
|
||||
let trailing = &locator.contents()[TextRange::new(offset, line_end)];
|
||||
|
||||
for char in trailing.chars() {
|
||||
if char == '#' {
|
||||
return false;
|
||||
}
|
||||
if !is_python_whitespace(char) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// If a [`Ranged`] has a trailing comment, return the index of the hash.
|
||||
pub fn trailing_comment_start_offset<T>(located: &T, locator: &Locator) -> Option<TextSize>
|
||||
where
|
||||
T: Ranged,
|
||||
{
|
||||
let line_end = locator.line_end(located.end());
|
||||
|
||||
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
|
||||
|
||||
for (index, char) in trailing.char_indices() {
|
||||
if char == '#' {
|
||||
return TextSize::try_from(index).ok();
|
||||
}
|
||||
if !is_python_whitespace(char) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Return the end offset at which the empty lines following a statement.
|
||||
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
|
||||
let line_end = locator.full_line_end(stmt.end());
|
||||
let rest = &locator.contents()[usize::from(line_end)..];
|
||||
|
||||
UniversalNewlineIterator::with_offset(rest, line_end)
|
||||
.take_while(|line| line.trim_whitespace().is_empty())
|
||||
.last()
|
||||
.map_or(line_end, |line| line.full_end())
|
||||
}
|
||||
|
||||
/// Return the range of the first parenthesis pair after a given [`TextSize`].
|
||||
pub fn match_parens(start: TextSize, locator: &Locator) -> Option<TextRange> {
|
||||
let contents = &locator.contents()[usize::from(start)..];
|
||||
|
||||
let mut fix_start = None;
|
||||
let mut fix_end = None;
|
||||
let mut count = 0u32;
|
||||
|
||||
for (tok, range) in lexer::lex_starts_at(contents, Mode::Module, start).flatten() {
|
||||
match tok {
|
||||
Tok::Lpar => {
|
||||
if count == 0 {
|
||||
fix_start = Some(range.start());
|
||||
}
|
||||
count = count.saturating_add(1);
|
||||
}
|
||||
Tok::Rpar => {
|
||||
count = count.saturating_sub(1);
|
||||
if count == 0 {
|
||||
fix_end = Some(range.end());
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
match (fix_start, fix_end) {
|
||||
(Some(start), Some(end)) => Some(TextRange::new(start, end)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
|
||||
pub fn first_colon_range(range: TextRange, locator: &Locator) -> Option<TextRange> {
|
||||
let contents = &locator.contents()[range];
|
||||
let range = lexer::lex_starts_at(contents, Mode::Module, range.start())
|
||||
.flatten()
|
||||
.find(|(tok, _)| tok.is_colon())
|
||||
.map(|(_, range)| range);
|
||||
range
|
||||
}
|
||||
|
||||
/// Given an offset at the end of a line (including newlines), return the offset of the
|
||||
/// continuation at the end of that line.
|
||||
fn find_continuation(offset: TextSize, locator: &Locator, indexer: &Indexer) -> Option<TextSize> {
|
||||
let newline_pos = usize::from(offset).saturating_sub(1);
|
||||
|
||||
// Skip the newline.
|
||||
let newline_len = match locator.contents().as_bytes()[newline_pos] {
|
||||
b'\n' => {
|
||||
if locator
|
||||
.contents()
|
||||
.as_bytes()
|
||||
.get(newline_pos.saturating_sub(1))
|
||||
== Some(&b'\r')
|
||||
{
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
b'\r' => 1,
|
||||
// No preceding line.
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
indexer
|
||||
.is_continuation(offset - TextSize::from(newline_len), locator)
|
||||
.then(|| offset - TextSize::from(newline_len) - TextSize::from(1))
|
||||
}
|
||||
|
||||
/// If the node starting at the given [`TextSize`] is preceded by at least one continuation line
|
||||
/// (i.e., a line ending in a backslash), return the starting offset of the first such continuation
|
||||
/// character.
|
||||
///
|
||||
/// For example, given:
|
||||
/// ```python
|
||||
/// x = 1; \
|
||||
/// y = 2
|
||||
/// ```
|
||||
///
|
||||
/// When passed the offset of `y`, this function will return the offset of the backslash at the end
|
||||
/// of the first line.
|
||||
///
|
||||
/// Similarly, given:
|
||||
/// ```python
|
||||
/// x = 1; \
|
||||
/// \
|
||||
/// y = 2;
|
||||
/// ```
|
||||
///
|
||||
/// When passed the offset of `y`, this function will again return the offset of the backslash at
|
||||
/// the end of the first line.
|
||||
pub fn preceded_by_continuations(
|
||||
offset: TextSize,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
) -> Option<TextSize> {
|
||||
// Find the first preceding continuation.
|
||||
let mut continuation = find_continuation(locator.line_start(offset), locator, indexer)?;
|
||||
|
||||
// Continue searching for continuations, in the unlikely event that we have multiple
|
||||
// continuations in a row.
|
||||
loop {
|
||||
let previous_line_end = locator.line_start(continuation);
|
||||
if locator
|
||||
.slice(TextRange::new(previous_line_end, continuation))
|
||||
.chars()
|
||||
.all(is_python_whitespace)
|
||||
{
|
||||
if let Some(next_continuation) = find_continuation(previous_line_end, locator, indexer)
|
||||
{
|
||||
continuation = next_continuation;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
Some(continuation)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements preceding it.
|
||||
pub fn preceded_by_multi_statement_line(stmt: &Stmt, locator: &Locator, indexer: &Indexer) -> bool {
|
||||
has_leading_content(stmt.start(), locator)
|
||||
|| preceded_by_continuations(stmt.start(), locator, indexer).is_some()
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements following it.
|
||||
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
|
||||
has_trailing_content(stmt.end(), locator)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` is a docstring.
|
||||
pub fn is_docstring_stmt(stmt: &Stmt) -> bool {
|
||||
if let Stmt::Expr(ast::StmtExpr {
|
||||
|
@ -1500,166 +1282,19 @@ impl Truthiness {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LocatedCmpOp {
|
||||
pub range: TextRange,
|
||||
pub op: CmpOp,
|
||||
}
|
||||
|
||||
impl LocatedCmpOp {
|
||||
fn new<T: Into<TextRange>>(range: T, op: CmpOp) -> Self {
|
||||
Self {
|
||||
range: range.into(),
|
||||
op,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
|
||||
/// ranges.
|
||||
///
|
||||
/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes.
|
||||
/// `CPython` doesn't either. This method iterates over the token stream and
|
||||
/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges.
|
||||
pub fn locate_cmp_ops(expr: &Expr, locator: &Locator) -> Vec<LocatedCmpOp> {
|
||||
// If `Expr` is a multi-line expression, we need to parenthesize it to
|
||||
// ensure that it's lexed correctly.
|
||||
let contents = locator.slice(expr.range());
|
||||
let parenthesized_contents = format!("({contents})");
|
||||
let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression)
|
||||
.flatten()
|
||||
.skip(1)
|
||||
.map(|(tok, range)| (tok, range.sub(TextSize::from(1))))
|
||||
.filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_)))
|
||||
.peekable();
|
||||
|
||||
let mut ops: Vec<LocatedCmpOp> = vec![];
|
||||
let mut count = 0u32;
|
||||
loop {
|
||||
let Some((tok, range)) = tok_iter.next() else {
|
||||
break;
|
||||
};
|
||||
if matches!(tok, Tok::Lpar) {
|
||||
count = count.saturating_add(1);
|
||||
continue;
|
||||
} else if matches!(tok, Tok::Rpar) {
|
||||
count = count.saturating_sub(1);
|
||||
continue;
|
||||
}
|
||||
if count == 0 {
|
||||
match tok {
|
||||
Tok::Not => {
|
||||
if let Some((_, next_range)) =
|
||||
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::In))
|
||||
{
|
||||
ops.push(LocatedCmpOp::new(
|
||||
TextRange::new(range.start(), next_range.end()),
|
||||
CmpOp::NotIn,
|
||||
));
|
||||
}
|
||||
}
|
||||
Tok::In => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::In));
|
||||
}
|
||||
Tok::Is => {
|
||||
let op = if let Some((_, next_range)) =
|
||||
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::Not))
|
||||
{
|
||||
LocatedCmpOp::new(
|
||||
TextRange::new(range.start(), next_range.end()),
|
||||
CmpOp::IsNot,
|
||||
)
|
||||
} else {
|
||||
LocatedCmpOp::new(range, CmpOp::Is)
|
||||
};
|
||||
ops.push(op);
|
||||
}
|
||||
Tok::NotEqual => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::NotEq));
|
||||
}
|
||||
Tok::EqEqual => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::Eq));
|
||||
}
|
||||
Tok::GreaterEqual => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::GtE));
|
||||
}
|
||||
Tok::Greater => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::Gt));
|
||||
}
|
||||
Tok::LessEqual => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::LtE));
|
||||
}
|
||||
Tok::Less => {
|
||||
ops.push(LocatedCmpOp::new(range, CmpOp::Lt));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
ops
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use std::cell::RefCell;
|
||||
|
||||
use std::vec;
|
||||
|
||||
use anyhow::Result;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_ast::{
|
||||
self, CmpOp, Constant, Expr, ExprConstant, ExprContext, ExprName, Identifier, Ranged, Stmt,
|
||||
StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
|
||||
self, Constant, Expr, ExprConstant, ExprContext, ExprName, Identifier, Stmt, StmtTypeAlias,
|
||||
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
|
||||
};
|
||||
use rustpython_parser::ast::Suite;
|
||||
use rustpython_parser::Parse;
|
||||
|
||||
use crate::helpers::{
|
||||
any_over_stmt, any_over_type_param, first_colon_range, has_trailing_content,
|
||||
locate_cmp_ops, resolve_imported_module_path, LocatedCmpOp,
|
||||
};
|
||||
use crate::source_code::Locator;
|
||||
|
||||
#[test]
|
||||
fn trailing_content() -> Result<()> {
|
||||
let contents = "x = 1";
|
||||
let program = Suite::parse(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1; y = 2";
|
||||
let program = Suite::parse(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1 ";
|
||||
let program = Suite::parse(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = "x = 1 # Comment";
|
||||
let program = Suite::parse(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
let contents = r#"
|
||||
x = 1
|
||||
y = 2
|
||||
"#
|
||||
.trim();
|
||||
let program = Suite::parse(contents, "<filename>")?;
|
||||
let stmt = program.first().unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
assert!(!has_trailing_content(stmt.end(), &locator));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
use crate::helpers::{any_over_stmt, any_over_type_param, resolve_imported_module_path};
|
||||
|
||||
#[test]
|
||||
fn resolve_import() {
|
||||
|
@ -1698,101 +1333,6 @@ y = 2
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_first_colon_range() {
|
||||
let contents = "with a: pass";
|
||||
let locator = Locator::new(contents);
|
||||
let range = first_colon_range(
|
||||
TextRange::new(TextSize::from(0), contents.text_len()),
|
||||
&locator,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&contents[range], ":");
|
||||
assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_cmp_op_location() -> Result<()> {
|
||||
let contents = "x == 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(4),
|
||||
CmpOp::Eq
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x != 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(4),
|
||||
CmpOp::NotEq
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x is 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(4),
|
||||
CmpOp::Is
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x is not 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(8),
|
||||
CmpOp::IsNot
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x in 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(4),
|
||||
CmpOp::In
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x not in 1";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(8),
|
||||
CmpOp::NotIn
|
||||
)]
|
||||
);
|
||||
|
||||
let contents = "x != (1 is not 2)";
|
||||
let expr = Expr::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(
|
||||
locate_cmp_ops(&expr, &locator),
|
||||
vec![LocatedCmpOp::new(
|
||||
TextSize::from(2)..TextSize::from(4),
|
||||
CmpOp::NotEq
|
||||
)]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn any_over_stmt_type_alias() {
|
||||
let seen = RefCell::new(Vec::new());
|
||||
|
|
|
@ -11,13 +11,10 @@
|
|||
//! This module can be used to identify the [`TextRange`] of the `except` token.
|
||||
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use rustpython_ast::{Alias, Arg, ArgWithDefault};
|
||||
use rustpython_parser::ast::{self, ExceptHandler, Ranged, Stmt};
|
||||
use rustpython_ast::{self as ast, Alias, Arg, ArgWithDefault, ExceptHandler, Ranged, Stmt};
|
||||
|
||||
use ruff_python_trivia::{is_python_whitespace, Cursor};
|
||||
|
||||
use crate::source_code::Locator;
|
||||
|
||||
pub trait Identifier {
|
||||
/// Return the [`TextRange`] of the identifier in the given AST node.
|
||||
fn identifier(&self) -> TextRange;
|
||||
|
@ -82,14 +79,14 @@ impl Identifier for Alias {
|
|||
}
|
||||
|
||||
/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
|
||||
pub fn except(handler: &ExceptHandler, locator: &Locator) -> TextRange {
|
||||
IdentifierTokenizer::new(locator.contents(), handler.range())
|
||||
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
|
||||
IdentifierTokenizer::new(source, handler.range())
|
||||
.next()
|
||||
.expect("Failed to find `except` token in `ExceptHandler`")
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the `else` token in a `For`, `AsyncFor`, or `While` statement.
|
||||
pub fn else_(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
|
||||
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
|
||||
let (Stmt::For(ast::StmtFor { body, orelse, .. })
|
||||
| Stmt::AsyncFor(ast::StmtAsyncFor { body, orelse, .. })
|
||||
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
|
||||
|
@ -103,7 +100,7 @@ pub fn else_(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
|
|||
|
||||
IdentifierTokenizer::starts_at(
|
||||
body.last().expect("Expected body to be non-empty").end(),
|
||||
locator.contents(),
|
||||
source,
|
||||
)
|
||||
.next()
|
||||
}
|
||||
|
@ -203,17 +200,15 @@ impl Iterator for IdentifierTokenizer<'_> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use anyhow::Result;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_ast::{Ranged, Stmt};
|
||||
use rustpython_parser::Parse;
|
||||
use rustpython_parser::{Parse, ParseError};
|
||||
|
||||
use crate::identifier;
|
||||
use crate::identifier::IdentifierTokenizer;
|
||||
use crate::source_code::Locator;
|
||||
|
||||
#[test]
|
||||
fn extract_else_range() -> Result<()> {
|
||||
fn extract_else_range() -> Result<(), ParseError> {
|
||||
let contents = r#"
|
||||
for x in y:
|
||||
pass
|
||||
|
@ -222,8 +217,7 @@ else:
|
|||
"#
|
||||
.trim();
|
||||
let stmt = Stmt::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
let range = identifier::else_(&stmt, &locator).unwrap();
|
||||
let range = identifier::else_(&stmt, contents).unwrap();
|
||||
assert_eq!(&contents[range], "else");
|
||||
assert_eq!(
|
||||
range,
|
||||
|
@ -233,12 +227,11 @@ else:
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn extract_global_names() -> Result<()> {
|
||||
fn extract_global_names() -> Result<(), ParseError> {
|
||||
let contents = r#"global X,Y, Z"#.trim();
|
||||
let stmt = Stmt::parse(contents, "<filename>")?;
|
||||
let locator = Locator::new(contents);
|
||||
|
||||
let mut names = IdentifierTokenizer::new(locator.contents(), stmt.range());
|
||||
let mut names = IdentifierTokenizer::new(contents, stmt.range());
|
||||
|
||||
let range = names.next_token().unwrap();
|
||||
assert_eq!(&contents[range], "global");
|
||||
|
|
|
@ -10,13 +10,10 @@ pub mod identifier;
|
|||
pub mod imports;
|
||||
pub mod node;
|
||||
pub mod relocate;
|
||||
pub mod source_code;
|
||||
pub mod statement_visitor;
|
||||
pub mod stmt_if;
|
||||
pub mod str;
|
||||
pub mod token_kind;
|
||||
pub mod traversal;
|
||||
pub mod types;
|
||||
pub mod typing;
|
||||
pub mod visitor;
|
||||
pub mod whitespace;
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
use ast::{TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple};
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_ast::{
|
||||
Alias, Arg, ArgWithDefault, Arguments, Comprehension, Decorator, ExceptHandler, Keyword,
|
||||
MatchCase, Mod, Pattern, Stmt, TypeIgnore, WithItem,
|
||||
self as ast, Alias, Arg, ArgWithDefault, Arguments, Comprehension, Decorator, ExceptHandler,
|
||||
Expr, Keyword, MatchCase, Mod, Pattern, Ranged, Stmt, TypeIgnore, TypeParam,
|
||||
TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
|
||||
};
|
||||
use rustpython_parser::ast::{self, Expr, Ranged};
|
||||
use std::ptr::NonNull;
|
||||
|
||||
pub trait AstNode: Ranged {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::ast::{self, Expr, Keyword};
|
||||
use rustpython_ast::{self as ast, Expr, Keyword};
|
||||
|
||||
fn relocate_keyword(keyword: &mut Keyword, location: TextRange) {
|
||||
relocate_expr(&mut keyword.value, location);
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
use itertools::Itertools;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::ops::Deref;
|
||||
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
#[derive(Clone)]
|
||||
pub struct CommentRanges {
|
||||
raw: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRanges {
|
||||
/// Returns `true` if the given range includes a comment.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
.binary_search_by(|range| {
|
||||
if target.contains_range(*range) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else if range.end() < target.start() {
|
||||
std::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Returns the comments who are within the range
|
||||
pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
|
||||
let start = self
|
||||
.raw
|
||||
.partition_point(|comment| comment.start() < range.start());
|
||||
// We expect there are few comments, so switching to find should be faster
|
||||
match self.raw[start..]
|
||||
.iter()
|
||||
.find_position(|comment| comment.end() > range.end())
|
||||
{
|
||||
Some((in_range, _element)) => &self.raw[start..start + in_range],
|
||||
None => &self.raw[start..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CommentRanges {
|
||||
type Target = [TextRange];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.raw.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for CommentRanges {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("CommentRanges").field(&self.raw).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a CommentRanges {
|
||||
type IntoIter = std::slice::Iter<'a, TextRange>;
|
||||
type Item = &'a TextRange;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.raw.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CommentRangesBuilder {
|
||||
ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRangesBuilder {
|
||||
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
|
||||
if token.is_comment() {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(self) -> CommentRanges {
|
||||
CommentRanges { raw: self.ranges }
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,296 +0,0 @@
|
|||
//! Struct used to index source code, to enable efficient lookup of tokens that
|
||||
//! are omitted from the AST (e.g., commented lines).
|
||||
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{StringKind, Tok};
|
||||
|
||||
use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
use crate::source_code::Locator;
|
||||
|
||||
pub struct Indexer {
|
||||
comment_ranges: CommentRanges,
|
||||
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
triple_quoted_string_ranges: Vec<TextRange>,
|
||||
|
||||
/// The range of all f-string in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
f_string_ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
|
||||
assert!(TextSize::try_from(locator.contents().len()).is_ok());
|
||||
|
||||
let mut comment_ranges_builder = CommentRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut triple_quoted_string_ranges = Vec::new();
|
||||
let mut f_string_ranges = Vec::new();
|
||||
// Token, end
|
||||
let mut prev_end = TextSize::default();
|
||||
let mut prev_token: Option<&Tok> = None;
|
||||
let mut line_start = TextSize::default();
|
||||
|
||||
for (tok, range) in tokens.iter().flatten() {
|
||||
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
|
||||
|
||||
// Get the trivia between the previous and the current token and detect any newlines.
|
||||
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
|
||||
// between any two tokens that form a continuation. That's why we have to extract the
|
||||
// newlines "manually".
|
||||
for (index, text) in trivia.match_indices(['\n', '\r']) {
|
||||
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Newlines after a newline never form a continuation.
|
||||
if !matches!(prev_token, Some(Tok::Newline | Tok::NonLogicalNewline)) {
|
||||
continuation_lines.push(line_start);
|
||||
}
|
||||
|
||||
// SAFETY: Safe because of the len assertion at the top of the function.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
{
|
||||
line_start = prev_end + TextSize::new((index + 1) as u32);
|
||||
}
|
||||
}
|
||||
|
||||
comment_ranges_builder.visit_token(tok, *range);
|
||||
|
||||
match tok {
|
||||
Tok::Newline | Tok::NonLogicalNewline => {
|
||||
line_start = range.end();
|
||||
}
|
||||
Tok::String {
|
||||
triple_quoted: true,
|
||||
..
|
||||
} => {
|
||||
triple_quoted_string_ranges.push(*range);
|
||||
}
|
||||
Tok::String {
|
||||
kind: StringKind::FString | StringKind::RawFString,
|
||||
..
|
||||
} => {
|
||||
f_string_ranges.push(*range);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
prev_token = Some(tok);
|
||||
prev_end = range.end();
|
||||
}
|
||||
Self {
|
||||
comment_ranges: comment_ranges_builder.finish(),
|
||||
continuation_lines,
|
||||
triple_quoted_string_ranges,
|
||||
f_string_ranges,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of comments
|
||||
pub const fn comment_ranges(&self) -> &CommentRanges {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the comments in the given range as source code slices
|
||||
pub fn comments_in_range<'a>(
|
||||
&'a self,
|
||||
range: TextRange,
|
||||
locator: &'a Locator,
|
||||
) -> impl Iterator<Item = &'a str> {
|
||||
self.comment_ranges
|
||||
.comments_in_range(range)
|
||||
.iter()
|
||||
.map(move |comment_range| locator.slice(*comment_range))
|
||||
}
|
||||
|
||||
/// Returns the line start positions of continuations (backslash).
|
||||
pub fn continuation_line_starts(&self) -> &[TextSize] {
|
||||
&self.continuation_lines
|
||||
}
|
||||
|
||||
/// Returns `true` if the given offset is part of a continuation line.
|
||||
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
|
||||
let line_start = locator.line_start(offset);
|
||||
self.continuation_lines.binary_search(&line_start).is_ok()
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the triple-quoted-string containing a given offset.
|
||||
pub fn triple_quoted_string_range(&self, offset: TextSize) -> Option<TextRange> {
|
||||
let Ok(string_range_index) = self.triple_quoted_string_ranges.binary_search_by(|range| {
|
||||
if offset < range.start() {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if range.contains(offset) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
Some(self.triple_quoted_string_ranges[string_range_index])
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the f-string containing a given offset.
|
||||
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
|
||||
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
|
||||
if offset < range.start() {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if range.contains(offset) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
Some(self.f_string_ranges[string_range_index])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{lexer, Mode};
|
||||
|
||||
use crate::source_code::{Indexer, Locator};
|
||||
|
||||
#[test]
|
||||
fn continuation() {
|
||||
let contents = r#"x = 1"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
# Hello, world!
|
||||
|
||||
x = 1
|
||||
|
||||
y = 2
|
||||
"#
|
||||
.trim();
|
||||
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
x = \
|
||||
1
|
||||
|
||||
if True:
|
||||
z = \
|
||||
\
|
||||
2
|
||||
|
||||
(
|
||||
"abc" # Foo
|
||||
"def" \
|
||||
"ghi"
|
||||
)
|
||||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 1
|
||||
TextSize::from(0),
|
||||
// row 5
|
||||
TextSize::from(22),
|
||||
// row 6
|
||||
TextSize::from(32),
|
||||
// row 11
|
||||
TextSize::from(71),
|
||||
]
|
||||
);
|
||||
|
||||
let contents = r"
|
||||
x = 1; import sys
|
||||
import os
|
||||
|
||||
if True:
|
||||
x = 1; import sys
|
||||
import os
|
||||
|
||||
if True:
|
||||
x = 1; \
|
||||
import os
|
||||
|
||||
x = 1; \
|
||||
import os
|
||||
"
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 9
|
||||
TextSize::from(84),
|
||||
// row 12
|
||||
TextSize::from(116)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_ranges() {
|
||||
let contents = r#""this is a single-quoted string""#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(indexer.triple_quoted_string_ranges, []);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
this is a multiline string
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(71))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
'''this is a multiline string with multiple delimiter types'''
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(107))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
this is one
|
||||
multiline string
|
||||
"""
|
||||
"""
|
||||
and this is
|
||||
another
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
&[
|
||||
TextRange::new(TextSize::from(13), TextSize::from(85)),
|
||||
TextRange::new(TextSize::from(98), TextSize::from(161))
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,595 +0,0 @@
|
|||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::source_code::SourceLocation;
|
||||
|
||||
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
|
||||
///
|
||||
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
|
||||
#[derive(Clone)]
|
||||
pub struct LineIndex {
|
||||
inner: Arc<LineIndexInner>,
|
||||
}
|
||||
|
||||
struct LineIndexInner {
|
||||
line_starts: Vec<TextSize>,
|
||||
kind: IndexKind,
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
/// Builds the [`LineIndex`] from the source text of a file.
|
||||
pub fn from_source_text(text: &str) -> Self {
|
||||
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
|
||||
line_starts.push(TextSize::default());
|
||||
|
||||
let bytes = text.as_bytes();
|
||||
let mut utf8 = false;
|
||||
|
||||
assert!(u32::try_from(bytes.len()).is_ok());
|
||||
|
||||
for (i, byte) in bytes.iter().enumerate() {
|
||||
utf8 |= !byte.is_ascii();
|
||||
|
||||
match byte {
|
||||
// Only track one line break for `\r\n`.
|
||||
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
|
||||
b'\n' | b'\r' => {
|
||||
// SAFETY: Assertion above guarantees `i <= u32::MAX`
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let kind = if utf8 {
|
||||
IndexKind::Utf8
|
||||
} else {
|
||||
IndexKind::Ascii
|
||||
};
|
||||
|
||||
Self {
|
||||
inner: Arc::new(LineIndexInner { line_starts, kind }),
|
||||
}
|
||||
}
|
||||
|
||||
fn kind(&self) -> IndexKind {
|
||||
self.inner.kind
|
||||
}
|
||||
|
||||
/// Returns the row and column index for an offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(0), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(4), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(13), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
|
||||
match self.line_starts().binary_search(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(0),
|
||||
},
|
||||
Err(next_row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
let row = next_row - 1;
|
||||
let mut line_start = self.line_starts()[row];
|
||||
|
||||
let column = if self.kind().is_ascii() {
|
||||
usize::from(offset) - usize::from(line_start)
|
||||
} else {
|
||||
// Don't count the BOM character as a column.
|
||||
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
|
||||
line_start = '\u{feff}'.text_len();
|
||||
}
|
||||
|
||||
content[TextRange::new(line_start, offset)].chars().count()
|
||||
};
|
||||
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(column),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
pub(crate) fn line_count(&self) -> usize {
|
||||
self.line_starts().len()
|
||||
}
|
||||
|
||||
/// Returns the row number for a given offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
match self.line_starts().binary_search(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => OneIndexed::from_zero_indexed(row),
|
||||
Err(row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
OneIndexed::from_zero_indexed(row - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offset](TextSize) for the `line` with the given index.
|
||||
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
|
||||
let row_index = line.to_zero_indexed();
|
||||
let starts = self.line_starts();
|
||||
|
||||
// If start-of-line position after last line
|
||||
if row_index == starts.len() {
|
||||
contents.text_len()
|
||||
} else {
|
||||
starts[row_index]
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offset](TextSize) of the `line`'s end.
|
||||
/// The offset is the end of the line, up to and including the newline character ending the line (if any).
|
||||
pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
|
||||
let row_index = line.to_zero_indexed();
|
||||
let starts = self.line_starts();
|
||||
|
||||
// If start-of-line position after last line
|
||||
if row_index.saturating_add(1) >= starts.len() {
|
||||
contents.text_len()
|
||||
} else {
|
||||
starts[row_index + 1]
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`TextRange`] of the `line` with the given index.
|
||||
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
|
||||
/// the newline character ending the line (if any).
|
||||
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
|
||||
let starts = self.line_starts();
|
||||
|
||||
if starts.len() == line.to_zero_indexed() {
|
||||
TextRange::empty(contents.text_len())
|
||||
} else {
|
||||
TextRange::new(
|
||||
self.line_start(line, contents),
|
||||
self.line_start(line.saturating_add(1), contents),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offsets](TextSize) for every line
|
||||
pub fn line_starts(&self) -> &[TextSize] {
|
||||
&self.inner.line_starts
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LineIndex {
|
||||
type Target = [TextSize];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.line_starts()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for LineIndex {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
f.debug_list().entries(self.line_starts()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum IndexKind {
|
||||
/// Optimized index for an ASCII only document
|
||||
Ascii,
|
||||
|
||||
/// Index for UTF8 documents
|
||||
Utf8,
|
||||
}
|
||||
|
||||
impl IndexKind {
|
||||
const fn is_ascii(self) -> bool {
|
||||
matches!(self, IndexKind::Ascii)
|
||||
}
|
||||
}
|
||||
|
||||
/// Type-safe wrapper for a value whose logical range starts at `1`, for
|
||||
/// instance the line or column numbers in a file
|
||||
///
|
||||
/// Internally this is represented as a [`NonZeroUsize`], this enables some
|
||||
/// memory optimizations
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct OneIndexed(NonZeroUsize);
|
||||
|
||||
impl OneIndexed {
|
||||
/// The largest value that can be represented by this integer type
|
||||
pub const MAX: Self = unwrap(Self::new(usize::MAX));
|
||||
// SAFETY: These constants are being initialized with non-zero values
|
||||
/// The smallest value that can be represented by this integer type.
|
||||
pub const MIN: Self = unwrap(Self::new(1));
|
||||
pub const ONE: NonZeroUsize = unwrap(NonZeroUsize::new(1));
|
||||
|
||||
/// Creates a non-zero if the given value is not zero.
|
||||
pub const fn new(value: usize) -> Option<Self> {
|
||||
match NonZeroUsize::new(value) {
|
||||
Some(value) => Some(Self(value)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new [`OneIndexed`] from a zero-indexed value
|
||||
pub const fn from_zero_indexed(value: usize) -> Self {
|
||||
Self(Self::ONE.saturating_add(value))
|
||||
}
|
||||
|
||||
/// Returns the value as a primitive type.
|
||||
pub const fn get(self) -> usize {
|
||||
self.0.get()
|
||||
}
|
||||
|
||||
/// Return the zero-indexed primitive value for this [`OneIndexed`]
|
||||
pub const fn to_zero_indexed(self) -> usize {
|
||||
self.0.get() - 1
|
||||
}
|
||||
|
||||
/// Saturating integer addition. Computes `self + rhs`, saturating at
|
||||
/// the numeric bounds instead of overflowing.
|
||||
#[must_use]
|
||||
pub const fn saturating_add(self, rhs: usize) -> Self {
|
||||
match NonZeroUsize::new(self.0.get().saturating_add(rhs)) {
|
||||
Some(value) => Self(value),
|
||||
None => Self::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
/// Saturating integer subtraction. Computes `self - rhs`, saturating
|
||||
/// at the numeric bounds instead of overflowing.
|
||||
#[must_use]
|
||||
pub const fn saturating_sub(self, rhs: usize) -> Self {
|
||||
match NonZeroUsize::new(self.0.get().saturating_sub(rhs)) {
|
||||
Some(value) => Self(value),
|
||||
None => Self::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for OneIndexed {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
std::fmt::Debug::fmt(&self.0.get(), f)
|
||||
}
|
||||
}
|
||||
|
||||
/// A const `Option::unwrap` without nightly features:
|
||||
/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441)
|
||||
const fn unwrap<T: Copy>(option: Option<T>) -> T {
|
||||
match option {
|
||||
Some(value) => value,
|
||||
None => panic!("unwrapping None"),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
use crate::source_code::line_index::LineIndex;
|
||||
use crate::source_code::{OneIndexed, SourceLocation};
|
||||
|
||||
#[test]
|
||||
fn ascii_index() {
|
||||
let index = LineIndex::from_source_text("");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1\n");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[
|
||||
TextSize::from(0),
|
||||
TextSize::from(6),
|
||||
TextSize::from(12),
|
||||
TextSize::from(22)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_source_location() {
|
||||
let contents = "x = 1\ny = 2";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
|
||||
// First row.
|
||||
let loc = index.source_location(TextSize::from(2), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(2)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.source_location(TextSize::from(6), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(11), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_carriage_return() {
|
||||
let contents = "x = 4\ry = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
|
||||
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(6), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_carriage_return_newline() {
|
||||
let contents = "x = 4\r\ny = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
|
||||
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(8), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_index() {
|
||||
let index = LineIndex::from_source_text("x = '🫣'");
|
||||
assert_eq!(index.line_count(), 1);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = '🫣'\n");
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(11)]
|
||||
);
|
||||
|
||||
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
|
||||
assert_eq!(index.line_count(), 4);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[
|
||||
TextSize::from(0),
|
||||
TextSize::from(11),
|
||||
TextSize::from(17),
|
||||
TextSize::from(27)
|
||||
]
|
||||
);
|
||||
|
||||
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
|
||||
assert_eq!(index.line_count(), 3);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_carriage_return() {
|
||||
let contents = "x = '🫣'\ry = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(11)]
|
||||
);
|
||||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(11), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_carriage_return_newline() {
|
||||
let contents = "x = '🫣'\r\ny = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(12)]
|
||||
);
|
||||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(13), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_byte_offset() {
|
||||
let contents = "x = '☃'\ny = 2";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(10)]
|
||||
);
|
||||
|
||||
// First row.
|
||||
let loc = index.source_location(TextSize::from(0), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(5), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(8), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.source_location(TextSize::from(10), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
// One-past-the-end.
|
||||
let loc = index.source_location(TextSize::from(15), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,412 +0,0 @@
|
|||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||
|
||||
use std::ops::Add;
|
||||
|
||||
use memchr::{memchr2, memrchr2};
|
||||
use once_cell::unsync::OnceCell;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use ruff_python_trivia::find_newline;
|
||||
|
||||
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
||||
|
||||
pub struct Locator<'a> {
|
||||
contents: &'a str,
|
||||
index: OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
impl<'a> Locator<'a> {
|
||||
pub const fn new(contents: &'a str) -> Self {
|
||||
Self {
|
||||
contents,
|
||||
index: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.to_index().line_index(offset)
|
||||
}
|
||||
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.to_source_code().source_location(offset)
|
||||
}
|
||||
|
||||
fn to_index(&self) -> &LineIndex {
|
||||
self.index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.contents))
|
||||
}
|
||||
|
||||
pub fn line_index(&self) -> Option<&LineIndex> {
|
||||
self.index.get()
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
index: self.to_index(),
|
||||
text: self.contents,
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the start position of the line of `offset`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\rthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
||||
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
|
||||
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
|
||||
// SAFETY: Safe because `index < offset`
|
||||
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
||||
} else {
|
||||
TextSize::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
|
||||
offset == TextSize::from(0)
|
||||
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Computes the offset that is right after the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some((index, line_ending)) = find_newline(slice) {
|
||||
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the offset that is right before the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
|
||||
offset + TextSize::try_from(index).unwrap()
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, and including, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.full_line_end(offset))
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line ending before the newline character.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.line_end(offset))
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// The line includes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.full_line_range(offset)]
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// Excludes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
|
||||
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
|
||||
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.line_range(offset)]
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
|
||||
/// at the end of `range.ends()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(11))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(24))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(
|
||||
self.line_start(range.start()),
|
||||
self.full_line_end(range.end()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
|
||||
/// at the end of `range.end()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(10))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(22))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
|
||||
}
|
||||
|
||||
/// Returns true if the text of `range` contains any line break.
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert!(
|
||||
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// );
|
||||
/// assert!(
|
||||
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the `range` is out of bounds.
|
||||
pub fn contains_line_break(&self, range: TextRange) -> bool {
|
||||
let text = &self.contents[range];
|
||||
text.contains(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.lines_range(range)]
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// Includes the newline characters of the last line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line\n"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line\r\n"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.full_lines_range(range)]
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
#[inline]
|
||||
pub fn slice(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[range]
|
||||
}
|
||||
|
||||
/// Return the underlying source code.
|
||||
pub fn contents(&self) -> &'a str {
|
||||
self.contents
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the source code.
|
||||
pub const fn len(&self) -> usize {
|
||||
self.contents.len()
|
||||
}
|
||||
|
||||
pub fn text_len(&self) -> TextSize {
|
||||
self.contents.text_len()
|
||||
}
|
||||
|
||||
/// Return `true` if the source code is empty.
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.contents.is_empty()
|
||||
}
|
||||
}
|
|
@ -1,266 +0,0 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::{ast, lexer, Mode, Parse, ParseError};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
pub use generator::Generator;
|
||||
pub use indexer::Indexer;
|
||||
pub use locator::Locator;
|
||||
pub use stylist::{Quote, Stylist};
|
||||
|
||||
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
|
||||
|
||||
mod comment_ranges;
|
||||
mod generator;
|
||||
mod indexer;
|
||||
mod line_index;
|
||||
mod locator;
|
||||
mod stylist;
|
||||
|
||||
/// Run round-trip source code generation on a given Python code.
|
||||
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
|
||||
let locator = Locator::new(code);
|
||||
let python_ast = ast::Suite::parse(code, source_path)?;
|
||||
let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect();
|
||||
let stylist = Stylist::from_tokens(&tokens, &locator);
|
||||
let mut generator: Generator = (&stylist).into();
|
||||
generator.unparse_suite(&python_ast);
|
||||
Ok(generator.generate())
|
||||
}
|
||||
|
||||
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
|
||||
#[derive(Debug)]
|
||||
pub struct SourceCode<'src, 'index> {
|
||||
text: &'src str,
|
||||
index: &'index LineIndex,
|
||||
}
|
||||
|
||||
impl<'src, 'index> SourceCode<'src, 'index> {
|
||||
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
|
||||
Self {
|
||||
text: content,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the one indexed row and column numbers for `offset`.
|
||||
#[inline]
|
||||
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.index.source_location(offset, self.text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.index.line_index(offset)
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
pub fn slice(&self, range: TextRange) -> &'src str {
|
||||
&self.text[range]
|
||||
}
|
||||
|
||||
pub fn line_start(&self, line: OneIndexed) -> TextSize {
|
||||
self.index.line_start(line, self.text)
|
||||
}
|
||||
|
||||
pub fn line_end(&self, line: OneIndexed) -> TextSize {
|
||||
self.index.line_end(line, self.text)
|
||||
}
|
||||
|
||||
pub fn line_range(&self, line: OneIndexed) -> TextRange {
|
||||
self.index.line_range(line, self.text)
|
||||
}
|
||||
|
||||
/// Returns the source text of the line with the given index
|
||||
#[inline]
|
||||
pub fn line_text(&self, index: OneIndexed) -> &'src str {
|
||||
let range = self.index.line_range(index, self.text);
|
||||
&self.text[range]
|
||||
}
|
||||
|
||||
/// Returns the source text
|
||||
pub fn text(&self) -> &'src str {
|
||||
self.text
|
||||
}
|
||||
|
||||
/// Returns the number of lines
|
||||
#[inline]
|
||||
pub fn line_count(&self) -> usize {
|
||||
self.index.line_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<Self> for SourceCode<'_, '_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.text == other.text
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for SourceCode<'_, '_> {}
|
||||
|
||||
/// A Builder for constructing a [`SourceFile`]
|
||||
pub struct SourceFileBuilder {
|
||||
name: Box<str>,
|
||||
code: Box<str>,
|
||||
index: Option<LineIndex>,
|
||||
}
|
||||
|
||||
impl SourceFileBuilder {
|
||||
/// Creates a new builder for a file named `name`.
|
||||
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
code: code.into(),
|
||||
index: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn line_index(mut self, index: LineIndex) -> Self {
|
||||
self.index = Some(index);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_line_index(&mut self, index: LineIndex) {
|
||||
self.index = Some(index);
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns the [`SourceFile`].
|
||||
pub fn finish(self) -> SourceFile {
|
||||
let index = if let Some(index) = self.index {
|
||||
once_cell::sync::OnceCell::with_value(index)
|
||||
} else {
|
||||
once_cell::sync::OnceCell::new()
|
||||
};
|
||||
|
||||
SourceFile {
|
||||
inner: Arc::new(SourceFileInner {
|
||||
name: self.name,
|
||||
code: self.code,
|
||||
line_index: index,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`].
|
||||
///
|
||||
/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count.
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub struct SourceFile {
|
||||
inner: Arc<SourceFileInner>,
|
||||
}
|
||||
|
||||
impl Debug for SourceFile {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceFile")
|
||||
.field("name", &self.name())
|
||||
.field("code", &self.source_text())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceFile {
|
||||
/// Returns the name of the source file (filename).
|
||||
#[inline]
|
||||
pub fn name(&self) -> &str {
|
||||
&self.inner.name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn slice(&self, range: TextRange) -> &str {
|
||||
&self.source_text()[range]
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
text: self.source_text(),
|
||||
index: self.index(),
|
||||
}
|
||||
}
|
||||
|
||||
fn index(&self) -> &LineIndex {
|
||||
self.inner
|
||||
.line_index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
|
||||
}
|
||||
|
||||
/// Returns the source code.
|
||||
#[inline]
|
||||
pub fn source_text(&self) -> &str {
|
||||
&self.inner.code
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for SourceFile {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for SourceFile {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// Short circuit if these are the same source files
|
||||
if Arc::ptr_eq(&self.inner, &other.inner) {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
self.inner.name.cmp(&other.inner.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SourceFileInner {
|
||||
name: Box<str>,
|
||||
code: Box<str>,
|
||||
line_index: once_cell::sync::OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
impl PartialEq for SourceFileInner {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name && self.code == other.code
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for SourceFileInner {}
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct SourceLocation {
|
||||
pub row: OneIndexed,
|
||||
pub column: OneIndexed,
|
||||
}
|
||||
|
||||
impl Default for SourceLocation {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
row: OneIndexed::MIN,
|
||||
column: OneIndexed::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for SourceLocation {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceLocation")
|
||||
.field("row", &self.row.get())
|
||||
.field("column", &self.column.get())
|
||||
.finish()
|
||||
}
|
||||
}
|
|
@ -1,329 +0,0 @@
|
|||
//! Detect code style from Python source code.
|
||||
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
|
||||
use once_cell::unsync::OnceCell;
|
||||
use ruff_python_trivia::{find_newline, LineEnding};
|
||||
use rustpython_literal::escape::Quote as StrQuote;
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
use crate::source_code::Locator;
|
||||
use crate::str::leading_quote;
|
||||
|
||||
pub struct Stylist<'a> {
|
||||
locator: &'a Locator<'a>,
|
||||
indentation: Indentation,
|
||||
quote: Quote,
|
||||
line_ending: OnceCell<LineEnding>,
|
||||
}
|
||||
|
||||
impl<'a> Stylist<'a> {
|
||||
pub fn indentation(&'a self) -> &'a Indentation {
|
||||
&self.indentation
|
||||
}
|
||||
|
||||
pub fn quote(&'a self) -> Quote {
|
||||
self.quote
|
||||
}
|
||||
|
||||
pub fn line_ending(&'a self) -> LineEnding {
|
||||
*self.line_ending.get_or_init(|| {
|
||||
let contents = self.locator.contents();
|
||||
find_newline(contents)
|
||||
.map(|(_, ending)| ending)
|
||||
.unwrap_or_default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
||||
let indentation = detect_indention(tokens, locator);
|
||||
|
||||
Self {
|
||||
locator,
|
||||
indentation,
|
||||
quote: detect_quote(tokens, locator),
|
||||
line_ending: OnceCell::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
|
||||
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
|
||||
Tok::String {
|
||||
triple_quoted: false,
|
||||
..
|
||||
} => Some(*range),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
if let Some(quote_range) = quote_range {
|
||||
let content = &locator.slice(quote_range);
|
||||
if let Some(quotes) = leading_quote(content) {
|
||||
return if quotes.contains('\'') {
|
||||
Quote::Single
|
||||
} else if quotes.contains('"') {
|
||||
Quote::Double
|
||||
} else {
|
||||
unreachable!("Expected string to start with a valid quote prefix")
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Quote::default()
|
||||
}
|
||||
|
||||
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
|
||||
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
|
||||
if matches!(t, Tok::Indent) {
|
||||
Some(range)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(indent_range) = indent_range {
|
||||
let whitespace = locator.slice(*indent_range);
|
||||
|
||||
Indentation(whitespace.to_string())
|
||||
} else {
|
||||
Indentation::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// The quotation style used in Python source code.
|
||||
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum Quote {
|
||||
Single,
|
||||
#[default]
|
||||
Double,
|
||||
}
|
||||
|
||||
impl From<Quote> for char {
|
||||
fn from(val: Quote) -> Self {
|
||||
match val {
|
||||
Quote::Single => '\'',
|
||||
Quote::Double => '"',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Quote> for StrQuote {
|
||||
fn from(val: Quote) -> Self {
|
||||
match val {
|
||||
Quote::Single => StrQuote::Single,
|
||||
Quote::Double => StrQuote::Double,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Quote {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Quote::Single => write!(f, "\'"),
|
||||
Quote::Double => write!(f, "\""),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The indentation style used in Python source code.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Indentation(String);
|
||||
|
||||
impl Indentation {
|
||||
pub const fn new(indentation: String) -> Self {
|
||||
Self(indentation)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Indentation {
|
||||
fn default() -> Self {
|
||||
Indentation(" ".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl Indentation {
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.0.as_str()
|
||||
}
|
||||
|
||||
pub fn as_char(&self) -> char {
|
||||
self.0.chars().next().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Indentation {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rustpython_parser::lexer::lex;
|
||||
use rustpython_parser::Mode;
|
||||
|
||||
use ruff_python_trivia::{find_newline, LineEnding};
|
||||
|
||||
use crate::source_code::stylist::{Indentation, Quote};
|
||||
use crate::source_code::{Locator, Stylist};
|
||||
|
||||
#[test]
|
||||
fn indentation() {
|
||||
let contents = r#"x = 1"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation::default()
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
if True:
|
||||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation(" ".to_string())
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
if True:
|
||||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation(" ".to_string())
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
if True:
|
||||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation("\t".to_string())
|
||||
);
|
||||
|
||||
// TODO(charlie): Should non-significant whitespace be detected?
|
||||
let contents = r#"
|
||||
x = (
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
)
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation::default()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quote() {
|
||||
let contents = r#"x = 1"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::default()
|
||||
);
|
||||
|
||||
let contents = r#"x = '1'"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Single
|
||||
);
|
||||
|
||||
let contents = r#"x = "1""#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
|
||||
let contents = r#"s = "It's done.""#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
|
||||
// No style if only double quoted docstring (will take default Double)
|
||||
let contents = r#"
|
||||
def f():
|
||||
"""Docstring."""
|
||||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::default()
|
||||
);
|
||||
|
||||
// Detect from string literal appearing after docstring
|
||||
let contents = r#"
|
||||
"""Module docstring."""
|
||||
|
||||
a = 'v'
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Single
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
'''Module docstring.'''
|
||||
|
||||
a = "v"
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_ending() {
|
||||
let contents = "x = 1";
|
||||
assert_eq!(find_newline(contents).map(|(_, ending)| ending), None);
|
||||
|
||||
let contents = "x = 1\n";
|
||||
assert_eq!(
|
||||
find_newline(contents).map(|(_, ending)| ending),
|
||||
Some(LineEnding::Lf)
|
||||
);
|
||||
|
||||
let contents = "x = 1\r";
|
||||
assert_eq!(
|
||||
find_newline(contents).map(|(_, ending)| ending),
|
||||
Some(LineEnding::Cr)
|
||||
);
|
||||
|
||||
let contents = "x = 1\r\n";
|
||||
assert_eq!(
|
||||
find_newline(contents).map(|(_, ending)| ending),
|
||||
Some(LineEnding::CrLf)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
//! Specialized AST visitor trait and walk functions that only visit statements.
|
||||
|
||||
use rustpython_ast::ElifElseClause;
|
||||
use rustpython_parser::ast::{self, ExceptHandler, MatchCase, Stmt};
|
||||
use rustpython_ast::{self as ast, ElifElseClause, ExceptHandler, MatchCase, Stmt};
|
||||
|
||||
/// A trait for AST visitors that only need to visit statements.
|
||||
pub trait StatementVisitor<'a> {
|
||||
|
|
|
@ -1,20 +1,14 @@
|
|||
use crate::source_code::Locator;
|
||||
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_ast::{ElifElseClause, Expr, Ranged, Stmt, StmtIf};
|
||||
use rustpython_parser::{lexer, Mode, Tok};
|
||||
use std::iter;
|
||||
|
||||
/// Return the `Range` of the first `Elif` or `Else` token in an `If` statement.
|
||||
pub fn elif_else_range(clause: &ElifElseClause, locator: &Locator) -> Option<TextRange> {
|
||||
let contents = &locator.contents()[clause.range];
|
||||
let token = lexer::lex_starts_at(contents, Mode::Module, clause.range.start())
|
||||
.flatten()
|
||||
pub fn elif_else_range(clause: &ElifElseClause, contents: &str) -> Option<TextRange> {
|
||||
let token = SimpleTokenizer::new(contents, clause.range)
|
||||
.skip_trivia()
|
||||
.next()?;
|
||||
if matches!(token.0, Tok::Elif | Tok::Else) {
|
||||
Some(token.1)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
matches!(token.kind, SimpleTokenKind::Elif | SimpleTokenKind::Else).then_some(token.range())
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
|
@ -49,15 +43,13 @@ pub fn if_elif_branches(stmt_if: &StmtIf) -> impl Iterator<Item = IfElifBranch>
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::source_code::Locator;
|
||||
use crate::stmt_if::elif_else_range;
|
||||
use anyhow::Result;
|
||||
use ruff_text_size::TextSize;
|
||||
use rustpython_ast::Stmt;
|
||||
use rustpython_parser::Parse;
|
||||
use rustpython_parser::{Parse, ParseError};
|
||||
|
||||
#[test]
|
||||
fn extract_elif_else_range() -> Result<()> {
|
||||
fn extract_elif_else_range() -> Result<(), ParseError> {
|
||||
let contents = "if a:
|
||||
...
|
||||
elif b:
|
||||
|
@ -65,8 +57,7 @@ elif b:
|
|||
";
|
||||
let stmt = Stmt::parse(contents, "<filename>")?;
|
||||
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
let range = elif_else_range(&stmt.elif_else_clauses[0], &locator).unwrap();
|
||||
let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap();
|
||||
assert_eq!(range.start(), TextSize::from(14));
|
||||
assert_eq!(range.end(), TextSize::from(18));
|
||||
|
||||
|
@ -77,8 +68,7 @@ else:
|
|||
";
|
||||
let stmt = Stmt::parse(contents, "<filename>")?;
|
||||
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
|
||||
let locator = Locator::new(contents);
|
||||
let range = elif_else_range(&stmt.elif_else_clauses[0], &locator).unwrap();
|
||||
let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap();
|
||||
assert_eq!(range.start(), TextSize::from(14));
|
||||
assert_eq!(range.end(), TextSize::from(18));
|
||||
|
||||
|
|
|
@ -1,447 +0,0 @@
|
|||
use rustpython_parser::Tok;
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub enum TokenKind {
|
||||
/// Token value for a name, commonly known as an identifier.
|
||||
Name,
|
||||
/// Token value for an integer.
|
||||
Int,
|
||||
/// Token value for a floating point number.
|
||||
Float,
|
||||
/// Token value for a complex number.
|
||||
Complex,
|
||||
/// Token value for a string.
|
||||
String,
|
||||
/// Token value for a Jupyter magic command.
|
||||
MagicCommand,
|
||||
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
|
||||
Comment,
|
||||
/// Token value for a newline.
|
||||
Newline,
|
||||
/// Token value for a newline that is not a logical line break. These are filtered out of
|
||||
/// the token stream prior to parsing.
|
||||
NonLogicalNewline,
|
||||
/// Token value for an indent.
|
||||
Indent,
|
||||
/// Token value for a dedent.
|
||||
Dedent,
|
||||
EndOfFile,
|
||||
/// Token value for a left parenthesis `(`.
|
||||
Lpar,
|
||||
/// Token value for a right parenthesis `)`.
|
||||
Rpar,
|
||||
/// Token value for a left square bracket `[`.
|
||||
Lsqb,
|
||||
/// Token value for a right square bracket `]`.
|
||||
Rsqb,
|
||||
/// Token value for a colon `:`.
|
||||
Colon,
|
||||
/// Token value for a comma `,`.
|
||||
Comma,
|
||||
/// Token value for a semicolon `;`.
|
||||
Semi,
|
||||
/// Token value for plus `+`.
|
||||
Plus,
|
||||
/// Token value for minus `-`.
|
||||
Minus,
|
||||
/// Token value for star `*`.
|
||||
Star,
|
||||
/// Token value for slash `/`.
|
||||
Slash,
|
||||
/// Token value for vertical bar `|`.
|
||||
Vbar,
|
||||
/// Token value for ampersand `&`.
|
||||
Amper,
|
||||
/// Token value for less than `<`.
|
||||
Less,
|
||||
/// Token value for greater than `>`.
|
||||
Greater,
|
||||
/// Token value for equal `=`.
|
||||
Equal,
|
||||
/// Token value for dot `.`.
|
||||
Dot,
|
||||
/// Token value for percent `%`.
|
||||
Percent,
|
||||
/// Token value for left bracket `{`.
|
||||
Lbrace,
|
||||
/// Token value for right bracket `}`.
|
||||
Rbrace,
|
||||
/// Token value for double equal `==`.
|
||||
EqEqual,
|
||||
/// Token value for not equal `!=`.
|
||||
NotEqual,
|
||||
/// Token value for less than or equal `<=`.
|
||||
LessEqual,
|
||||
/// Token value for greater than or equal `>=`.
|
||||
GreaterEqual,
|
||||
/// Token value for tilde `~`.
|
||||
Tilde,
|
||||
/// Token value for caret `^`.
|
||||
CircumFlex,
|
||||
/// Token value for left shift `<<`.
|
||||
LeftShift,
|
||||
/// Token value for right shift `>>`.
|
||||
RightShift,
|
||||
/// Token value for double star `**`.
|
||||
DoubleStar,
|
||||
/// Token value for double star equal `**=`.
|
||||
DoubleStarEqual,
|
||||
/// Token value for plus equal `+=`.
|
||||
PlusEqual,
|
||||
/// Token value for minus equal `-=`.
|
||||
MinusEqual,
|
||||
/// Token value for star equal `*=`.
|
||||
StarEqual,
|
||||
/// Token value for slash equal `/=`.
|
||||
SlashEqual,
|
||||
/// Token value for percent equal `%=`.
|
||||
PercentEqual,
|
||||
/// Token value for ampersand equal `&=`.
|
||||
AmperEqual,
|
||||
/// Token value for vertical bar equal `|=`.
|
||||
VbarEqual,
|
||||
/// Token value for caret equal `^=`.
|
||||
CircumflexEqual,
|
||||
/// Token value for left shift equal `<<=`.
|
||||
LeftShiftEqual,
|
||||
/// Token value for right shift equal `>>=`.
|
||||
RightShiftEqual,
|
||||
/// Token value for double slash `//`.
|
||||
DoubleSlash,
|
||||
/// Token value for double slash equal `//=`.
|
||||
DoubleSlashEqual,
|
||||
/// Token value for colon equal `:=`.
|
||||
ColonEqual,
|
||||
/// Token value for at `@`.
|
||||
At,
|
||||
/// Token value for at equal `@=`.
|
||||
AtEqual,
|
||||
/// Token value for arrow `->`.
|
||||
Rarrow,
|
||||
/// Token value for ellipsis `...`.
|
||||
Ellipsis,
|
||||
|
||||
// Self documenting.
|
||||
// Keywords (alphabetically):
|
||||
False,
|
||||
None,
|
||||
True,
|
||||
|
||||
And,
|
||||
As,
|
||||
Assert,
|
||||
Async,
|
||||
Await,
|
||||
Break,
|
||||
Class,
|
||||
Continue,
|
||||
Def,
|
||||
Del,
|
||||
Elif,
|
||||
Else,
|
||||
Except,
|
||||
Finally,
|
||||
For,
|
||||
From,
|
||||
Global,
|
||||
If,
|
||||
Import,
|
||||
In,
|
||||
Is,
|
||||
Lambda,
|
||||
Nonlocal,
|
||||
Not,
|
||||
Or,
|
||||
Pass,
|
||||
Raise,
|
||||
Return,
|
||||
Try,
|
||||
While,
|
||||
Match,
|
||||
Type,
|
||||
Case,
|
||||
With,
|
||||
Yield,
|
||||
|
||||
// RustPython specific.
|
||||
StartModule,
|
||||
StartInteractive,
|
||||
StartExpression,
|
||||
}
|
||||
|
||||
impl TokenKind {
|
||||
#[inline]
|
||||
pub const fn is_newline(&self) -> bool {
|
||||
matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_unary(&self) -> bool {
|
||||
matches!(self, TokenKind::Plus | TokenKind::Minus)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_keyword(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::False
|
||||
| TokenKind::True
|
||||
| TokenKind::None
|
||||
| TokenKind::And
|
||||
| TokenKind::As
|
||||
| TokenKind::Assert
|
||||
| TokenKind::Await
|
||||
| TokenKind::Break
|
||||
| TokenKind::Class
|
||||
| TokenKind::Continue
|
||||
| TokenKind::Def
|
||||
| TokenKind::Del
|
||||
| TokenKind::Elif
|
||||
| TokenKind::Else
|
||||
| TokenKind::Except
|
||||
| TokenKind::Finally
|
||||
| TokenKind::For
|
||||
| TokenKind::From
|
||||
| TokenKind::Global
|
||||
| TokenKind::If
|
||||
| TokenKind::Import
|
||||
| TokenKind::In
|
||||
| TokenKind::Is
|
||||
| TokenKind::Lambda
|
||||
| TokenKind::Nonlocal
|
||||
| TokenKind::Not
|
||||
| TokenKind::Or
|
||||
| TokenKind::Pass
|
||||
| TokenKind::Raise
|
||||
| TokenKind::Return
|
||||
| TokenKind::Try
|
||||
| TokenKind::While
|
||||
| TokenKind::With
|
||||
| TokenKind::Yield
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_operator(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::Lpar
|
||||
| TokenKind::Rpar
|
||||
| TokenKind::Lsqb
|
||||
| TokenKind::Rsqb
|
||||
| TokenKind::Comma
|
||||
| TokenKind::Semi
|
||||
| TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::Vbar
|
||||
| TokenKind::Amper
|
||||
| TokenKind::Less
|
||||
| TokenKind::Greater
|
||||
| TokenKind::Equal
|
||||
| TokenKind::Dot
|
||||
| TokenKind::Percent
|
||||
| TokenKind::Lbrace
|
||||
| TokenKind::Rbrace
|
||||
| TokenKind::EqEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::GreaterEqual
|
||||
| TokenKind::Tilde
|
||||
| TokenKind::CircumFlex
|
||||
| TokenKind::LeftShift
|
||||
| TokenKind::RightShift
|
||||
| TokenKind::DoubleStar
|
||||
| TokenKind::PlusEqual
|
||||
| TokenKind::MinusEqual
|
||||
| TokenKind::StarEqual
|
||||
| TokenKind::SlashEqual
|
||||
| TokenKind::PercentEqual
|
||||
| TokenKind::AmperEqual
|
||||
| TokenKind::VbarEqual
|
||||
| TokenKind::CircumflexEqual
|
||||
| TokenKind::LeftShiftEqual
|
||||
| TokenKind::RightShiftEqual
|
||||
| TokenKind::DoubleStarEqual
|
||||
| TokenKind::DoubleSlash
|
||||
| TokenKind::DoubleSlashEqual
|
||||
| TokenKind::At
|
||||
| TokenKind::AtEqual
|
||||
| TokenKind::Rarrow
|
||||
| TokenKind::Ellipsis
|
||||
| TokenKind::ColonEqual
|
||||
| TokenKind::Colon
|
||||
| TokenKind::And
|
||||
| TokenKind::Or
|
||||
| TokenKind::Not
|
||||
| TokenKind::In
|
||||
| TokenKind::Is
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_singleton(&self) -> bool {
|
||||
matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_trivia(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::Newline
|
||||
| TokenKind::Indent
|
||||
| TokenKind::Dedent
|
||||
| TokenKind::NonLogicalNewline
|
||||
| TokenKind::Comment
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_arithmetic(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::DoubleStar
|
||||
| TokenKind::Star
|
||||
| TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Slash
|
||||
| TokenKind::DoubleSlash
|
||||
| TokenKind::At
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_bitwise_or_shift(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::LeftShift
|
||||
| TokenKind::LeftShiftEqual
|
||||
| TokenKind::RightShift
|
||||
| TokenKind::RightShiftEqual
|
||||
| TokenKind::Amper
|
||||
| TokenKind::AmperEqual
|
||||
| TokenKind::Vbar
|
||||
| TokenKind::VbarEqual
|
||||
| TokenKind::CircumFlex
|
||||
| TokenKind::CircumflexEqual
|
||||
| TokenKind::Tilde
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_soft_keyword(&self) -> bool {
|
||||
matches!(self, TokenKind::Match | TokenKind::Case)
|
||||
}
|
||||
|
||||
pub const fn from_token(token: &Tok) -> Self {
|
||||
match token {
|
||||
Tok::Name { .. } => TokenKind::Name,
|
||||
Tok::Int { .. } => TokenKind::Int,
|
||||
Tok::Float { .. } => TokenKind::Float,
|
||||
Tok::Complex { .. } => TokenKind::Complex,
|
||||
Tok::String { .. } => TokenKind::String,
|
||||
Tok::MagicCommand { .. } => TokenKind::MagicCommand,
|
||||
Tok::Comment(_) => TokenKind::Comment,
|
||||
Tok::Newline => TokenKind::Newline,
|
||||
Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
|
||||
Tok::Indent => TokenKind::Indent,
|
||||
Tok::Dedent => TokenKind::Dedent,
|
||||
Tok::EndOfFile => TokenKind::EndOfFile,
|
||||
Tok::Lpar => TokenKind::Lpar,
|
||||
Tok::Rpar => TokenKind::Rpar,
|
||||
Tok::Lsqb => TokenKind::Lsqb,
|
||||
Tok::Rsqb => TokenKind::Rsqb,
|
||||
Tok::Colon => TokenKind::Colon,
|
||||
Tok::Comma => TokenKind::Comma,
|
||||
Tok::Semi => TokenKind::Semi,
|
||||
Tok::Plus => TokenKind::Plus,
|
||||
Tok::Minus => TokenKind::Minus,
|
||||
Tok::Star => TokenKind::Star,
|
||||
Tok::Slash => TokenKind::Slash,
|
||||
Tok::Vbar => TokenKind::Vbar,
|
||||
Tok::Amper => TokenKind::Amper,
|
||||
Tok::Less => TokenKind::Less,
|
||||
Tok::Greater => TokenKind::Greater,
|
||||
Tok::Equal => TokenKind::Equal,
|
||||
Tok::Dot => TokenKind::Dot,
|
||||
Tok::Percent => TokenKind::Percent,
|
||||
Tok::Lbrace => TokenKind::Lbrace,
|
||||
Tok::Rbrace => TokenKind::Rbrace,
|
||||
Tok::EqEqual => TokenKind::EqEqual,
|
||||
Tok::NotEqual => TokenKind::NotEqual,
|
||||
Tok::LessEqual => TokenKind::LessEqual,
|
||||
Tok::GreaterEqual => TokenKind::GreaterEqual,
|
||||
Tok::Tilde => TokenKind::Tilde,
|
||||
Tok::CircumFlex => TokenKind::CircumFlex,
|
||||
Tok::LeftShift => TokenKind::LeftShift,
|
||||
Tok::RightShift => TokenKind::RightShift,
|
||||
Tok::DoubleStar => TokenKind::DoubleStar,
|
||||
Tok::DoubleStarEqual => TokenKind::DoubleStarEqual,
|
||||
Tok::PlusEqual => TokenKind::PlusEqual,
|
||||
Tok::MinusEqual => TokenKind::MinusEqual,
|
||||
Tok::StarEqual => TokenKind::StarEqual,
|
||||
Tok::SlashEqual => TokenKind::SlashEqual,
|
||||
Tok::PercentEqual => TokenKind::PercentEqual,
|
||||
Tok::AmperEqual => TokenKind::AmperEqual,
|
||||
Tok::VbarEqual => TokenKind::VbarEqual,
|
||||
Tok::CircumflexEqual => TokenKind::CircumflexEqual,
|
||||
Tok::LeftShiftEqual => TokenKind::LeftShiftEqual,
|
||||
Tok::RightShiftEqual => TokenKind::RightShiftEqual,
|
||||
Tok::DoubleSlash => TokenKind::DoubleSlash,
|
||||
Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual,
|
||||
Tok::ColonEqual => TokenKind::ColonEqual,
|
||||
Tok::At => TokenKind::At,
|
||||
Tok::AtEqual => TokenKind::AtEqual,
|
||||
Tok::Rarrow => TokenKind::Rarrow,
|
||||
Tok::Ellipsis => TokenKind::Ellipsis,
|
||||
Tok::False => TokenKind::False,
|
||||
Tok::None => TokenKind::None,
|
||||
Tok::True => TokenKind::True,
|
||||
Tok::And => TokenKind::And,
|
||||
Tok::As => TokenKind::As,
|
||||
Tok::Assert => TokenKind::Assert,
|
||||
Tok::Async => TokenKind::Async,
|
||||
Tok::Await => TokenKind::Await,
|
||||
Tok::Break => TokenKind::Break,
|
||||
Tok::Class => TokenKind::Class,
|
||||
Tok::Continue => TokenKind::Continue,
|
||||
Tok::Def => TokenKind::Def,
|
||||
Tok::Del => TokenKind::Del,
|
||||
Tok::Elif => TokenKind::Elif,
|
||||
Tok::Else => TokenKind::Else,
|
||||
Tok::Except => TokenKind::Except,
|
||||
Tok::Finally => TokenKind::Finally,
|
||||
Tok::For => TokenKind::For,
|
||||
Tok::From => TokenKind::From,
|
||||
Tok::Global => TokenKind::Global,
|
||||
Tok::If => TokenKind::If,
|
||||
Tok::Import => TokenKind::Import,
|
||||
Tok::In => TokenKind::In,
|
||||
Tok::Is => TokenKind::Is,
|
||||
Tok::Lambda => TokenKind::Lambda,
|
||||
Tok::Nonlocal => TokenKind::Nonlocal,
|
||||
Tok::Not => TokenKind::Not,
|
||||
Tok::Or => TokenKind::Or,
|
||||
Tok::Pass => TokenKind::Pass,
|
||||
Tok::Raise => TokenKind::Raise,
|
||||
Tok::Return => TokenKind::Return,
|
||||
Tok::Try => TokenKind::Try,
|
||||
Tok::While => TokenKind::While,
|
||||
Tok::Match => TokenKind::Match,
|
||||
Tok::Case => TokenKind::Case,
|
||||
Tok::Type => TokenKind::Type,
|
||||
Tok::With => TokenKind::With,
|
||||
Tok::Yield => TokenKind::Yield,
|
||||
Tok::StartModule => TokenKind::StartModule,
|
||||
Tok::StartInteractive => TokenKind::StartInteractive,
|
||||
Tok::StartExpression => TokenKind::StartExpression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Tok> for TokenKind {
|
||||
fn from(value: &Tok) -> Self {
|
||||
Self::from_token(value)
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
//! Utilities for manually traversing a Python AST.
|
||||
use rustpython_ast::{ExceptHandler, Stmt, Suite};
|
||||
use rustpython_parser::ast;
|
||||
use rustpython_ast::{self as ast, ExceptHandler, Stmt, Suite};
|
||||
|
||||
/// Given a [`Stmt`] and its parent, return the [`Suite`] that contains the [`Stmt`].
|
||||
pub fn suite<'a>(stmt: &'a Stmt, parent: &'a Stmt) -> Option<&'a Suite> {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::ops::Deref;
|
||||
|
||||
use rustpython_parser::ast::{Expr, Stmt};
|
||||
use rustpython_ast::{Expr, Stmt};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Node<'a> {
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
use anyhow::Result;
|
||||
use ruff_text_size::{TextLen, TextRange};
|
||||
use rustpython_parser::ast::Expr;
|
||||
use rustpython_parser::Parse;
|
||||
|
||||
use crate::relocate::relocate_expr;
|
||||
use crate::source_code::Locator;
|
||||
use crate::str;
|
||||
|
||||
#[derive(is_macro::Is, Copy, Clone)]
|
||||
pub enum AnnotationKind {
|
||||
/// The annotation is defined as part a simple string literal,
|
||||
/// e.g. `x: "List[int]" = []`. Annotations within simple literals
|
||||
/// can be accurately located. For example, we can underline specific
|
||||
/// expressions within the annotation and apply automatic fixes, which is
|
||||
/// not possible for complex string literals.
|
||||
Simple,
|
||||
/// The annotation is defined as part of a complex string literal, such as
|
||||
/// a literal containing an implicit concatenation or escaped characters,
|
||||
/// e.g. `x: "List" "[int]" = []`. These are comparatively rare, but valid.
|
||||
Complex,
|
||||
}
|
||||
|
||||
/// Parse a type annotation from a string.
|
||||
pub fn parse_type_annotation(
|
||||
value: &str,
|
||||
range: TextRange,
|
||||
locator: &Locator,
|
||||
) -> Result<(Expr, AnnotationKind)> {
|
||||
let expression = &locator.contents()[range];
|
||||
|
||||
if str::raw_contents(expression).map_or(false, |body| body == value) {
|
||||
// The annotation is considered "simple" if and only if the raw representation (e.g.,
|
||||
// `List[int]` within "List[int]") exactly matches the parsed representation. This
|
||||
// isn't the case, e.g., for implicit concatenations, or for annotations that contain
|
||||
// escaped quotes.
|
||||
let leading_quote = str::leading_quote(expression).unwrap();
|
||||
let expr = Expr::parse_starts_at(
|
||||
value,
|
||||
"<filename>",
|
||||
range.start() + leading_quote.text_len(),
|
||||
)?;
|
||||
Ok((expr, AnnotationKind::Simple))
|
||||
} else {
|
||||
// Otherwise, consider this a "complex" annotation.
|
||||
let mut expr = Expr::parse(value, "<filename>")?;
|
||||
relocate_expr(&mut expr, range);
|
||||
Ok((expr, AnnotationKind::Complex))
|
||||
}
|
||||
}
|
|
@ -2,11 +2,10 @@
|
|||
|
||||
pub mod preorder;
|
||||
|
||||
use rustpython_ast::ElifElseClause;
|
||||
use rustpython_parser::ast::{
|
||||
self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ExceptHandler, Expr,
|
||||
ExprContext, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam, TypeParamTypeVar, UnaryOp,
|
||||
WithItem,
|
||||
use rustpython_ast::{
|
||||
self as ast, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ElifElseClause,
|
||||
ExceptHandler, Expr, ExprContext, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam,
|
||||
TypeParamTypeVar, UnaryOp, WithItem,
|
||||
};
|
||||
|
||||
/// A trait for AST visitors. Visits all nodes in the AST recursively in evaluation-order.
|
||||
|
@ -803,8 +802,9 @@ mod tests {
|
|||
use std::fmt::{Debug, Write};
|
||||
|
||||
use insta::assert_snapshot;
|
||||
use rustpython_ast as ast;
|
||||
use rustpython_parser::lexer::lex;
|
||||
use rustpython_parser::{ast, parse_tokens, Mode};
|
||||
use rustpython_parser::{parse_tokens, Mode};
|
||||
|
||||
use crate::node::AnyNodeRef;
|
||||
use crate::visitor::{
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use rustpython_ast::{ArgWithDefault, ElifElseClause, Mod, TypeIgnore};
|
||||
use rustpython_parser::ast::{
|
||||
self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Constant, Decorator, ExceptHandler,
|
||||
Expr, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam, TypeParamTypeVar, UnaryOp,
|
||||
WithItem,
|
||||
use rustpython_ast::{
|
||||
self as ast, Alias, Arg, ArgWithDefault, Arguments, BoolOp, CmpOp, Comprehension, Constant,
|
||||
Decorator, ElifElseClause, ExceptHandler, Expr, Keyword, MatchCase, Mod, Operator, Pattern,
|
||||
Stmt, TypeIgnore, TypeParam, TypeParamTypeVar, UnaryOp, WithItem,
|
||||
};
|
||||
|
||||
/// Visitor that traverses all nodes recursively in pre-order.
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::ast::Ranged;
|
||||
use rustpython_ast::{Ranged, Stmt};
|
||||
|
||||
use ruff_python_trivia::is_python_whitespace;
|
||||
|
||||
use crate::source_code::Locator;
|
||||
use ruff_python_trivia::{
|
||||
has_trailing_content, indentation_at_offset, is_python_whitespace, PythonWhitespace,
|
||||
};
|
||||
use ruff_source_file::{newlines::UniversalNewlineIterator, Locator};
|
||||
|
||||
/// Extract the leading indentation from a line.
|
||||
#[inline]
|
||||
|
@ -14,14 +15,40 @@ where
|
|||
indentation_at_offset(locator, located.start())
|
||||
}
|
||||
|
||||
/// Extract the leading indentation from a line.
|
||||
pub fn indentation_at_offset<'a>(locator: &'a Locator, offset: TextSize) -> Option<&'a str> {
|
||||
let line_start = locator.line_start(offset);
|
||||
let indentation = &locator.contents()[TextRange::new(line_start, offset)];
|
||||
/// Return the end offset at which the empty lines following a statement.
|
||||
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
|
||||
let line_end = locator.full_line_end(stmt.end());
|
||||
let rest = &locator.contents()[usize::from(line_end)..];
|
||||
|
||||
if indentation.chars().all(is_python_whitespace) {
|
||||
Some(indentation)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
UniversalNewlineIterator::with_offset(rest, line_end)
|
||||
.take_while(|line| line.trim_whitespace().is_empty())
|
||||
.last()
|
||||
.map_or(line_end, |line| line.full_end())
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements following it.
|
||||
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
|
||||
has_trailing_content(stmt.end(), locator)
|
||||
}
|
||||
|
||||
/// If a [`Ranged`] has a trailing comment, return the index of the hash.
|
||||
pub fn trailing_comment_start_offset<T>(located: &T, locator: &Locator) -> Option<TextSize>
|
||||
where
|
||||
T: Ranged,
|
||||
{
|
||||
let line_end = locator.line_end(located.end());
|
||||
|
||||
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
|
||||
|
||||
for (index, char) in trailing.char_indices() {
|
||||
if char == '#' {
|
||||
return TextSize::try_from(index).ok();
|
||||
}
|
||||
if !is_python_whitespace(char) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue