Remove parser dependency from ruff-python-ast (#6096)

This commit is contained in:
Micha Reiser 2023-07-26 17:47:22 +02:00 committed by GitHub
parent 99127243f4
commit 2cf00fee96
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
658 changed files with 1714 additions and 1546 deletions

View file

@ -1,5 +1,5 @@
use bitflags::bitflags;
use rustpython_parser::ast::{self, Constant, Expr, Stmt};
use rustpython_ast::{self as ast, Constant, Expr, Stmt};
bitflags! {
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]

View file

@ -1,4 +1,4 @@
use rustpython_parser::ast::{self, Expr};
use rustpython_ast::{self as ast, Expr};
use smallvec::{smallvec, SmallVec};
/// A representation of a qualified name, like `typing.List`.

View file

@ -1,4 +1,4 @@
use rustpython_parser::ast::{self, Decorator, Stmt};
use rustpython_ast::{self as ast, Decorator, Stmt};
pub fn name(stmt: &Stmt) -> &str {
match stmt {

View file

@ -2,7 +2,7 @@
//! ability to compare expressions for equality (via [`Eq`] and [`Hash`]).
use num_bigint::BigInt;
use rustpython_parser::ast;
use rustpython_ast as ast;
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub enum ComparableExprContext {

View file

@ -1,6 +1,6 @@
use std::hash::Hash;
use rustpython_parser::ast::Expr;
use rustpython_ast::Expr;
use crate::comparable::ComparableExpr;

View file

@ -1,21 +1,15 @@
use std::borrow::Cow;
use std::ops::Sub;
use std::path::Path;
use num_traits::Zero;
use ruff_text_size::{TextRange, TextSize};
use rustpython_ast::CmpOp;
use rustpython_parser::ast::{
self, Arguments, Constant, ExceptHandler, Expr, Keyword, MatchCase, Pattern, Ranged, Stmt,
TypeParam,
use ruff_text_size::TextRange;
use rustpython_ast::{
self as ast, Arguments, Constant, ExceptHandler, Expr, Keyword, MatchCase, Pattern, Ranged,
Stmt, TypeParam,
};
use rustpython_parser::{lexer, Mode, Tok};
use smallvec::SmallVec;
use ruff_python_trivia::{is_python_whitespace, PythonWhitespace, UniversalNewlineIterator};
use crate::call_path::CallPath;
use crate::source_code::{Indexer, Locator};
use crate::statement_visitor::{walk_body, walk_stmt, StatementVisitor};
/// Return `true` if the `Stmt` is a compound statement (as opposed to a simple statement).
@ -772,27 +766,6 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
}
}
/// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
where
T: Ranged,
{
let start = if has_leading_content(node.start(), locator) {
node.start()
} else {
locator.line_start(node.start())
};
let end = if has_trailing_content(node.end(), locator) {
node.end()
} else {
locator.line_end(node.end())
};
indexer
.comment_ranges()
.intersects(TextRange::new(start, end))
}
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.
///
/// Accepts a closure that determines whether a given name (e.g., `"list"`) is a Python builtin.
@ -1027,197 +1000,6 @@ where
}
}
/// Return `true` if the node starting the given [`TextSize`] has leading content.
pub fn has_leading_content(offset: TextSize, locator: &Locator) -> bool {
let line_start = locator.line_start(offset);
let leading = &locator.contents()[TextRange::new(line_start, offset)];
leading.chars().any(|char| !is_python_whitespace(char))
}
/// Return `true` if the node ending at the given [`TextSize`] has trailing content.
pub fn has_trailing_content(offset: TextSize, locator: &Locator) -> bool {
let line_end = locator.line_end(offset);
let trailing = &locator.contents()[TextRange::new(offset, line_end)];
for char in trailing.chars() {
if char == '#' {
return false;
}
if !is_python_whitespace(char) {
return true;
}
}
false
}
/// If a [`Ranged`] has a trailing comment, return the index of the hash.
pub fn trailing_comment_start_offset<T>(located: &T, locator: &Locator) -> Option<TextSize>
where
T: Ranged,
{
let line_end = locator.line_end(located.end());
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
for (index, char) in trailing.char_indices() {
if char == '#' {
return TextSize::try_from(index).ok();
}
if !is_python_whitespace(char) {
return None;
}
}
None
}
/// Return the end offset at which the empty lines following a statement.
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
let line_end = locator.full_line_end(stmt.end());
let rest = &locator.contents()[usize::from(line_end)..];
UniversalNewlineIterator::with_offset(rest, line_end)
.take_while(|line| line.trim_whitespace().is_empty())
.last()
.map_or(line_end, |line| line.full_end())
}
/// Return the range of the first parenthesis pair after a given [`TextSize`].
pub fn match_parens(start: TextSize, locator: &Locator) -> Option<TextRange> {
let contents = &locator.contents()[usize::from(start)..];
let mut fix_start = None;
let mut fix_end = None;
let mut count = 0u32;
for (tok, range) in lexer::lex_starts_at(contents, Mode::Module, start).flatten() {
match tok {
Tok::Lpar => {
if count == 0 {
fix_start = Some(range.start());
}
count = count.saturating_add(1);
}
Tok::Rpar => {
count = count.saturating_sub(1);
if count == 0 {
fix_end = Some(range.end());
break;
}
}
_ => {}
}
}
match (fix_start, fix_end) {
(Some(start), Some(end)) => Some(TextRange::new(start, end)),
_ => None,
}
}
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
pub fn first_colon_range(range: TextRange, locator: &Locator) -> Option<TextRange> {
let contents = &locator.contents()[range];
let range = lexer::lex_starts_at(contents, Mode::Module, range.start())
.flatten()
.find(|(tok, _)| tok.is_colon())
.map(|(_, range)| range);
range
}
/// Given an offset at the end of a line (including newlines), return the offset of the
/// continuation at the end of that line.
fn find_continuation(offset: TextSize, locator: &Locator, indexer: &Indexer) -> Option<TextSize> {
let newline_pos = usize::from(offset).saturating_sub(1);
// Skip the newline.
let newline_len = match locator.contents().as_bytes()[newline_pos] {
b'\n' => {
if locator
.contents()
.as_bytes()
.get(newline_pos.saturating_sub(1))
== Some(&b'\r')
{
2
} else {
1
}
}
b'\r' => 1,
// No preceding line.
_ => return None,
};
indexer
.is_continuation(offset - TextSize::from(newline_len), locator)
.then(|| offset - TextSize::from(newline_len) - TextSize::from(1))
}
/// If the node starting at the given [`TextSize`] is preceded by at least one continuation line
/// (i.e., a line ending in a backslash), return the starting offset of the first such continuation
/// character.
///
/// For example, given:
/// ```python
/// x = 1; \
/// y = 2
/// ```
///
/// When passed the offset of `y`, this function will return the offset of the backslash at the end
/// of the first line.
///
/// Similarly, given:
/// ```python
/// x = 1; \
/// \
/// y = 2;
/// ```
///
/// When passed the offset of `y`, this function will again return the offset of the backslash at
/// the end of the first line.
pub fn preceded_by_continuations(
offset: TextSize,
locator: &Locator,
indexer: &Indexer,
) -> Option<TextSize> {
// Find the first preceding continuation.
let mut continuation = find_continuation(locator.line_start(offset), locator, indexer)?;
// Continue searching for continuations, in the unlikely event that we have multiple
// continuations in a row.
loop {
let previous_line_end = locator.line_start(continuation);
if locator
.slice(TextRange::new(previous_line_end, continuation))
.chars()
.all(is_python_whitespace)
{
if let Some(next_continuation) = find_continuation(previous_line_end, locator, indexer)
{
continuation = next_continuation;
continue;
}
}
break;
}
Some(continuation)
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements preceding it.
pub fn preceded_by_multi_statement_line(stmt: &Stmt, locator: &Locator, indexer: &Indexer) -> bool {
has_leading_content(stmt.start(), locator)
|| preceded_by_continuations(stmt.start(), locator, indexer).is_some()
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements following it.
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
has_trailing_content(stmt.end(), locator)
}
/// Return `true` if a `Stmt` is a docstring.
pub fn is_docstring_stmt(stmt: &Stmt) -> bool {
if let Stmt::Expr(ast::StmtExpr {
@ -1500,166 +1282,19 @@ impl Truthiness {
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LocatedCmpOp {
pub range: TextRange,
pub op: CmpOp,
}
impl LocatedCmpOp {
fn new<T: Into<TextRange>>(range: T, op: CmpOp) -> Self {
Self {
range: range.into(),
op,
}
}
}
/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
/// ranges.
///
/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes.
/// `CPython` doesn't either. This method iterates over the token stream and
/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges.
pub fn locate_cmp_ops(expr: &Expr, locator: &Locator) -> Vec<LocatedCmpOp> {
// If `Expr` is a multi-line expression, we need to parenthesize it to
// ensure that it's lexed correctly.
let contents = locator.slice(expr.range());
let parenthesized_contents = format!("({contents})");
let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression)
.flatten()
.skip(1)
.map(|(tok, range)| (tok, range.sub(TextSize::from(1))))
.filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_)))
.peekable();
let mut ops: Vec<LocatedCmpOp> = vec![];
let mut count = 0u32;
loop {
let Some((tok, range)) = tok_iter.next() else {
break;
};
if matches!(tok, Tok::Lpar) {
count = count.saturating_add(1);
continue;
} else if matches!(tok, Tok::Rpar) {
count = count.saturating_sub(1);
continue;
}
if count == 0 {
match tok {
Tok::Not => {
if let Some((_, next_range)) =
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::In))
{
ops.push(LocatedCmpOp::new(
TextRange::new(range.start(), next_range.end()),
CmpOp::NotIn,
));
}
}
Tok::In => {
ops.push(LocatedCmpOp::new(range, CmpOp::In));
}
Tok::Is => {
let op = if let Some((_, next_range)) =
tok_iter.next_if(|(tok, _)| matches!(tok, Tok::Not))
{
LocatedCmpOp::new(
TextRange::new(range.start(), next_range.end()),
CmpOp::IsNot,
)
} else {
LocatedCmpOp::new(range, CmpOp::Is)
};
ops.push(op);
}
Tok::NotEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::NotEq));
}
Tok::EqEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::Eq));
}
Tok::GreaterEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::GtE));
}
Tok::Greater => {
ops.push(LocatedCmpOp::new(range, CmpOp::Gt));
}
Tok::LessEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::LtE));
}
Tok::Less => {
ops.push(LocatedCmpOp::new(range, CmpOp::Lt));
}
_ => {}
}
}
}
ops
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use std::cell::RefCell;
use std::vec;
use anyhow::Result;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_text_size::TextRange;
use rustpython_ast::{
self, CmpOp, Constant, Expr, ExprConstant, ExprContext, ExprName, Identifier, Ranged, Stmt,
StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
self, Constant, Expr, ExprConstant, ExprContext, ExprName, Identifier, Stmt, StmtTypeAlias,
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
};
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
use crate::helpers::{
any_over_stmt, any_over_type_param, first_colon_range, has_trailing_content,
locate_cmp_ops, resolve_imported_module_path, LocatedCmpOp,
};
use crate::source_code::Locator;
#[test]
fn trailing_content() -> Result<()> {
let contents = "x = 1";
let program = Suite::parse(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!has_trailing_content(stmt.end(), &locator));
let contents = "x = 1; y = 2";
let program = Suite::parse(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(has_trailing_content(stmt.end(), &locator));
let contents = "x = 1 ";
let program = Suite::parse(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!has_trailing_content(stmt.end(), &locator));
let contents = "x = 1 # Comment";
let program = Suite::parse(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!has_trailing_content(stmt.end(), &locator));
let contents = r#"
x = 1
y = 2
"#
.trim();
let program = Suite::parse(contents, "<filename>")?;
let stmt = program.first().unwrap();
let locator = Locator::new(contents);
assert!(!has_trailing_content(stmt.end(), &locator));
Ok(())
}
use crate::helpers::{any_over_stmt, any_over_type_param, resolve_imported_module_path};
#[test]
fn resolve_import() {
@ -1698,101 +1333,6 @@ y = 2
);
}
#[test]
fn extract_first_colon_range() {
let contents = "with a: pass";
let locator = Locator::new(contents);
let range = first_colon_range(
TextRange::new(TextSize::from(0), contents.text_len()),
&locator,
)
.unwrap();
assert_eq!(&contents[range], ":");
assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
}
#[test]
fn extract_cmp_op_location() -> Result<()> {
let contents = "x == 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::Eq
)]
);
let contents = "x != 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::NotEq
)]
);
let contents = "x is 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::Is
)]
);
let contents = "x is not 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(8),
CmpOp::IsNot
)]
);
let contents = "x in 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::In
)]
);
let contents = "x not in 1";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(8),
CmpOp::NotIn
)]
);
let contents = "x != (1 is not 2)";
let expr = Expr::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
assert_eq!(
locate_cmp_ops(&expr, &locator),
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::NotEq
)]
);
Ok(())
}
#[test]
fn any_over_stmt_type_alias() {
let seen = RefCell::new(Vec::new());

View file

@ -11,13 +11,10 @@
//! This module can be used to identify the [`TextRange`] of the `except` token.
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_ast::{Alias, Arg, ArgWithDefault};
use rustpython_parser::ast::{self, ExceptHandler, Ranged, Stmt};
use rustpython_ast::{self as ast, Alias, Arg, ArgWithDefault, ExceptHandler, Ranged, Stmt};
use ruff_python_trivia::{is_python_whitespace, Cursor};
use crate::source_code::Locator;
pub trait Identifier {
/// Return the [`TextRange`] of the identifier in the given AST node.
fn identifier(&self) -> TextRange;
@ -82,14 +79,14 @@ impl Identifier for Alias {
}
/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
pub fn except(handler: &ExceptHandler, locator: &Locator) -> TextRange {
IdentifierTokenizer::new(locator.contents(), handler.range())
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
IdentifierTokenizer::new(source, handler.range())
.next()
.expect("Failed to find `except` token in `ExceptHandler`")
}
/// Return the [`TextRange`] of the `else` token in a `For`, `AsyncFor`, or `While` statement.
pub fn else_(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
let (Stmt::For(ast::StmtFor { body, orelse, .. })
| Stmt::AsyncFor(ast::StmtAsyncFor { body, orelse, .. })
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
@ -103,7 +100,7 @@ pub fn else_(stmt: &Stmt, locator: &Locator) -> Option<TextRange> {
IdentifierTokenizer::starts_at(
body.last().expect("Expected body to be non-empty").end(),
locator.contents(),
source,
)
.next()
}
@ -203,17 +200,15 @@ impl Iterator for IdentifierTokenizer<'_> {
#[cfg(test)]
mod tests {
use anyhow::Result;
use ruff_text_size::{TextRange, TextSize};
use rustpython_ast::{Ranged, Stmt};
use rustpython_parser::Parse;
use rustpython_parser::{Parse, ParseError};
use crate::identifier;
use crate::identifier::IdentifierTokenizer;
use crate::source_code::Locator;
#[test]
fn extract_else_range() -> Result<()> {
fn extract_else_range() -> Result<(), ParseError> {
let contents = r#"
for x in y:
pass
@ -222,8 +217,7 @@ else:
"#
.trim();
let stmt = Stmt::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
let range = identifier::else_(&stmt, &locator).unwrap();
let range = identifier::else_(&stmt, contents).unwrap();
assert_eq!(&contents[range], "else");
assert_eq!(
range,
@ -233,12 +227,11 @@ else:
}
#[test]
fn extract_global_names() -> Result<()> {
fn extract_global_names() -> Result<(), ParseError> {
let contents = r#"global X,Y, Z"#.trim();
let stmt = Stmt::parse(contents, "<filename>")?;
let locator = Locator::new(contents);
let mut names = IdentifierTokenizer::new(locator.contents(), stmt.range());
let mut names = IdentifierTokenizer::new(contents, stmt.range());
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "global");

View file

@ -10,13 +10,10 @@ pub mod identifier;
pub mod imports;
pub mod node;
pub mod relocate;
pub mod source_code;
pub mod statement_visitor;
pub mod stmt_if;
pub mod str;
pub mod token_kind;
pub mod traversal;
pub mod types;
pub mod typing;
pub mod visitor;
pub mod whitespace;

View file

@ -1,10 +1,9 @@
use ast::{TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple};
use ruff_text_size::TextRange;
use rustpython_ast::{
Alias, Arg, ArgWithDefault, Arguments, Comprehension, Decorator, ExceptHandler, Keyword,
MatchCase, Mod, Pattern, Stmt, TypeIgnore, WithItem,
self as ast, Alias, Arg, ArgWithDefault, Arguments, Comprehension, Decorator, ExceptHandler,
Expr, Keyword, MatchCase, Mod, Pattern, Ranged, Stmt, TypeIgnore, TypeParam,
TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
};
use rustpython_parser::ast::{self, Expr, Ranged};
use std::ptr::NonNull;
pub trait AstNode: Ranged {

View file

@ -1,5 +1,5 @@
use ruff_text_size::TextRange;
use rustpython_parser::ast::{self, Expr, Keyword};
use rustpython_ast::{self as ast, Expr, Keyword};
fn relocate_keyword(keyword: &mut Keyword, location: TextRange) {
relocate_expr(&mut keyword.value, location);

View file

@ -1,84 +0,0 @@
use itertools::Itertools;
use std::fmt::{Debug, Formatter};
use std::ops::Deref;
use ruff_text_size::TextRange;
use rustpython_parser::Tok;
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone)]
pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl CommentRanges {
/// Returns `true` if the given range includes a comment.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
/// Returns the comments who are within the range
pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
let start = self
.raw
.partition_point(|comment| comment.start() < range.start());
// We expect there are few comments, so switching to find should be faster
match self.raw[start..]
.iter()
.find_position(|comment| comment.end() > range.end())
{
Some((in_range, _element)) => &self.raw[start..start + in_range],
None => &self.raw[start..],
}
}
}
impl Deref for CommentRanges {
type Target = [TextRange];
fn deref(&self) -> &Self::Target {
self.raw.as_slice()
}
}
impl Debug for CommentRanges {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("CommentRanges").field(&self.raw).finish()
}
}
impl<'a> IntoIterator for &'a CommentRanges {
type IntoIter = std::slice::Iter<'a, TextRange>;
type Item = &'a TextRange;
fn into_iter(self) -> Self::IntoIter {
self.raw.iter()
}
}
#[derive(Debug, Clone, Default)]
pub struct CommentRangesBuilder {
ranges: Vec<TextRange>,
}
impl CommentRangesBuilder {
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
if token.is_comment() {
self.ranges.push(range);
}
}
pub fn finish(self) -> CommentRanges {
CommentRanges { raw: self.ranges }
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,296 +0,0 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{StringKind, Tok};
use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
use crate::source_code::Locator;
pub struct Indexer {
comment_ranges: CommentRanges,
/// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>,
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
triple_quoted_string_ranges: Vec<TextRange>,
/// The range of all f-string in the source document. The ranges are sorted by their
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
f_string_ranges: Vec<TextRange>,
}
impl Indexer {
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
assert!(TextSize::try_from(locator.contents().len()).is_ok());
let mut comment_ranges_builder = CommentRangesBuilder::default();
let mut continuation_lines = Vec::new();
let mut triple_quoted_string_ranges = Vec::new();
let mut f_string_ranges = Vec::new();
// Token, end
let mut prev_end = TextSize::default();
let mut prev_token: Option<&Tok> = None;
let mut line_start = TextSize::default();
for (tok, range) in tokens.iter().flatten() {
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
// Get the trivia between the previous and the current token and detect any newlines.
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
// between any two tokens that form a continuation. That's why we have to extract the
// newlines "manually".
for (index, text) in trivia.match_indices(['\n', '\r']) {
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
continue;
}
// Newlines after a newline never form a continuation.
if !matches!(prev_token, Some(Tok::Newline | Tok::NonLogicalNewline)) {
continuation_lines.push(line_start);
}
// SAFETY: Safe because of the len assertion at the top of the function.
#[allow(clippy::cast_possible_truncation)]
{
line_start = prev_end + TextSize::new((index + 1) as u32);
}
}
comment_ranges_builder.visit_token(tok, *range);
match tok {
Tok::Newline | Tok::NonLogicalNewline => {
line_start = range.end();
}
Tok::String {
triple_quoted: true,
..
} => {
triple_quoted_string_ranges.push(*range);
}
Tok::String {
kind: StringKind::FString | StringKind::RawFString,
..
} => {
f_string_ranges.push(*range);
}
_ => {}
}
prev_token = Some(tok);
prev_end = range.end();
}
Self {
comment_ranges: comment_ranges_builder.finish(),
continuation_lines,
triple_quoted_string_ranges,
f_string_ranges,
}
}
/// Returns the byte offset ranges of comments
pub const fn comment_ranges(&self) -> &CommentRanges {
&self.comment_ranges
}
/// Returns the comments in the given range as source code slices
pub fn comments_in_range<'a>(
&'a self,
range: TextRange,
locator: &'a Locator,
) -> impl Iterator<Item = &'a str> {
self.comment_ranges
.comments_in_range(range)
.iter()
.map(move |comment_range| locator.slice(*comment_range))
}
/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines
}
/// Returns `true` if the given offset is part of a continuation line.
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
let line_start = locator.line_start(offset);
self.continuation_lines.binary_search(&line_start).is_ok()
}
/// Return the [`TextRange`] of the triple-quoted-string containing a given offset.
pub fn triple_quoted_string_range(&self, offset: TextSize) -> Option<TextRange> {
let Ok(string_range_index) = self.triple_quoted_string_ranges.binary_search_by(|range| {
if offset < range.start() {
std::cmp::Ordering::Greater
} else if range.contains(offset) {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less
}
}) else {
return None;
};
Some(self.triple_quoted_string_ranges[string_range_index])
}
/// Return the [`TextRange`] of the f-string containing a given offset.
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
if offset < range.start() {
std::cmp::Ordering::Greater
} else if range.contains(offset) {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less
}
}) else {
return None;
};
Some(self.f_string_ranges[string_range_index])
}
}
#[cfg(test)]
mod tests {
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::source_code::{Indexer, Locator};
#[test]
fn continuation() {
let contents = r#"x = 1"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
# Hello, world!
x = 1
y = 2
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
x = \
1
if True:
z = \
\
2
(
"abc" # Foo
"def" \
"ghi"
)
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 1
TextSize::from(0),
// row 5
TextSize::from(22),
// row 6
TextSize::from(32),
// row 11
TextSize::from(71),
]
);
let contents = r"
x = 1; import sys
import os
if True:
x = 1; import sys
import os
if True:
x = 1; \
import os
x = 1; \
import os
"
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 9
TextSize::from(84),
// row 12
TextSize::from(116)
]
);
}
#[test]
fn string_ranges() {
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(indexer.triple_quoted_string_ranges, []);
let contents = r#"
"""
this is a multiline string
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges,
[TextRange::new(TextSize::from(13), TextSize::from(71))]
);
let contents = r#"
"""
'''this is a multiline string with multiple delimiter types'''
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges,
[TextRange::new(TextSize::from(13), TextSize::from(107))]
);
let contents = r#"
"""
this is one
multiline string
"""
"""
and this is
another
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.triple_quoted_string_ranges,
&[
TextRange::new(TextSize::from(13), TextSize::from(85)),
TextRange::new(TextSize::from(98), TextSize::from(161))
]
);
}
}

View file

@ -1,595 +0,0 @@
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroUsize;
use std::ops::Deref;
use std::sync::Arc;
use ruff_text_size::{TextLen, TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::source_code::SourceLocation;
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
}
impl LineIndex {
/// Builds the [`LineIndex`] from the source text of a file.
pub fn from_source_text(text: &str) -> Self {
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
line_starts.push(TextSize::default());
let bytes = text.as_bytes();
let mut utf8 = false;
assert!(u32::try_from(bytes.len()).is_ok());
for (i, byte) in bytes.iter().enumerate() {
utf8 |= !byte.is_ascii();
match byte {
// Only track one line break for `\r\n`.
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
// SAFETY: Assertion above guarantees `i <= u32::MAX`
#[allow(clippy::cast_possible_truncation)]
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
}
_ => {}
}
}
let kind = if utf8 {
IndexKind::Utf8
} else {
IndexKind::Ascii
};
Self {
inner: Arc::new(LineIndexInner { line_starts, kind }),
}
}
fn kind(&self) -> IndexKind {
self.inner.kind
}
/// Returns the row and column index for an offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(
/// index.source_location(TextSize::from(0), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(4), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(13), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row];
let column = if self.kind().is_ascii() {
usize::from(offset) - usize::from(line_start)
} else {
// Don't count the BOM character as a column.
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
content[TextRange::new(line_start, offset)].chars().count()
};
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
}
}
}
}
/// Return the number of lines in the source code.
pub(crate) fn line_count(&self) -> usize {
self.line_starts().len()
}
/// Returns the row number for a given offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => OneIndexed::from_zero_indexed(row),
Err(row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
OneIndexed::from_zero_indexed(row - 1)
}
}
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index == starts.len() {
contents.text_len()
} else {
starts[row_index]
}
}
/// Returns the [byte offset](TextSize) of the `line`'s end.
/// The offset is the end of the line, up to and including the newline character ending the line (if any).
pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index.saturating_add(1) >= starts.len() {
contents.text_len()
} else {
starts[row_index + 1]
}
}
/// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any).
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed() {
TextRange::empty(contents.text_len())
} else {
TextRange::new(
self.line_start(line, contents),
self.line_start(line.saturating_add(1), contents),
)
}
}
/// Returns the [byte offsets](TextSize) for every line
pub fn line_starts(&self) -> &[TextSize] {
&self.inner.line_starts
}
}
impl Deref for LineIndex {
type Target = [TextSize];
fn deref(&self) -> &Self::Target {
self.line_starts()
}
}
impl Debug for LineIndex {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.line_starts()).finish()
}
}
#[derive(Debug, Clone, Copy)]
enum IndexKind {
/// Optimized index for an ASCII only document
Ascii,
/// Index for UTF8 documents
Utf8,
}
impl IndexKind {
const fn is_ascii(self) -> bool {
matches!(self, IndexKind::Ascii)
}
}
/// Type-safe wrapper for a value whose logical range starts at `1`, for
/// instance the line or column numbers in a file
///
/// Internally this is represented as a [`NonZeroUsize`], this enables some
/// memory optimizations
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OneIndexed(NonZeroUsize);
impl OneIndexed {
/// The largest value that can be represented by this integer type
pub const MAX: Self = unwrap(Self::new(usize::MAX));
// SAFETY: These constants are being initialized with non-zero values
/// The smallest value that can be represented by this integer type.
pub const MIN: Self = unwrap(Self::new(1));
pub const ONE: NonZeroUsize = unwrap(NonZeroUsize::new(1));
/// Creates a non-zero if the given value is not zero.
pub const fn new(value: usize) -> Option<Self> {
match NonZeroUsize::new(value) {
Some(value) => Some(Self(value)),
None => None,
}
}
/// Construct a new [`OneIndexed`] from a zero-indexed value
pub const fn from_zero_indexed(value: usize) -> Self {
Self(Self::ONE.saturating_add(value))
}
/// Returns the value as a primitive type.
pub const fn get(self) -> usize {
self.0.get()
}
/// Return the zero-indexed primitive value for this [`OneIndexed`]
pub const fn to_zero_indexed(self) -> usize {
self.0.get() - 1
}
/// Saturating integer addition. Computes `self + rhs`, saturating at
/// the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_add(self, rhs: usize) -> Self {
match NonZeroUsize::new(self.0.get().saturating_add(rhs)) {
Some(value) => Self(value),
None => Self::MAX,
}
}
/// Saturating integer subtraction. Computes `self - rhs`, saturating
/// at the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_sub(self, rhs: usize) -> Self {
match NonZeroUsize::new(self.0.get().saturating_sub(rhs)) {
Some(value) => Self(value),
None => Self::MIN,
}
}
}
impl fmt::Display for OneIndexed {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
std::fmt::Debug::fmt(&self.0.get(), f)
}
}
/// A const `Option::unwrap` without nightly features:
/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441)
const fn unwrap<T: Copy>(option: Option<T>) -> T {
match option {
Some(value) => value,
None => panic!("unwrapping None"),
}
}
#[cfg(test)]
mod tests {
use ruff_text_size::TextSize;
use crate::source_code::line_index::LineIndex;
use crate::source_code::{OneIndexed, SourceLocation};
#[test]
fn ascii_index() {
let index = LineIndex::from_source_text("");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1\n");
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(6),
TextSize::from(12),
TextSize::from(22)
]
);
}
#[test]
fn ascii_source_location() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.source_location(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
// Second row.
let loc = index.source_location(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
#[test]
fn ascii_carriage_return() {
let contents = "x = 4\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn ascii_carriage_return_newline() {
let contents = "x = 4\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.line_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.line_count(), 4);
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(11),
TextSize::from(17),
TextSize::from(27)
]
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.line_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
);
}
#[test]
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]
);
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_byte_offset() {
let contents = "x = '☃'\ny = 2";
let index = LineIndex::from_source_text(contents);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(10)]
);
// First row.
let loc = index.source_location(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.source_location(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
// Second row.
let loc = index.source_location(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.source_location(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
}

View file

@ -1,412 +0,0 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use std::ops::Add;
use memchr::{memchr2, memrchr2};
use once_cell::unsync::OnceCell;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_python_trivia::find_newline;
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
pub struct Locator<'a> {
contents: &'a str,
index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
index: OnceCell::new(),
}
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
self.to_index().line_index(offset)
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
self.to_source_code().source_location(offset)
}
fn to_index(&self) -> &LineIndex {
self.index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
pub fn line_index(&self) -> Option<&LineIndex> {
self.index.get()
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
index: self.to_index(),
text: self.contents,
}
}
/// Computes the start position of the line of `offset`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\rthird line");
///
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
///
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_start(&self, offset: TextSize) -> TextSize {
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
// SAFETY: Safe because `index < offset`
TextSize::try_from(index).unwrap().add(TextSize::from(1))
} else {
TextSize::default()
}
}
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
offset == TextSize::from(0)
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
}
/// Computes the offset that is right after the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some((index, line_ending)) = find_newline(slice) {
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
} else {
self.contents.text_len()
}
}
/// Computes the offset that is right before the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
offset + TextSize::try_from(index).unwrap()
} else {
self.contents.text_len()
}
}
/// Computes the range of this `offset`s line.
///
/// The range starts at the beginning of the line and goes up to, and including, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.full_line_end(offset))
}
/// Computes the range of this `offset`s line ending before the newline character.
///
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.line_end(offset))
}
/// Returns the text of the `offset`'s line.
///
/// The line includes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line(&self, offset: TextSize) -> &'a str {
&self.contents[self.full_line_range(offset)]
}
/// Returns the text of the `offset`'s line.
///
/// Excludes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line(&self, offset: TextSize) -> &'a str {
&self.contents[self.line_range(offset)]
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
/// at the end of `range.ends()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(11))
/// );
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(24))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(
self.line_start(range.start()),
self.full_line_end(range.end()),
)
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
/// at the end of `range.end()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(10))
/// );
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(22))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
}
/// Returns true if the text of `range` contains any line break.
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert!(
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// );
/// assert!(
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// );
/// ```
///
/// ## Panics
/// If the `range` is out of bounds.
pub fn contains_line_break(&self, range: TextRange) -> bool {
let text = &self.contents[range];
text.contains(['\n', '\r'])
}
/// Returns the text of all lines that include `range`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line"
/// );
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines(&self, range: TextRange) -> &'a str {
&self.contents[self.lines_range(range)]
}
/// Returns the text of all lines that include `range`.
///
/// Includes the newline characters of the last line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line\n"
/// );
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line\r\n"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines(&self, range: TextRange) -> &'a str {
&self.contents[self.full_lines_range(range)]
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'a str {
&self.contents[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'a str {
&self.contents[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
#[inline]
pub fn slice(&self, range: TextRange) -> &'a str {
&self.contents[range]
}
/// Return the underlying source code.
pub fn contents(&self) -> &'a str {
self.contents
}
/// Return the number of bytes in the source code.
pub const fn len(&self) -> usize {
self.contents.len()
}
pub fn text_len(&self) -> TextSize {
self.contents.text_len()
}
/// Return `true` if the source code is empty.
pub const fn is_empty(&self) -> bool {
self.contents.is_empty()
}
}

View file

@ -1,266 +0,0 @@
use std::cmp::Ordering;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::{ast, lexer, Mode, Parse, ParseError};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
pub use stylist::{Quote, Stylist};
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
mod comment_ranges;
mod generator;
mod indexer;
mod line_index;
mod locator;
mod stylist;
/// Run round-trip source code generation on a given Python code.
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
let locator = Locator::new(code);
let python_ast = ast::Suite::parse(code, source_path)?;
let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect();
let stylist = Stylist::from_tokens(&tokens, &locator);
let mut generator: Generator = (&stylist).into();
generator.unparse_suite(&python_ast);
Ok(generator.generate())
}
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
index: &'index LineIndex,
}
impl<'src, 'index> SourceCode<'src, 'index> {
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
Self {
text: content,
index,
}
}
/// Computes the one indexed row and column numbers for `offset`.
#[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
self.index.source_location(offset, self.text)
}
#[inline]
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
self.index.line_index(offset)
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'src str {
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'src str {
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
pub fn slice(&self, range: TextRange) -> &'src str {
&self.text[range]
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
pub fn line_end(&self, line: OneIndexed) -> TextSize {
self.index.line_end(line, self.text)
}
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.index.line_range(line, self.text)
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
let range = self.index.line_range(index, self.text);
&self.text[range]
}
/// Returns the source text
pub fn text(&self) -> &'src str {
self.text
}
/// Returns the number of lines
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
}
impl PartialEq<Self> for SourceCode<'_, '_> {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for SourceCode<'_, '_> {}
/// A Builder for constructing a [`SourceFile`]
pub struct SourceFileBuilder {
name: Box<str>,
code: Box<str>,
index: Option<LineIndex>,
}
impl SourceFileBuilder {
/// Creates a new builder for a file named `name`.
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
Self {
name: name.into(),
code: code.into(),
index: None,
}
}
#[must_use]
pub fn line_index(mut self, index: LineIndex) -> Self {
self.index = Some(index);
self
}
pub fn set_line_index(&mut self, index: LineIndex) {
self.index = Some(index);
}
/// Consumes `self` and returns the [`SourceFile`].
pub fn finish(self) -> SourceFile {
let index = if let Some(index) = self.index {
once_cell::sync::OnceCell::with_value(index)
} else {
once_cell::sync::OnceCell::new()
};
SourceFile {
inner: Arc::new(SourceFileInner {
name: self.name,
code: self.code,
line_index: index,
}),
}
}
}
/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`].
///
/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count.
#[derive(Clone, Eq, PartialEq)]
pub struct SourceFile {
inner: Arc<SourceFileInner>,
}
impl Debug for SourceFile {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceFile")
.field("name", &self.name())
.field("code", &self.source_text())
.finish()
}
}
impl SourceFile {
/// Returns the name of the source file (filename).
#[inline]
pub fn name(&self) -> &str {
&self.inner.name
}
#[inline]
pub fn slice(&self, range: TextRange) -> &str {
&self.source_text()[range]
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
text: self.source_text(),
index: self.index(),
}
}
fn index(&self) -> &LineIndex {
self.inner
.line_index
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
}
/// Returns the source code.
#[inline]
pub fn source_text(&self) -> &str {
&self.inner.code
}
}
impl PartialOrd for SourceFile {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SourceFile {
fn cmp(&self, other: &Self) -> Ordering {
// Short circuit if these are the same source files
if Arc::ptr_eq(&self.inner, &other.inner) {
Ordering::Equal
} else {
self.inner.name.cmp(&other.inner.name)
}
}
}
struct SourceFileInner {
name: Box<str>,
code: Box<str>,
line_index: once_cell::sync::OnceCell<LineIndex>,
}
impl PartialEq for SourceFileInner {
fn eq(&self, other: &Self) -> bool {
self.name == other.name && self.code == other.code
}
}
impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl Default for SourceLocation {
fn default() -> Self {
Self {
row: OneIndexed::MIN,
column: OneIndexed::MIN,
}
}
}
impl Debug for SourceLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation")
.field("row", &self.row.get())
.field("column", &self.column.get())
.finish()
}
}

View file

@ -1,329 +0,0 @@
//! Detect code style from Python source code.
use std::fmt;
use std::ops::Deref;
use once_cell::unsync::OnceCell;
use ruff_python_trivia::{find_newline, LineEnding};
use rustpython_literal::escape::Quote as StrQuote;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::source_code::Locator;
use crate::str::leading_quote;
pub struct Stylist<'a> {
locator: &'a Locator<'a>,
indentation: Indentation,
quote: Quote,
line_ending: OnceCell<LineEnding>,
}
impl<'a> Stylist<'a> {
pub fn indentation(&'a self) -> &'a Indentation {
&self.indentation
}
pub fn quote(&'a self) -> Quote {
self.quote
}
pub fn line_ending(&'a self) -> LineEnding {
*self.line_ending.get_or_init(|| {
let contents = self.locator.contents();
find_newline(contents)
.map(|(_, ending)| ending)
.unwrap_or_default()
})
}
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
let indentation = detect_indention(tokens, locator);
Self {
locator,
indentation,
quote: detect_quote(tokens, locator),
line_ending: OnceCell::default(),
}
}
}
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(*range),
_ => None,
});
if let Some(quote_range) = quote_range {
let content = &locator.slice(quote_range);
if let Some(quotes) = leading_quote(content) {
return if quotes.contains('\'') {
Quote::Single
} else if quotes.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
};
}
}
Quote::default()
}
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
if matches!(t, Tok::Indent) {
Some(range)
} else {
None
}
});
if let Some(indent_range) = indent_range {
let whitespace = locator.slice(*indent_range);
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
}
/// The quotation style used in Python source code.
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
pub enum Quote {
Single,
#[default]
Double,
}
impl From<Quote> for char {
fn from(val: Quote) -> Self {
match val {
Quote::Single => '\'',
Quote::Double => '"',
}
}
}
impl From<Quote> for StrQuote {
fn from(val: Quote) -> Self {
match val {
Quote::Single => StrQuote::Single,
Quote::Double => StrQuote::Double,
}
}
}
impl fmt::Display for Quote {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Quote::Single => write!(f, "\'"),
Quote::Double => write!(f, "\""),
}
}
}
/// The indentation style used in Python source code.
#[derive(Debug, PartialEq, Eq)]
pub struct Indentation(String);
impl Indentation {
pub const fn new(indentation: String) -> Self {
Self(indentation)
}
}
impl Default for Indentation {
fn default() -> Self {
Indentation(" ".to_string())
}
}
impl Indentation {
pub fn as_str(&self) -> &str {
self.0.as_str()
}
pub fn as_char(&self) -> char {
self.0.chars().next().unwrap()
}
}
impl Deref for Indentation {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::lexer::lex;
use rustpython_parser::Mode;
use ruff_python_trivia::{find_newline, LineEnding};
use crate::source_code::stylist::{Indentation, Quote};
use crate::source_code::{Locator, Stylist};
#[test]
fn indentation() {
let contents = r#"x = 1"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
let contents = r#"
if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r#"
if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r#"
if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation("\t".to_string())
);
// TODO(charlie): Should non-significant whitespace be detected?
let contents = r#"
x = (
1,
2,
3,
)
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
}
#[test]
fn quote() {
let contents = r#"x = 1"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
let contents = r#"x = '1'"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"x = "1""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
let contents = r#"s = "It's done.""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
// No style if only double quoted docstring (will take default Double)
let contents = r#"
def f():
"""Docstring."""
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
// Detect from string literal appearing after docstring
let contents = r#"
"""Module docstring."""
a = 'v'
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"
'''Module docstring.'''
a = "v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
}
#[test]
fn line_ending() {
let contents = "x = 1";
assert_eq!(find_newline(contents).map(|(_, ending)| ending), None);
let contents = "x = 1\n";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::Lf)
);
let contents = "x = 1\r";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::Cr)
);
let contents = "x = 1\r\n";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::CrLf)
);
}
}

View file

@ -1,7 +1,6 @@
//! Specialized AST visitor trait and walk functions that only visit statements.
use rustpython_ast::ElifElseClause;
use rustpython_parser::ast::{self, ExceptHandler, MatchCase, Stmt};
use rustpython_ast::{self as ast, ElifElseClause, ExceptHandler, MatchCase, Stmt};
/// A trait for AST visitors that only need to visit statements.
pub trait StatementVisitor<'a> {

View file

@ -1,20 +1,14 @@
use crate::source_code::Locator;
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::TextRange;
use rustpython_ast::{ElifElseClause, Expr, Ranged, Stmt, StmtIf};
use rustpython_parser::{lexer, Mode, Tok};
use std::iter;
/// Return the `Range` of the first `Elif` or `Else` token in an `If` statement.
pub fn elif_else_range(clause: &ElifElseClause, locator: &Locator) -> Option<TextRange> {
let contents = &locator.contents()[clause.range];
let token = lexer::lex_starts_at(contents, Mode::Module, clause.range.start())
.flatten()
pub fn elif_else_range(clause: &ElifElseClause, contents: &str) -> Option<TextRange> {
let token = SimpleTokenizer::new(contents, clause.range)
.skip_trivia()
.next()?;
if matches!(token.0, Tok::Elif | Tok::Else) {
Some(token.1)
} else {
None
}
matches!(token.kind, SimpleTokenKind::Elif | SimpleTokenKind::Else).then_some(token.range())
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@ -49,15 +43,13 @@ pub fn if_elif_branches(stmt_if: &StmtIf) -> impl Iterator<Item = IfElifBranch>
#[cfg(test)]
mod test {
use crate::source_code::Locator;
use crate::stmt_if::elif_else_range;
use anyhow::Result;
use ruff_text_size::TextSize;
use rustpython_ast::Stmt;
use rustpython_parser::Parse;
use rustpython_parser::{Parse, ParseError};
#[test]
fn extract_elif_else_range() -> Result<()> {
fn extract_elif_else_range() -> Result<(), ParseError> {
let contents = "if a:
...
elif b:
@ -65,8 +57,7 @@ elif b:
";
let stmt = Stmt::parse(contents, "<filename>")?;
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(&stmt.elif_else_clauses[0], &locator).unwrap();
let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap();
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));
@ -77,8 +68,7 @@ else:
";
let stmt = Stmt::parse(contents, "<filename>")?;
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(&stmt.elif_else_clauses[0], &locator).unwrap();
let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap();
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));

View file

@ -1,447 +0,0 @@
use rustpython_parser::Tok;
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum TokenKind {
/// Token value for a name, commonly known as an identifier.
Name,
/// Token value for an integer.
Int,
/// Token value for a floating point number.
Float,
/// Token value for a complex number.
Complex,
/// Token value for a string.
String,
/// Token value for a Jupyter magic command.
MagicCommand,
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
Comment,
/// Token value for a newline.
Newline,
/// Token value for a newline that is not a logical line break. These are filtered out of
/// the token stream prior to parsing.
NonLogicalNewline,
/// Token value for an indent.
Indent,
/// Token value for a dedent.
Dedent,
EndOfFile,
/// Token value for a left parenthesis `(`.
Lpar,
/// Token value for a right parenthesis `)`.
Rpar,
/// Token value for a left square bracket `[`.
Lsqb,
/// Token value for a right square bracket `]`.
Rsqb,
/// Token value for a colon `:`.
Colon,
/// Token value for a comma `,`.
Comma,
/// Token value for a semicolon `;`.
Semi,
/// Token value for plus `+`.
Plus,
/// Token value for minus `-`.
Minus,
/// Token value for star `*`.
Star,
/// Token value for slash `/`.
Slash,
/// Token value for vertical bar `|`.
Vbar,
/// Token value for ampersand `&`.
Amper,
/// Token value for less than `<`.
Less,
/// Token value for greater than `>`.
Greater,
/// Token value for equal `=`.
Equal,
/// Token value for dot `.`.
Dot,
/// Token value for percent `%`.
Percent,
/// Token value for left bracket `{`.
Lbrace,
/// Token value for right bracket `}`.
Rbrace,
/// Token value for double equal `==`.
EqEqual,
/// Token value for not equal `!=`.
NotEqual,
/// Token value for less than or equal `<=`.
LessEqual,
/// Token value for greater than or equal `>=`.
GreaterEqual,
/// Token value for tilde `~`.
Tilde,
/// Token value for caret `^`.
CircumFlex,
/// Token value for left shift `<<`.
LeftShift,
/// Token value for right shift `>>`.
RightShift,
/// Token value for double star `**`.
DoubleStar,
/// Token value for double star equal `**=`.
DoubleStarEqual,
/// Token value for plus equal `+=`.
PlusEqual,
/// Token value for minus equal `-=`.
MinusEqual,
/// Token value for star equal `*=`.
StarEqual,
/// Token value for slash equal `/=`.
SlashEqual,
/// Token value for percent equal `%=`.
PercentEqual,
/// Token value for ampersand equal `&=`.
AmperEqual,
/// Token value for vertical bar equal `|=`.
VbarEqual,
/// Token value for caret equal `^=`.
CircumflexEqual,
/// Token value for left shift equal `<<=`.
LeftShiftEqual,
/// Token value for right shift equal `>>=`.
RightShiftEqual,
/// Token value for double slash `//`.
DoubleSlash,
/// Token value for double slash equal `//=`.
DoubleSlashEqual,
/// Token value for colon equal `:=`.
ColonEqual,
/// Token value for at `@`.
At,
/// Token value for at equal `@=`.
AtEqual,
/// Token value for arrow `->`.
Rarrow,
/// Token value for ellipsis `...`.
Ellipsis,
// Self documenting.
// Keywords (alphabetically):
False,
None,
True,
And,
As,
Assert,
Async,
Await,
Break,
Class,
Continue,
Def,
Del,
Elif,
Else,
Except,
Finally,
For,
From,
Global,
If,
Import,
In,
Is,
Lambda,
Nonlocal,
Not,
Or,
Pass,
Raise,
Return,
Try,
While,
Match,
Type,
Case,
With,
Yield,
// RustPython specific.
StartModule,
StartInteractive,
StartExpression,
}
impl TokenKind {
#[inline]
pub const fn is_newline(&self) -> bool {
matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
}
#[inline]
pub const fn is_unary(&self) -> bool {
matches!(self, TokenKind::Plus | TokenKind::Minus)
}
#[inline]
pub const fn is_keyword(&self) -> bool {
matches!(
self,
TokenKind::False
| TokenKind::True
| TokenKind::None
| TokenKind::And
| TokenKind::As
| TokenKind::Assert
| TokenKind::Await
| TokenKind::Break
| TokenKind::Class
| TokenKind::Continue
| TokenKind::Def
| TokenKind::Del
| TokenKind::Elif
| TokenKind::Else
| TokenKind::Except
| TokenKind::Finally
| TokenKind::For
| TokenKind::From
| TokenKind::Global
| TokenKind::If
| TokenKind::Import
| TokenKind::In
| TokenKind::Is
| TokenKind::Lambda
| TokenKind::Nonlocal
| TokenKind::Not
| TokenKind::Or
| TokenKind::Pass
| TokenKind::Raise
| TokenKind::Return
| TokenKind::Try
| TokenKind::While
| TokenKind::With
| TokenKind::Yield
)
}
#[inline]
pub const fn is_operator(&self) -> bool {
matches!(
self,
TokenKind::Lpar
| TokenKind::Rpar
| TokenKind::Lsqb
| TokenKind::Rsqb
| TokenKind::Comma
| TokenKind::Semi
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::Vbar
| TokenKind::Amper
| TokenKind::Less
| TokenKind::Greater
| TokenKind::Equal
| TokenKind::Dot
| TokenKind::Percent
| TokenKind::Lbrace
| TokenKind::Rbrace
| TokenKind::EqEqual
| TokenKind::NotEqual
| TokenKind::LessEqual
| TokenKind::GreaterEqual
| TokenKind::Tilde
| TokenKind::CircumFlex
| TokenKind::LeftShift
| TokenKind::RightShift
| TokenKind::DoubleStar
| TokenKind::PlusEqual
| TokenKind::MinusEqual
| TokenKind::StarEqual
| TokenKind::SlashEqual
| TokenKind::PercentEqual
| TokenKind::AmperEqual
| TokenKind::VbarEqual
| TokenKind::CircumflexEqual
| TokenKind::LeftShiftEqual
| TokenKind::RightShiftEqual
| TokenKind::DoubleStarEqual
| TokenKind::DoubleSlash
| TokenKind::DoubleSlashEqual
| TokenKind::At
| TokenKind::AtEqual
| TokenKind::Rarrow
| TokenKind::Ellipsis
| TokenKind::ColonEqual
| TokenKind::Colon
| TokenKind::And
| TokenKind::Or
| TokenKind::Not
| TokenKind::In
| TokenKind::Is
)
}
#[inline]
pub const fn is_singleton(&self) -> bool {
matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
}
#[inline]
pub const fn is_trivia(&self) -> bool {
matches!(
self,
TokenKind::Newline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::NonLogicalNewline
| TokenKind::Comment
)
}
#[inline]
pub const fn is_arithmetic(&self) -> bool {
matches!(
self,
TokenKind::DoubleStar
| TokenKind::Star
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Slash
| TokenKind::DoubleSlash
| TokenKind::At
)
}
#[inline]
pub const fn is_bitwise_or_shift(&self) -> bool {
matches!(
self,
TokenKind::LeftShift
| TokenKind::LeftShiftEqual
| TokenKind::RightShift
| TokenKind::RightShiftEqual
| TokenKind::Amper
| TokenKind::AmperEqual
| TokenKind::Vbar
| TokenKind::VbarEqual
| TokenKind::CircumFlex
| TokenKind::CircumflexEqual
| TokenKind::Tilde
)
}
#[inline]
pub const fn is_soft_keyword(&self) -> bool {
matches!(self, TokenKind::Match | TokenKind::Case)
}
pub const fn from_token(token: &Tok) -> Self {
match token {
Tok::Name { .. } => TokenKind::Name,
Tok::Int { .. } => TokenKind::Int,
Tok::Float { .. } => TokenKind::Float,
Tok::Complex { .. } => TokenKind::Complex,
Tok::String { .. } => TokenKind::String,
Tok::MagicCommand { .. } => TokenKind::MagicCommand,
Tok::Comment(_) => TokenKind::Comment,
Tok::Newline => TokenKind::Newline,
Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
Tok::Indent => TokenKind::Indent,
Tok::Dedent => TokenKind::Dedent,
Tok::EndOfFile => TokenKind::EndOfFile,
Tok::Lpar => TokenKind::Lpar,
Tok::Rpar => TokenKind::Rpar,
Tok::Lsqb => TokenKind::Lsqb,
Tok::Rsqb => TokenKind::Rsqb,
Tok::Colon => TokenKind::Colon,
Tok::Comma => TokenKind::Comma,
Tok::Semi => TokenKind::Semi,
Tok::Plus => TokenKind::Plus,
Tok::Minus => TokenKind::Minus,
Tok::Star => TokenKind::Star,
Tok::Slash => TokenKind::Slash,
Tok::Vbar => TokenKind::Vbar,
Tok::Amper => TokenKind::Amper,
Tok::Less => TokenKind::Less,
Tok::Greater => TokenKind::Greater,
Tok::Equal => TokenKind::Equal,
Tok::Dot => TokenKind::Dot,
Tok::Percent => TokenKind::Percent,
Tok::Lbrace => TokenKind::Lbrace,
Tok::Rbrace => TokenKind::Rbrace,
Tok::EqEqual => TokenKind::EqEqual,
Tok::NotEqual => TokenKind::NotEqual,
Tok::LessEqual => TokenKind::LessEqual,
Tok::GreaterEqual => TokenKind::GreaterEqual,
Tok::Tilde => TokenKind::Tilde,
Tok::CircumFlex => TokenKind::CircumFlex,
Tok::LeftShift => TokenKind::LeftShift,
Tok::RightShift => TokenKind::RightShift,
Tok::DoubleStar => TokenKind::DoubleStar,
Tok::DoubleStarEqual => TokenKind::DoubleStarEqual,
Tok::PlusEqual => TokenKind::PlusEqual,
Tok::MinusEqual => TokenKind::MinusEqual,
Tok::StarEqual => TokenKind::StarEqual,
Tok::SlashEqual => TokenKind::SlashEqual,
Tok::PercentEqual => TokenKind::PercentEqual,
Tok::AmperEqual => TokenKind::AmperEqual,
Tok::VbarEqual => TokenKind::VbarEqual,
Tok::CircumflexEqual => TokenKind::CircumflexEqual,
Tok::LeftShiftEqual => TokenKind::LeftShiftEqual,
Tok::RightShiftEqual => TokenKind::RightShiftEqual,
Tok::DoubleSlash => TokenKind::DoubleSlash,
Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual,
Tok::ColonEqual => TokenKind::ColonEqual,
Tok::At => TokenKind::At,
Tok::AtEqual => TokenKind::AtEqual,
Tok::Rarrow => TokenKind::Rarrow,
Tok::Ellipsis => TokenKind::Ellipsis,
Tok::False => TokenKind::False,
Tok::None => TokenKind::None,
Tok::True => TokenKind::True,
Tok::And => TokenKind::And,
Tok::As => TokenKind::As,
Tok::Assert => TokenKind::Assert,
Tok::Async => TokenKind::Async,
Tok::Await => TokenKind::Await,
Tok::Break => TokenKind::Break,
Tok::Class => TokenKind::Class,
Tok::Continue => TokenKind::Continue,
Tok::Def => TokenKind::Def,
Tok::Del => TokenKind::Del,
Tok::Elif => TokenKind::Elif,
Tok::Else => TokenKind::Else,
Tok::Except => TokenKind::Except,
Tok::Finally => TokenKind::Finally,
Tok::For => TokenKind::For,
Tok::From => TokenKind::From,
Tok::Global => TokenKind::Global,
Tok::If => TokenKind::If,
Tok::Import => TokenKind::Import,
Tok::In => TokenKind::In,
Tok::Is => TokenKind::Is,
Tok::Lambda => TokenKind::Lambda,
Tok::Nonlocal => TokenKind::Nonlocal,
Tok::Not => TokenKind::Not,
Tok::Or => TokenKind::Or,
Tok::Pass => TokenKind::Pass,
Tok::Raise => TokenKind::Raise,
Tok::Return => TokenKind::Return,
Tok::Try => TokenKind::Try,
Tok::While => TokenKind::While,
Tok::Match => TokenKind::Match,
Tok::Case => TokenKind::Case,
Tok::Type => TokenKind::Type,
Tok::With => TokenKind::With,
Tok::Yield => TokenKind::Yield,
Tok::StartModule => TokenKind::StartModule,
Tok::StartInteractive => TokenKind::StartInteractive,
Tok::StartExpression => TokenKind::StartExpression,
}
}
}
impl From<&Tok> for TokenKind {
fn from(value: &Tok) -> Self {
Self::from_token(value)
}
}

View file

@ -1,6 +1,5 @@
//! Utilities for manually traversing a Python AST.
use rustpython_ast::{ExceptHandler, Stmt, Suite};
use rustpython_parser::ast;
use rustpython_ast::{self as ast, ExceptHandler, Stmt, Suite};
/// Given a [`Stmt`] and its parent, return the [`Suite`] that contains the [`Stmt`].
pub fn suite<'a>(stmt: &'a Stmt, parent: &'a Stmt) -> Option<&'a Suite> {

View file

@ -1,6 +1,6 @@
use std::ops::Deref;
use rustpython_parser::ast::{Expr, Stmt};
use rustpython_ast::{Expr, Stmt};
#[derive(Clone)]
pub enum Node<'a> {

View file

@ -1,50 +0,0 @@
use anyhow::Result;
use ruff_text_size::{TextLen, TextRange};
use rustpython_parser::ast::Expr;
use rustpython_parser::Parse;
use crate::relocate::relocate_expr;
use crate::source_code::Locator;
use crate::str;
#[derive(is_macro::Is, Copy, Clone)]
pub enum AnnotationKind {
/// The annotation is defined as part a simple string literal,
/// e.g. `x: "List[int]" = []`. Annotations within simple literals
/// can be accurately located. For example, we can underline specific
/// expressions within the annotation and apply automatic fixes, which is
/// not possible for complex string literals.
Simple,
/// The annotation is defined as part of a complex string literal, such as
/// a literal containing an implicit concatenation or escaped characters,
/// e.g. `x: "List" "[int]" = []`. These are comparatively rare, but valid.
Complex,
}
/// Parse a type annotation from a string.
pub fn parse_type_annotation(
value: &str,
range: TextRange,
locator: &Locator,
) -> Result<(Expr, AnnotationKind)> {
let expression = &locator.contents()[range];
if str::raw_contents(expression).map_or(false, |body| body == value) {
// The annotation is considered "simple" if and only if the raw representation (e.g.,
// `List[int]` within "List[int]") exactly matches the parsed representation. This
// isn't the case, e.g., for implicit concatenations, or for annotations that contain
// escaped quotes.
let leading_quote = str::leading_quote(expression).unwrap();
let expr = Expr::parse_starts_at(
value,
"<filename>",
range.start() + leading_quote.text_len(),
)?;
Ok((expr, AnnotationKind::Simple))
} else {
// Otherwise, consider this a "complex" annotation.
let mut expr = Expr::parse(value, "<filename>")?;
relocate_expr(&mut expr, range);
Ok((expr, AnnotationKind::Complex))
}
}

View file

@ -2,11 +2,10 @@
pub mod preorder;
use rustpython_ast::ElifElseClause;
use rustpython_parser::ast::{
self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ExceptHandler, Expr,
ExprContext, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam, TypeParamTypeVar, UnaryOp,
WithItem,
use rustpython_ast::{
self as ast, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Decorator, ElifElseClause,
ExceptHandler, Expr, ExprContext, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam,
TypeParamTypeVar, UnaryOp, WithItem,
};
/// A trait for AST visitors. Visits all nodes in the AST recursively in evaluation-order.
@ -803,8 +802,9 @@ mod tests {
use std::fmt::{Debug, Write};
use insta::assert_snapshot;
use rustpython_ast as ast;
use rustpython_parser::lexer::lex;
use rustpython_parser::{ast, parse_tokens, Mode};
use rustpython_parser::{parse_tokens, Mode};
use crate::node::AnyNodeRef;
use crate::visitor::{

View file

@ -1,8 +1,7 @@
use rustpython_ast::{ArgWithDefault, ElifElseClause, Mod, TypeIgnore};
use rustpython_parser::ast::{
self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Constant, Decorator, ExceptHandler,
Expr, Keyword, MatchCase, Operator, Pattern, Stmt, TypeParam, TypeParamTypeVar, UnaryOp,
WithItem,
use rustpython_ast::{
self as ast, Alias, Arg, ArgWithDefault, Arguments, BoolOp, CmpOp, Comprehension, Constant,
Decorator, ElifElseClause, ExceptHandler, Expr, Keyword, MatchCase, Mod, Operator, Pattern,
Stmt, TypeIgnore, TypeParam, TypeParamTypeVar, UnaryOp, WithItem,
};
/// Visitor that traverses all nodes recursively in pre-order.

View file

@ -1,9 +1,10 @@
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use rustpython_ast::{Ranged, Stmt};
use ruff_python_trivia::is_python_whitespace;
use crate::source_code::Locator;
use ruff_python_trivia::{
has_trailing_content, indentation_at_offset, is_python_whitespace, PythonWhitespace,
};
use ruff_source_file::{newlines::UniversalNewlineIterator, Locator};
/// Extract the leading indentation from a line.
#[inline]
@ -14,14 +15,40 @@ where
indentation_at_offset(locator, located.start())
}
/// Extract the leading indentation from a line.
pub fn indentation_at_offset<'a>(locator: &'a Locator, offset: TextSize) -> Option<&'a str> {
let line_start = locator.line_start(offset);
let indentation = &locator.contents()[TextRange::new(line_start, offset)];
/// Return the end offset at which the empty lines following a statement.
pub fn trailing_lines_end(stmt: &Stmt, locator: &Locator) -> TextSize {
let line_end = locator.full_line_end(stmt.end());
let rest = &locator.contents()[usize::from(line_end)..];
if indentation.chars().all(is_python_whitespace) {
Some(indentation)
} else {
None
}
UniversalNewlineIterator::with_offset(rest, line_end)
.take_while(|line| line.trim_whitespace().is_empty())
.last()
.map_or(line_end, |line| line.full_end())
}
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
/// other statements following it.
pub fn followed_by_multi_statement_line(stmt: &Stmt, locator: &Locator) -> bool {
has_trailing_content(stmt.end(), locator)
}
/// If a [`Ranged`] has a trailing comment, return the index of the hash.
pub fn trailing_comment_start_offset<T>(located: &T, locator: &Locator) -> Option<TextSize>
where
T: Ranged,
{
let line_end = locator.line_end(located.end());
let trailing = &locator.contents()[TextRange::new(located.end(), line_end)];
for (index, char) in trailing.char_indices() {
if char == '#' {
return TextSize::try_from(index).ok();
}
if !is_python_whitespace(char) {
return None;
}
}
None
}