mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-26 20:09:22 +00:00

<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary I decided to disable the new [`needless_continue`](https://rust-lang.github.io/rust-clippy/master/index.html#needless_continue) rule because I often found the explicit `continue` more readable over an empty block or having to invert the condition of an other branch. ## Test Plan `cargo test` --------- Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
258 lines
7.4 KiB
Rust
258 lines
7.4 KiB
Rust
//! Extract [`TextRange`] information from AST nodes.
|
|
//!
|
|
//! For example, given:
|
|
//! ```python
|
|
//! try:
|
|
//! ...
|
|
//! except Exception as e:
|
|
//! ...
|
|
//! ```
|
|
//!
|
|
//! This module can be used to identify the [`TextRange`] of the `except` token.
|
|
|
|
use crate::{self as ast, Alias, ExceptHandler, Parameter, ParameterWithDefault, Stmt};
|
|
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
|
|
|
use ruff_python_trivia::{is_python_whitespace, Cursor};
|
|
|
|
pub trait Identifier {
|
|
/// Return the [`TextRange`] of the identifier in the given AST node.
|
|
fn identifier(&self) -> TextRange;
|
|
}
|
|
|
|
impl Identifier for ast::StmtFunctionDef {
|
|
/// Return the [`TextRange`] of the identifier in the given function definition.
|
|
///
|
|
/// For example, return the range of `f` in:
|
|
/// ```python
|
|
/// def f():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for ast::StmtClassDef {
|
|
/// Return the [`TextRange`] of the identifier in the given class definition.
|
|
///
|
|
/// For example, return the range of `C` in:
|
|
/// ```python
|
|
/// class C():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for Stmt {
|
|
/// Return the [`TextRange`] of the identifier in the given statement.
|
|
///
|
|
/// For example, return the range of `f` in:
|
|
/// ```python
|
|
/// def f():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
match self {
|
|
Stmt::ClassDef(class) => class.identifier(),
|
|
Stmt::FunctionDef(function) => function.identifier(),
|
|
_ => self.range(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Identifier for Parameter {
|
|
/// Return the [`TextRange`] for the identifier defining an [`Parameter`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// def f(x: int):
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for ParameterWithDefault {
|
|
/// Return the [`TextRange`] for the identifier defining an [`ParameterWithDefault`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// def f(x: int = 0):
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.parameter.identifier()
|
|
}
|
|
}
|
|
|
|
impl Identifier for Alias {
|
|
/// Return the [`TextRange`] for the identifier defining an [`Alias`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// from foo import bar as x
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.asname
|
|
.as_ref()
|
|
.map_or_else(|| self.name.range(), Ranged::range)
|
|
}
|
|
}
|
|
|
|
/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
|
|
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
|
|
IdentifierTokenizer::new(source, handler.range())
|
|
.next()
|
|
.expect("Failed to find `except` token in `ExceptHandler`")
|
|
}
|
|
|
|
/// Return the [`TextRange`] of the `else` token in a `For` or `While` statement.
|
|
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
|
|
let (Stmt::For(ast::StmtFor { body, orelse, .. })
|
|
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
|
|
else {
|
|
return None;
|
|
};
|
|
|
|
if orelse.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
IdentifierTokenizer::starts_at(
|
|
body.last().expect("Expected body to be non-empty").end(),
|
|
source,
|
|
)
|
|
.next()
|
|
}
|
|
|
|
/// Return `true` if the given character starts a valid Python identifier.
|
|
///
|
|
/// Python identifiers must start with an alphabetic character or an underscore.
|
|
fn is_python_identifier_start(c: char) -> bool {
|
|
c.is_alphabetic() || c == '_'
|
|
}
|
|
|
|
/// Return `true` if the given character is a valid Python identifier continuation character.
|
|
///
|
|
/// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a
|
|
/// number.
|
|
fn is_python_identifier_continue(c: char) -> bool {
|
|
c.is_alphanumeric() || c == '_'
|
|
}
|
|
|
|
/// Simple zero allocation tokenizer for Python identifiers.
|
|
///
|
|
/// The tokenizer must operate over a range that can only contain identifiers, keywords, and
|
|
/// comments (along with whitespace and continuation characters). It does not support other tokens,
|
|
/// like operators, literals, or delimiters. It also does not differentiate between keywords and
|
|
/// identifiers, treating every valid token as an "identifier".
|
|
///
|
|
/// This is useful for cases like, e.g., identifying the alias name in an aliased import (`bar` in
|
|
/// `import foo as bar`), where we're guaranteed to only have identifiers and keywords in the
|
|
/// relevant range.
|
|
pub(crate) struct IdentifierTokenizer<'a> {
|
|
cursor: Cursor<'a>,
|
|
offset: TextSize,
|
|
}
|
|
|
|
impl<'a> IdentifierTokenizer<'a> {
|
|
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
|
|
Self {
|
|
cursor: Cursor::new(&source[range]),
|
|
offset: range.start(),
|
|
}
|
|
}
|
|
|
|
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
|
|
let range = TextRange::new(offset, source.text_len());
|
|
Self::new(source, range)
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<TextRange> {
|
|
while let Some(c) = {
|
|
self.offset += self.cursor.token_len();
|
|
self.cursor.start_token();
|
|
self.cursor.bump()
|
|
} {
|
|
match c {
|
|
c if is_python_identifier_start(c) => {
|
|
self.cursor.eat_while(is_python_identifier_continue);
|
|
return Some(TextRange::at(self.offset, self.cursor.token_len()));
|
|
}
|
|
|
|
c if is_python_whitespace(c) => {
|
|
self.cursor.eat_while(is_python_whitespace);
|
|
}
|
|
|
|
'#' => {
|
|
self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
|
|
}
|
|
|
|
'\r' => {
|
|
self.cursor.eat_char('\n');
|
|
}
|
|
|
|
'\n' => {
|
|
// Nothing to do.
|
|
}
|
|
|
|
'\\' => {
|
|
// Nothing to do.
|
|
}
|
|
|
|
_ => {
|
|
// Nothing to do.
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
impl Iterator for IdentifierTokenizer<'_> {
|
|
type Item = TextRange;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_token()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::IdentifierTokenizer;
|
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
|
|
|
#[test]
|
|
fn extract_global_names() {
|
|
let contents = r"global X,Y, Z".trim();
|
|
|
|
let mut names = IdentifierTokenizer::new(
|
|
contents,
|
|
TextRange::new(TextSize::new(0), contents.text_len()),
|
|
);
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "global");
|
|
assert_eq!(range, TextRange::new(TextSize::from(0), TextSize::from(6)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "X");
|
|
assert_eq!(range, TextRange::new(TextSize::from(7), TextSize::from(8)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "Y");
|
|
assert_eq!(range, TextRange::new(TextSize::from(9), TextSize::from(10)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "Z");
|
|
assert_eq!(
|
|
range,
|
|
TextRange::new(TextSize::from(12), TextSize::from(13))
|
|
);
|
|
}
|
|
}
|