mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-22 11:24:35 +00:00

Update to [Rust 1.74](https://blog.rust-lang.org/2023/11/16/Rust-1.74.0.html) and use the new clippy lints table. The update itself introduced a new clippy lint about superfluous hashes in raw strings, which got removed. I moved our lint config from `rustflags` to the newly stabilized [workspace.lints](https://doc.rust-lang.org/stable/cargo/reference/workspaces.html#the-lints-table). One consequence is that we have to `unsafe_code = "warn"` instead of "forbid" because the latter now actually bans unsafe code: ``` error[E0453]: allow(unsafe_code) incompatible with previous forbid --> crates/ruff_source_file/src/newlines.rs:62:17 | 62 | #[allow(unsafe_code)] | ^^^^^^^^^^^ overruled by previous forbid | = note: `forbid` lint level was set on command line ``` --------- Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
258 lines
7.4 KiB
Rust
258 lines
7.4 KiB
Rust
//! Extract [`TextRange`] information from AST nodes.
|
|
//!
|
|
//! For example, given:
|
|
//! ```python
|
|
//! try:
|
|
//! ...
|
|
//! except Exception as e:
|
|
//! ...
|
|
//! ```
|
|
//!
|
|
//! This module can be used to identify the [`TextRange`] of the `except` token.
|
|
|
|
use crate::{self as ast, Alias, ExceptHandler, Parameter, ParameterWithDefault, Stmt};
|
|
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
|
|
|
use ruff_python_trivia::{is_python_whitespace, Cursor};
|
|
|
|
pub trait Identifier {
|
|
/// Return the [`TextRange`] of the identifier in the given AST node.
|
|
fn identifier(&self) -> TextRange;
|
|
}
|
|
|
|
impl Identifier for ast::StmtFunctionDef {
|
|
/// Return the [`TextRange`] of the identifier in the given function definition.
|
|
///
|
|
/// For example, return the range of `f` in:
|
|
/// ```python
|
|
/// def f():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for ast::StmtClassDef {
|
|
/// Return the [`TextRange`] of the identifier in the given class definition.
|
|
///
|
|
/// For example, return the range of `C` in:
|
|
/// ```python
|
|
/// class C():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for Stmt {
|
|
/// Return the [`TextRange`] of the identifier in the given statement.
|
|
///
|
|
/// For example, return the range of `f` in:
|
|
/// ```python
|
|
/// def f():
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
match self {
|
|
Stmt::ClassDef(class) => class.identifier(),
|
|
Stmt::FunctionDef(function) => function.identifier(),
|
|
_ => self.range(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Identifier for Parameter {
|
|
/// Return the [`TextRange`] for the identifier defining an [`Parameter`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// def f(x: int):
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.name.range()
|
|
}
|
|
}
|
|
|
|
impl Identifier for ParameterWithDefault {
|
|
/// Return the [`TextRange`] for the identifier defining an [`ParameterWithDefault`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// def f(x: int = 0):
|
|
/// ...
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.parameter.identifier()
|
|
}
|
|
}
|
|
|
|
impl Identifier for Alias {
|
|
/// Return the [`TextRange`] for the identifier defining an [`Alias`].
|
|
///
|
|
/// For example, return the range of `x` in:
|
|
/// ```python
|
|
/// from foo import bar as x
|
|
/// ```
|
|
fn identifier(&self) -> TextRange {
|
|
self.asname
|
|
.as_ref()
|
|
.map_or_else(|| self.name.range(), Ranged::range)
|
|
}
|
|
}
|
|
|
|
/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
|
|
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
|
|
IdentifierTokenizer::new(source, handler.range())
|
|
.next()
|
|
.expect("Failed to find `except` token in `ExceptHandler`")
|
|
}
|
|
|
|
/// Return the [`TextRange`] of the `else` token in a `For` or `While` statement.
|
|
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
|
|
let (Stmt::For(ast::StmtFor { body, orelse, .. })
|
|
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
|
|
else {
|
|
return None;
|
|
};
|
|
|
|
if orelse.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
IdentifierTokenizer::starts_at(
|
|
body.last().expect("Expected body to be non-empty").end(),
|
|
source,
|
|
)
|
|
.next()
|
|
}
|
|
|
|
/// Return `true` if the given character starts a valid Python identifier.
|
|
///
|
|
/// Python identifiers must start with an alphabetic character or an underscore.
|
|
fn is_python_identifier_start(c: char) -> bool {
|
|
c.is_alphabetic() || c == '_'
|
|
}
|
|
|
|
/// Return `true` if the given character is a valid Python identifier continuation character.
|
|
///
|
|
/// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a
|
|
/// number.
|
|
fn is_python_identifier_continue(c: char) -> bool {
|
|
c.is_alphanumeric() || c == '_'
|
|
}
|
|
|
|
/// Simple zero allocation tokenizer for Python identifiers.
|
|
///
|
|
/// The tokenizer must operate over a range that can only contain identifiers, keywords, and
|
|
/// comments (along with whitespace and continuation characters). It does not support other tokens,
|
|
/// like operators, literals, or delimiters. It also does not differentiate between keywords and
|
|
/// identifiers, treating every valid token as an "identifier".
|
|
///
|
|
/// This is useful for cases like, e.g., identifying the alias name in an aliased import (`bar` in
|
|
/// `import foo as bar`), where we're guaranteed to only have identifiers and keywords in the
|
|
/// relevant range.
|
|
pub(crate) struct IdentifierTokenizer<'a> {
|
|
cursor: Cursor<'a>,
|
|
offset: TextSize,
|
|
}
|
|
|
|
impl<'a> IdentifierTokenizer<'a> {
|
|
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
|
|
Self {
|
|
cursor: Cursor::new(&source[range]),
|
|
offset: range.start(),
|
|
}
|
|
}
|
|
|
|
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
|
|
let range = TextRange::new(offset, source.text_len());
|
|
Self::new(source, range)
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<TextRange> {
|
|
while let Some(c) = {
|
|
self.offset += self.cursor.token_len();
|
|
self.cursor.start_token();
|
|
self.cursor.bump()
|
|
} {
|
|
match c {
|
|
c if is_python_identifier_start(c) => {
|
|
self.cursor.eat_while(is_python_identifier_continue);
|
|
return Some(TextRange::at(self.offset, self.cursor.token_len()));
|
|
}
|
|
|
|
c if is_python_whitespace(c) => {
|
|
self.cursor.eat_while(is_python_whitespace);
|
|
}
|
|
|
|
'#' => {
|
|
self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
|
|
}
|
|
|
|
'\r' => {
|
|
self.cursor.eat_char('\n');
|
|
}
|
|
|
|
'\n' => {
|
|
// Nothing to do.
|
|
}
|
|
|
|
'\\' => {
|
|
// Nothing to do.
|
|
}
|
|
|
|
_ => {
|
|
// Nothing to do.
|
|
}
|
|
};
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
impl Iterator for IdentifierTokenizer<'_> {
|
|
type Item = TextRange;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_token()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::IdentifierTokenizer;
|
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
|
|
|
#[test]
|
|
fn extract_global_names() {
|
|
let contents = r"global X,Y, Z".trim();
|
|
|
|
let mut names = IdentifierTokenizer::new(
|
|
contents,
|
|
TextRange::new(TextSize::new(0), contents.text_len()),
|
|
);
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "global");
|
|
assert_eq!(range, TextRange::new(TextSize::from(0), TextSize::from(6)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "X");
|
|
assert_eq!(range, TextRange::new(TextSize::from(7), TextSize::from(8)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "Y");
|
|
assert_eq!(range, TextRange::new(TextSize::from(9), TextSize::from(10)));
|
|
|
|
let range = names.next_token().unwrap();
|
|
assert_eq!(&contents[range], "Z");
|
|
assert_eq!(
|
|
range,
|
|
TextRange::new(TextSize::from(12), TextSize::from(13))
|
|
);
|
|
}
|
|
}
|