mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-23 13:05:06 +00:00
Remove exception-handler lexing from unused-bound-exception
fix (#5851)
## Summary The motivation here is that it will make this rule easier to rewrite as a deferred check. Right now, we can't run this rule in the deferred phase, because it depends on the `except_handler` to power its autofix. Instead of lexing the `except_handler`, we can use the `SimpleTokenizer` from the formatter, and just lex forwards and backwards. For context, this rule detects the unused `e` in: ```python try: pass except ValueError as e: pass ```
This commit is contained in:
parent
41da52a61b
commit
4204fc002d
32 changed files with 125 additions and 112 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -2157,7 +2157,6 @@ dependencies = [
|
||||||
"similar",
|
"similar",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"unic-ucd-ident",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2195,8 +2194,10 @@ version = "0.0.0"
|
||||||
name = "ruff_python_whitespace"
|
name = "ruff_python_whitespace"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"insta",
|
||||||
"memchr",
|
"memchr",
|
||||||
"ruff_text_size",
|
"ruff_text_size",
|
||||||
|
"unic-ucd-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -4103,11 +4103,8 @@ where
|
||||||
);
|
);
|
||||||
if self.patch(Rule::UnusedVariable) {
|
if self.patch(Rule::UnusedVariable) {
|
||||||
diagnostic.try_set_fix(|| {
|
diagnostic.try_set_fix(|| {
|
||||||
pyflakes::fixes::remove_exception_handler_assignment(
|
pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
|
||||||
except_handler,
|
.map(Fix::automatic)
|
||||||
self.locator,
|
|
||||||
)
|
|
||||||
.map(Fix::automatic)
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
self.diagnostics.push(diagnostic);
|
self.diagnostics.push(diagnostic);
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
use anyhow::{bail, Ok, Result};
|
use anyhow::{Context, Ok, Result};
|
||||||
use ruff_text_size::TextRange;
|
use ruff_text_size::TextRange;
|
||||||
use rustpython_parser::ast::{ExceptHandler, Expr, Ranged};
|
use rustpython_parser::ast::{Expr, Identifier, Ranged};
|
||||||
use rustpython_parser::{lexer, Mode};
|
|
||||||
|
|
||||||
use ruff_diagnostics::Edit;
|
use ruff_diagnostics::Edit;
|
||||||
use ruff_python_ast::source_code::{Locator, Stylist};
|
use ruff_python_ast::source_code::{Locator, Stylist};
|
||||||
|
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
|
||||||
|
|
||||||
use crate::autofix::codemods::CodegenStylist;
|
use crate::autofix::codemods::CodegenStylist;
|
||||||
use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
|
use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
|
||||||
|
@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
|
||||||
|
|
||||||
/// Generate a [`Edit`] to remove the binding from an exception handler.
|
/// Generate a [`Edit`] to remove the binding from an exception handler.
|
||||||
pub(crate) fn remove_exception_handler_assignment(
|
pub(crate) fn remove_exception_handler_assignment(
|
||||||
except_handler: &ExceptHandler,
|
bound_exception: &Identifier,
|
||||||
locator: &Locator,
|
locator: &Locator,
|
||||||
) -> Result<Edit> {
|
) -> Result<Edit> {
|
||||||
let contents = locator.slice(except_handler.range());
|
// Lex backwards, to the token just before the `as`.
|
||||||
let mut fix_start = None;
|
let mut tokenizer =
|
||||||
let mut fix_end = None;
|
SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
|
||||||
|
|
||||||
// End of the token just before the `as` to the semicolon.
|
// Eat the `as` token.
|
||||||
let mut prev = None;
|
let preceding = tokenizer
|
||||||
for (tok, range) in
|
.next_back()
|
||||||
lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten()
|
.context("expected the exception name to be preceded by `as`")?;
|
||||||
{
|
debug_assert!(matches!(preceding.kind, TokenKind::As));
|
||||||
if tok.is_as() {
|
|
||||||
fix_start = prev;
|
|
||||||
}
|
|
||||||
if tok.is_colon() {
|
|
||||||
fix_end = Some(range.start());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
prev = Some(range.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let (Some(start), Some(end)) = (fix_start, fix_end) {
|
// Lex to the end of the preceding token, which should be the exception value.
|
||||||
Ok(Edit::deletion(start, end))
|
let preceding = tokenizer
|
||||||
} else {
|
.next_back()
|
||||||
bail!("Could not find span of exception handler")
|
.context("expected the exception name to be preceded by a token")?;
|
||||||
}
|
|
||||||
|
// Lex forwards, to the `:` token.
|
||||||
|
let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
|
||||||
|
.next()
|
||||||
|
.context("expected the exception name to be followed by a colon")?;
|
||||||
|
debug_assert!(matches!(following.kind, TokenKind::Colon));
|
||||||
|
|
||||||
|
Ok(Edit::deletion(preceding.end(), following.start()))
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,6 @@ rustpython-parser = { workspace = true }
|
||||||
serde = { workspace = true, optional = true }
|
serde = { workspace = true, optional = true }
|
||||||
smallvec = { workspace = true }
|
smallvec = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
unic-ucd-ident = "0.9.0"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}
|
ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}
|
||||||
|
|
|
@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize};
|
||||||
use rustpython_parser::ast::Ranged;
|
use rustpython_parser::ast::Ranged;
|
||||||
|
|
||||||
use ruff_formatter::{format_args, write, Argument, Arguments};
|
use ruff_formatter::{format_args, write, Argument, Arguments};
|
||||||
|
use ruff_python_whitespace::{
|
||||||
|
lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::context::NodeLevel;
|
use crate::context::NodeLevel;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind};
|
|
||||||
use crate::MagicTrailingComma;
|
use crate::MagicTrailingComma;
|
||||||
|
|
||||||
/// Adds parentheses and indents `content` if it doesn't fit on a line.
|
/// Adds parentheses and indents `content` if it doesn't fit on a line.
|
||||||
|
|
|
@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged;
|
||||||
|
|
||||||
use ruff_formatter::{format_args, write, FormatError, SourceCode};
|
use ruff_formatter::{format_args, write, FormatError, SourceCode};
|
||||||
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
||||||
|
use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
|
||||||
|
|
||||||
use crate::comments::SourceComment;
|
use crate::comments::SourceComment;
|
||||||
use crate::context::NodeLevel;
|
use crate::context::NodeLevel;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{lines_after, lines_before, skip_trailing_trivia};
|
|
||||||
|
|
||||||
/// Formats the leading comments of a node.
|
/// Formats the leading comments of a node.
|
||||||
pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments
|
pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments
|
||||||
|
|
|
@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
|
||||||
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
||||||
use ruff_python_ast::source_code::Locator;
|
use ruff_python_ast::source_code::Locator;
|
||||||
use ruff_python_ast::whitespace;
|
use ruff_python_ast::whitespace;
|
||||||
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
|
use ruff_python_whitespace::{
|
||||||
|
first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
|
||||||
|
UniversalNewlines,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
|
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
|
||||||
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
|
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
|
||||||
use crate::other::arguments::{
|
use crate::other::arguments::{
|
||||||
assign_argument_separator_comment_placement, find_argument_separators,
|
assign_argument_separator_comment_placement, find_argument_separators,
|
||||||
};
|
};
|
||||||
use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind};
|
|
||||||
|
|
||||||
/// Implements the custom comment placement logic.
|
/// Implements the custom comment placement logic.
|
||||||
pub(super) fn place_comment<'a>(
|
pub(super) fn place_comment<'a>(
|
||||||
|
|
|
@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
|
||||||
|
|
||||||
use ruff_formatter::write;
|
use ruff_formatter::write;
|
||||||
use ruff_python_ast::node::AnyNodeRef;
|
use ruff_python_ast::node::AnyNodeRef;
|
||||||
|
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
|
||||||
|
|
||||||
use crate::comments::dangling_comments;
|
use crate::comments::dangling_comments;
|
||||||
|
|
||||||
use crate::expression::parentheses::{
|
use crate::expression::parentheses::{
|
||||||
parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
|
parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
|
||||||
};
|
};
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{SimpleTokenizer, TokenKind};
|
|
||||||
use crate::FormatNodeRule;
|
use crate::FormatNodeRule;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
use crate::comments::{dangling_comments, SourceComment};
|
|
||||||
use crate::context::PyFormatContext;
|
|
||||||
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
|
|
||||||
use crate::trivia::Token;
|
|
||||||
use crate::trivia::{first_non_trivia_token, TokenKind};
|
|
||||||
use crate::{AsFormat, FormatNodeRule, PyFormatter};
|
|
||||||
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
|
|
||||||
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
|
|
||||||
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
|
||||||
use ruff_text_size::TextRange;
|
use ruff_text_size::TextRange;
|
||||||
use rustpython_parser::ast::ExprSlice;
|
use rustpython_parser::ast::ExprSlice;
|
||||||
use rustpython_parser::ast::{Expr, Ranged};
|
use rustpython_parser::ast::{Expr, Ranged};
|
||||||
|
|
||||||
|
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
|
||||||
|
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
|
||||||
|
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
||||||
|
use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind};
|
||||||
|
|
||||||
|
use crate::comments::{dangling_comments, SourceComment};
|
||||||
|
use crate::context::PyFormatContext;
|
||||||
|
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
|
||||||
|
use crate::{AsFormat, FormatNodeRule, PyFormatter};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct FormatExprSlice;
|
pub struct FormatExprSlice;
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
use crate::comments::trailing_comments;
|
|
||||||
use crate::context::PyFormatContext;
|
|
||||||
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
|
|
||||||
use crate::trivia::{SimpleTokenizer, TokenKind};
|
|
||||||
use crate::{AsFormat, FormatNodeRule, PyFormatter};
|
|
||||||
use ruff_formatter::prelude::{hard_line_break, space, text};
|
|
||||||
use ruff_formatter::{Format, FormatContext, FormatResult};
|
|
||||||
use ruff_python_ast::node::AnyNodeRef;
|
|
||||||
use ruff_text_size::{TextLen, TextRange};
|
use ruff_text_size::{TextLen, TextRange};
|
||||||
use rustpython_parser::ast::UnaryOp;
|
use rustpython_parser::ast::UnaryOp;
|
||||||
use rustpython_parser::ast::{ExprUnaryOp, Ranged};
|
use rustpython_parser::ast::{ExprUnaryOp, Ranged};
|
||||||
|
|
||||||
|
use ruff_formatter::prelude::{hard_line_break, space, text};
|
||||||
|
use ruff_formatter::{Format, FormatContext, FormatResult};
|
||||||
|
use ruff_python_ast::node::AnyNodeRef;
|
||||||
|
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
|
||||||
|
|
||||||
|
use crate::comments::trailing_comments;
|
||||||
|
use crate::context::PyFormatContext;
|
||||||
|
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
|
||||||
|
use crate::{AsFormat, FormatNodeRule, PyFormatter};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct FormatExprUnaryOp;
|
pub struct FormatExprUnaryOp;
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
use crate::context::NodeLevel;
|
use rustpython_parser::ast::Ranged;
|
||||||
use crate::prelude::*;
|
|
||||||
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
|
|
||||||
use ruff_formatter::prelude::tag::Condition;
|
use ruff_formatter::prelude::tag::Condition;
|
||||||
use ruff_formatter::{format_args, write, Argument, Arguments};
|
use ruff_formatter::{format_args, write, Argument, Arguments};
|
||||||
use ruff_python_ast::node::AnyNodeRef;
|
use ruff_python_ast::node::AnyNodeRef;
|
||||||
use rustpython_parser::ast::Ranged;
|
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
|
||||||
|
|
||||||
|
use crate::context::NodeLevel;
|
||||||
|
use crate::prelude::*;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
pub(crate) enum OptionalParentheses {
|
pub(crate) enum OptionalParentheses {
|
||||||
|
|
|
@ -33,7 +33,6 @@ pub(crate) mod other;
|
||||||
pub(crate) mod pattern;
|
pub(crate) mod pattern;
|
||||||
mod prelude;
|
mod prelude;
|
||||||
pub(crate) mod statement;
|
pub(crate) mod statement;
|
||||||
mod trivia;
|
|
||||||
|
|
||||||
include!("../../ruff_formatter/shared_traits.rs");
|
include!("../../ruff_formatter/shared_traits.rs");
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
use std::usize;
|
use std::usize;
|
||||||
|
|
||||||
|
use ruff_text_size::{TextRange, TextSize};
|
||||||
use rustpython_parser::ast::{Arguments, Ranged};
|
use rustpython_parser::ast::{Arguments, Ranged};
|
||||||
|
|
||||||
use ruff_formatter::{format_args, write};
|
use ruff_formatter::{format_args, write};
|
||||||
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
use ruff_python_ast::node::{AnyNodeRef, AstNode};
|
||||||
|
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
|
||||||
|
|
||||||
use crate::comments::{
|
use crate::comments::{
|
||||||
dangling_comments, leading_comments, leading_node_comments, trailing_comments,
|
dangling_comments, leading_comments, leading_node_comments, trailing_comments,
|
||||||
|
@ -12,9 +14,7 @@ use crate::comments::{
|
||||||
use crate::context::NodeLevel;
|
use crate::context::NodeLevel;
|
||||||
use crate::expression::parentheses::parenthesized;
|
use crate::expression::parentheses::parenthesized;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
|
|
||||||
use crate::FormatNodeRule;
|
use crate::FormatNodeRule;
|
||||||
use ruff_text_size::{TextRange, TextSize};
|
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct FormatArguments;
|
pub struct FormatArguments;
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
use crate::comments::trailing_comments;
|
|
||||||
|
|
||||||
use crate::expression::parentheses::{parenthesized, Parentheses};
|
|
||||||
use crate::prelude::*;
|
|
||||||
use crate::trivia::{SimpleTokenizer, TokenKind};
|
|
||||||
use ruff_formatter::write;
|
|
||||||
use ruff_text_size::TextRange;
|
use ruff_text_size::TextRange;
|
||||||
use rustpython_parser::ast::{Ranged, StmtClassDef};
|
use rustpython_parser::ast::{Ranged, StmtClassDef};
|
||||||
|
|
||||||
|
use ruff_formatter::write;
|
||||||
|
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
|
||||||
|
|
||||||
|
use crate::comments::trailing_comments;
|
||||||
|
use crate::expression::parentheses::{parenthesized, Parentheses};
|
||||||
|
use crate::prelude::*;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct FormatStmtClassDef;
|
pub struct FormatStmtClassDef;
|
||||||
|
|
||||||
|
|
|
@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
|
||||||
|
|
||||||
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
|
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
|
||||||
use ruff_python_ast::function::AnyFunctionDefinition;
|
use ruff_python_ast::function::AnyFunctionDefinition;
|
||||||
|
use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
|
||||||
|
|
||||||
use crate::comments::{leading_comments, trailing_comments};
|
use crate::comments::{leading_comments, trailing_comments};
|
||||||
use crate::context::NodeLevel;
|
use crate::context::NodeLevel;
|
||||||
use crate::expression::parentheses::{optional_parentheses, Parentheses};
|
use crate::expression::parentheses::{optional_parentheses, Parentheses};
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{lines_after, skip_trailing_trivia};
|
|
||||||
use crate::FormatNodeRule;
|
use crate::FormatNodeRule;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|
|
@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
|
||||||
|
|
||||||
use ruff_formatter::{format_args, write, FormatError};
|
use ruff_formatter::{format_args, write, FormatError};
|
||||||
use ruff_python_ast::node::AnyNodeRef;
|
use ruff_python_ast::node::AnyNodeRef;
|
||||||
|
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
|
||||||
|
|
||||||
use crate::comments::trailing_comments;
|
use crate::comments::trailing_comments;
|
||||||
use crate::expression::parentheses::{
|
use crate::expression::parentheses::{
|
||||||
in_parentheses_only_soft_line_break_or_space, optional_parentheses,
|
in_parentheses_only_soft_line_break_or_space, optional_parentheses,
|
||||||
};
|
};
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::trivia::{SimpleTokenizer, TokenKind};
|
|
||||||
use crate::FormatNodeRule;
|
use crate::FormatNodeRule;
|
||||||
|
|
||||||
pub(super) enum AnyStatementWith<'a> {
|
pub(super) enum AnyStatementWith<'a> {
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
use crate::context::NodeLevel;
|
use rustpython_parser::ast::{Ranged, Stmt, Suite};
|
||||||
use crate::prelude::*;
|
|
||||||
use crate::trivia::lines_before;
|
|
||||||
use ruff_formatter::{
|
use ruff_formatter::{
|
||||||
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
|
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
|
||||||
};
|
};
|
||||||
use rustpython_parser::ast::{Ranged, Stmt, Suite};
|
use ruff_python_whitespace::lines_before;
|
||||||
|
|
||||||
|
use crate::context::NodeLevel;
|
||||||
|
use crate::prelude::*;
|
||||||
|
|
||||||
/// Level at which the [`Suite`] appears in the source code.
|
/// Level at which the [`Suite`] appears in the source code.
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
@ -185,13 +187,15 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Suite {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use rustpython_parser::ast::Suite;
|
||||||
|
use rustpython_parser::Parse;
|
||||||
|
|
||||||
|
use ruff_formatter::format;
|
||||||
|
|
||||||
use crate::comments::Comments;
|
use crate::comments::Comments;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::statement::suite::SuiteLevel;
|
use crate::statement::suite::SuiteLevel;
|
||||||
use crate::PyFormatOptions;
|
use crate::PyFormatOptions;
|
||||||
use ruff_formatter::format;
|
|
||||||
use rustpython_parser::ast::Suite;
|
|
||||||
use rustpython_parser::Parse;
|
|
||||||
|
|
||||||
fn format_suite(level: SuiteLevel) -> String {
|
fn format_suite(level: SuiteLevel) -> String {
|
||||||
let source = r#"
|
let source = r#"
|
||||||
|
|
|
@ -16,3 +16,7 @@ license = { workspace = true }
|
||||||
ruff_text_size = { workspace = true }
|
ruff_text_size = { workspace = true }
|
||||||
|
|
||||||
memchr = { workspace = true }
|
memchr = { workspace = true }
|
||||||
|
unic-ucd-ident = "0.9.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
insta = { workspace = true }
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
mod cursor;
|
mod cursor;
|
||||||
mod newlines;
|
mod newlines;
|
||||||
|
mod tokenizer;
|
||||||
mod whitespace;
|
mod whitespace;
|
||||||
|
|
||||||
pub use cursor::*;
|
pub use cursor::*;
|
||||||
pub use newlines::*;
|
pub use newlines::*;
|
||||||
|
pub use tokenizer::*;
|
||||||
pub use whitespace::*;
|
pub use whitespace::*;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokenize_reverse()
|
expression: test_case.tokenize_reverse()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
source: crates/ruff_python_formatter/src/trivia.rs
|
source: crates/ruff_python_whitespace/src/tokenizer.rs
|
||||||
expression: test_case.tokens()
|
expression: test_case.tokens()
|
||||||
---
|
---
|
||||||
[
|
[
|
|
@ -1,7 +1,7 @@
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
||||||
|
|
||||||
use ruff_python_whitespace::{is_python_whitespace, Cursor};
|
use crate::{is_python_whitespace, Cursor};
|
||||||
|
|
||||||
/// Searches for the first non-trivia character in `range`.
|
/// Searches for the first non-trivia character in `range`.
|
||||||
///
|
///
|
||||||
|
@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor};
|
||||||
/// of the character, the second item the non-trivia character.
|
/// of the character, the second item the non-trivia character.
|
||||||
///
|
///
|
||||||
/// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
|
/// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
|
||||||
pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
|
pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
|
||||||
SimpleTokenizer::starts_at(offset, code)
|
SimpleTokenizer::starts_at(offset, code)
|
||||||
.skip_trivia()
|
.skip_trivia()
|
||||||
.next()
|
.next()
|
||||||
|
@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Tok
|
||||||
/// ## Notes
|
/// ## Notes
|
||||||
///
|
///
|
||||||
/// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
|
/// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
|
||||||
pub(crate) fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
|
pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
|
||||||
SimpleTokenizer::up_to(offset, code)
|
SimpleTokenizer::up_to(offset, code)
|
||||||
.skip_trivia()
|
.skip_trivia()
|
||||||
.next_back()
|
.next_back()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
|
/// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
|
||||||
pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
|
pub fn lines_before(offset: TextSize, code: &str) -> u32 {
|
||||||
let tokens = SimpleTokenizer::up_to(offset, code);
|
let tokens = SimpleTokenizer::up_to(offset, code);
|
||||||
let mut newlines = 0u32;
|
let mut newlines = 0u32;
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Counts the empty lines between `offset` and the first non-whitespace character.
|
/// Counts the empty lines between `offset` and the first non-whitespace character.
|
||||||
pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
|
pub fn lines_after(offset: TextSize, code: &str) -> u32 {
|
||||||
let tokens = SimpleTokenizer::starts_at(offset, code);
|
let tokens = SimpleTokenizer::starts_at(offset, code);
|
||||||
let mut newlines = 0u32;
|
let mut newlines = 0u32;
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the position after skipping any trailing trivia up to, but not including the newline character.
|
/// Returns the position after skipping any trailing trivia up to, but not including the newline character.
|
||||||
pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
|
pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
|
||||||
let tokenizer = SimpleTokenizer::starts_at(offset, code);
|
let tokenizer = SimpleTokenizer::starts_at(offset, code);
|
||||||
|
|
||||||
for token in tokenizer {
|
for token in tokenizer {
|
||||||
|
@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
||||||
pub(crate) struct Token {
|
pub struct Token {
|
||||||
pub(crate) kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
pub(crate) range: TextRange,
|
pub range: TextRange,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
pub(crate) const fn kind(&self) -> TokenKind {
|
pub const fn kind(&self) -> TokenKind {
|
||||||
self.kind
|
self.kind
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
pub(crate) const fn range(&self) -> TextRange {
|
pub const fn range(&self) -> TextRange {
|
||||||
self.range
|
self.range
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) const fn start(&self) -> TextSize {
|
pub const fn start(&self) -> TextSize {
|
||||||
self.range.start()
|
self.range.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) const fn end(&self) -> TextSize {
|
pub const fn end(&self) -> TextSize {
|
||||||
self.range.end()
|
self.range.end()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||||
pub(crate) enum TokenKind {
|
pub enum TokenKind {
|
||||||
/// A comment, not including the trailing new line.
|
/// A comment, not including the trailing new line.
|
||||||
Comment,
|
Comment,
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ impl TokenKind {
|
||||||
///
|
///
|
||||||
/// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
|
/// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
|
||||||
/// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
|
/// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
|
||||||
pub(crate) struct SimpleTokenizer<'a> {
|
pub struct SimpleTokenizer<'a> {
|
||||||
offset: TextSize,
|
offset: TextSize,
|
||||||
back_offset: TextSize,
|
back_offset: TextSize,
|
||||||
/// `true` when it is known that the current `back` line has no comment for sure.
|
/// `true` when it is known that the current `back` line has no comment for sure.
|
||||||
|
@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SimpleTokenizer<'a> {
|
impl<'a> SimpleTokenizer<'a> {
|
||||||
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
|
pub fn new(source: &'a str, range: TextRange) -> Self {
|
||||||
Self {
|
Self {
|
||||||
offset: range.start(),
|
offset: range.start(),
|
||||||
back_offset: range.end(),
|
back_offset: range.end(),
|
||||||
|
@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
|
pub fn starts_at(offset: TextSize, source: &'a str) -> Self {
|
||||||
let range = TextRange::new(offset, source.text_len());
|
let range = TextRange::new(offset, source.text_len());
|
||||||
Self::new(source, range)
|
Self::new(source, range)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
|
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
|
||||||
pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self {
|
pub fn up_to(offset: TextSize, source: &'a str) -> Self {
|
||||||
Self::new(source, TextRange::up_to(offset))
|
Self::new(source, TextRange::up_to(offset))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
|
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
|
||||||
/// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
|
/// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
|
||||||
/// because the lexer doesn't need to scan for comments.
|
/// because the lexer doesn't need to scan for comments.
|
||||||
pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
|
pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
|
||||||
let mut tokenizer = Self::up_to(offset, source);
|
let mut tokenizer = Self::up_to(offset, source);
|
||||||
tokenizer.back_line_has_no_comment = true;
|
tokenizer.back_line_has_no_comment = true;
|
||||||
tokenizer
|
tokenizer
|
||||||
|
@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> {
|
||||||
|
|
||||||
/// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
|
/// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
|
||||||
/// because it needs to check if the line has any comments when encountering any non-trivia token.
|
/// because it needs to check if the line has any comments when encountering any non-trivia token.
|
||||||
pub(crate) fn next_token_back(&mut self) -> Token {
|
pub fn next_token_back(&mut self) -> Token {
|
||||||
self.cursor.start_token();
|
self.cursor.start_token();
|
||||||
|
|
||||||
let Some(last) = self.cursor.bump_back() else {
|
let Some(last) = self.cursor.bump_back() else {
|
||||||
|
@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> {
|
||||||
token
|
token
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
|
pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
|
||||||
self.filter(|t| !t.kind().is_trivia())
|
self.filter(|t| !t.kind().is_trivia())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -539,7 +539,7 @@ mod tests {
|
||||||
use insta::assert_debug_snapshot;
|
use insta::assert_debug_snapshot;
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token};
|
use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
|
||||||
|
|
||||||
struct TokenizationTestCase {
|
struct TokenizationTestCase {
|
||||||
source: &'static str,
|
source: &'static str,
|
Loading…
Add table
Add a link
Reference in a new issue