Remove exception-handler lexing from unused-bound-exception fix (#5851)

## Summary

The motivation here is that it will make this rule easier to rewrite as
a deferred check. Right now, we can't run this rule in the deferred
phase, because it depends on the `except_handler` to power its autofix.
Instead of lexing the `except_handler`, we can use the `SimpleTokenizer`
from the formatter, and just lex forwards and backwards.

For context, this rule detects the unused `e` in:

```python
try:
  pass
except ValueError as e:
  pass
```
This commit is contained in:
Charlie Marsh 2023-07-18 14:27:46 -04:00 committed by GitHub
parent 41da52a61b
commit 4204fc002d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 125 additions and 112 deletions

3
Cargo.lock generated
View file

@ -2157,7 +2157,6 @@ dependencies = [
"similar",
"smallvec",
"thiserror",
"unic-ucd-ident",
]
[[package]]
@ -2195,8 +2194,10 @@ version = "0.0.0"
name = "ruff_python_whitespace"
version = "0.0.0"
dependencies = [
"insta",
"memchr",
"ruff_text_size",
"unic-ucd-ident",
]
[[package]]

View file

@ -4103,10 +4103,7 @@ where
);
if self.patch(Rule::UnusedVariable) {
diagnostic.try_set_fix(|| {
pyflakes::fixes::remove_exception_handler_assignment(
except_handler,
self.locator,
)
pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
.map(Fix::automatic)
});
}

View file

@ -1,10 +1,10 @@
use anyhow::{bail, Ok, Result};
use anyhow::{Context, Ok, Result};
use ruff_text_size::TextRange;
use rustpython_parser::ast::{ExceptHandler, Expr, Ranged};
use rustpython_parser::{lexer, Mode};
use rustpython_parser::ast::{Expr, Identifier, Ranged};
use ruff_diagnostics::Edit;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::autofix::codemods::CodegenStylist;
use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
/// Generate a [`Edit`] to remove the binding from an exception handler.
pub(crate) fn remove_exception_handler_assignment(
except_handler: &ExceptHandler,
bound_exception: &Identifier,
locator: &Locator,
) -> Result<Edit> {
let contents = locator.slice(except_handler.range());
let mut fix_start = None;
let mut fix_end = None;
// Lex backwards, to the token just before the `as`.
let mut tokenizer =
SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
// End of the token just before the `as` to the semicolon.
let mut prev = None;
for (tok, range) in
lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten()
{
if tok.is_as() {
fix_start = prev;
}
if tok.is_colon() {
fix_end = Some(range.start());
break;
}
prev = Some(range.end());
}
// Eat the `as` token.
let preceding = tokenizer
.next_back()
.context("expected the exception name to be preceded by `as`")?;
debug_assert!(matches!(preceding.kind, TokenKind::As));
if let (Some(start), Some(end)) = (fix_start, fix_end) {
Ok(Edit::deletion(start, end))
} else {
bail!("Could not find span of exception handler")
}
// Lex to the end of the preceding token, which should be the exception value.
let preceding = tokenizer
.next_back()
.context("expected the exception name to be preceded by a token")?;
// Lex forwards, to the `:` token.
let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
.next()
.context("expected the exception name to be followed by a colon")?;
debug_assert!(matches!(following.kind, TokenKind::Colon));
Ok(Edit::deletion(preceding.end(), following.start()))
}

View file

@ -28,7 +28,6 @@ rustpython-parser = { workspace = true }
serde = { workspace = true, optional = true }
smallvec = { workspace = true }
thiserror = { workspace = true }
unic-ucd-ident = "0.9.0"
[dev-dependencies]
ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}

View file

@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_whitespace::{
lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
};
use crate::context::NodeLevel;
use crate::prelude::*;
use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind};
use crate::MagicTrailingComma;
/// Adds parentheses and indents `content` if it doesn't fit on a line.

View file

@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, FormatError, SourceCode};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
use crate::comments::SourceComment;
use crate::context::NodeLevel;
use crate::prelude::*;
use crate::trivia::{lines_after, lines_before, skip_trailing_trivia};
/// Formats the leading comments of a node.
pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments

View file

@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace;
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
use ruff_python_whitespace::{
first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
UniversalNewlines,
};
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
use crate::other::arguments::{
assign_argument_separator_comment_placement, find_argument_separators,
};
use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind};
/// Implements the custom comment placement logic.
pub(super) fn place_comment<'a>(

View file

@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::dangling_comments;
use crate::expression::parentheses::{
parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule;
#[derive(Default)]

View file

@ -1,16 +1,17 @@
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::trivia::Token;
use crate::trivia::{first_non_trivia_token, TokenKind};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_text_size::TextRange;
use rustpython_parser::ast::ExprSlice;
use rustpython_parser::ast::{Expr, Ranged};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind};
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
#[derive(Default)]
pub struct FormatExprSlice;

View file

@ -1,15 +1,17 @@
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_text_size::{TextLen, TextRange};
use rustpython_parser::ast::UnaryOp;
use rustpython_parser::ast::{ExprUnaryOp, Ranged};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
#[derive(Default)]
pub struct FormatExprUnaryOp;

View file

@ -1,10 +1,12 @@
use crate::context::NodeLevel;
use crate::prelude::*;
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use rustpython_parser::ast::Ranged;
use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::node::AnyNodeRef;
use rustpython_parser::ast::Ranged;
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::context::NodeLevel;
use crate::prelude::*;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum OptionalParentheses {

View file

@ -33,7 +33,6 @@ pub(crate) mod other;
pub(crate) mod pattern;
mod prelude;
pub(crate) mod statement;
mod trivia;
include!("../../ruff_formatter/shared_traits.rs");

View file

@ -1,9 +1,11 @@
use std::usize;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::{Arguments, Ranged};
use ruff_formatter::{format_args, write};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::comments::{
dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@ -12,9 +14,7 @@ use crate::comments::{
use crate::context::NodeLevel;
use crate::expression::parentheses::parenthesized;
use crate::prelude::*;
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::FormatNodeRule;
use ruff_text_size::{TextRange, TextSize};
#[derive(Default)]
pub struct FormatArguments;

View file

@ -1,12 +1,13 @@
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use ruff_formatter::write;
use ruff_text_size::TextRange;
use rustpython_parser::ast::{Ranged, StmtClassDef};
use ruff_formatter::write;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
use crate::prelude::*;
#[derive(Default)]
pub struct FormatStmtClassDef;

View file

@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::function::AnyFunctionDefinition;
use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
use crate::comments::{leading_comments, trailing_comments};
use crate::context::NodeLevel;
use crate::expression::parentheses::{optional_parentheses, Parentheses};
use crate::prelude::*;
use crate::trivia::{lines_after, skip_trailing_trivia};
use crate::FormatNodeRule;
#[derive(Default)]

View file

@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{
in_parentheses_only_soft_line_break_or_space, optional_parentheses,
};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule;
pub(super) enum AnyStatementWith<'a> {

View file

@ -1,10 +1,12 @@
use crate::context::NodeLevel;
use crate::prelude::*;
use crate::trivia::lines_before;
use rustpython_parser::ast::{Ranged, Stmt, Suite};
use ruff_formatter::{
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
};
use rustpython_parser::ast::{Ranged, Stmt, Suite};
use ruff_python_whitespace::lines_before;
use crate::context::NodeLevel;
use crate::prelude::*;
/// Level at which the [`Suite`] appears in the source code.
#[derive(Copy, Clone, Debug)]
@ -185,13 +187,15 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Suite {
#[cfg(test)]
mod tests {
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
use ruff_formatter::format;
use crate::comments::Comments;
use crate::prelude::*;
use crate::statement::suite::SuiteLevel;
use crate::PyFormatOptions;
use ruff_formatter::format;
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
fn format_suite(level: SuiteLevel) -> String {
let source = r#"

View file

@ -16,3 +16,7 @@ license = { workspace = true }
ruff_text_size = { workspace = true }
memchr = { workspace = true }
unic-ucd-ident = "0.9.0"
[dev-dependencies]
insta = { workspace = true }

View file

@ -1,7 +1,9 @@
mod cursor;
mod newlines;
mod tokenizer;
mod whitespace;
pub use cursor::*;
pub use newlines::*;
pub use tokenizer::*;
pub use whitespace::*;

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokenize_reverse()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,5 +1,5 @@
---
source: crates/ruff_python_formatter/src/trivia.rs
source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens()
---
[

View file

@ -1,7 +1,7 @@
use ruff_text_size::{TextLen, TextRange, TextSize};
use unic_ucd_ident::{is_xid_continue, is_xid_start};
use ruff_python_whitespace::{is_python_whitespace, Cursor};
use crate::{is_python_whitespace, Cursor};
/// Searches for the first non-trivia character in `range`.
///
@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor};
/// of the character, the second item the non-trivia character.
///
/// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
SimpleTokenizer::starts_at(offset, code)
.skip_trivia()
.next()
@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Tok
/// ## Notes
///
/// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
pub(crate) fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
SimpleTokenizer::up_to(offset, code)
.skip_trivia()
.next_back()
}
/// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
pub fn lines_before(offset: TextSize, code: &str) -> u32 {
let tokens = SimpleTokenizer::up_to(offset, code);
let mut newlines = 0u32;
@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
}
/// Counts the empty lines between `offset` and the first non-whitespace character.
pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
pub fn lines_after(offset: TextSize, code: &str) -> u32 {
let tokens = SimpleTokenizer::starts_at(offset, code);
let mut newlines = 0u32;
@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
}
/// Returns the position after skipping any trailing trivia up to, but not including the newline character.
pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
let tokenizer = SimpleTokenizer::starts_at(offset, code);
for token in tokenizer {
@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
}
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct Token {
pub(crate) kind: TokenKind,
pub(crate) range: TextRange,
pub struct Token {
pub kind: TokenKind,
pub range: TextRange,
}
impl Token {
pub(crate) const fn kind(&self) -> TokenKind {
pub const fn kind(&self) -> TokenKind {
self.kind
}
#[allow(unused)]
pub(crate) const fn range(&self) -> TextRange {
pub const fn range(&self) -> TextRange {
self.range
}
pub(crate) const fn start(&self) -> TextSize {
pub const fn start(&self) -> TextSize {
self.range.start()
}
pub(crate) const fn end(&self) -> TextSize {
pub const fn end(&self) -> TextSize {
self.range.end()
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) enum TokenKind {
pub enum TokenKind {
/// A comment, not including the trailing new line.
Comment,
@ -247,7 +247,7 @@ impl TokenKind {
///
/// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
/// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
pub(crate) struct SimpleTokenizer<'a> {
pub struct SimpleTokenizer<'a> {
offset: TextSize,
back_offset: TextSize,
/// `true` when it is known that the current `back` line has no comment for sure.
@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> {
}
impl<'a> SimpleTokenizer<'a> {
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
pub fn new(source: &'a str, range: TextRange) -> Self {
Self {
offset: range.start(),
back_offset: range.end(),
@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> {
}
}
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
pub fn starts_at(offset: TextSize, source: &'a str) -> Self {
let range = TextRange::new(offset, source.text_len());
Self::new(source, range)
}
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self {
pub fn up_to(offset: TextSize, source: &'a str) -> Self {
Self::new(source, TextRange::up_to(offset))
}
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
/// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
/// because the lexer doesn't need to scan for comments.
pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
let mut tokenizer = Self::up_to(offset, source);
tokenizer.back_line_has_no_comment = true;
tokenizer
@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> {
/// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
/// because it needs to check if the line has any comments when encountering any non-trivia token.
pub(crate) fn next_token_back(&mut self) -> Token {
pub fn next_token_back(&mut self) -> Token {
self.cursor.start_token();
let Some(last) = self.cursor.bump_back() else {
@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> {
token
}
pub(crate) fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
self.filter(|t| !t.kind().is_trivia())
}
}
@ -539,7 +539,7 @@ mod tests {
use insta::assert_debug_snapshot;
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token};
use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
struct TokenizationTestCase {
source: &'static str,