Remove exception-handler lexing from unused-bound-exception fix (#5851)

## Summary

The motivation here is that it will make this rule easier to rewrite as
a deferred check. Right now, we can't run this rule in the deferred
phase, because it depends on the `except_handler` to power its autofix.
Instead of lexing the `except_handler`, we can use the `SimpleTokenizer`
from the formatter, and just lex forwards and backwards.

For context, this rule detects the unused `e` in:

```python
try:
  pass
except ValueError as e:
  pass
```
This commit is contained in:
Charlie Marsh 2023-07-18 14:27:46 -04:00 committed by GitHub
parent 41da52a61b
commit 4204fc002d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 125 additions and 112 deletions

3
Cargo.lock generated
View file

@ -2157,7 +2157,6 @@ dependencies = [
"similar", "similar",
"smallvec", "smallvec",
"thiserror", "thiserror",
"unic-ucd-ident",
] ]
[[package]] [[package]]
@ -2195,8 +2194,10 @@ version = "0.0.0"
name = "ruff_python_whitespace" name = "ruff_python_whitespace"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"insta",
"memchr", "memchr",
"ruff_text_size", "ruff_text_size",
"unic-ucd-ident",
] ]
[[package]] [[package]]

View file

@ -4103,11 +4103,8 @@ where
); );
if self.patch(Rule::UnusedVariable) { if self.patch(Rule::UnusedVariable) {
diagnostic.try_set_fix(|| { diagnostic.try_set_fix(|| {
pyflakes::fixes::remove_exception_handler_assignment( pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
except_handler, .map(Fix::automatic)
self.locator,
)
.map(Fix::automatic)
}); });
} }
self.diagnostics.push(diagnostic); self.diagnostics.push(diagnostic);

View file

@ -1,10 +1,10 @@
use anyhow::{bail, Ok, Result}; use anyhow::{Context, Ok, Result};
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
use rustpython_parser::ast::{ExceptHandler, Expr, Ranged}; use rustpython_parser::ast::{Expr, Identifier, Ranged};
use rustpython_parser::{lexer, Mode};
use ruff_diagnostics::Edit; use ruff_diagnostics::Edit;
use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::autofix::codemods::CodegenStylist; use crate::autofix::codemods::CodegenStylist;
use crate::cst::matchers::{match_call_mut, match_dict, match_expression}; use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
/// Generate a [`Edit`] to remove the binding from an exception handler. /// Generate a [`Edit`] to remove the binding from an exception handler.
pub(crate) fn remove_exception_handler_assignment( pub(crate) fn remove_exception_handler_assignment(
except_handler: &ExceptHandler, bound_exception: &Identifier,
locator: &Locator, locator: &Locator,
) -> Result<Edit> { ) -> Result<Edit> {
let contents = locator.slice(except_handler.range()); // Lex backwards, to the token just before the `as`.
let mut fix_start = None; let mut tokenizer =
let mut fix_end = None; SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
// End of the token just before the `as` to the semicolon. // Eat the `as` token.
let mut prev = None; let preceding = tokenizer
for (tok, range) in .next_back()
lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten() .context("expected the exception name to be preceded by `as`")?;
{ debug_assert!(matches!(preceding.kind, TokenKind::As));
if tok.is_as() {
fix_start = prev;
}
if tok.is_colon() {
fix_end = Some(range.start());
break;
}
prev = Some(range.end());
}
if let (Some(start), Some(end)) = (fix_start, fix_end) { // Lex to the end of the preceding token, which should be the exception value.
Ok(Edit::deletion(start, end)) let preceding = tokenizer
} else { .next_back()
bail!("Could not find span of exception handler") .context("expected the exception name to be preceded by a token")?;
}
// Lex forwards, to the `:` token.
let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
.next()
.context("expected the exception name to be followed by a colon")?;
debug_assert!(matches!(following.kind, TokenKind::Colon));
Ok(Edit::deletion(preceding.end(), following.start()))
} }

View file

@ -28,7 +28,6 @@ rustpython-parser = { workspace = true }
serde = { workspace = true, optional = true } serde = { workspace = true, optional = true }
smallvec = { workspace = true } smallvec = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
unic-ucd-ident = "0.9.0"
[dev-dependencies] [dev-dependencies]
ruff_formatter = { path = "../ruff_formatter", features = ["serde"]} ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}

View file

@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged; use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, Argument, Arguments}; use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_whitespace::{
lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
};
use crate::context::NodeLevel; use crate::context::NodeLevel;
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind};
use crate::MagicTrailingComma; use crate::MagicTrailingComma;
/// Adds parentheses and indents `content` if it doesn't fit on a line. /// Adds parentheses and indents `content` if it doesn't fit on a line.

View file

@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, FormatError, SourceCode}; use ruff_formatter::{format_args, write, FormatError, SourceCode};
use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
use crate::comments::SourceComment; use crate::comments::SourceComment;
use crate::context::NodeLevel; use crate::context::NodeLevel;
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{lines_after, lines_before, skip_trailing_trivia};
/// Formats the leading comments of a node. /// Formats the leading comments of a node.
pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments

View file

@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace; use ruff_python_ast::whitespace;
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines}; use ruff_python_whitespace::{
first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
UniversalNewlines,
};
use crate::comments::visitor::{CommentPlacement, DecoratedComment}; use crate::comments::visitor::{CommentPlacement, DecoratedComment};
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection}; use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
use crate::other::arguments::{ use crate::other::arguments::{
assign_argument_separator_comment_placement, find_argument_separators, assign_argument_separator_comment_placement, find_argument_separators,
}; };
use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind};
/// Implements the custom comment placement logic. /// Implements the custom comment placement logic.
pub(super) fn place_comment<'a>( pub(super) fn place_comment<'a>(

View file

@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
use ruff_formatter::write; use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::dangling_comments; use crate::comments::dangling_comments;
use crate::expression::parentheses::{ use crate::expression::parentheses::{
parenthesized, NeedsParentheses, OptionalParentheses, Parentheses, parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
}; };
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule; use crate::FormatNodeRule;
#[derive(Default)] #[derive(Default)]

View file

@ -1,16 +1,17 @@
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::trivia::Token;
use crate::trivia::{first_non_trivia_token, TokenKind};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
use rustpython_parser::ast::ExprSlice; use rustpython_parser::ast::ExprSlice;
use rustpython_parser::ast::{Expr, Ranged}; use rustpython_parser::ast::{Expr, Ranged};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind};
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
#[derive(Default)] #[derive(Default)]
pub struct FormatExprSlice; pub struct FormatExprSlice;

View file

@ -1,15 +1,17 @@
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_text_size::{TextLen, TextRange}; use ruff_text_size::{TextLen, TextRange};
use rustpython_parser::ast::UnaryOp; use rustpython_parser::ast::UnaryOp;
use rustpython_parser::ast::{ExprUnaryOp, Ranged}; use rustpython_parser::ast::{ExprUnaryOp, Ranged};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
#[derive(Default)] #[derive(Default)]
pub struct FormatExprUnaryOp; pub struct FormatExprUnaryOp;

View file

@ -1,10 +1,12 @@
use crate::context::NodeLevel; use rustpython_parser::ast::Ranged;
use crate::prelude::*;
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use ruff_formatter::prelude::tag::Condition; use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments}; use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use rustpython_parser::ast::Ranged; use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::context::NodeLevel;
use crate::prelude::*;
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum OptionalParentheses { pub(crate) enum OptionalParentheses {

View file

@ -33,7 +33,6 @@ pub(crate) mod other;
pub(crate) mod pattern; pub(crate) mod pattern;
mod prelude; mod prelude;
pub(crate) mod statement; pub(crate) mod statement;
mod trivia;
include!("../../ruff_formatter/shared_traits.rs"); include!("../../ruff_formatter/shared_traits.rs");

View file

@ -1,9 +1,11 @@
use std::usize; use std::usize;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::{Arguments, Ranged}; use rustpython_parser::ast::{Arguments, Ranged};
use ruff_formatter::{format_args, write}; use ruff_formatter::{format_args, write};
use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::comments::{ use crate::comments::{
dangling_comments, leading_comments, leading_node_comments, trailing_comments, dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@ -12,9 +14,7 @@ use crate::comments::{
use crate::context::NodeLevel; use crate::context::NodeLevel;
use crate::expression::parentheses::parenthesized; use crate::expression::parentheses::parenthesized;
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use crate::FormatNodeRule; use crate::FormatNodeRule;
use ruff_text_size::{TextRange, TextSize};
#[derive(Default)] #[derive(Default)]
pub struct FormatArguments; pub struct FormatArguments;

View file

@ -1,12 +1,13 @@
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use ruff_formatter::write;
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
use rustpython_parser::ast::{Ranged, StmtClassDef}; use rustpython_parser::ast::{Ranged, StmtClassDef};
use ruff_formatter::write;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
use crate::prelude::*;
#[derive(Default)] #[derive(Default)]
pub struct FormatStmtClassDef; pub struct FormatStmtClassDef;

View file

@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule}; use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::function::AnyFunctionDefinition; use ruff_python_ast::function::AnyFunctionDefinition;
use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
use crate::comments::{leading_comments, trailing_comments}; use crate::comments::{leading_comments, trailing_comments};
use crate::context::NodeLevel; use crate::context::NodeLevel;
use crate::expression::parentheses::{optional_parentheses, Parentheses}; use crate::expression::parentheses::{optional_parentheses, Parentheses};
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{lines_after, skip_trailing_trivia};
use crate::FormatNodeRule; use crate::FormatNodeRule;
#[derive(Default)] #[derive(Default)]

View file

@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
use ruff_formatter::{format_args, write, FormatError}; use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use crate::comments::trailing_comments; use crate::comments::trailing_comments;
use crate::expression::parentheses::{ use crate::expression::parentheses::{
in_parentheses_only_soft_line_break_or_space, optional_parentheses, in_parentheses_only_soft_line_break_or_space, optional_parentheses,
}; };
use crate::prelude::*; use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule; use crate::FormatNodeRule;
pub(super) enum AnyStatementWith<'a> { pub(super) enum AnyStatementWith<'a> {

View file

@ -1,10 +1,12 @@
use crate::context::NodeLevel; use rustpython_parser::ast::{Ranged, Stmt, Suite};
use crate::prelude::*;
use crate::trivia::lines_before;
use ruff_formatter::{ use ruff_formatter::{
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions, format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
}; };
use rustpython_parser::ast::{Ranged, Stmt, Suite}; use ruff_python_whitespace::lines_before;
use crate::context::NodeLevel;
use crate::prelude::*;
/// Level at which the [`Suite`] appears in the source code. /// Level at which the [`Suite`] appears in the source code.
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@ -185,13 +187,15 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Suite {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
use ruff_formatter::format;
use crate::comments::Comments; use crate::comments::Comments;
use crate::prelude::*; use crate::prelude::*;
use crate::statement::suite::SuiteLevel; use crate::statement::suite::SuiteLevel;
use crate::PyFormatOptions; use crate::PyFormatOptions;
use ruff_formatter::format;
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
fn format_suite(level: SuiteLevel) -> String { fn format_suite(level: SuiteLevel) -> String {
let source = r#" let source = r#"

View file

@ -16,3 +16,7 @@ license = { workspace = true }
ruff_text_size = { workspace = true } ruff_text_size = { workspace = true }
memchr = { workspace = true } memchr = { workspace = true }
unic-ucd-ident = "0.9.0"
[dev-dependencies]
insta = { workspace = true }

View file

@ -1,7 +1,9 @@
mod cursor; mod cursor;
mod newlines; mod newlines;
mod tokenizer;
mod whitespace; mod whitespace;
pub use cursor::*; pub use cursor::*;
pub use newlines::*; pub use newlines::*;
pub use tokenizer::*;
pub use whitespace::*; pub use whitespace::*;

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokenize_reverse() expression: test_case.tokenize_reverse()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,5 +1,5 @@
--- ---
source: crates/ruff_python_formatter/src/trivia.rs source: crates/ruff_python_whitespace/src/tokenizer.rs
expression: test_case.tokens() expression: test_case.tokens()
--- ---
[ [

View file

@ -1,7 +1,7 @@
use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_text_size::{TextLen, TextRange, TextSize};
use unic_ucd_ident::{is_xid_continue, is_xid_start}; use unic_ucd_ident::{is_xid_continue, is_xid_start};
use ruff_python_whitespace::{is_python_whitespace, Cursor}; use crate::{is_python_whitespace, Cursor};
/// Searches for the first non-trivia character in `range`. /// Searches for the first non-trivia character in `range`.
/// ///
@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor};
/// of the character, the second item the non-trivia character. /// of the character, the second item the non-trivia character.
/// ///
/// Returns `None` if the range is empty or only contains trivia (whitespace or comments). /// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> { pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
SimpleTokenizer::starts_at(offset, code) SimpleTokenizer::starts_at(offset, code)
.skip_trivia() .skip_trivia()
.next() .next()
@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Tok
/// ## Notes /// ## Notes
/// ///
/// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments. /// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
pub(crate) fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> { pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
SimpleTokenizer::up_to(offset, code) SimpleTokenizer::up_to(offset, code)
.skip_trivia() .skip_trivia()
.next_back() .next_back()
} }
/// Returns the number of newlines between `offset` and the first non whitespace character in the source code. /// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 { pub fn lines_before(offset: TextSize, code: &str) -> u32 {
let tokens = SimpleTokenizer::up_to(offset, code); let tokens = SimpleTokenizer::up_to(offset, code);
let mut newlines = 0u32; let mut newlines = 0u32;
@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
} }
/// Counts the empty lines between `offset` and the first non-whitespace character. /// Counts the empty lines between `offset` and the first non-whitespace character.
pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 { pub fn lines_after(offset: TextSize, code: &str) -> u32 {
let tokens = SimpleTokenizer::starts_at(offset, code); let tokens = SimpleTokenizer::starts_at(offset, code);
let mut newlines = 0u32; let mut newlines = 0u32;
@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
} }
/// Returns the position after skipping any trailing trivia up to, but not including the newline character. /// Returns the position after skipping any trailing trivia up to, but not including the newline character.
pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize { pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
let tokenizer = SimpleTokenizer::starts_at(offset, code); let tokenizer = SimpleTokenizer::starts_at(offset, code);
for token in tokenizer { for token in tokenizer {
@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
} }
#[derive(Clone, Debug, Eq, PartialEq, Hash)] #[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct Token { pub struct Token {
pub(crate) kind: TokenKind, pub kind: TokenKind,
pub(crate) range: TextRange, pub range: TextRange,
} }
impl Token { impl Token {
pub(crate) const fn kind(&self) -> TokenKind { pub const fn kind(&self) -> TokenKind {
self.kind self.kind
} }
#[allow(unused)] #[allow(unused)]
pub(crate) const fn range(&self) -> TextRange { pub const fn range(&self) -> TextRange {
self.range self.range
} }
pub(crate) const fn start(&self) -> TextSize { pub const fn start(&self) -> TextSize {
self.range.start() self.range.start()
} }
pub(crate) const fn end(&self) -> TextSize { pub const fn end(&self) -> TextSize {
self.range.end() self.range.end()
} }
} }
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) enum TokenKind { pub enum TokenKind {
/// A comment, not including the trailing new line. /// A comment, not including the trailing new line.
Comment, Comment,
@ -247,7 +247,7 @@ impl TokenKind {
/// ///
/// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it /// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
/// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file. /// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
pub(crate) struct SimpleTokenizer<'a> { pub struct SimpleTokenizer<'a> {
offset: TextSize, offset: TextSize,
back_offset: TextSize, back_offset: TextSize,
/// `true` when it is known that the current `back` line has no comment for sure. /// `true` when it is known that the current `back` line has no comment for sure.
@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> {
} }
impl<'a> SimpleTokenizer<'a> { impl<'a> SimpleTokenizer<'a> {
pub(crate) fn new(source: &'a str, range: TextRange) -> Self { pub fn new(source: &'a str, range: TextRange) -> Self {
Self { Self {
offset: range.start(), offset: range.start(),
back_offset: range.end(), back_offset: range.end(),
@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> {
} }
} }
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self { pub fn starts_at(offset: TextSize, source: &'a str) -> Self {
let range = TextRange::new(offset, source.text_len()); let range = TextRange::new(offset, source.text_len());
Self::new(source, range) Self::new(source, range)
} }
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`. /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self { pub fn up_to(offset: TextSize, source: &'a str) -> Self {
Self::new(source, TextRange::up_to(offset)) Self::new(source, TextRange::up_to(offset))
} }
/// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
/// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
/// because the lexer doesn't need to scan for comments. /// because the lexer doesn't need to scan for comments.
pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self { pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
let mut tokenizer = Self::up_to(offset, source); let mut tokenizer = Self::up_to(offset, source);
tokenizer.back_line_has_no_comment = true; tokenizer.back_line_has_no_comment = true;
tokenizer tokenizer
@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> {
/// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
/// because it needs to check if the line has any comments when encountering any non-trivia token. /// because it needs to check if the line has any comments when encountering any non-trivia token.
pub(crate) fn next_token_back(&mut self) -> Token { pub fn next_token_back(&mut self) -> Token {
self.cursor.start_token(); self.cursor.start_token();
let Some(last) = self.cursor.bump_back() else { let Some(last) = self.cursor.bump_back() else {
@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> {
token token
} }
pub(crate) fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a { pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
self.filter(|t| !t.kind().is_trivia()) self.filter(|t| !t.kind().is_trivia())
} }
} }
@ -539,7 +539,7 @@ mod tests {
use insta::assert_debug_snapshot; use insta::assert_debug_snapshot;
use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token}; use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
struct TokenizationTestCase { struct TokenizationTestCase {
source: &'static str, source: &'static str,