From 4204fc002d67dcb002df6852ec89db9a999c2029 Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 14:27:46 -0400
Subject: [PATCH] Remove exception-handler lexing from `unused-bound-exception`
 fix (#5851)

## Summary

The motivation here is that it will make this rule easier to rewrite as
a deferred check. Right now, we can't run this rule in the deferred
phase, because it depends on the `except_handler` to power its autofix.
Instead of lexing the `except_handler`, we can use the `SimpleTokenizer`
from the formatter, and just lex forwards and backwards.

For context, this rule detects the unused `e` in:

```python
try:
  pass
except ValueError as e:
  pass
```
---
 Cargo.lock                                    |  3 +-
 crates/ruff/src/checkers/ast/mod.rs           |  7 +--
 crates/ruff/src/rules/pyflakes/fixes.rs       | 50 +++++++++----------
 crates/ruff_python_formatter/Cargo.toml       |  1 -
 crates/ruff_python_formatter/src/builders.rs  |  4 +-
 .../src/comments/format.rs                    |  2 +-
 .../src/comments/placement.rs                 |  6 ++-
 .../src/expression/expr_call.rs               |  3 +-
 .../src/expression/expr_slice.rs              | 19 +++----
 .../src/expression/expr_unary_op.rs           | 18 ++++---
 .../src/expression/parentheses.rs             | 10 ++--
 crates/ruff_python_formatter/src/lib.rs       |  1 -
 .../src/other/arguments.rs                    |  4 +-
 .../src/statement/stmt_class_def.rs           | 13 ++---
 .../src/statement/stmt_function_def.rs        |  2 +-
 .../src/statement/stmt_with.rs                |  2 +-
 .../src/statement/suite.rs                    | 18 ++++---
 crates/ruff_python_whitespace/Cargo.toml      |  4 ++
 crates/ruff_python_whitespace/src/lib.rs      |  2 +
 ...hitespace__tokenizer__tests__Reverse.snap} |  2 +-
 ..._identifier_ending_in_non_start_char.snap} |  2 +-
 ...e_word_with_only_id_continuing_chars.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_bogus.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_comma.snap} |  2 +-
 ...enizer__tests__tokenize_continuation.snap} |  2 +-
 ...tokenizer__tests__tokenize_multichar.snap} |  2 +-
 ...kenizer__tests__tokenize_parentheses.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_slash.snap} |  2 +-
 ...tokenizer__tests__tokenize_substring.snap} |  2 +-
 ...e__tokenizer__tests__tokenize_trivia.snap} |  2 +-
 ...ce__tokenizer__tests__tricky_unicode.snap} |  2 +-
 .../src/tokenizer.rs}                         | 44 ++++++++--------
 32 files changed, 125 insertions(+), 112 deletions(-)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap} (98%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap} (65%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap} (80%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap} (97%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap} (83%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap} (88%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap} (89%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap} (88%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap} (91%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap} (81%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap} (84%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap} (65%)
 rename crates/{ruff_python_formatter/src/trivia.rs => ruff_python_whitespace/src/tokenizer.rs} (93%)

diff --git a/Cargo.lock b/Cargo.lock
index b58fdb4ab8..4d904a5884 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2157,7 +2157,6 @@ dependencies = [
  "similar",
  "smallvec",
  "thiserror",
- "unic-ucd-ident",
 ]
 
 [[package]]
@@ -2195,8 +2194,10 @@ version = "0.0.0"
 name = "ruff_python_whitespace"
 version = "0.0.0"
 dependencies = [
+ "insta",
  "memchr",
  "ruff_text_size",
+ "unic-ucd-ident",
 ]
 
 [[package]]
diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index 455e62b520..c1bf7fb8a5 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -4103,11 +4103,8 @@ where
                     );
                     if self.patch(Rule::UnusedVariable) {
                         diagnostic.try_set_fix(|| {
-                            pyflakes::fixes::remove_exception_handler_assignment(
-                                except_handler,
-                                self.locator,
-                            )
-                            .map(Fix::automatic)
+                            pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
+                                .map(Fix::automatic)
                         });
                     }
                     self.diagnostics.push(diagnostic);
diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs
index 694e03bf87..c6cdee0f5a 100644
--- a/crates/ruff/src/rules/pyflakes/fixes.rs
+++ b/crates/ruff/src/rules/pyflakes/fixes.rs
@@ -1,10 +1,10 @@
-use anyhow::{bail, Ok, Result};
+use anyhow::{Context, Ok, Result};
 use ruff_text_size::TextRange;
-use rustpython_parser::ast::{ExceptHandler, Expr, Ranged};
-use rustpython_parser::{lexer, Mode};
+use rustpython_parser::ast::{Expr, Identifier, Ranged};
 
 use ruff_diagnostics::Edit;
 use ruff_python_ast::source_code::{Locator, Stylist};
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::autofix::codemods::CodegenStylist;
 use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
@@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
 
 /// Generate a [`Edit`] to remove the binding from an exception handler.
 pub(crate) fn remove_exception_handler_assignment(
-    except_handler: &ExceptHandler,
+    bound_exception: &Identifier,
     locator: &Locator,
 ) -> Result<Edit> {
-    let contents = locator.slice(except_handler.range());
-    let mut fix_start = None;
-    let mut fix_end = None;
+    // Lex backwards, to the token just before the `as`.
+    let mut tokenizer =
+        SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
 
-    // End of the token just before the `as` to the semicolon.
-    let mut prev = None;
-    for (tok, range) in
-        lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten()
-    {
-        if tok.is_as() {
-            fix_start = prev;
-        }
-        if tok.is_colon() {
-            fix_end = Some(range.start());
-            break;
-        }
-        prev = Some(range.end());
-    }
+    // Eat the `as` token.
+    let preceding = tokenizer
+        .next_back()
+        .context("expected the exception name to be preceded by `as`")?;
+    debug_assert!(matches!(preceding.kind, TokenKind::As));
 
-    if let (Some(start), Some(end)) = (fix_start, fix_end) {
-        Ok(Edit::deletion(start, end))
-    } else {
-        bail!("Could not find span of exception handler")
-    }
+    // Lex to the end of the preceding token, which should be the exception value.
+    let preceding = tokenizer
+        .next_back()
+        .context("expected the exception name to be preceded by a token")?;
+
+    // Lex forwards, to the `:` token.
+    let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
+        .next()
+        .context("expected the exception name to be followed by a colon")?;
+    debug_assert!(matches!(following.kind, TokenKind::Colon));
+
+    Ok(Edit::deletion(preceding.end(), following.start()))
 }
diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml
index 381c3ec6c9..ae36a02119 100644
--- a/crates/ruff_python_formatter/Cargo.toml
+++ b/crates/ruff_python_formatter/Cargo.toml
@@ -28,7 +28,6 @@ rustpython-parser = { workspace = true }
 serde = { workspace = true, optional = true }
 smallvec = { workspace = true }
 thiserror = { workspace = true }
-unic-ucd-ident = "0.9.0"
 
 [dev-dependencies]
 ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}
diff --git a/crates/ruff_python_formatter/src/builders.rs b/crates/ruff_python_formatter/src/builders.rs
index 9f24a49ca2..0456d58df9 100644
--- a/crates/ruff_python_formatter/src/builders.rs
+++ b/crates/ruff_python_formatter/src/builders.rs
@@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::ast::Ranged;
 
 use ruff_formatter::{format_args, write, Argument, Arguments};
+use ruff_python_whitespace::{
+    lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
+};
 
 use crate::context::NodeLevel;
 use crate::prelude::*;
-use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind};
 use crate::MagicTrailingComma;
 
 /// Adds parentheses and indents `content` if it doesn't fit on a line.
diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs
index 84b0e3b654..aa7d7296f1 100644
--- a/crates/ruff_python_formatter/src/comments/format.rs
+++ b/crates/ruff_python_formatter/src/comments/format.rs
@@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged;
 
 use ruff_formatter::{format_args, write, FormatError, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
 
 use crate::comments::SourceComment;
 use crate::context::NodeLevel;
 use crate::prelude::*;
-use crate::trivia::{lines_after, lines_before, skip_trailing_trivia};
 
 /// Formats the leading comments of a node.
 pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments
diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs
index e4d7ec9d82..27ae9688c8 100644
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_ast::source_code::Locator;
 use ruff_python_ast::whitespace;
-use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
+use ruff_python_whitespace::{
+    first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
+    UniversalNewlines,
+};
 
 use crate::comments::visitor::{CommentPlacement, DecoratedComment};
 use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
 use crate::other::arguments::{
     assign_argument_separator_comment_placement, find_argument_separators,
 };
-use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind};
 
 /// Implements the custom comment placement logic.
 pub(super) fn place_comment<'a>(
diff --git a/crates/ruff_python_formatter/src/expression/expr_call.rs b/crates/ruff_python_formatter/src/expression/expr_call.rs
index c46aa374f7..4054208baf 100644
--- a/crates/ruff_python_formatter/src/expression/expr_call.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_call.rs
@@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
 
 use ruff_formatter::write;
 use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::comments::dangling_comments;
-
 use crate::expression::parentheses::{
     parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
 };
 use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
 use crate::FormatNodeRule;
 
 #[derive(Default)]
diff --git a/crates/ruff_python_formatter/src/expression/expr_slice.rs b/crates/ruff_python_formatter/src/expression/expr_slice.rs
index 0d9dd7445f..93434b8777 100644
--- a/crates/ruff_python_formatter/src/expression/expr_slice.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs
@@ -1,16 +1,17 @@
-use crate::comments::{dangling_comments, SourceComment};
-use crate::context::PyFormatContext;
-use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
-use crate::trivia::Token;
-use crate::trivia::{first_non_trivia_token, TokenKind};
-use crate::{AsFormat, FormatNodeRule, PyFormatter};
-use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
-use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
-use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_text_size::TextRange;
 use rustpython_parser::ast::ExprSlice;
 use rustpython_parser::ast::{Expr, Ranged};
 
+use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
+use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
+use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind};
+
+use crate::comments::{dangling_comments, SourceComment};
+use crate::context::PyFormatContext;
+use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
+use crate::{AsFormat, FormatNodeRule, PyFormatter};
+
 #[derive(Default)]
 pub struct FormatExprSlice;
 
diff --git a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
index 97462c4d7f..ffe5f0f69c 100644
--- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
@@ -1,15 +1,17 @@
-use crate::comments::trailing_comments;
-use crate::context::PyFormatContext;
-use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
-use crate::trivia::{SimpleTokenizer, TokenKind};
-use crate::{AsFormat, FormatNodeRule, PyFormatter};
-use ruff_formatter::prelude::{hard_line_break, space, text};
-use ruff_formatter::{Format, FormatContext, FormatResult};
-use ruff_python_ast::node::AnyNodeRef;
 use ruff_text_size::{TextLen, TextRange};
 use rustpython_parser::ast::UnaryOp;
 use rustpython_parser::ast::{ExprUnaryOp, Ranged};
 
+use ruff_formatter::prelude::{hard_line_break, space, text};
+use ruff_formatter::{Format, FormatContext, FormatResult};
+use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+
+use crate::comments::trailing_comments;
+use crate::context::PyFormatContext;
+use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
+use crate::{AsFormat, FormatNodeRule, PyFormatter};
+
 #[derive(Default)]
 pub struct FormatExprUnaryOp;
 
diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs
index 281d1896b8..85981345f4 100644
--- a/crates/ruff_python_formatter/src/expression/parentheses.rs
+++ b/crates/ruff_python_formatter/src/expression/parentheses.rs
@@ -1,10 +1,12 @@
-use crate::context::NodeLevel;
-use crate::prelude::*;
-use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+use rustpython_parser::ast::Ranged;
+
 use ruff_formatter::prelude::tag::Condition;
 use ruff_formatter::{format_args, write, Argument, Arguments};
 use ruff_python_ast::node::AnyNodeRef;
-use rustpython_parser::ast::Ranged;
+use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+
+use crate::context::NodeLevel;
+use crate::prelude::*;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub(crate) enum OptionalParentheses {
diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs
index 7055bab2fa..d1a7420291 100644
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@@ -33,7 +33,6 @@ pub(crate) mod other;
 pub(crate) mod pattern;
 mod prelude;
 pub(crate) mod statement;
-mod trivia;
 
 include!("../../ruff_formatter/shared_traits.rs");
 
diff --git a/crates/ruff_python_formatter/src/other/arguments.rs b/crates/ruff_python_formatter/src/other/arguments.rs
index 3e84558ad2..5e4d7fe6f3 100644
--- a/crates/ruff_python_formatter/src/other/arguments.rs
+++ b/crates/ruff_python_formatter/src/other/arguments.rs
@@ -1,9 +1,11 @@
 use std::usize;
 
+use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::ast::{Arguments, Ranged};
 
 use ruff_formatter::{format_args, write};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
 
 use crate::comments::{
     dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@@ -12,9 +14,7 @@ use crate::comments::{
 use crate::context::NodeLevel;
 use crate::expression::parentheses::parenthesized;
 use crate::prelude::*;
-use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
 use crate::FormatNodeRule;
-use ruff_text_size::{TextRange, TextSize};
 
 #[derive(Default)]
 pub struct FormatArguments;
diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
index 86c7688284..0876ac7347 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
@@ -1,12 +1,13 @@
-use crate::comments::trailing_comments;
-
-use crate::expression::parentheses::{parenthesized, Parentheses};
-use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
-use ruff_formatter::write;
 use ruff_text_size::TextRange;
 use rustpython_parser::ast::{Ranged, StmtClassDef};
 
+use ruff_formatter::write;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+
+use crate::comments::trailing_comments;
+use crate::expression::parentheses::{parenthesized, Parentheses};
+use crate::prelude::*;
+
 #[derive(Default)]
 pub struct FormatStmtClassDef;
 
diff --git a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
index 69f370e7c1..f7fa032174 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
@@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
 
 use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
 use ruff_python_ast::function::AnyFunctionDefinition;
+use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
 
 use crate::comments::{leading_comments, trailing_comments};
 use crate::context::NodeLevel;
 use crate::expression::parentheses::{optional_parentheses, Parentheses};
 use crate::prelude::*;
-use crate::trivia::{lines_after, skip_trailing_trivia};
 use crate::FormatNodeRule;
 
 #[derive(Default)]
diff --git a/crates/ruff_python_formatter/src/statement/stmt_with.rs b/crates/ruff_python_formatter/src/statement/stmt_with.rs
index 56eca66b17..2c610f029d 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_with.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_with.rs
@@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
 
 use ruff_formatter::{format_args, write, FormatError};
 use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::comments::trailing_comments;
 use crate::expression::parentheses::{
     in_parentheses_only_soft_line_break_or_space, optional_parentheses,
 };
 use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
 use crate::FormatNodeRule;
 
 pub(super) enum AnyStatementWith<'a> {
diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs
index 92fc32ed4e..09a12f6697 100644
--- a/crates/ruff_python_formatter/src/statement/suite.rs
+++ b/crates/ruff_python_formatter/src/statement/suite.rs
@@ -1,10 +1,12 @@
-use crate::context::NodeLevel;
-use crate::prelude::*;
-use crate::trivia::lines_before;
+use rustpython_parser::ast::{Ranged, Stmt, Suite};
+
 use ruff_formatter::{
     format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
 };
-use rustpython_parser::ast::{Ranged, Stmt, Suite};
+use ruff_python_whitespace::lines_before;
+
+use crate::context::NodeLevel;
+use crate::prelude::*;
 
 /// Level at which the [`Suite`] appears in the source code.
 #[derive(Copy, Clone, Debug)]
@@ -185,13 +187,15 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Suite {
 
 #[cfg(test)]
 mod tests {
+    use rustpython_parser::ast::Suite;
+    use rustpython_parser::Parse;
+
+    use ruff_formatter::format;
+
     use crate::comments::Comments;
     use crate::prelude::*;
     use crate::statement::suite::SuiteLevel;
     use crate::PyFormatOptions;
-    use ruff_formatter::format;
-    use rustpython_parser::ast::Suite;
-    use rustpython_parser::Parse;
 
     fn format_suite(level: SuiteLevel) -> String {
         let source = r#"
diff --git a/crates/ruff_python_whitespace/Cargo.toml b/crates/ruff_python_whitespace/Cargo.toml
index cbfc1aea24..22b36562d3 100644
--- a/crates/ruff_python_whitespace/Cargo.toml
+++ b/crates/ruff_python_whitespace/Cargo.toml
@@ -16,3 +16,7 @@ license = { workspace = true }
 ruff_text_size = { workspace = true }
 
 memchr = { workspace = true }
+unic-ucd-ident = "0.9.0"
+
+[dev-dependencies]
+insta = { workspace = true }
diff --git a/crates/ruff_python_whitespace/src/lib.rs b/crates/ruff_python_whitespace/src/lib.rs
index b8c95e351c..4e16d7ca2d 100644
--- a/crates/ruff_python_whitespace/src/lib.rs
+++ b/crates/ruff_python_whitespace/src/lib.rs
@@ -1,7 +1,9 @@
 mod cursor;
 mod newlines;
+mod tokenizer;
 mod whitespace;
 
 pub use cursor::*;
 pub use newlines::*;
+pub use tokenizer::*;
 pub use whitespace::*;
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
similarity index 98%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
index ec701539c6..3ae643205e 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokenize_reverse()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
similarity index 65%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
index 15e9d84407..6f19b91273 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
similarity index 80%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
index 26e9fd18bc..ccb0282831 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
similarity index 97%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
index 7936816089..f5005ec2c9 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
similarity index 83%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
index 38d1fed60a..a1f98abd4e 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
similarity index 88%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
index 83079fe81a..5e9802280d 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
similarity index 89%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
index 16a1293b44..ff371d781f 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
similarity index 88%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
index ccd6969c2d..6c792f7cf0 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
similarity index 91%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
index 093715cf17..f82f501d65 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
similarity index 81%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
index 181b438c3f..9b06f81cb9 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
similarity index 84%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
index f1d708d6cb..79f9130287 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
similarity index 65%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
index 91b9cb397a..c8aab65b39 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_whitespace/src/tokenizer.rs
similarity index 93%
rename from crates/ruff_python_formatter/src/trivia.rs
rename to crates/ruff_python_whitespace/src/tokenizer.rs
index 63f92b6e2f..c8aa15dbb7 100644
--- a/crates/ruff_python_formatter/src/trivia.rs
+++ b/crates/ruff_python_whitespace/src/tokenizer.rs
@@ -1,7 +1,7 @@
 use ruff_text_size::{TextLen, TextRange, TextSize};
 use unic_ucd_ident::{is_xid_continue, is_xid_start};
 
-use ruff_python_whitespace::{is_python_whitespace, Cursor};
+use crate::{is_python_whitespace, Cursor};
 
 /// Searches for the first non-trivia character in `range`.
 ///
@@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor};
 /// of the character, the second item the non-trivia character.
 ///
 /// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
-pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
+pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
     SimpleTokenizer::starts_at(offset, code)
         .skip_trivia()
         .next()
@@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Tok
 /// ## Notes
 ///
 /// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
-pub(crate) fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
+pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
     SimpleTokenizer::up_to(offset, code)
         .skip_trivia()
         .next_back()
 }
 
 /// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
-pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
+pub fn lines_before(offset: TextSize, code: &str) -> u32 {
     let tokens = SimpleTokenizer::up_to(offset, code);
     let mut newlines = 0u32;
 
@@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
 }
 
 /// Counts the empty lines between `offset` and the first non-whitespace character.
-pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
+pub fn lines_after(offset: TextSize, code: &str) -> u32 {
     let tokens = SimpleTokenizer::starts_at(offset, code);
     let mut newlines = 0u32;
 
@@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
 }
 
 /// Returns the position after skipping any trailing trivia up to, but not including the newline character.
-pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
+pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
     let tokenizer = SimpleTokenizer::starts_at(offset, code);
 
     for token in tokenizer {
@@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
-pub(crate) struct Token {
-    pub(crate) kind: TokenKind,
-    pub(crate) range: TextRange,
+pub struct Token {
+    pub kind: TokenKind,
+    pub range: TextRange,
 }
 
 impl Token {
-    pub(crate) const fn kind(&self) -> TokenKind {
+    pub const fn kind(&self) -> TokenKind {
         self.kind
     }
 
     #[allow(unused)]
-    pub(crate) const fn range(&self) -> TextRange {
+    pub const fn range(&self) -> TextRange {
         self.range
     }
 
-    pub(crate) const fn start(&self) -> TextSize {
+    pub const fn start(&self) -> TextSize {
         self.range.start()
     }
 
-    pub(crate) const fn end(&self) -> TextSize {
+    pub const fn end(&self) -> TextSize {
         self.range.end()
     }
 }
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
-pub(crate) enum TokenKind {
+pub enum TokenKind {
     /// A comment, not including the trailing new line.
     Comment,
 
@@ -247,7 +247,7 @@ impl TokenKind {
 ///
 /// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
 /// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
-pub(crate) struct SimpleTokenizer<'a> {
+pub struct SimpleTokenizer<'a> {
     offset: TextSize,
     back_offset: TextSize,
     /// `true` when it is known that the current `back` line has no comment for sure.
@@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> {
 }
 
 impl<'a> SimpleTokenizer<'a> {
-    pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
+    pub fn new(source: &'a str, range: TextRange) -> Self {
         Self {
             offset: range.start(),
             back_offset: range.end(),
@@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> {
         }
     }
 
-    pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
+    pub fn starts_at(offset: TextSize, source: &'a str) -> Self {
         let range = TextRange::new(offset, source.text_len());
         Self::new(source, range)
     }
 
     /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
-    pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self {
+    pub fn up_to(offset: TextSize, source: &'a str) -> Self {
         Self::new(source, TextRange::up_to(offset))
     }
 
     /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
     /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
     /// because the lexer doesn't need to scan for comments.
-    pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
+    pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
         let mut tokenizer = Self::up_to(offset, source);
         tokenizer.back_line_has_no_comment = true;
         tokenizer
@@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> {
 
     /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
     /// because it needs to check if the line has any comments when encountering any non-trivia token.
-    pub(crate) fn next_token_back(&mut self) -> Token {
+    pub fn next_token_back(&mut self) -> Token {
         self.cursor.start_token();
 
         let Some(last) = self.cursor.bump_back() else {
@@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> {
         token
     }
 
-    pub(crate) fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
+    pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
         self.filter(|t| !t.kind().is_trivia())
     }
 }
@@ -539,7 +539,7 @@ mod tests {
     use insta::assert_debug_snapshot;
     use ruff_text_size::{TextLen, TextRange, TextSize};
 
-    use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token};
+    use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
 
     struct TokenizationTestCase {
         source: &'static str,