From 2cbe1733c8c4d4d884fba33c4804bce94b31cdd2 Mon Sep 17 00:00:00 2001
From: konsti <konstin@mailbox.org>
Date: Sat, 16 Sep 2023 05:21:45 +0200
Subject: [PATCH] Use CommentRanges in backwards lexing (#7360)

## Summary

The tokenizer was split into a forward and a backwards tokenizer. The
backwards tokenizer uses the same names as the forwards ones (e.g.
`next_token`). The backwards tokenizer gets the comment ranges that we
already built to skip comments.

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
---
 Cargo.lock                                    |   4 +-
 crates/ruff/src/checkers/noqa.rs              |   3 +-
 crates/ruff/src/noqa.rs                       |  29 +-
 .../flake8_pytest_style/rules/parametrize.rs  |  16 +-
 .../flake8_simplify/rules/ast_bool_op.rs      |  22 +-
 .../rules/flake8_simplify/rules/ast_ifexp.rs  |   9 +-
 .../flake8_simplify/rules/key_in_dict.rs      |  18 +-
 .../pandas_vet/rules/inplace_argument.rs      |  14 +-
 crates/ruff/src/rules/pycodestyle/helpers.rs  |  20 +-
 .../pycodestyle/rules/literal_comparisons.rs  |   1 +
 .../src/rules/pycodestyle/rules/not_tests.rs  |   2 +
 crates/ruff/src/rules/pyflakes/fixes.rs       |  19 +-
 .../rules/pyflakes/rules/unused_variable.rs   |   1 +
 .../pyupgrade/rules/yield_in_for_loop.rs      |   1 +
 crates/ruff_python_ast/src/parenthesize.rs    |  10 +-
 crates/ruff_python_ast/tests/parenthesize.rs  |  57 +-
 .../src/comments/debug.rs                     |   4 +-
 .../ruff_python_formatter/src/comments/mod.rs |  31 +-
 .../src/comments/placement.rs                 |  18 +-
 .../src/comments/visitor.rs                   |  10 +-
 .../src/expression/binary_like.rs             |  48 +-
 .../src/expression/expr_attribute.rs          |  15 +-
 .../src/expression/expr_call.rs               |  13 +-
 .../src/expression/expr_if_exp.rs             |   6 +-
 .../src/expression/expr_subscript.rs          |  13 +-
 .../src/expression/expr_unary_op.rs           |  12 +-
 .../src/expression/mod.rs                     |  41 +-
 .../src/expression/parentheses.rs             |  20 +-
 crates/ruff_python_formatter/src/lib.rs       |   5 +-
 .../ruff_python_formatter/src/pattern/mod.rs  |  24 +-
 .../src/statement/suite.rs                    |   8 +-
 .../ruff_python_index/src/comment_ranges.rs   |  70 +-
 crates/ruff_python_index/src/indexer.rs       |   7 +-
 crates/ruff_python_index/src/lib.rs           |   2 +-
 crates/ruff_python_trivia/Cargo.toml          |   3 +-
 .../ruff_python_trivia/src/comment_ranges.rs  |  71 ++
 crates/ruff_python_trivia/src/cursor.rs       |   2 +-
 crates/ruff_python_trivia/src/lib.rs          |   2 +
 crates/ruff_python_trivia/src/tokenizer.rs    | 712 +++++++-----------
 crates/ruff_wasm/Cargo.toml                   |   1 +
 crates/ruff_wasm/src/lib.rs                   |   8 +-
 41 files changed, 744 insertions(+), 628 deletions(-)
 create mode 100644 crates/ruff_python_trivia/src/comment_ranges.rs
diff --git a/Cargo.lock b/Cargo.lock
index 488aaa2df9..02ead9b8cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2430,12 +2430,11 @@ name = "ruff_python_trivia"
 version = "0.0.0"
 dependencies = [
  "insta",
- "memchr",
+ "itertools",
  "ruff_python_ast",
  "ruff_python_parser",
  "ruff_source_file",
  "ruff_text_size",
- "smallvec",
  "unicode-ident",
 ]
 
@@ -2492,6 +2491,7 @@ dependencies = [
  "ruff_python_formatter",
  "ruff_python_index",
  "ruff_python_parser",
+ "ruff_python_trivia",
  "ruff_source_file",
  "ruff_text_size",
  "ruff_workspace",
diff --git a/crates/ruff/src/checkers/noqa.rs b/crates/ruff/src/checkers/noqa.rs
index a6995bd4ee..0e323ab032 100644
--- a/crates/ruff/src/checkers/noqa.rs
+++ b/crates/ruff/src/checkers/noqa.rs
@@ -6,6 +6,7 @@ use itertools::Itertools;
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
 use ruff_diagnostics::{Diagnostic, Edit, Fix};
+use ruff_python_trivia::CommentRanges;
 use ruff_source_file::Locator;
 
 use crate::noqa;
@@ -19,7 +20,7 @@ pub(crate) fn check_noqa(
     diagnostics: &mut Vec<Diagnostic>,
     path: &Path,
     locator: &Locator,
-    comment_ranges: &[TextRange],
+    comment_ranges: &CommentRanges,
     noqa_line_for: &NoqaMapping,
     analyze_directives: bool,
     settings: &Settings,
diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs
index c905f7bd5a..43fc2f5b97 100644
--- a/crates/ruff/src/noqa.rs
+++ b/crates/ruff/src/noqa.rs
@@ -11,7 +11,7 @@ use log::warn;
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
 use ruff_diagnostics::Diagnostic;
-use ruff_python_trivia::indentation_at_offset;
+use ruff_python_trivia::{indentation_at_offset, CommentRanges};
 use ruff_source_file::{LineEnding, Locator};
 
 use crate::codes::NoqaCode;
@@ -234,7 +234,7 @@ impl FileExemption {
     /// globally ignored within the file.
     pub(crate) fn try_extract(
         contents: &str,
-        comment_ranges: &[TextRange],
+        comment_ranges: &CommentRanges,
         path: &Path,
         locator: &Locator,
     ) -> Option<Self> {
@@ -457,7 +457,7 @@ pub(crate) fn add_noqa(
     path: &Path,
     diagnostics: &[Diagnostic],
     locator: &Locator,
-    commented_lines: &[TextRange],
+    comment_ranges: &CommentRanges,
     noqa_line_for: &NoqaMapping,
     line_ending: LineEnding,
 ) -> Result<usize> {
@@ -465,7 +465,7 @@ pub(crate) fn add_noqa(
         path,
         diagnostics,
         locator,
-        commented_lines,
+        comment_ranges,
         noqa_line_for,
         line_ending,
     );
@@ -477,7 +477,7 @@ fn add_noqa_inner(
     path: &Path,
     diagnostics: &[Diagnostic],
     locator: &Locator,
-    commented_ranges: &[TextRange],
+    comment_ranges: &CommentRanges,
     noqa_line_for: &NoqaMapping,
     line_ending: LineEnding,
 ) -> (usize, String) {
@@ -487,8 +487,8 @@ fn add_noqa_inner(
 
     // Whether the file is exempted from all checks.
     // Codes that are globally exempted (within the current file).
-    let exemption = FileExemption::try_extract(locator.contents(), commented_ranges, path, locator);
-    let directives = NoqaDirectives::from_commented_ranges(commented_ranges, path, locator);
+    let exemption = FileExemption::try_extract(locator.contents(), comment_ranges, path, locator);
+    let directives = NoqaDirectives::from_commented_ranges(comment_ranges, path, locator);
 
     // Mark any non-ignored diagnostics.
     for diagnostic in diagnostics {
@@ -658,7 +658,7 @@ pub(crate) struct NoqaDirectives<'a> {
 
 impl<'a> NoqaDirectives<'a> {
     pub(crate) fn from_commented_ranges(
-        comment_ranges: &[TextRange],
+        comment_ranges: &CommentRanges,
         path: &Path,
         locator: &'a Locator<'a>,
     ) -> Self {
@@ -800,6 +800,7 @@ mod tests {
     use ruff_text_size::{TextRange, TextSize};
 
     use ruff_diagnostics::Diagnostic;
+    use ruff_python_trivia::CommentRanges;
     use ruff_source_file::{LineEnding, Locator};
 
     use crate::noqa::{add_noqa_inner, Directive, NoqaMapping, ParsedFileExemption};
@@ -997,7 +998,7 @@ mod tests {
             path,
             &[],
             &Locator::new(contents),
-            &[],
+            &CommentRanges::default(),
             &noqa_line_for,
             LineEnding::Lf,
         );
@@ -1017,7 +1018,7 @@ mod tests {
             path,
             &diagnostics,
             &Locator::new(contents),
-            &[],
+            &CommentRanges::default(),
             &noqa_line_for,
             LineEnding::Lf,
         );
@@ -1038,11 +1039,13 @@ mod tests {
         ];
         let contents = "x = 1  # noqa: E741\n";
         let noqa_line_for = NoqaMapping::default();
+        let comment_ranges =
+            CommentRanges::new(vec![TextRange::new(TextSize::from(7), TextSize::from(19))]);
         let (count, output) = add_noqa_inner(
             path,
             &diagnostics,
             &Locator::new(contents),
-            &[TextRange::new(TextSize::from(7), TextSize::from(19))],
+            &comment_ranges,
             &noqa_line_for,
             LineEnding::Lf,
         );
@@ -1063,11 +1066,13 @@ mod tests {
         ];
         let contents = "x = 1  # noqa";
         let noqa_line_for = NoqaMapping::default();
+        let comment_ranges =
+            CommentRanges::new(vec![TextRange::new(TextSize::from(7), TextSize::from(13))]);
         let (count, output) = add_noqa_inner(
             path,
             &diagnostics,
             &Locator::new(contents),
-            &[TextRange::new(TextSize::from(7), TextSize::from(13))],
+            &comment_ranges,
             &noqa_line_for,
             LineEnding::Lf,
         );
diff --git a/crates/ruff/src/rules/flake8_pytest_style/rules/parametrize.rs b/crates/ruff/src/rules/flake8_pytest_style/rules/parametrize.rs
index fbe674dc1c..293c4b8f08 100644
--- a/crates/ruff/src/rules/flake8_pytest_style/rules/parametrize.rs
+++ b/crates/ruff/src/rules/flake8_pytest_style/rules/parametrize.rs
@@ -9,6 +9,7 @@ use ruff_python_ast::node::AstNode;
 use ruff_python_ast::parenthesize::parenthesized_range;
 use ruff_python_ast::{self as ast, Arguments, Constant, Decorator, Expr, ExprContext};
 use ruff_python_codegen::Generator;
+use ruff_python_trivia::CommentRanges;
 use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
@@ -298,12 +299,17 @@ fn elts_to_csv(elts: &[Expr], generator: Generator) -> Option<String> {
 fn get_parametrize_name_range(
     decorator: &Decorator,
     expr: &Expr,
+    comment_ranges: &CommentRanges,
     source: &str,
 ) -> Option<TextRange> {
-    decorator
-        .expression
-        .as_call_expr()
-        .and_then(|call| parenthesized_range(expr.into(), call.arguments.as_any_node_ref(), source))
+    decorator.expression.as_call_expr().and_then(|call| {
+        parenthesized_range(
+            expr.into(),
+            call.arguments.as_any_node_ref(),
+            comment_ranges,
+            source,
+        )
+    })
 }
 
 /// PT006
@@ -322,6 +328,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) {
                         let name_range = get_parametrize_name_range(
                             decorator,
                             expr,
+                            checker.indexer().comment_ranges(),
                             checker.locator().contents(),
                         )
                         .unwrap_or(expr.range());
@@ -356,6 +363,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) {
                         let name_range = get_parametrize_name_range(
                             decorator,
                             expr,
+                            checker.indexer().comment_ranges(),
                             checker.locator().contents(),
                         )
                         .unwrap_or(expr.range());
diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs
index ea895e5123..b60b5de122 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs
@@ -773,9 +773,14 @@ fn is_short_circuit(
             edit = Some(get_short_circuit_edit(
                 value,
                 TextRange::new(
-                    parenthesized_range(furthest.into(), expr.into(), checker.locator().contents())
-                        .unwrap_or(furthest.range())
-                        .start(),
+                    parenthesized_range(
+                        furthest.into(),
+                        expr.into(),
+                        checker.indexer().comment_ranges(),
+                        checker.locator().contents(),
+                    )
+                    .unwrap_or(furthest.range())
+                    .start(),
                     expr.end(),
                 ),
                 short_circuit_truthiness,
@@ -796,9 +801,14 @@ fn is_short_circuit(
             edit = Some(get_short_circuit_edit(
                 next_value,
                 TextRange::new(
-                    parenthesized_range(furthest.into(), expr.into(), checker.locator().contents())
-                        .unwrap_or(furthest.range())
-                        .start(),
+                    parenthesized_range(
+                        furthest.into(),
+                        expr.into(),
+                        checker.indexer().comment_ranges(),
+                        checker.locator().contents(),
+                    )
+                    .unwrap_or(furthest.range())
+                    .start(),
                     expr.end(),
                 ),
                 short_circuit_truthiness,
diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_ifexp.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_ifexp.rs
index 1dc4919308..f5d890f61b 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_ifexp.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_ifexp.rs
@@ -163,8 +163,13 @@ pub(crate) fn if_expr_with_true_false(
                 checker
                     .locator()
                     .slice(
-                        parenthesized_range(test.into(), expr.into(), checker.locator().contents())
-                            .unwrap_or(test.range()),
+                        parenthesized_range(
+                            test.into(),
+                            expr.into(),
+                            checker.indexer().comment_ranges(),
+                            checker.locator().contents(),
+                        )
+                        .unwrap_or(test.range()),
                     )
                     .to_string(),
                 expr.range(),
diff --git a/crates/ruff/src/rules/flake8_simplify/rules/key_in_dict.rs b/crates/ruff/src/rules/flake8_simplify/rules/key_in_dict.rs
index 9fe20266f2..bddbeffc84 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/key_in_dict.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/key_in_dict.rs
@@ -88,10 +88,20 @@ fn key_in_dict(
     }
 
     // Extract the exact range of the left and right expressions.
-    let left_range = parenthesized_range(left.into(), parent, checker.locator().contents())
-        .unwrap_or(left.range());
-    let right_range = parenthesized_range(right.into(), parent, checker.locator().contents())
-        .unwrap_or(right.range());
+    let left_range = parenthesized_range(
+        left.into(),
+        parent,
+        checker.indexer().comment_ranges(),
+        checker.locator().contents(),
+    )
+    .unwrap_or(left.range());
+    let right_range = parenthesized_range(
+        right.into(),
+        parent,
+        checker.indexer().comment_ranges(),
+        checker.locator().contents(),
+    )
+    .unwrap_or(right.range());
 
     let mut diagnostic = Diagnostic::new(
         InDictKeys {
diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
index 0c88965082..417b769591 100644
--- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
+++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs
@@ -3,6 +3,7 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::helpers::is_const_true;
 use ruff_python_ast::parenthesize::parenthesized_range;
 use ruff_python_ast::{self as ast, Keyword, Stmt};
+use ruff_python_trivia::CommentRanges;
 use ruff_source_file::Locator;
 use ruff_text_size::Ranged;
 
@@ -85,6 +86,7 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) {
                             call,
                             keyword,
                             statement,
+                            checker.indexer().comment_ranges(),
                             checker.locator(),
                         ) {
                             diagnostic.set_fix(fix);
@@ -107,15 +109,21 @@ fn convert_inplace_argument_to_assignment(
     call: &ast::ExprCall,
     keyword: &Keyword,
     statement: &Stmt,
+    comment_ranges: &CommentRanges,
     locator: &Locator,
 ) -> Option<Fix> {
     // Add the assignment.
     let attr = call.func.as_attribute_expr()?;
     let insert_assignment = Edit::insertion(
         format!("{name} = ", name = locator.slice(attr.value.range())),
-        parenthesized_range(call.into(), statement.into(), locator.contents())
-            .unwrap_or(call.range())
-            .start(),
+        parenthesized_range(
+            call.into(),
+            statement.into(),
+            comment_ranges,
+            locator.contents(),
+        )
+        .unwrap_or(call.range())
+        .start(),
     );
 
     // Remove the `inplace` argument.
diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs
index ce8bd311a3..9b01107260 100644
--- a/crates/ruff/src/rules/pycodestyle/helpers.rs
+++ b/crates/ruff/src/rules/pycodestyle/helpers.rs
@@ -3,6 +3,7 @@ use unicode_width::UnicodeWidthStr;
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::parenthesize::parenthesized_range;
 use ruff_python_ast::{CmpOp, Expr};
+use ruff_python_trivia::CommentRanges;
 use ruff_source_file::{Line, Locator};
 use ruff_text_size::{Ranged, TextLen, TextRange};
 
@@ -17,6 +18,7 @@ pub(super) fn generate_comparison(
     ops: &[CmpOp],
     comparators: &[Expr],
     parent: AnyNodeRef,
+    comment_ranges: &CommentRanges,
     locator: &Locator,
 ) -> String {
     let start = left.start();
@@ -24,9 +26,12 @@ pub(super) fn generate_comparison(
     let mut contents = String::with_capacity(usize::from(end - start));
 
     // Add the left side of the comparison.
-    contents.push_str(locator.slice(
-        parenthesized_range(left.into(), parent, locator.contents()).unwrap_or(left.range()),
-    ));
+    contents.push_str(
+        locator.slice(
+            parenthesized_range(left.into(), parent, comment_ranges, locator.contents())
+                .unwrap_or(left.range()),
+        ),
+    );
 
     for (op, comparator) in ops.iter().zip(comparators) {
         // Add the operator.
@@ -46,8 +51,13 @@ pub(super) fn generate_comparison(
         // Add the right side of the comparison.
         contents.push_str(
             locator.slice(
-                parenthesized_range(comparator.into(), parent, locator.contents())
-                    .unwrap_or(comparator.range()),
+                parenthesized_range(
+                    comparator.into(),
+                    parent,
+                    comment_ranges,
+                    locator.contents(),
+                )
+                .unwrap_or(comparator.range()),
             ),
         );
     }
diff --git a/crates/ruff/src/rules/pycodestyle/rules/literal_comparisons.rs b/crates/ruff/src/rules/pycodestyle/rules/literal_comparisons.rs
index 20536c4125..2abf9faf65 100644
--- a/crates/ruff/src/rules/pycodestyle/rules/literal_comparisons.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/literal_comparisons.rs
@@ -283,6 +283,7 @@ pub(crate) fn literal_comparisons(checker: &mut Checker, compare: &ast::ExprComp
             &ops,
             &compare.comparators,
             compare.into(),
+            checker.indexer().comment_ranges(),
             checker.locator(),
         );
         for diagnostic in &mut diagnostics {
diff --git a/crates/ruff/src/rules/pycodestyle/rules/not_tests.rs b/crates/ruff/src/rules/pycodestyle/rules/not_tests.rs
index 4eff0c9670..024b685ca2 100644
--- a/crates/ruff/src/rules/pycodestyle/rules/not_tests.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/not_tests.rs
@@ -100,6 +100,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) {
                             &[CmpOp::NotIn],
                             comparators,
                             unary_op.into(),
+                            checker.indexer().comment_ranges(),
                             checker.locator(),
                         ),
                         unary_op.range(),
@@ -118,6 +119,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) {
                             &[CmpOp::IsNot],
                             comparators,
                             unary_op.into(),
+                            checker.indexer().comment_ranges(),
                             checker.locator(),
                         ),
                         unary_op.range(),
diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs
index ae6006d8d7..7f0675de9e 100644
--- a/crates/ruff/src/rules/pyflakes/fixes.rs
+++ b/crates/ruff/src/rules/pyflakes/fixes.rs
@@ -4,7 +4,7 @@ use ruff_diagnostics::Edit;
 use ruff_python_ast as ast;
 use ruff_python_codegen::Stylist;
 use ruff_python_semantic::Binding;
-use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
+use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind, SimpleTokenizer};
 use ruff_source_file::Locator;
 use ruff_text_size::Ranged;
 
@@ -91,20 +91,27 @@ pub(crate) fn remove_exception_handler_assignment(
     bound_exception: &Binding,
     locator: &Locator,
 ) -> Result<Edit> {
-    // Lex backwards, to the token just before the `as`.
+    // Find the position just after the exception name. This is a late pass so we only have the
+    // binding and can't look its parent in the AST up anymore.
+    // ```
+    // except ZeroDivisionError as err:
+    //                             ^^^ This is the bound_exception range
+    //                         ^^^^ lex this range
+    //                         ^ preceding_end (we want to remove from here)
+    // ```
+    // There can't be any comments in that range.
     let mut tokenizer =
-        SimpleTokenizer::up_to_without_back_comment(bound_exception.start(), locator.contents())
-            .skip_trivia();
+        BackwardsTokenizer::up_to(bound_exception.start(), locator.contents(), &[]).skip_trivia();
 
     // Eat the `as` token.
     let preceding = tokenizer
-        .next_back()
+        .next()
         .context("expected the exception name to be preceded by `as`")?;
     debug_assert!(matches!(preceding.kind, SimpleTokenKind::As));
 
     // Lex to the end of the preceding token, which should be the exception value.
     let preceding = tokenizer
-        .next_back()
+        .next()
         .context("expected the exception name to be preceded by a token")?;
 
     // Lex forwards, to the `:` token.
diff --git a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs
index 89eebff824..64aaa16e75 100644
--- a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs
+++ b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs
@@ -224,6 +224,7 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
                     let start = parenthesized_range(
                         target.into(),
                         statement.into(),
+                        checker.indexer().comment_ranges(),
                         checker.locator().contents(),
                     )
                     .unwrap_or(target.range())
diff --git a/crates/ruff/src/rules/pyupgrade/rules/yield_in_for_loop.rs b/crates/ruff/src/rules/pyupgrade/rules/yield_in_for_loop.rs
index fb97cd42fd..c9558966e5 100644
--- a/crates/ruff/src/rules/pyupgrade/rules/yield_in_for_loop.rs
+++ b/crates/ruff/src/rules/pyupgrade/rules/yield_in_for_loop.rs
@@ -108,6 +108,7 @@ pub(crate) fn yield_in_for_loop(checker: &mut Checker, stmt_for: &ast::StmtFor)
             parenthesized_range(
                 iter.as_ref().into(),
                 stmt_for.into(),
+                checker.indexer().comment_ranges(),
                 checker.locator().contents(),
             )
             .unwrap_or(iter.range()),
diff --git a/crates/ruff_python_ast/src/parenthesize.rs b/crates/ruff_python_ast/src/parenthesize.rs
index f86f09028b..8c6b441a98 100644
--- a/crates/ruff_python_ast/src/parenthesize.rs
+++ b/crates/ruff_python_ast/src/parenthesize.rs
@@ -1,5 +1,5 @@
-use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
-use ruff_text_size::{Ranged, TextRange, TextSize};
+use ruff_python_trivia::{BackwardsTokenizer, CommentRanges, SimpleTokenKind, SimpleTokenizer};
+use ruff_text_size::{Ranged, TextLen, TextRange};
 
 use crate::node::AnyNodeRef;
 use crate::ExpressionRef;
@@ -9,6 +9,7 @@ use crate::ExpressionRef;
 pub fn parenthesized_range(
     expr: ExpressionRef,
     parent: AnyNodeRef,
+    comment_ranges: &CommentRanges,
     source: &str,
 ) -> Option<TextRange> {
     // If the parent is a node that brings its own parentheses, exclude the closing parenthesis
@@ -23,7 +24,7 @@ pub fn parenthesized_range(
     // - `Tuple`: The elements of a tuple. The only risk is a single-element tuple (e.g., `(x,)`),
     //    which must have a trailing comma anyway.
     let exclusive_parent_end = if parent.is_arguments() {
-        parent.end() - TextSize::new(1)
+        parent.end() - ")".text_len()
     } else {
         parent.end()
     };
@@ -33,9 +34,8 @@ pub fn parenthesized_range(
             .skip_trivia()
             .take_while(|token| token.kind == SimpleTokenKind::RParen);
 
-    let left_tokenizer = SimpleTokenizer::up_to_without_back_comment(expr.start(), source)
+    let left_tokenizer = BackwardsTokenizer::up_to(expr.start(), source, comment_ranges)
         .skip_trivia()
-        .rev()
         .take_while(|token| token.kind == SimpleTokenKind::LParen);
 
     // Zip closing parenthesis with opening parenthesis. The order is intentional, as testing for
diff --git a/crates/ruff_python_ast/tests/parenthesize.rs b/crates/ruff_python_ast/tests/parenthesize.rs
index 9713e7b7dd..69a68f7f82 100644
--- a/crates/ruff_python_ast/tests/parenthesize.rs
+++ b/crates/ruff_python_ast/tests/parenthesize.rs
@@ -1,5 +1,6 @@
 use ruff_python_ast::parenthesize::parenthesized_range;
 use ruff_python_parser::parse_expression;
+use ruff_python_trivia::CommentRanges;
 use ruff_text_size::TextRange;
 
 #[test]
@@ -10,7 +11,12 @@ fn test_parenthesized_name() {
     let bin_op = expr.as_bin_op_expr().unwrap();
     let name = bin_op.left.as_ref();
 
-    let parenthesized = parenthesized_range(name.into(), bin_op.into(), source_code);
+    let parenthesized = parenthesized_range(
+        name.into(),
+        bin_op.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, Some(TextRange::new(0.into(), 3.into())));
 }
 
@@ -22,7 +28,12 @@ fn test_non_parenthesized_name() {
     let bin_op = expr.as_bin_op_expr().unwrap();
     let name = bin_op.left.as_ref();
 
-    let parenthesized = parenthesized_range(name.into(), bin_op.into(), source_code);
+    let parenthesized = parenthesized_range(
+        name.into(),
+        bin_op.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, None);
 }
 
@@ -35,7 +46,12 @@ fn test_parenthesized_argument() {
     let arguments = &call.arguments;
     let argument = arguments.args.first().unwrap();
 
-    let parenthesized = parenthesized_range(argument.into(), arguments.into(), source_code);
+    let parenthesized = parenthesized_range(
+        argument.into(),
+        arguments.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, Some(TextRange::new(2.into(), 5.into())));
 }
 
@@ -48,7 +64,12 @@ fn test_non_parenthesized_argument() {
     let arguments = &call.arguments;
     let argument = arguments.args.first().unwrap();
 
-    let parenthesized = parenthesized_range(argument.into(), arguments.into(), source_code);
+    let parenthesized = parenthesized_range(
+        argument.into(),
+        arguments.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, None);
 }
 
@@ -60,7 +81,12 @@ fn test_parenthesized_tuple_member() {
     let tuple = expr.as_tuple_expr().unwrap();
     let member = tuple.elts.last().unwrap();
 
-    let parenthesized = parenthesized_range(member.into(), tuple.into(), source_code);
+    let parenthesized = parenthesized_range(
+        member.into(),
+        tuple.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, Some(TextRange::new(4.into(), 7.into())));
 }
 
@@ -72,7 +98,12 @@ fn test_non_parenthesized_tuple_member() {
     let tuple = expr.as_tuple_expr().unwrap();
     let member = tuple.elts.last().unwrap();
 
-    let parenthesized = parenthesized_range(member.into(), tuple.into(), source_code);
+    let parenthesized = parenthesized_range(
+        member.into(),
+        tuple.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, None);
 }
 
@@ -84,7 +115,12 @@ fn test_twice_parenthesized_name() {
     let bin_op = expr.as_bin_op_expr().unwrap();
     let name = bin_op.left.as_ref();
 
-    let parenthesized = parenthesized_range(name.into(), bin_op.into(), source_code);
+    let parenthesized = parenthesized_range(
+        name.into(),
+        bin_op.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, Some(TextRange::new(0.into(), 5.into())));
 }
 
@@ -97,6 +133,11 @@ fn test_twice_parenthesized_argument() {
     let arguments = &call.arguments;
     let argument = arguments.args.first().unwrap();
 
-    let parenthesized = parenthesized_range(argument.into(), arguments.into(), source_code);
+    let parenthesized = parenthesized_range(
+        argument.into(),
+        arguments.into(),
+        &CommentRanges::default(),
+        source_code,
+    );
     assert_eq!(parenthesized, Some(TextRange::new(2.into(), 11.into())));
 }
diff --git a/crates/ruff_python_formatter/src/comments/debug.rs b/crates/ruff_python_formatter/src/comments/debug.rs
index baaf405f11..0d9057b85a 100644
--- a/crates/ruff_python_formatter/src/comments/debug.rs
+++ b/crates/ruff_python_formatter/src/comments/debug.rs
@@ -182,6 +182,7 @@ mod tests {
     use ruff_formatter::SourceCode;
     use ruff_python_ast::node::AnyNode;
     use ruff_python_ast::{StmtBreak, StmtContinue};
+    use ruff_python_trivia::CommentRanges;
     use ruff_text_size::{TextRange, TextSize};
 
     use crate::comments::map::MultiMap;
@@ -231,7 +232,8 @@ break;
             ),
         );
 
-        let comments = Comments::new(comments_map);
+        let comment_ranges = CommentRanges::default();
+        let comments = Comments::new(comments_map, &comment_ranges);
 
         assert_debug_snapshot!(comments.debug(source_code));
     }
diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs
index 4d6eac168c..3e99fc0942 100644
--- a/crates/ruff_python_formatter/src/comments/mod.rs
+++ b/crates/ruff_python_formatter/src/comments/mod.rs
@@ -99,8 +99,7 @@ use ruff_formatter::{SourceCode, SourceCodeSlice};
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal};
 use ruff_python_ast::Mod;
-use ruff_python_index::CommentRanges;
-use ruff_python_trivia::PythonWhitespace;
+use ruff_python_trivia::{CommentRanges, PythonWhitespace};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};
 
@@ -281,7 +280,7 @@ type CommentsMap<'a> = MultiMap<NodeRefEqualityKey<'a>, SourceComment>;
 /// The comments of a syntax tree stored by node.
 ///
 /// Cloning `comments` is cheap as it only involves bumping a reference counter.
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub(crate) struct Comments<'a> {
     /// The implementation uses an [Rc] so that [Comments] has a lifetime independent from the [crate::Formatter].
     /// Independent lifetimes are necessary to support the use case where a (formattable object)[crate::Format]
@@ -306,15 +305,31 @@ pub(crate) struct Comments<'a> {
     /// }
     /// ```
     data: Rc<CommentsData<'a>>,
+    /// We need those for backwards lexing
+    comment_ranges: &'a CommentRanges,
 }
 
 impl<'a> Comments<'a> {
-    fn new(comments: CommentsMap<'a>) -> Self {
+    fn new(comments: CommentsMap<'a>, comment_ranges: &'a CommentRanges) -> Self {
         Self {
             data: Rc::new(CommentsData { comments }),
+            comment_ranges,
         }
     }
 
+    /// Effectively a [`Default`] implementation that works around the lifetimes for tests
+    #[cfg(test)]
+    pub(crate) fn from_ranges(comment_ranges: &'a CommentRanges) -> Self {
+        Self {
+            data: Rc::new(CommentsData::default()),
+            comment_ranges,
+        }
+    }
+
+    pub(crate) fn ranges(&self) -> &'a CommentRanges {
+        self.comment_ranges
+    }
+
     /// Extracts the comments from the AST.
     pub(crate) fn from_ast(
         root: &'a Mod,
@@ -324,12 +339,13 @@ impl<'a> Comments<'a> {
         let map = if comment_ranges.is_empty() {
             CommentsMap::new()
         } else {
-            let mut builder = CommentsMapBuilder::new(Locator::new(source_code.as_str()));
+            let mut builder =
+                CommentsMapBuilder::new(Locator::new(source_code.as_str()), comment_ranges);
             CommentsVisitor::new(source_code, comment_ranges, &mut builder).visit(root);
             builder.finish()
         };
 
-        Self::new(map)
+        Self::new(map, comment_ranges)
     }
 
     /// Returns `true` if the given `node` has any comments.
@@ -528,9 +544,10 @@ mod tests {
 
     use ruff_formatter::SourceCode;
     use ruff_python_ast::Mod;
-    use ruff_python_index::{CommentRanges, CommentRangesBuilder};
+    use ruff_python_index::CommentRangesBuilder;
     use ruff_python_parser::lexer::lex;
     use ruff_python_parser::{parse_tokens, Mode};
+    use ruff_python_trivia::CommentRanges;
 
     use crate::comments::Comments;
 
diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs
index 6557a4fb9b..1d4ebcce99 100644
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@@ -4,7 +4,8 @@ use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::whitespace::indentation;
 use ruff_python_ast::{self as ast, Comprehension, Expr, MatchCase, Parameters};
 use ruff_python_trivia::{
-    find_only_token_in_range, indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer,
+    find_only_token_in_range, indentation_at_offset, BackwardsTokenizer, CommentRanges,
+    SimpleToken, SimpleTokenKind, SimpleTokenizer,
 };
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextLen, TextRange};
@@ -20,12 +21,13 @@ use crate::pattern::pattern_match_sequence::SequenceType;
 /// Manually attach comments to nodes that the default placement gets wrong.
 pub(super) fn place_comment<'a>(
     comment: DecoratedComment<'a>,
+    comment_ranges: &CommentRanges,
     locator: &Locator,
 ) -> CommentPlacement<'a> {
     handle_parenthesized_comment(comment, locator)
         .or_else(|comment| handle_end_of_line_comment_around_body(comment, locator))
         .or_else(|comment| handle_own_line_comment_around_body(comment, locator))
-        .or_else(|comment| handle_enclosed_comment(comment, locator))
+        .or_else(|comment| handle_enclosed_comment(comment, comment_ranges, locator))
 }
 
 /// Handle parenthesized comments. A parenthesized comment is a comment that appears within a
@@ -172,6 +174,7 @@ fn handle_parenthesized_comment<'a>(
 /// Handle a comment that is enclosed by a node.
 fn handle_enclosed_comment<'a>(
     comment: DecoratedComment<'a>,
+    comment_ranges: &CommentRanges,
     locator: &Locator,
 ) -> CommentPlacement<'a> {
     match comment.enclosing_node() {
@@ -213,13 +216,15 @@ fn handle_enclosed_comment<'a>(
         AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator)
             .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)),
         AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator),
-        AnyNodeRef::ExprSlice(expr_slice) => handle_slice_comments(comment, expr_slice, locator),
+        AnyNodeRef::ExprSlice(expr_slice) => {
+            handle_slice_comments(comment, expr_slice, comment_ranges, locator)
+        }
         AnyNodeRef::ExprStarred(starred) => {
             handle_trailing_expression_starred_star_end_of_line_comment(comment, starred, locator)
         }
         AnyNodeRef::ExprSubscript(expr_subscript) => {
             if let Expr::Slice(expr_slice) = expr_subscript.slice.as_ref() {
-                handle_slice_comments(comment, expr_slice, locator)
+                handle_slice_comments(comment, expr_slice, comment_ranges, locator)
             } else {
                 CommentPlacement::Default(comment)
             }
@@ -958,6 +963,7 @@ fn handle_module_level_own_line_comment_before_class_or_function_comment<'a>(
 fn handle_slice_comments<'a>(
     comment: DecoratedComment<'a>,
     expr_slice: &'a ast::ExprSlice,
+    comment_ranges: &CommentRanges,
     locator: &Locator,
 ) -> CommentPlacement<'a> {
     let ast::ExprSlice {
@@ -969,9 +975,9 @@ fn handle_slice_comments<'a>(
 
     // Check for `foo[ # comment`, but only if they are on the same line
     let after_lbracket = matches!(
-        SimpleTokenizer::up_to_without_back_comment(comment.start(), locator.contents())
+        BackwardsTokenizer::up_to(comment.start(), locator.contents(), comment_ranges)
             .skip_trivia()
-            .next_back(),
+            .next(),
         Some(SimpleToken {
             kind: SimpleTokenKind::LBracket,
             ..
diff --git a/crates/ruff_python_formatter/src/comments/visitor.rs b/crates/ruff_python_formatter/src/comments/visitor.rs
index c291e635e9..91819acc39 100644
--- a/crates/ruff_python_formatter/src/comments/visitor.rs
+++ b/crates/ruff_python_formatter/src/comments/visitor.rs
@@ -8,8 +8,7 @@ use ruff_python_ast::{Mod, Stmt};
 // pre-order.
 #[allow(clippy::wildcard_imports)]
 use ruff_python_ast::visitor::preorder::*;
-use ruff_python_index::CommentRanges;
-use ruff_python_trivia::is_python_whitespace;
+use ruff_python_trivia::{is_python_whitespace, CommentRanges};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
@@ -536,12 +535,14 @@ impl<'a> PushComment<'a> for CommentsVecBuilder<'a> {
 /// [`CommentsMap`].
 pub(super) struct CommentsMapBuilder<'a> {
     comments: CommentsMap<'a>,
+    /// We need those for backwards lexing
+    comment_ranges: &'a CommentRanges,
     locator: Locator<'a>,
 }
 
 impl<'a> PushComment<'a> for CommentsMapBuilder<'a> {
     fn push_comment(&mut self, placement: DecoratedComment<'a>) {
-        let placement = place_comment(placement, &self.locator);
+        let placement = place_comment(placement, self.comment_ranges, &self.locator);
         match placement {
             CommentPlacement::Leading { node, comment } => {
                 self.push_leading_comment(node, comment);
@@ -603,9 +604,10 @@ impl<'a> PushComment<'a> for CommentsMapBuilder<'a> {
 }
 
 impl<'a> CommentsMapBuilder<'a> {
-    pub(crate) fn new(locator: Locator<'a>) -> Self {
+    pub(crate) fn new(locator: Locator<'a>, comment_ranges: &'a CommentRanges) -> Self {
         Self {
             comments: CommentsMap::default(),
+            comment_ranges,
             locator,
         }
     }
diff --git a/crates/ruff_python_formatter/src/expression/binary_like.rs b/crates/ruff_python_formatter/src/expression/binary_like.rs
index 55e55c8037..8e4ab0f93b 100644
--- a/crates/ruff_python_formatter/src/expression/binary_like.rs
+++ b/crates/ruff_python_formatter/src/expression/binary_like.rs
@@ -8,6 +8,7 @@ use ruff_python_ast::{
     Constant, Expr, ExprAttribute, ExprBinOp, ExprBoolOp, ExprCompare, ExprConstant, ExprUnaryOp,
     UnaryOp,
 };
+use ruff_python_trivia::CommentRanges;
 use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
 use ruff_text_size::{Ranged, TextRange};
 
@@ -179,7 +180,13 @@ impl<'a> BinaryLike<'a> {
         ) {
             let expression = operand.expression();
             match expression {
-                Expr::BinOp(binary) if !is_expression_parenthesized(expression.into(), source) => {
+                Expr::BinOp(binary)
+                    if !is_expression_parenthesized(
+                        expression.into(),
+                        comments.ranges(),
+                        source,
+                    ) =>
+                {
                     let leading_comments = operand
                         .leading_binary_comments()
                         .unwrap_or_else(|| comments.leading(binary));
@@ -198,7 +205,11 @@ impl<'a> BinaryLike<'a> {
                     );
                 }
                 Expr::Compare(compare)
-                    if !is_expression_parenthesized(expression.into(), source) =>
+                    if !is_expression_parenthesized(
+                        expression.into(),
+                        comments.ranges(),
+                        source,
+                    ) =>
                 {
                     let leading_comments = operand
                         .leading_binary_comments()
@@ -218,7 +229,11 @@ impl<'a> BinaryLike<'a> {
                     );
                 }
                 Expr::BoolOp(bool_op)
-                    if !is_expression_parenthesized(expression.into(), source) =>
+                    if !is_expression_parenthesized(
+                        expression.into(),
+                        comments.ranges(),
+                        source,
+                    ) =>
                 {
                     let leading_comments = operand
                         .leading_binary_comments()
@@ -282,7 +297,11 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
                 AnyString::from_expression(operand.expression())
                     .filter(|string| {
                         string.is_implicit_concatenated()
-                            && !is_expression_parenthesized(string.into(), source)
+                            && !is_expression_parenthesized(
+                                string.into(),
+                                comments.ranges(),
+                                source,
+                            )
                     })
                     .map(|string| (index, string, operand))
             })
@@ -430,6 +449,7 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
                         if (right_operand_has_leading_comments
                             && !is_expression_parenthesized(
                                 right_operand.expression().into(),
+                                f.context().comments().ranges(),
                                 f.context().source(),
                             ))
                             || right_operator.has_trailing_comments()
@@ -466,11 +486,16 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
     }
 }
 
-fn is_simple_power_expression(left: &Expr, right: &Expr, source: &str) -> bool {
+fn is_simple_power_expression(
+    left: &Expr,
+    right: &Expr,
+    comment_range: &CommentRanges,
+    source: &str,
+) -> bool {
     is_simple_power_operand(left)
         && is_simple_power_operand(right)
-        && !is_expression_parenthesized(left.into(), source)
-        && !is_expression_parenthesized(right.into(), source)
+        && !is_expression_parenthesized(left.into(), comment_range, source)
+        && !is_expression_parenthesized(right.into(), comment_range, source)
 }
 
 /// Return `true` if an [`Expr`] adheres to [Black's definition](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#line-breaks-binary-operators)
@@ -664,6 +689,7 @@ impl Format<PyFormatContext<'_>> for FlatBinaryExpressionSlice<'_> {
                     && is_simple_power_expression(
                         left.last_operand().expression(),
                         right.first_operand().expression(),
+                        f.context().comments().ranges(),
                         f.context().source(),
                     );
 
@@ -806,7 +832,7 @@ impl<'a> Operand<'a> {
             } => !leading_comments.is_empty(),
             Operand::Middle { expression } | Operand::Right { expression, .. } => {
                 let leading = comments.leading(*expression);
-                if is_expression_parenthesized((*expression).into(), source) {
+                if is_expression_parenthesized((*expression).into(), comments.ranges(), source) {
                     leading.iter().any(|comment| {
                         !comment.is_formatted()
                             && matches!(
@@ -853,7 +879,11 @@ impl Format<PyFormatContext<'_>> for Operand<'_> {
     fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
         let expression = self.expression();
 
-        return if is_expression_parenthesized(expression.into(), f.context().source()) {
+        return if is_expression_parenthesized(
+            expression.into(),
+            f.context().comments().ranges(),
+            f.context().source(),
+        ) {
             let comments = f.context().comments().clone();
             let expression_comments = comments.leading_dangling_trailing(expression);
 
diff --git a/crates/ruff_python_formatter/src/expression/expr_attribute.rs b/crates/ruff_python_formatter/src/expression/expr_attribute.rs
index 37abc02b0a..c9cc463bea 100644
--- a/crates/ruff_python_formatter/src/expression/expr_attribute.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_attribute.rs
@@ -45,7 +45,7 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
                         value: Constant::Int(_) | Constant::Float(_),
                         ..
                     })
-                ) || is_expression_parenthesized(value.into(), f.context().source());
+                ) || is_expression_parenthesized(value.into(), f.context().comments().ranges(), f.context().source());
 
             if call_chain_layout == CallChainLayout::Fluent {
                 if parenthesize_value {
@@ -142,15 +142,22 @@ impl NeedsParentheses for ExprAttribute {
         context: &PyFormatContext,
     ) -> OptionalParentheses {
         // Checks if there are any own line comments in an attribute chain (a.b.c).
-        if CallChainLayout::from_expression(self.into(), context.source())
-            == CallChainLayout::Fluent
+        if CallChainLayout::from_expression(
+            self.into(),
+            context.comments().ranges(),
+            context.source(),
+        ) == CallChainLayout::Fluent
         {
             OptionalParentheses::Multiline
         } else if context.comments().has_dangling(self) {
             OptionalParentheses::Always
         } else if self.value.is_name_expr() {
             OptionalParentheses::BestFit
-        } else if is_expression_parenthesized(self.value.as_ref().into(), context.source()) {
+        } else if is_expression_parenthesized(
+            self.value.as_ref().into(),
+            context.comments().ranges(),
+            context.source(),
+        ) {
             OptionalParentheses::Never
         } else {
             self.value.needs_parentheses(self.into(), context)
diff --git a/crates/ruff_python_formatter/src/expression/expr_call.rs b/crates/ruff_python_formatter/src/expression/expr_call.rs
index 6df21ba328..2fe26d8d34 100644
--- a/crates/ruff_python_formatter/src/expression/expr_call.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_call.rs
@@ -82,13 +82,20 @@ impl NeedsParentheses for ExprCall {
         _parent: AnyNodeRef,
         context: &PyFormatContext,
     ) -> OptionalParentheses {
-        if CallChainLayout::from_expression(self.into(), context.source())
-            == CallChainLayout::Fluent
+        if CallChainLayout::from_expression(
+            self.into(),
+            context.comments().ranges(),
+            context.source(),
+        ) == CallChainLayout::Fluent
         {
             OptionalParentheses::Multiline
         } else if context.comments().has_dangling(self) {
             OptionalParentheses::Always
-        } else if is_expression_parenthesized(self.func.as_ref().into(), context.source()) {
+        } else if is_expression_parenthesized(
+            self.func.as_ref().into(),
+            context.comments().ranges(),
+            context.source(),
+        ) {
             OptionalParentheses::Never
         } else {
             self.func.needs_parentheses(self.into(), context)
diff --git a/crates/ruff_python_formatter/src/expression/expr_if_exp.rs b/crates/ruff_python_formatter/src/expression/expr_if_exp.rs
index 1b22fb568c..a1b57b3bf3 100644
--- a/crates/ruff_python_formatter/src/expression/expr_if_exp.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_if_exp.rs
@@ -101,7 +101,11 @@ impl Format<PyFormatContext<'_>> for FormatOrElse<'_> {
     fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
         match self.orelse {
             Expr::IfExp(expr)
-                if !is_expression_parenthesized(expr.into(), f.context().source()) =>
+                if !is_expression_parenthesized(
+                    expr.into(),
+                    f.context().comments().ranges(),
+                    f.context().source(),
+                ) =>
             {
                 write!(f, [expr.format().with_options(ExprIfExpLayout::Nested)])
             }
diff --git a/crates/ruff_python_formatter/src/expression/expr_subscript.rs b/crates/ruff_python_formatter/src/expression/expr_subscript.rs
index b4c9dc7959..ecd4a2abc1 100644
--- a/crates/ruff_python_formatter/src/expression/expr_subscript.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_subscript.rs
@@ -89,11 +89,18 @@ impl NeedsParentheses for ExprSubscript {
         context: &PyFormatContext,
     ) -> OptionalParentheses {
         {
-            if CallChainLayout::from_expression(self.into(), context.source())
-                == CallChainLayout::Fluent
+            if CallChainLayout::from_expression(
+                self.into(),
+                context.comments().ranges(),
+                context.source(),
+            ) == CallChainLayout::Fluent
             {
                 OptionalParentheses::Multiline
-            } else if is_expression_parenthesized(self.value.as_ref().into(), context.source()) {
+            } else if is_expression_parenthesized(
+                self.value.as_ref().into(),
+                context.comments().ranges(),
+                context.source(),
+            ) {
                 OptionalParentheses::Never
             } else {
                 match self.value.needs_parentheses(self.into(), context) {
diff --git a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
index 3405fa3e5c..1b58d03571 100644
--- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
@@ -46,7 +46,11 @@ impl FormatNodeRule<ExprUnaryOp> for FormatExprUnaryOp {
         //  a)
         // ```
         if comments.has_leading(operand.as_ref())
-            && !is_expression_parenthesized(operand.as_ref().into(), f.context().source())
+            && !is_expression_parenthesized(
+                operand.as_ref().into(),
+                f.context().comments().ranges(),
+                f.context().source(),
+            )
         {
             hard_line_break().fmt(f)?;
         } else if op.is_not() {
@@ -72,7 +76,11 @@ impl NeedsParentheses for ExprUnaryOp {
         context: &PyFormatContext,
     ) -> OptionalParentheses {
         // We preserve the parentheses of the operand. It should not be necessary to break this expression.
-        if is_expression_parenthesized(self.operand.as_ref().into(), context.source()) {
+        if is_expression_parenthesized(
+            self.operand.as_ref().into(),
+            context.comments().ranges(),
+            context.source(),
+        ) {
             OptionalParentheses::Never
         } else {
             OptionalParentheses::Multiline
diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs
index ba2b979fe5..dd20097a7e 100644
--- a/crates/ruff_python_formatter/src/expression/mod.rs
+++ b/crates/ruff_python_formatter/src/expression/mod.rs
@@ -9,6 +9,7 @@ use ruff_python_ast as ast;
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::visitor::preorder::{walk_expr, PreorderVisitor};
 use ruff_python_ast::{Constant, Expr, ExpressionRef, Operator};
+use ruff_python_trivia::CommentRanges;
 
 use crate::builders::parenthesize_if_expands;
 use crate::comments::leading_comments;
@@ -103,9 +104,11 @@ impl FormatRule<Expr, PyFormatContext<'_>> for FormatExpr {
         });
 
         let parenthesize = match parentheses {
-            Parentheses::Preserve => {
-                is_expression_parenthesized(expression.into(), f.context().source())
-            }
+            Parentheses::Preserve => is_expression_parenthesized(
+                expression.into(),
+                f.context().comments().ranges(),
+                f.context().source(),
+            ),
             Parentheses::Always => true,
             // Fluent style means we already have parentheses
             Parentheses::Never => false,
@@ -186,7 +189,11 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
 
         let comments = f.context().comments();
         let preserve_parentheses = parenthesize.is_optional()
-            && is_expression_parenthesized((*expression).into(), f.context().source());
+            && is_expression_parenthesized(
+                (*expression).into(),
+                f.context().comments().ranges(),
+                f.context().source(),
+            );
 
         let has_comments =
             comments.has_leading(*expression) || comments.has_trailing_own_line(*expression);
@@ -581,7 +588,11 @@ impl<'input> PreorderVisitor<'input> for CanOmitOptionalParenthesesVisitor<'inpu
         self.last = Some(expr);
 
         // Rule only applies for non-parenthesized expressions.
-        if is_expression_parenthesized(expr.into(), self.context.source()) {
+        if is_expression_parenthesized(
+            expr.into(),
+            self.context.comments().ranges(),
+            self.context.source(),
+        ) {
             self.any_parenthesized_expressions = true;
         } else {
             self.visit_subexpression(expr);
@@ -635,7 +646,11 @@ pub enum CallChainLayout {
 }
 
 impl CallChainLayout {
-    pub(crate) fn from_expression(mut expr: ExpressionRef, source: &str) -> Self {
+    pub(crate) fn from_expression(
+        mut expr: ExpressionRef,
+        comment_ranges: &CommentRanges,
+        source: &str,
+    ) -> Self {
         let mut attributes_after_parentheses = 0;
         loop {
             match expr {
@@ -646,7 +661,7 @@ impl CallChainLayout {
                     // data[:100].T
                     // ^^^^^^^^^^ value
                     // ```
-                    if is_expression_parenthesized(value.into(), source) {
+                    if is_expression_parenthesized(value.into(), comment_ranges, source) {
                         // `(a).b`. We preserve these parentheses so don't recurse
                         attributes_after_parentheses += 1;
                         break;
@@ -674,7 +689,7 @@ impl CallChainLayout {
                     // f2 = (a).w().t(1,)
                     //       ^ expr
                     // ```
-                    if is_expression_parenthesized(expr, source) {
+                    if is_expression_parenthesized(expr, comment_ranges, source) {
                         attributes_after_parentheses += 1;
                     }
 
@@ -683,7 +698,7 @@ impl CallChainLayout {
             }
 
             // We preserve these parentheses so don't recurse
-            if is_expression_parenthesized(expr, source) {
+            if is_expression_parenthesized(expr, comment_ranges, source) {
                 break;
             }
         }
@@ -704,7 +719,11 @@ impl CallChainLayout {
         match self {
             CallChainLayout::Default => {
                 if f.context().node_level().is_parenthesized() {
-                    CallChainLayout::from_expression(item.into(), f.context().source())
+                    CallChainLayout::from_expression(
+                        item.into(),
+                        f.context().comments().ranges(),
+                        f.context().source(),
+                    )
                 } else {
                     CallChainLayout::NonFluent
                 }
@@ -745,7 +764,7 @@ fn has_parentheses(expr: &Expr, context: &PyFormatContext) -> Option<OwnParenthe
 
     // Otherwise, if the node lacks parentheses (e.g., `(1)`) or only contains empty parentheses
     // (e.g., `([])`), we need to check for surrounding parentheses.
-    if is_expression_parenthesized(expr.into(), context.source()) {
+    if is_expression_parenthesized(expr.into(), context.comments().ranges(), context.source()) {
         return Some(OwnParentheses::NonEmpty);
     }
 
diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs
index c048046683..9b16fe416e 100644
--- a/crates/ruff_python_formatter/src/expression/parentheses.rs
+++ b/crates/ruff_python_formatter/src/expression/parentheses.rs
@@ -2,7 +2,10 @@ use ruff_formatter::prelude::tag::Condition;
 use ruff_formatter::{format_args, write, Argument, Arguments, FormatContext, FormatOptions};
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::ExpressionRef;
-use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
+use ruff_python_trivia::CommentRanges;
+use ruff_python_trivia::{
+    first_non_trivia_token, BackwardsTokenizer, SimpleToken, SimpleTokenKind,
+};
 use ruff_text_size::Ranged;
 
 use crate::comments::{
@@ -100,7 +103,11 @@ pub enum Parentheses {
     Never,
 }
 
-pub(crate) fn is_expression_parenthesized(expr: ExpressionRef, contents: &str) -> bool {
+pub(crate) fn is_expression_parenthesized(
+    expr: ExpressionRef,
+    comment_ranges: &CommentRanges,
+    contents: &str,
+) -> bool {
     // First test if there's a closing parentheses because it tends to be cheaper.
     if matches!(
         first_non_trivia_token(expr.end(), contents),
@@ -109,11 +116,10 @@ pub(crate) fn is_expression_parenthesized(expr: ExpressionRef, contents: &str) -
             ..
         })
     ) {
-        let mut tokenizer =
-            SimpleTokenizer::up_to_without_back_comment(expr.start(), contents).skip_trivia();
-
         matches!(
-            tokenizer.next_back(),
+            BackwardsTokenizer::up_to(expr.start(), contents, comment_ranges)
+                .skip_trivia()
+                .next(),
             Some(SimpleToken {
                 kind: SimpleTokenKind::LParen,
                 ..
@@ -418,6 +424,7 @@ impl Format<PyFormatContext<'_>> for FormatEmptyParenthesized<'_> {
 mod tests {
     use ruff_python_ast::ExpressionRef;
     use ruff_python_parser::parse_expression;
+    use ruff_python_trivia::CommentRanges;
 
     use crate::expression::parentheses::is_expression_parenthesized;
 
@@ -427,6 +434,7 @@ mod tests {
         let expr = parse_expression(expression, "<filename>").unwrap();
         assert!(!is_expression_parenthesized(
             ExpressionRef::from(&expr),
+            &CommentRanges::default(),
             expression
         ));
     }
diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs
index 4eb54f1b34..031b93510b 100644
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@@ -5,9 +5,10 @@ use ruff_formatter::prelude::*;
 use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
 use ruff_python_ast::node::AstNode;
 use ruff_python_ast::Mod;
-use ruff_python_index::{CommentRanges, CommentRangesBuilder};
+use ruff_python_index::CommentRangesBuilder;
 use ruff_python_parser::lexer::{lex, LexicalError};
 use ruff_python_parser::{parse_tokens, Mode, ParseError};
+use ruff_python_trivia::CommentRanges;
 use ruff_source_file::Locator;
 
 use crate::comments::{
@@ -120,7 +121,7 @@ impl From<ParseError> for FormatModuleError {
     }
 }
 
-#[tracing::instrument(level=Level::TRACE, skip_all)]
+#[tracing::instrument(level = Level::TRACE, skip_all)]
 pub fn format_module(
     contents: &str,
     options: PyFormatOptions,
diff --git a/crates/ruff_python_formatter/src/pattern/mod.rs b/crates/ruff_python_formatter/src/pattern/mod.rs
index d0f44e258a..50d1f99fb2 100644
--- a/crates/ruff_python_formatter/src/pattern/mod.rs
+++ b/crates/ruff_python_formatter/src/pattern/mod.rs
@@ -1,7 +1,10 @@
 use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions};
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::Pattern;
-use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
+use ruff_python_trivia::CommentRanges;
+use ruff_python_trivia::{
+    first_non_trivia_token, BackwardsTokenizer, SimpleToken, SimpleTokenKind,
+};
 use ruff_text_size::Ranged;
 
 use crate::expression::parentheses::{
@@ -48,7 +51,11 @@ impl FormatRule<Pattern, PyFormatContext<'_>> for FormatPattern {
         });
 
         let parenthesize = match self.parentheses {
-            Parentheses::Preserve => is_pattern_parenthesized(pattern, f.context().source()),
+            Parentheses::Preserve => is_pattern_parenthesized(
+                pattern,
+                f.context().comments().ranges(),
+                f.context().source(),
+            ),
             Parentheses::Always => true,
             Parentheses::Never => false,
         };
@@ -98,7 +105,11 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Pattern {
     }
 }
 
-fn is_pattern_parenthesized(pattern: &Pattern, contents: &str) -> bool {
+fn is_pattern_parenthesized(
+    pattern: &Pattern,
+    comment_ranges: &CommentRanges,
+    contents: &str,
+) -> bool {
     // First test if there's a closing parentheses because it tends to be cheaper.
     if matches!(
         first_non_trivia_token(pattern.end(), contents),
@@ -107,11 +118,10 @@ fn is_pattern_parenthesized(pattern: &Pattern, contents: &str) -> bool {
             ..
         })
     ) {
-        let mut tokenizer =
-            SimpleTokenizer::up_to_without_back_comment(pattern.start(), contents).skip_trivia();
-
         matches!(
-            tokenizer.next_back(),
+            BackwardsTokenizer::up_to(pattern.start(), contents, comment_ranges)
+                .skip_trivia()
+                .next(),
             Some(SimpleToken {
                 kind: SimpleTokenKind::LParen,
                 ..
diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs
index 8ae33eebb3..0faf69a9d2 100644
--- a/crates/ruff_python_formatter/src/statement/suite.rs
+++ b/crates/ruff_python_formatter/src/statement/suite.rs
@@ -561,6 +561,7 @@ impl Format<PyFormatContext<'_>> for SuiteChildStatement<'_> {
 mod tests {
     use ruff_formatter::format;
     use ruff_python_parser::parse_suite;
+    use ruff_python_trivia::CommentRanges;
 
     use crate::comments::Comments;
     use crate::prelude::*;
@@ -591,7 +592,12 @@ def trailing_func():
 
         let statements = parse_suite(source, "test.py").unwrap();
 
-        let context = PyFormatContext::new(PyFormatOptions::default(), source, Comments::default());
+        let comment_ranges = CommentRanges::default();
+        let context = PyFormatContext::new(
+            PyFormatOptions::default(),
+            source,
+            Comments::from_ranges(&comment_ranges),
+        );
 
         let test_formatter =
             format_with(|f: &mut PyFormatter| statements.format().with_options(level).fmt(f));
diff --git a/crates/ruff_python_index/src/comment_ranges.rs b/crates/ruff_python_index/src/comment_ranges.rs
index 9376d91e02..602446a934 100644
--- a/crates/ruff_python_index/src/comment_ranges.rs
+++ b/crates/ruff_python_index/src/comment_ranges.rs
@@ -1,70 +1,8 @@
-use itertools::Itertools;
-use std::fmt::{Debug, Formatter};
-use std::ops::Deref;
+use std::fmt::Debug;
 
 use ruff_python_parser::Tok;
-use ruff_text_size::{Ranged, TextRange};
-
-/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
-#[derive(Clone)]
-pub struct CommentRanges {
-    raw: Vec<TextRange>,
-}
-
-impl CommentRanges {
-    /// Returns `true` if the given range includes a comment.
-    pub fn intersects(&self, target: TextRange) -> bool {
-        self.raw
-            .binary_search_by(|range| {
-                if target.contains_range(*range) {
-                    std::cmp::Ordering::Equal
-                } else if range.end() < target.start() {
-                    std::cmp::Ordering::Less
-                } else {
-                    std::cmp::Ordering::Greater
-                }
-            })
-            .is_ok()
-    }
-
-    /// Returns the comments who are within the range
-    pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
-        let start = self
-            .raw
-            .partition_point(|comment| comment.start() < range.start());
-        // We expect there are few comments, so switching to find should be faster
-        match self.raw[start..]
-            .iter()
-            .find_position(|comment| comment.end() > range.end())
-        {
-            Some((in_range, _element)) => &self.raw[start..start + in_range],
-            None => &self.raw[start..],
-        }
-    }
-}
-
-impl Deref for CommentRanges {
-    type Target = [TextRange];
-
-    fn deref(&self) -> &Self::Target {
-        self.raw.as_slice()
-    }
-}
-
-impl Debug for CommentRanges {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.debug_tuple("CommentRanges").field(&self.raw).finish()
-    }
-}
-
-impl<'a> IntoIterator for &'a CommentRanges {
-    type IntoIter = std::slice::Iter<'a, TextRange>;
-    type Item = &'a TextRange;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.raw.iter()
-    }
-}
+use ruff_python_trivia::CommentRanges;
+use ruff_text_size::TextRange;
 
 #[derive(Debug, Clone, Default)]
 pub struct CommentRangesBuilder {
@@ -79,6 +17,6 @@ impl CommentRangesBuilder {
     }
 
     pub fn finish(self) -> CommentRanges {
-        CommentRanges { raw: self.ranges }
+        CommentRanges::new(self.ranges)
     }
 }
diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs
index aec33f087a..11273f8131 100644
--- a/crates/ruff_python_index/src/indexer.rs
+++ b/crates/ruff_python_index/src/indexer.rs
@@ -1,15 +1,16 @@
 //! Struct used to index source code, to enable efficient lookup of tokens that
 //! are omitted from the AST (e.g., commented lines).
 
+use crate::CommentRangesBuilder;
 use ruff_python_ast::Stmt;
 use ruff_python_parser::lexer::LexResult;
 use ruff_python_parser::{StringKind, Tok};
-use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace};
+use ruff_python_trivia::{
+    has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges,
+};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
-use super::comment_ranges::{CommentRanges, CommentRangesBuilder};
-
 pub struct Indexer {
     comment_ranges: CommentRanges,
 
diff --git a/crates/ruff_python_index/src/lib.rs b/crates/ruff_python_index/src/lib.rs
index a6ed9363ad..2e585ca5df 100644
--- a/crates/ruff_python_index/src/lib.rs
+++ b/crates/ruff_python_index/src/lib.rs
@@ -1,5 +1,5 @@
 mod comment_ranges;
 mod indexer;
 
-pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
+pub use comment_ranges::CommentRangesBuilder;
 pub use indexer::Indexer;
diff --git a/crates/ruff_python_trivia/Cargo.toml b/crates/ruff_python_trivia/Cargo.toml
index 8ae3fad931..e77cf41bac 100644
--- a/crates/ruff_python_trivia/Cargo.toml
+++ b/crates/ruff_python_trivia/Cargo.toml
@@ -16,8 +16,7 @@ license = { workspace = true }
 ruff_text_size = { path = "../ruff_text_size" }
 ruff_source_file = { path = "../ruff_source_file" }
 
-memchr = { workspace = true }
-smallvec = { workspace = true }
+itertools = { workspace = true }
 unicode-ident = { workspace = true }
 
 [dev-dependencies]
diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs
new file mode 100644
index 0000000000..a5b7b7ed50
--- /dev/null
+++ b/crates/ruff_python_trivia/src/comment_ranges.rs
@@ -0,0 +1,71 @@
+use std::fmt::{Debug, Formatter};
+use std::ops::Deref;
+
+use itertools::Itertools;
+
+use ruff_text_size::{Ranged, TextRange};
+
+/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
+#[derive(Clone, Default)]
+pub struct CommentRanges {
+    raw: Vec<TextRange>,
+}
+
+impl CommentRanges {
+    pub fn new(ranges: Vec<TextRange>) -> Self {
+        Self { raw: ranges }
+    }
+
+    /// Returns `true` if the given range includes a comment.
+    pub fn intersects(&self, target: TextRange) -> bool {
+        self.raw
+            .binary_search_by(|range| {
+                if target.contains_range(*range) {
+                    std::cmp::Ordering::Equal
+                } else if range.end() < target.start() {
+                    std::cmp::Ordering::Less
+                } else {
+                    std::cmp::Ordering::Greater
+                }
+            })
+            .is_ok()
+    }
+
+    /// Returns the comments who are within the range
+    pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
+        let start = self
+            .raw
+            .partition_point(|comment| comment.start() < range.start());
+        // We expect there are few comments, so switching to find should be faster
+        match self.raw[start..]
+            .iter()
+            .find_position(|comment| comment.end() > range.end())
+        {
+            Some((in_range, _element)) => &self.raw[start..start + in_range],
+            None => &self.raw[start..],
+        }
+    }
+}
+
+impl Deref for CommentRanges {
+    type Target = [TextRange];
+
+    fn deref(&self) -> &Self::Target {
+        self.raw.as_slice()
+    }
+}
+
+impl Debug for CommentRanges {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_tuple("CommentRanges").field(&self.raw).finish()
+    }
+}
+
+impl<'a> IntoIterator for &'a CommentRanges {
+    type Item = &'a TextRange;
+    type IntoIter = std::slice::Iter<'a, TextRange>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.raw.iter()
+    }
+}
diff --git a/crates/ruff_python_trivia/src/cursor.rs b/crates/ruff_python_trivia/src/cursor.rs
index 336720269c..e046fa92ba 100644
--- a/crates/ruff_python_trivia/src/cursor.rs
+++ b/crates/ruff_python_trivia/src/cursor.rs
@@ -44,7 +44,7 @@ impl<'a> Cursor<'a> {
         self.chars.clone().next_back().unwrap_or(EOF_CHAR)
     }
 
-    // SAFETY: THe `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
+    // SAFETY: The `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
     #[allow(clippy::cast_possible_truncation)]
     pub fn text_len(&self) -> TextSize {
         TextSize::new(self.chars.as_str().len() as u32)
diff --git a/crates/ruff_python_trivia/src/lib.rs b/crates/ruff_python_trivia/src/lib.rs
index bef83ac26a..9c9bb8158c 100644
--- a/crates/ruff_python_trivia/src/lib.rs
+++ b/crates/ruff_python_trivia/src/lib.rs
@@ -1,8 +1,10 @@
+mod comment_ranges;
 mod cursor;
 pub mod textwrap;
 mod tokenizer;
 mod whitespace;
 
+pub use comment_ranges::CommentRanges;
 pub use cursor::*;
 pub use tokenizer::*;
 pub use whitespace::*;
diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs
index d7d0645f3f..2072eae936 100644
--- a/crates/ruff_python_trivia/src/tokenizer.rs
+++ b/crates/ruff_python_trivia/src/tokenizer.rs
@@ -1,4 +1,3 @@
-use memchr::{memchr2, memchr3, memrchr3_iter};
 use unicode_ident::{is_xid_continue, is_xid_start};
 
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
@@ -121,6 +120,47 @@ fn is_identifier_continuation(c: char) -> bool {
     }
 }
 
+fn to_keyword_or_other(source: &str) -> SimpleTokenKind {
+    match source {
+        "and" => SimpleTokenKind::And,
+        "as" => SimpleTokenKind::As,
+        "assert" => SimpleTokenKind::Assert,
+        "async" => SimpleTokenKind::Async,
+        "await" => SimpleTokenKind::Await,
+        "break" => SimpleTokenKind::Break,
+        "class" => SimpleTokenKind::Class,
+        "continue" => SimpleTokenKind::Continue,
+        "def" => SimpleTokenKind::Def,
+        "del" => SimpleTokenKind::Del,
+        "elif" => SimpleTokenKind::Elif,
+        "else" => SimpleTokenKind::Else,
+        "except" => SimpleTokenKind::Except,
+        "finally" => SimpleTokenKind::Finally,
+        "for" => SimpleTokenKind::For,
+        "from" => SimpleTokenKind::From,
+        "global" => SimpleTokenKind::Global,
+        "if" => SimpleTokenKind::If,
+        "import" => SimpleTokenKind::Import,
+        "in" => SimpleTokenKind::In,
+        "is" => SimpleTokenKind::Is,
+        "lambda" => SimpleTokenKind::Lambda,
+        "nonlocal" => SimpleTokenKind::Nonlocal,
+        "not" => SimpleTokenKind::Not,
+        "or" => SimpleTokenKind::Or,
+        "pass" => SimpleTokenKind::Pass,
+        "raise" => SimpleTokenKind::Raise,
+        "return" => SimpleTokenKind::Return,
+        "try" => SimpleTokenKind::Try,
+        "while" => SimpleTokenKind::While,
+        "match" => SimpleTokenKind::Match, // Match is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
+        "type" => SimpleTokenKind::Type, // Type is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
+        "case" => SimpleTokenKind::Case,
+        "with" => SimpleTokenKind::With,
+        "yield" => SimpleTokenKind::Yield,
+        _ => SimpleTokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
 pub struct SimpleToken {
     pub kind: SimpleTokenKind,
@@ -421,17 +461,15 @@ impl SimpleTokenKind {
     }
 }
 
-/// Simple zero allocation tokenizer for tokenizing trivia (and some tokens).
+/// Simple zero allocation tokenizer handling most tokens.
 ///
 /// The tokenizer must start at an offset that is trivia (e.g. not inside of a multiline string).
 ///
-/// The tokenizer doesn't guarantee any correctness after it returned a [`SimpleTokenKind::Other`]. That's why it
-/// will return [`SimpleTokenKind::Bogus`] for every character after until it reaches the end of the file.
+/// In case it finds something it can't parse, the tokenizer will return a
+/// [`SimpleTokenKind::Other`] and then only a final [`SimpleTokenKind::Bogus`] afterwards.
 pub struct SimpleTokenizer<'a> {
     offset: TextSize,
-    back_offset: TextSize,
     /// `true` when it is known that the current `back` line has no comment for sure.
-    back_line_has_no_comment: bool,
     bogus: bool,
     source: &'a str,
     cursor: Cursor<'a>,
@@ -441,8 +479,6 @@ impl<'a> SimpleTokenizer<'a> {
     pub fn new(source: &'a str, range: TextRange) -> Self {
         Self {
             offset: range.start(),
-            back_offset: range.end(),
-            back_line_has_no_comment: false,
             bogus: false,
             source,
             cursor: Cursor::new(&source[range]),
@@ -454,64 +490,6 @@ impl<'a> SimpleTokenizer<'a> {
         Self::new(source, range)
     }
 
-    /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
-    ///
-    /// Consider using [`SimpleTokenizer::up_to_without_back_comment`] if intend to lex backwards.
-    pub fn up_to(offset: TextSize, source: &'a str) -> Self {
-        Self::new(source, TextRange::up_to(offset))
-    }
-
-    /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
-    /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
-    /// because the lexer doesn't need to scan for comments.
-    pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
-        let mut tokenizer = Self::up_to(offset, source);
-        tokenizer.back_line_has_no_comment = true;
-        tokenizer
-    }
-
-    fn to_keyword_or_other(&self, range: TextRange) -> SimpleTokenKind {
-        let source = &self.source[range];
-        match source {
-            "and" => SimpleTokenKind::And,
-            "as" => SimpleTokenKind::As,
-            "assert" => SimpleTokenKind::Assert,
-            "async" => SimpleTokenKind::Async,
-            "await" => SimpleTokenKind::Await,
-            "break" => SimpleTokenKind::Break,
-            "class" => SimpleTokenKind::Class,
-            "continue" => SimpleTokenKind::Continue,
-            "def" => SimpleTokenKind::Def,
-            "del" => SimpleTokenKind::Del,
-            "elif" => SimpleTokenKind::Elif,
-            "else" => SimpleTokenKind::Else,
-            "except" => SimpleTokenKind::Except,
-            "finally" => SimpleTokenKind::Finally,
-            "for" => SimpleTokenKind::For,
-            "from" => SimpleTokenKind::From,
-            "global" => SimpleTokenKind::Global,
-            "if" => SimpleTokenKind::If,
-            "import" => SimpleTokenKind::Import,
-            "in" => SimpleTokenKind::In,
-            "is" => SimpleTokenKind::Is,
-            "lambda" => SimpleTokenKind::Lambda,
-            "nonlocal" => SimpleTokenKind::Nonlocal,
-            "not" => SimpleTokenKind::Not,
-            "or" => SimpleTokenKind::Or,
-            "pass" => SimpleTokenKind::Pass,
-            "raise" => SimpleTokenKind::Raise,
-            "return" => SimpleTokenKind::Return,
-            "try" => SimpleTokenKind::Try,
-            "while" => SimpleTokenKind::While,
-            "match" => SimpleTokenKind::Match, // Match is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
-            "type" => SimpleTokenKind::Type, // Type is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
-            "case" => SimpleTokenKind::Case,
-            "with" => SimpleTokenKind::With,
-            "yield" => SimpleTokenKind::Yield,
-            _ => SimpleTokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
-        }
-    }
-
     fn next_token(&mut self) -> SimpleToken {
         self.cursor.start_token();
 
@@ -523,6 +501,7 @@ impl<'a> SimpleTokenizer<'a> {
         };
 
         if self.bogus {
+            // Emit a single final bogus token
             let token = SimpleToken {
                 kind: SimpleTokenKind::Bogus,
                 range: TextRange::at(self.offset, first.text_len()),
@@ -532,14 +511,29 @@ impl<'a> SimpleTokenizer<'a> {
             return token;
         }
 
-        let kind = match first {
+        let kind = self.next_token_inner(first);
+
+        let token_len = self.cursor.token_len();
+
+        let token = SimpleToken {
+            kind,
+            range: TextRange::at(self.offset, token_len),
+        };
+
+        self.offset += token_len;
+
+        token
+    }
+
+    fn next_token_inner(&mut self, first: char) -> SimpleTokenKind {
+        match first {
             // Keywords and identifiers
             c if is_identifier_start(c) => {
                 self.cursor.eat_while(is_identifier_continuation);
                 let token_len = self.cursor.token_len();
 
                 let range = TextRange::at(self.offset, token_len);
-                let kind = self.to_keyword_or_other(range);
+                let kind = to_keyword_or_other(&self.source[range]);
 
                 if kind == SimpleTokenKind::Other {
                     self.bogus = true;
@@ -717,24 +711,102 @@ impl<'a> SimpleTokenizer<'a> {
                 self.bogus = true;
                 SimpleTokenKind::Other
             }
-        };
-
-        let token_len = self.cursor.token_len();
-
-        let token = SimpleToken {
-            kind,
-            range: TextRange::at(self.offset, token_len),
-        };
-
-        self.offset += token_len;
-
-        token
+        }
     }
 
-    /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
-    /// because it needs to check if the line has any comments when encountering any non-trivia token.
-    pub fn next_token_back(&mut self) -> SimpleToken {
+    pub fn skip_trivia(self) -> impl Iterator<Item = SimpleToken> + 'a {
+        self.filter(|t| !t.kind().is_trivia())
+    }
+}
+
+impl Iterator for SimpleTokenizer<'_> {
+    type Item = SimpleToken;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let token = self.next_token();
+
+        if token.kind == SimpleTokenKind::EndOfFile {
+            None
+        } else {
+            Some(token)
+        }
+    }
+}
+
+/// Simple zero allocation backwards tokenizer for finding preceding tokens.
+///
+/// The tokenizer must start at an offset that is trivia (e.g. not inside of a multiline string).
+/// It will fail when reaching a string.
+///
+/// In case it finds something it can't parse, the tokenizer will return a
+/// [`SimpleTokenKind::Other`] and then only a final [`SimpleTokenKind::Bogus`] afterwards.
+pub struct BackwardsTokenizer<'a> {
+    offset: TextSize,
+    back_offset: TextSize,
+    /// Remember if we have check for comments
+    after_newline: bool,
+    /// Not `&CommentRanges` to avoid a circular dependency
+    comment_ranges: &'a [TextRange],
+    /// The index the previously line ending comment
+    previous_comment_idx: Option<usize>,
+    bogus: bool,
+    source: &'a str,
+    cursor: Cursor<'a>,
+}
+
+impl<'a> BackwardsTokenizer<'a> {
+    pub fn new(source: &'a str, range: TextRange, comment_range: &'a [TextRange]) -> Self {
+        Self {
+            offset: range.start(),
+            back_offset: range.end(),
+            // We could start tokenizing at a comment
+            after_newline: true,
+            comment_ranges: comment_range,
+            previous_comment_idx: None,
+            bogus: false,
+            source,
+            cursor: Cursor::new(&source[range]),
+        }
+    }
+
+    pub fn up_to(offset: TextSize, source: &'a str, comment_range: &'a [TextRange]) -> Self {
+        Self::new(source, TextRange::up_to(offset), comment_range)
+    }
+
+    pub fn skip_trivia(self) -> impl Iterator<Item = SimpleToken> + 'a {
+        self.filter(|t| !t.kind().is_trivia())
+    }
+
+    pub fn next_token(&mut self) -> SimpleToken {
         self.cursor.start_token();
+        self.back_offset = self.cursor.text_len() + self.offset;
+
+        if self.after_newline {
+            // This comment ended a line with a higher line number, not the current one
+            let previous_comment_idx = self.previous_comment_idx.unwrap_or_else(|| {
+                self.comment_ranges
+                    .partition_point(|comment| comment.end() <= self.back_offset)
+            });
+            // If `previous_comment_idx == 0`, we're in a comment free region
+            if previous_comment_idx > 0 {
+                let comment = self.comment_ranges[previous_comment_idx - 1];
+                if comment.end() == self.back_offset {
+                    // Skip the comment without iterating over the chars manually
+                    self.cursor =
+                        Cursor::new(&self.source[TextRange::new(self.offset, comment.start())]);
+                    debug_assert_eq!(self.cursor.text_len() + self.offset, comment.start());
+                    self.after_newline = false;
+                    self.previous_comment_idx = Some(previous_comment_idx - 1);
+                    return SimpleToken {
+                        kind: SimpleTokenKind::Comment,
+                        range: comment.range(),
+                    };
+                }
+                // At least memoize the binary search
+                self.previous_comment_idx = Some(previous_comment_idx);
+            }
+            self.after_newline = false;
+        }
 
         let Some(last) = self.cursor.bump_back() else {
             return SimpleToken {
@@ -762,322 +834,132 @@ impl<'a> SimpleTokenizer<'a> {
             }
 
             '\r' => {
-                self.back_line_has_no_comment = false;
+                self.after_newline = true;
                 SimpleTokenKind::Newline
             }
 
             '\n' => {
-                self.back_line_has_no_comment = false;
                 self.cursor.eat_char_back('\r');
+                self.after_newline = true;
                 SimpleTokenKind::Newline
             }
-
-            // Empty comment (could also be a comment nested in another comment, but this shouldn't matter for what we use the lexer for)
-            '#' => SimpleTokenKind::Comment,
-
-            // For all other tokens, test if the character isn't part of a comment.
-            c => {
-                // Skip the test whether there's a preceding comment if it has been performed before.
-                let comment_length = if self.back_line_has_no_comment {
-                    None
-                } else {
-                    let bytes = self.cursor.chars().as_str().as_bytes();
-                    let mut potential_comment_starts: smallvec::SmallVec<[TextSize; 2]> =
-                        smallvec::SmallVec::new();
-
-                    // Find the start of the line, or any potential comments.
-                    for index in memrchr3_iter(b'\n', b'\r', b'#', bytes) {
-                        if bytes[index] == b'#' {
-                            // Potentially a comment, but not guaranteed
-                            // SAFETY: Safe, because ruff only supports files up to 4GB
-                            potential_comment_starts.push(TextSize::try_from(index).unwrap());
-                        } else {
-                            break;
-                        }
-                    }
-
-                    // No comments
-                    if potential_comment_starts.is_empty() {
-                        None
-                    } else {
-                        // The line contains at least one `#` token. The `#` can indicate the start of a
-                        // comment, meaning the current token is commented out, or it is a regular `#` inside of a string.
-                        self.comment_from_hash_positions(&potential_comment_starts)
-                    }
-                };
-
-                // From here on it is guaranteed that this line has no other comment.
-                self.back_line_has_no_comment = true;
-
-                if let Some(comment_length) = comment_length {
-                    // It is a comment, bump all tokens
-                    for _ in 0..usize::from(comment_length) {
-                        self.cursor.bump_back().unwrap();
-                    }
-
-                    SimpleTokenKind::Comment
-                } else {
-                    match c {
-                        // Keywords and identifiers
-                        c if is_identifier_continuation(c) => {
-                            // if we only have identifier continuations but no start (e.g. 555) we
-                            // don't want to consume the chars, so in that case, we want to rewind the
-                            // cursor to here
-                            let savepoint = self.cursor.clone();
-                            self.cursor.eat_back_while(is_identifier_continuation);
-
-                            let token_len = self.cursor.token_len();
-                            let range = TextRange::at(self.back_offset - token_len, token_len);
-
-                            if self.source[range]
-                                .chars()
-                                .next()
-                                .is_some_and(is_identifier_start)
-                            {
-                                self.to_keyword_or_other(range)
-                            } else {
-                                self.cursor = savepoint;
-                                self.bogus = true;
-                                SimpleTokenKind::Other
-                            }
-                        }
-
-                        // Non-trivia tokens that are unambiguous when lexing backwards.
-                        // In other words: these are characters that _don't_ appear at the
-                        // end of a multi-character token (like `!=`).
-                        '\\' => SimpleTokenKind::Continuation,
-                        ':' => SimpleTokenKind::Colon,
-                        '~' => SimpleTokenKind::Tilde,
-                        '%' => SimpleTokenKind::Percent,
-                        '|' => SimpleTokenKind::Vbar,
-                        ',' => SimpleTokenKind::Comma,
-                        ';' => SimpleTokenKind::Semi,
-                        '(' => SimpleTokenKind::LParen,
-                        ')' => SimpleTokenKind::RParen,
-                        '[' => SimpleTokenKind::LBracket,
-                        ']' => SimpleTokenKind::RBracket,
-                        '{' => SimpleTokenKind::LBrace,
-                        '}' => SimpleTokenKind::RBrace,
-                        '&' => SimpleTokenKind::Ampersand,
-                        '^' => SimpleTokenKind::Circumflex,
-                        '+' => SimpleTokenKind::Plus,
-                        '-' => SimpleTokenKind::Minus,
-
-                        // Non-trivia tokens that _are_ ambiguous when lexing backwards.
-                        // In other words: these are characters that _might_ mark the end
-                        // of a multi-character token (like `!=` or `->` or `//` or `**`).
-                        '=' | '*' | '/' | '@' | '!' | '<' | '>' | '.' => {
-                            // This could be a single-token token, like `+` in `x + y`, or a
-                            // multi-character token, like `+=` in `x += y`. It could also be a sequence
-                            // of multi-character tokens, like `x ==== y`, which is invalid, _but_ it's
-                            // important that we produce the same token stream when lexing backwards as
-                            // we do when lexing forwards. So, identify the range of the sequence, lex
-                            // forwards, and return the last token.
-                            let mut cursor = self.cursor.clone();
-                            cursor.eat_back_while(|c| {
-                                matches!(
-                                    c,
-                                    ':' | '~'
-                                        | '%'
-                                        | '|'
-                                        | '&'
-                                        | '^'
-                                        | '+'
-                                        | '-'
-                                        | '='
-                                        | '*'
-                                        | '/'
-                                        | '@'
-                                        | '!'
-                                        | '<'
-                                        | '>'
-                                        | '.'
-                                )
-                            });
-
-                            let token_len = cursor.token_len();
-                            let range = TextRange::at(self.back_offset - token_len, token_len);
-
-                            let forward_lexer = Self::new(self.source, range);
-                            if let Some(token) = forward_lexer.last() {
-                                // If the token spans multiple characters, bump the cursor. Note,
-                                // though, that we already bumped the cursor to past the last character
-                                // in the token at the very start of `next_token_back`.
-                                for _ in self.source[token.range].chars().rev().skip(1) {
-                                    self.cursor.bump_back().unwrap();
-                                }
-                                token.kind()
-                            } else {
-                                self.bogus = true;
-                                SimpleTokenKind::Other
-                            }
-                        }
-
-                        _ => {
-                            self.bogus = true;
-                            SimpleTokenKind::Other
-                        }
-                    }
-                }
-            }
+            _ => self.next_token_inner(last),
         };
 
         let token_len = self.cursor.token_len();
-
         let start = self.back_offset - token_len;
-
-        let token = SimpleToken {
+        SimpleToken {
             kind,
             range: TextRange::at(start, token_len),
-        };
-
-        self.back_offset = start;
-
-        token
-    }
-
-    pub fn skip_trivia(self) -> impl Iterator<Item = SimpleToken> + DoubleEndedIterator + 'a {
-        self.filter(|t| !t.kind().is_trivia())
-    }
-
-    /// Given the position of `#` tokens on a line, test if any `#` is the start of a comment and, if so, return the
-    /// length of the comment.
-    ///
-    /// The challenge is that `#` tokens can also appear inside of strings:
-    ///
-    /// ```python
-    /// ' #not a comment'
-    /// ```
-    ///
-    /// This looks innocent but is the `'` really the start of the new string or could it be a closing delimiter
-    /// of a previously started string:
-    ///
-    /// ```python
-    /// ' a string\
-    /// ` # a comment '
-    /// ```
-    ///
-    /// The only way to reliability tell whether the `#` is a comment when the comment contains a quote char is
-    /// to forward lex all strings and comments and test if there's any unclosed string literal. If so, then
-    /// the hash cannot be a comment.
-    fn comment_from_hash_positions(&self, hash_positions: &[TextSize]) -> Option<TextSize> {
-        // Iterate over the `#` positions from the start to the end of the line.
-        // This is necessary to correctly support `a # comment # comment`.
-        for possible_start in hash_positions.iter().rev() {
-            let comment_bytes =
-                self.source[TextRange::new(*possible_start, self.back_offset)].as_bytes();
-
-            // Test if the comment contains any quotes. If so, then it's possible that the `#` token isn't
-            // the start of a comment, but instead part of a string:
-            // ```python
-            // a + 'a string # not a comment'
-            // a + '''a string
-            // # not a comment'''
-            // ```
-            match memchr2(b'\'', b'"', comment_bytes) {
-                // Most comments don't contain quotes, and most strings don't contain comments.
-                // For these it's safe to assume that they are comments.
-                None => return Some(self.cursor.chars().as_str().text_len() - possible_start),
-                // Now it gets complicated... There's no good way to know whether this is a string or not.
-                // It is necessary to lex all strings and comments from the start to know if it is one or the other.
-                Some(_) => {
-                    if find_unterminated_string_kind(
-                        &self.cursor.chars().as_str()[TextRange::up_to(*possible_start)],
-                    )
-                    .is_none()
-                    {
-                        // There's no unterminated string at the comment's start position. This *must*
-                        // be a comment.
-                        return Some(self.cursor.chars().as_str().text_len() - possible_start);
-                    }
-
-                    // This is a hash inside of a string: `'test # not a comment'` continue with the next potential comment on the line.
-                }
-            }
-        }
-
-        None
-    }
-}
-
-fn find_unterminated_string_kind(input: &str) -> Option<StringKind> {
-    let mut rest = input;
-
-    while let Some(comment_or_string_start) = memchr3(b'#', b'\'', b'\"', rest.as_bytes()) {
-        let c = rest.as_bytes()[comment_or_string_start] as char;
-        let after = &rest[comment_or_string_start + 1..];
-
-        if c == '#' {
-            let comment_end = memchr2(b'\n', b'\r', after.as_bytes()).unwrap_or(after.len());
-            rest = &after[comment_end..];
-        } else {
-            let mut cursor = Cursor::new(after);
-            let quote_kind = if c == '\'' {
-                QuoteKind::Single
-            } else {
-                QuoteKind::Double
-            };
-
-            let string_kind = if cursor.eat_char(quote_kind.as_char()) {
-                // `''` or `""`
-                if cursor.eat_char(quote_kind.as_char()) {
-                    // `'''` or `"""`
-                    StringKind::Triple(quote_kind)
-                } else {
-                    // empty string literal, nothing more to lex
-                    rest = cursor.chars().as_str();
-                    continue;
-                }
-            } else {
-                StringKind::Single(quote_kind)
-            };
-
-            if !is_string_terminated(string_kind, &mut cursor) {
-                return Some(string_kind);
-            }
-
-            rest = cursor.chars().as_str();
         }
     }
 
-    None
-}
+    /// Helper to parser the previous token once we skipped all whitespace
+    fn next_token_inner(&mut self, last: char) -> SimpleTokenKind {
+        match last {
+            // Keywords and identifiers
+            c if is_identifier_continuation(c) => {
+                // if we only have identifier continuations but no start (e.g. 555) we
+                // don't want to consume the chars, so in that case, we want to rewind the
+                // cursor to here
+                let savepoint = self.cursor.clone();
+                self.cursor.eat_back_while(is_identifier_continuation);
 
-fn is_string_terminated(kind: StringKind, cursor: &mut Cursor) -> bool {
-    let quote_char = kind.quote_kind().as_char();
+                let token_len = self.cursor.token_len();
+                let range = TextRange::at(self.back_offset - token_len, token_len);
 
-    while let Some(c) = cursor.bump() {
-        match c {
-            '\n' | '\r' if kind.is_single() => {
-                // Reached the end of the line without a closing quote, this is an unterminated string literal.
-                return false;
-            }
-            '\\' => {
-                // Skip over escaped quotes that match this strings quotes or double escaped backslashes
-                if cursor.eat_char(quote_char) || cursor.eat_char('\\') {
-                    continue;
-                }
-                // Eat over line continuation
-                cursor.eat_char('\r');
-                cursor.eat_char('\n');
-            }
-            c if c == quote_char => {
-                if kind.is_single() || (cursor.eat_char(quote_char) && cursor.eat_char(quote_char))
+                if self.source[range]
+                    .chars()
+                    .next()
+                    .is_some_and(is_identifier_start)
                 {
-                    return true;
+                    to_keyword_or_other(&self.source[range])
+                } else {
+                    self.cursor = savepoint;
+                    self.bogus = true;
+                    SimpleTokenKind::Other
+                }
+            }
+
+            // Non-trivia tokens that are unambiguous when lexing backwards.
+            // In other words: these are characters that _don't_ appear at the
+            // end of a multi-character token (like `!=`).
+            '\\' => SimpleTokenKind::Continuation,
+            ':' => SimpleTokenKind::Colon,
+            '~' => SimpleTokenKind::Tilde,
+            '%' => SimpleTokenKind::Percent,
+            '|' => SimpleTokenKind::Vbar,
+            ',' => SimpleTokenKind::Comma,
+            ';' => SimpleTokenKind::Semi,
+            '(' => SimpleTokenKind::LParen,
+            ')' => SimpleTokenKind::RParen,
+            '[' => SimpleTokenKind::LBracket,
+            ']' => SimpleTokenKind::RBracket,
+            '{' => SimpleTokenKind::LBrace,
+            '}' => SimpleTokenKind::RBrace,
+            '&' => SimpleTokenKind::Ampersand,
+            '^' => SimpleTokenKind::Circumflex,
+            '+' => SimpleTokenKind::Plus,
+            '-' => SimpleTokenKind::Minus,
+
+            // Non-trivia tokens that _are_ ambiguous when lexing backwards.
+            // In other words: these are characters that _might_ mark the end
+            // of a multi-character token (like `!=` or `->` or `//` or `**`).
+            '=' | '*' | '/' | '@' | '!' | '<' | '>' | '.' => {
+                // This could be a single-token token, like `+` in `x + y`, or a
+                // multi-character token, like `+=` in `x += y`. It could also be a sequence
+                // of multi-character tokens, like `x ==== y`, which is invalid, _but_ it's
+                // important that we produce the same token stream when lexing backwards as
+                // we do when lexing forwards. So, identify the range of the sequence, lex
+                // forwards, and return the last token.
+                let mut cursor = self.cursor.clone();
+                cursor.eat_back_while(|c| {
+                    matches!(
+                        c,
+                        ':' | '~'
+                            | '%'
+                            | '|'
+                            | '&'
+                            | '^'
+                            | '+'
+                            | '-'
+                            | '='
+                            | '*'
+                            | '/'
+                            | '@'
+                            | '!'
+                            | '<'
+                            | '>'
+                            | '.'
+                    )
+                });
+
+                let token_len = cursor.token_len();
+                let range = TextRange::at(self.back_offset - token_len, token_len);
+
+                let forward_lexer = SimpleTokenizer::new(self.source, range);
+                if let Some(token) = forward_lexer.last() {
+                    // If the token spans multiple characters, bump the cursor. Note,
+                    // though, that we already bumped the cursor to past the last character
+                    // in the token at the very start of `next_token_back`.y
+                    for _ in self.source[token.range].chars().rev().skip(1) {
+                        self.cursor.bump_back().unwrap();
+                    }
+                    token.kind()
+                } else {
+                    self.bogus = true;
+                    SimpleTokenKind::Other
                 }
             }
             _ => {
-                // continue
+                self.bogus = true;
+                SimpleTokenKind::Other
             }
         }
     }
-
-    // Reached end without a closing quote
-    false
 }
 
-impl Iterator for SimpleTokenizer<'_> {
+impl Iterator for BackwardsTokenizer<'_> {
     type Item = SimpleToken;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -1091,64 +973,16 @@ impl Iterator for SimpleTokenizer<'_> {
     }
 }
 
-impl DoubleEndedIterator for SimpleTokenizer<'_> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        let token = self.next_token_back();
-
-        if token.kind == SimpleTokenKind::EndOfFile {
-            None
-        } else {
-            Some(token)
-        }
-    }
-}
-
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-enum StringKind {
-    /// `'...'` or `"..."`
-    Single(QuoteKind),
-    /// `'''...'''` or `"""..."""`
-    Triple(QuoteKind),
-}
-
-impl StringKind {
-    const fn quote_kind(self) -> QuoteKind {
-        match self {
-            StringKind::Single(kind) => kind,
-            StringKind::Triple(kind) => kind,
-        }
-    }
-
-    const fn is_single(self) -> bool {
-        matches!(self, StringKind::Single(_))
-    }
-}
-
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-enum QuoteKind {
-    /// `'``
-    Single,
-
-    /// `"`
-    Double,
-}
-
-impl QuoteKind {
-    const fn as_char(self) -> char {
-        match self {
-            QuoteKind::Single => '\'',
-            QuoteKind::Double => '"',
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use insta::assert_debug_snapshot;
 
+    use ruff_python_parser::lexer::lex;
+    use ruff_python_parser::{Mode, Tok};
     use ruff_text_size::{TextLen, TextRange, TextSize};
 
     use crate::tokenizer::{lines_after, lines_before, SimpleToken, SimpleTokenizer};
+    use crate::{BackwardsTokenizer, SimpleTokenKind};
 
     struct TokenizationTestCase {
         source: &'static str,
@@ -1167,9 +1001,17 @@ mod tests {
         }
 
         fn tokenize_reverse(&self) -> Vec<SimpleToken> {
-            SimpleTokenizer::new(self.source, self.range)
-                .rev()
-                .collect()
+            let comment_ranges: Vec<_> = lex(self.source, Mode::Module)
+                .filter_map(|result| {
+                    let (token, range) = result.expect("Input to be a valid python program.");
+                    if matches!(token, Tok::Comment(_)) {
+                        Some(range)
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect()
         }
 
         fn tokens(&self) -> &[SimpleToken] {
@@ -1495,4 +1337,22 @@ mod tests {
             1
         );
     }
+
+    #[test]
+    fn test_previous_token_simple() {
+        let cases = &["x = (", "x = ( ", "x = (\n"];
+        for source in cases {
+            let token = BackwardsTokenizer::up_to(source.text_len(), source, &[])
+                .skip_trivia()
+                .next()
+                .unwrap();
+            assert_eq!(
+                token,
+                SimpleToken {
+                    kind: SimpleTokenKind::LParen,
+                    range: TextRange::new(TextSize::new(4), TextSize::new(5)),
+                }
+            );
+        }
+    }
 }
diff --git a/crates/ruff_wasm/Cargo.toml b/crates/ruff_wasm/Cargo.toml
index 4e368dec5f..3ecc3ad702 100644
--- a/crates/ruff_wasm/Cargo.toml
+++ b/crates/ruff_wasm/Cargo.toml
@@ -28,6 +28,7 @@ ruff_python_index = { path = "../ruff_python_index" }
 ruff_python_parser = { path = "../ruff_python_parser" }
 ruff_source_file = { path = "../ruff_source_file" }
 ruff_text_size = { path = "../ruff_text_size" }
+ruff_python_trivia = { path = "../ruff_python_trivia" }
 ruff_workspace = { path = "../ruff_workspace" }
 
 console_error_panic_hook = { version = "0.1.7", optional = true }
diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs
index 24d2359325..2ab51cf77a 100644
--- a/crates/ruff_wasm/src/lib.rs
+++ b/crates/ruff_wasm/src/lib.rs
@@ -16,10 +16,10 @@ use ruff_formatter::{FormatResult, Formatted, LineWidth};
 use ruff_python_ast::{Mod, PySourceType};
 use ruff_python_codegen::Stylist;
 use ruff_python_formatter::{format_node, pretty_comments, PyFormatContext, PyFormatOptions};
-use ruff_python_index::{CommentRanges, CommentRangesBuilder, Indexer};
+use ruff_python_index::{CommentRangesBuilder, Indexer};
 use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::AsMode;
-use ruff_python_parser::{parse_tokens, Mode};
+use ruff_python_parser::{parse_tokens, AsMode, Mode};
+use ruff_python_trivia::CommentRanges;
 use ruff_source_file::{Locator, SourceLocation};
 use ruff_text_size::Ranged;
 use ruff_workspace::configuration::Configuration;
@@ -116,7 +116,7 @@ impl Workspace {
         Ok(Workspace { settings })
     }
 
-    #[wasm_bindgen(js_name=defaultSettings)]
+    #[wasm_bindgen(js_name = defaultSettings)]
     pub fn default_settings() -> Result<JsValue, Error> {
         serde_wasm_bindgen::to_value(&Options {
             // Propagate defaults.