Treat form feed as whitespace in SimpleTokenizer (#7626)

## Summary This is whitespace as per `is_python_whitespace`, and right now it tends to lead to panics in the formatter. Seems reasonable to treat it as whitespace in the `SimpleTokenizer` too. Closes .https://github.com/astral-sh/ruff/issues/7624.
2025-08-30 23:27:38 +00:00 · 2023-09-25 10:34:59 -04:00 · 2023-09-25 10:34:59 -04:00 · 65aebf127a
commit 65aebf127a
parent 17ceb5dcb3
3 changed files with 43 additions and 6 deletions
--- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py
+++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py
@ -0,0 +1,6 @@
 # Regression test for: https://github.com/astral-sh/ruff/issues/7624
 if symbol is not None:
    request["market"] = market["id"]
      #             "remaining_volume": "0.0",
 else:
    pass
--- a/crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap
+++ b/crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap
@ -0,0 +1,26 @@
 ---
 source: crates/ruff_python_formatter/tests/fixtures.rs
 input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py
 ---
 ## Input
 ```py
 # Regression test for: https://github.com/astral-sh/ruff/issues/7624
 if symbol is not None:
    request["market"] = market["id"]
      #             "remaining_volume": "0.0",
 else:
    pass
 ```
 ## Output
 ```py
 # Regression test for: https://github.com/astral-sh/ruff/issues/7624
 if symbol is not None:
    request["market"] = market["id"]
 #             "remaining_volume": "0.0",
 else:
    pass
 ```
--- a/crates/ruff_python_trivia/src/tokenizer.rs
+++ b/crates/ruff_python_trivia/src/tokenizer.rs
@ -566,8 +566,10 @@ impl<'a> SimpleTokenizer<'a> {
                kind
            }
-            ' ' | '\t' => {
+            // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as
-                self.cursor.eat_while(|c| matches!(c, ' ' | '\t'));
+            // whitespace.
            ' ' | '\t' | '\x0C' => {
                self.cursor.eat_while(|c| matches!(c, ' ' | '\t' | '\x0C'));
                SimpleTokenKind::Whitespace
            }
@ -837,10 +839,13 @@ impl<'a> BackwardsTokenizer<'a> {
        }
        let kind = match last {
-            // This may not be 100% correct because it will lex-out trailing whitespace from a comment
+            // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as
-            // as whitespace rather than being part of the token. This shouldn't matter for what we use the lexer for.
+            // whitespace. Note that this will lex-out trailing whitespace from a comment as
-            ' ' | '\t' => {
+            // whitespace rather than as part of the comment token, but this shouldn't matter for
-                self.cursor.eat_back_while(|c| matches!(c, ' ' | '\t'));
+            // our use case.
            ' ' | '\t' | '\x0C' => {
                self.cursor
                    .eat_back_while(|c| matches!(c, ' ' | '\t' | '\x0C'));
                SimpleTokenKind::Whitespace
            }