Treat form feed as whitespace in SimpleTokenizer (#7626)

## Summary

This is whitespace as per `is_python_whitespace`, and right now it tends
to lead to panics in the formatter. Seems reasonable to treat it as
whitespace in the `SimpleTokenizer` too.

Closes .https://github.com/astral-sh/ruff/issues/7624.
This commit is contained in:
Charlie Marsh 2023-09-25 10:34:59 -04:00 committed by GitHub
parent 17ceb5dcb3
commit 65aebf127a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 6 deletions

View file

@ -0,0 +1,6 @@
# Regression test for: https://github.com/astral-sh/ruff/issues/7624
if symbol is not None:
request["market"] = market["id"]
# "remaining_volume": "0.0",
else:
pass

View file

@ -0,0 +1,26 @@
---
source: crates/ruff_python_formatter/tests/fixtures.rs
input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py
---
## Input
```py
# Regression test for: https://github.com/astral-sh/ruff/issues/7624
if symbol is not None:
request["market"] = market["id"]
# "remaining_volume": "0.0",
else:
pass
```
## Output
```py
# Regression test for: https://github.com/astral-sh/ruff/issues/7624
if symbol is not None:
request["market"] = market["id"]
# "remaining_volume": "0.0",
else:
pass
```

View file

@ -566,8 +566,10 @@ impl<'a> SimpleTokenizer<'a> {
kind kind
} }
' ' | '\t' => { // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as
self.cursor.eat_while(|c| matches!(c, ' ' | '\t')); // whitespace.
' ' | '\t' | '\x0C' => {
self.cursor.eat_while(|c| matches!(c, ' ' | '\t' | '\x0C'));
SimpleTokenKind::Whitespace SimpleTokenKind::Whitespace
} }
@ -837,10 +839,13 @@ impl<'a> BackwardsTokenizer<'a> {
} }
let kind = match last { let kind = match last {
// This may not be 100% correct because it will lex-out trailing whitespace from a comment // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as
// as whitespace rather than being part of the token. This shouldn't matter for what we use the lexer for. // whitespace. Note that this will lex-out trailing whitespace from a comment as
' ' | '\t' => { // whitespace rather than as part of the comment token, but this shouldn't matter for
self.cursor.eat_back_while(|c| matches!(c, ' ' | '\t')); // our use case.
' ' | '\t' | '\x0C' => {
self.cursor
.eat_back_while(|c| matches!(c, ' ' | '\t' | '\x0C'));
SimpleTokenKind::Whitespace SimpleTokenKind::Whitespace
} }