Rename ruff_python_whitespace to ruff_python_trivia (#5886)

## Summary This crate now contains utilities for dealing with trivia more broadly: whitespace, newlines, "simple" trivia lexing, etc. So renaming it to reflect its increased responsibilities. To avoid conflicts, I've also renamed `Token` and `TokenKind` to `SimpleToken` and `SimpleTokenKind`.
2025-07-23 04:55:21 +00:00 · 2023-07-19 11:48:27 -04:00 · 2023-07-19 11:48:27 -04:00 · 5f3da9955a
commit 5f3da9955a
parent a75a6de577
86 changed files with 360 additions and 353 deletions
--- a/crates/ruff_python_formatter/Cargo.toml
+++ b/crates/ruff_python_formatter/Cargo.toml
@ -12,7 +12,7 @@ license = { workspace = true }

 [dependencies]
 ruff_formatter = { path = "../ruff_formatter" }
-ruff_python_whitespace = { path = "../ruff_python_whitespace" }
+ruff_python_trivia = { path = "../ruff_python_trivia" }
 ruff_python_ast = { path = "../ruff_python_ast" }
 ruff_text_size = { workspace = true }

--- a/crates/ruff_python_formatter/src/builders.rs
+++ b/crates/ruff_python_formatter/src/builders.rs
@ -2,8 +2,8 @@ use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::ast::Ranged;

 use ruff_formatter::{format_args, write, Argument, Arguments};
-use ruff_python_whitespace::{
-    lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
+use ruff_python_trivia::{
+    lines_after, skip_trailing_trivia, SimpleToken, SimpleTokenKind, SimpleTokenizer,
 };

 use crate::context::NodeLevel;
@ -294,12 +294,12 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
                        )
                        .skip_trivia()
                        // Skip over any closing parentheses belonging to the expression
-                        .find(|token| token.kind() != TokenKind::RParen);
+                        .find(|token| token.kind() != SimpleTokenKind::RParen);

                        matches!(
                            first_token,
-                            Some(Token {
-                                kind: TokenKind::Comma,
+                            Some(SimpleToken {
+                                kind: SimpleTokenKind::Comma,
                                ..
                            })
                        )
--- a/crates/ruff_python_formatter/src/comments/format.rs
+++ b/crates/ruff_python_formatter/src/comments/format.rs
@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;

 use ruff_formatter::{format_args, write, FormatError, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
-use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
+use ruff_python_trivia::{lines_after, lines_before, skip_trailing_trivia};

 use crate::comments::SourceComment;
 use crate::context::NodeLevel;
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@ -7,8 +7,8 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_ast::source_code::Locator;
 use ruff_python_ast::whitespace;
-use ruff_python_whitespace::{
-    first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
+use ruff_python_trivia::{
+    first_non_trivia_token_rev, PythonWhitespace, SimpleToken, SimpleTokenKind, SimpleTokenizer,
    UniversalNewlines,
 };

@ -756,7 +756,7 @@ fn handle_trailing_end_of_line_condition_comment<'a>(

        for token in tokens {
            match token.kind() {
-                TokenKind::Colon => {
+                SimpleTokenKind::Colon => {
                    if comment.slice().start() > token.start() {
                        // Comment comes after the colon
                        // ```python
@ -775,10 +775,10 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
                    // ```
                    break;
                }
-                TokenKind::RParen => {
+                SimpleTokenKind::RParen => {
                    // Skip over any closing parentheses
                }
-                TokenKind::Comma => {
+                SimpleTokenKind::Comma => {
                    // Skip over any trailing comma
                }
                kind => {
@ -884,12 +884,12 @@ fn handle_trailing_binary_expression_left_or_operator_comment<'a>(
    );

    let mut tokens = SimpleTokenizer::new(locator.contents(), between_operands_range).skip_trivia();
-    let operator_offset = if let Some(non_r_paren) = tokens.find(|t| t.kind() != TokenKind::RParen)
-    {
-        non_r_paren.start()
-    } else {
-        return CommentPlacement::Default(comment);
-    };
+    let operator_offset =
+        if let Some(non_r_paren) = tokens.find(|t| t.kind() != SimpleTokenKind::RParen) {
+            non_r_paren.start()
+        } else {
+            return CommentPlacement::Default(comment);
+        };

    let comment_range = comment.slice().range();

@ -1061,8 +1061,8 @@ fn handle_slice_comments<'a>(
    // Check for `foo[ # comment`, but only if they are on the same line
    let after_lbracket = matches!(
        first_non_trivia_token_rev(comment.slice().start(), locator.contents()),
-        Some(Token {
-            kind: TokenKind::LBracket,
+        Some(SimpleToken {
+            kind: SimpleTokenKind::LBracket,
            ..
        })
    );
@ -1182,11 +1182,11 @@ fn handle_dict_unpacking_comment<'a>(
    // we start from the preceding node but we skip its token
    for token in tokens.by_ref() {
        // Skip closing parentheses that are not part of the node range
-        if token.kind == TokenKind::RParen {
+        if token.kind == SimpleTokenKind::RParen {
            continue;
        }
        // The Keyword case
-        if token.kind == TokenKind::Star {
+        if token.kind == SimpleTokenKind::Star {
            count += 1;
            break;
        }
@ -1194,8 +1194,8 @@ fn handle_dict_unpacking_comment<'a>(
        debug_assert!(
            matches!(
                token,
-                Token {
-                    kind: TokenKind::LBrace | TokenKind::Comma | TokenKind::Colon,
+                SimpleToken {
+                    kind: SimpleTokenKind::LBrace | SimpleTokenKind::Comma | SimpleTokenKind::Colon,
                    ..
                }
            ),
@ -1205,7 +1205,7 @@ fn handle_dict_unpacking_comment<'a>(
    }

    for token in tokens {
-        if token.kind != TokenKind::Star {
+        if token.kind != SimpleTokenKind::Star {
            return CommentPlacement::Default(comment);
        }
        count += 1;
@ -1302,12 +1302,12 @@ fn handle_expr_if_comment<'a>(
    let if_token = find_only_token_in_range(
        TextRange::new(body.end(), test.start()),
        locator,
-        TokenKind::If,
+        SimpleTokenKind::If,
    );
    let else_token = find_only_token_in_range(
        TextRange::new(test.end(), orelse.start()),
        locator,
-        TokenKind::Else,
+        SimpleTokenKind::Else,
    );

    // Between `if` and `test`
@ -1369,7 +1369,7 @@ fn handle_with_item_comment<'a>(
    let as_token = find_only_token_in_range(
        TextRange::new(context_expr.end(), optional_vars.start()),
        locator,
-        TokenKind::As,
+        SimpleTokenKind::As,
    );

    // If before the `as` keyword, then it must be a trailing comment of the context expression.
@ -1386,13 +1386,17 @@ fn handle_with_item_comment<'a>(

 /// Looks for a token in the range that contains no other tokens except for parentheses outside
 /// the expression ranges
-fn find_only_token_in_range(range: TextRange, locator: &Locator, token_kind: TokenKind) -> Token {
+fn find_only_token_in_range(
+    range: TextRange,
+    locator: &Locator,
+    token_kind: SimpleTokenKind,
+) -> SimpleToken {
    let mut tokens = SimpleTokenizer::new(locator.contents(), range)
        .skip_trivia()
-        .skip_while(|token| token.kind == TokenKind::RParen);
+        .skip_while(|token| token.kind == SimpleTokenKind::RParen);
    let token = tokens.next().expect("Expected a token");
    debug_assert_eq!(token.kind(), token_kind);
-    let mut tokens = tokens.skip_while(|token| token.kind == TokenKind::LParen);
+    let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
    debug_assert_eq!(tokens.next(), None);
    token
 }
@ -1446,7 +1450,7 @@ fn handle_comprehension_comment<'a>(
            comprehension.iter.range().start(),
        ),
        locator,
-        TokenKind::In,
+        SimpleTokenKind::In,
    );

    // Comments between the target and the `in`
@ -1509,7 +1513,7 @@ fn handle_comprehension_comment<'a>(
        let if_token = find_only_token_in_range(
            TextRange::new(last_end, if_node.range().start()),
            locator,
-            TokenKind::If,
+            SimpleTokenKind::If,
        );
        if is_own_line {
            if last_end < comment.slice().start() && comment.slice().start() < if_token.start() {
--- a/crates/ruff_python_formatter/src/comments/visitor.rs
+++ b/crates/ruff_python_formatter/src/comments/visitor.rs
@ -13,7 +13,7 @@ use ruff_python_ast::source_code::{CommentRanges, Locator};
 // pre-order.
 #[allow(clippy::wildcard_imports)]
 use ruff_python_ast::visitor::preorder::*;
-use ruff_python_whitespace::is_python_whitespace;
+use ruff_python_trivia::is_python_whitespace;

 use crate::comments::node_key::NodeRefEqualityKey;
 use crate::comments::placement::place_comment;
--- a/crates/ruff_python_formatter/src/expression/expr_call.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_call.rs
@ -3,7 +3,7 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};

 use ruff_formatter::write;
 use ruff_python_ast::node::AnyNodeRef;
-use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};

 use crate::comments::dangling_comments;
 use crate::expression::expr_generator_exp::GeneratorExpParentheses;
@ -132,14 +132,14 @@ fn is_single_argument_parenthesized(argument: &Expr, call_end: TextSize, source:
        SimpleTokenizer::new(source, TextRange::new(argument.end(), call_end)).skip_trivia()
    {
        match token.kind() {
-            TokenKind::RParen => {
+            SimpleTokenKind::RParen => {
                if has_seen_r_paren {
                    return true;
                }
                has_seen_r_paren = true;
            }
            // Skip over any trailing comma
-            TokenKind::Comma => continue,
+            SimpleTokenKind::Comma => continue,
            _ => {
                // Passed the arguments
                break;
--- a/crates/ruff_python_formatter/src/expression/expr_slice.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs
@ -5,7 +5,7 @@ use rustpython_parser::ast::{Expr, Ranged};
 use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
 use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
-use ruff_python_whitespace::{SimpleTokenizer, Token, TokenKind};
+use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};

 use crate::comments::{dangling_comments, SourceComment};
 use crate::context::PyFormatContext;
@ -158,17 +158,17 @@ pub(crate) fn find_colons(
    range: TextRange,
    lower: &Option<Box<Expr>>,
    upper: &Option<Box<Expr>>,
-) -> FormatResult<(Token, Option<Token>)> {
+) -> FormatResult<(SimpleToken, Option<SimpleToken>)> {
    let after_lower = lower
        .as_ref()
        .map_or(range.start(), |lower| lower.range().end());
    let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_lower, range.end()))
        .skip_trivia()
-        .skip_while(|token| token.kind == TokenKind::RParen);
+        .skip_while(|token| token.kind == SimpleTokenKind::RParen);
    let first_colon = tokens.next().ok_or(FormatError::syntax_error(
        "Din't find any token for slice first colon",
    ))?;
-    if first_colon.kind != TokenKind::Colon {
+    if first_colon.kind != SimpleTokenKind::Colon {
        return Err(FormatError::syntax_error(
            "slice first colon token was not a colon",
        ));
@ -179,9 +179,9 @@ pub(crate) fn find_colons(
        .map_or(first_colon.end(), |upper| upper.range().end());
    let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_upper, range.end()))
        .skip_trivia()
-        .skip_while(|token| token.kind == TokenKind::RParen);
+        .skip_while(|token| token.kind == SimpleTokenKind::RParen);
    let second_colon = if let Some(token) = tokens.next() {
-        if token.kind != TokenKind::Colon {
+        if token.kind != SimpleTokenKind::Colon {
            return Err(FormatError::syntax_error(
                "Expected a colon for the second colon token",
            ));
--- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
@ -5,7 +5,7 @@ use rustpython_parser::ast::{ExprUnaryOp, Ranged};
 use ruff_formatter::prelude::{hard_line_break, space, text};
 use ruff_formatter::{Format, FormatContext, FormatResult};
 use ruff_python_ast::node::AnyNodeRef;
-use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};

 use crate::comments::trailing_comments;
 use crate::context::PyFormatContext;
@ -97,7 +97,7 @@ fn is_operand_parenthesized(unary: &ExprUnaryOp, source: &str) -> bool {
        .skip_trivia()
        .next()
    {
-        debug_assert_eq!(token.kind(), TokenKind::LParen);
+        debug_assert_eq!(token.kind(), SimpleTokenKind::LParen);
        true
    } else {
        false
--- a/crates/ruff_python_formatter/src/expression/parentheses.rs
+++ b/crates/ruff_python_formatter/src/expression/parentheses.rs
@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;
 use ruff_formatter::prelude::tag::Condition;
 use ruff_formatter::{format_args, write, Argument, Arguments};
 use ruff_python_ast::node::AnyNodeRef;
-use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};

 use crate::context::NodeLevel;
 use crate::prelude::*;
@ -77,8 +77,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b
    // First test if there's a closing parentheses because it tends to be cheaper.
    if matches!(
        first_non_trivia_token(expr.end(), contents),
-        Some(Token {
-            kind: TokenKind::RParen,
+        Some(SimpleToken {
+            kind: SimpleTokenKind::RParen,
            ..
        })
    ) {
@ -87,8 +87,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b

        matches!(
            tokenizer.next_back(),
-            Some(Token {
-                kind: TokenKind::LParen,
+            Some(SimpleToken {
+                kind: SimpleTokenKind::LParen,
                ..
            })
        )
--- a/crates/ruff_python_formatter/src/other/arguments.rs
+++ b/crates/ruff_python_formatter/src/other/arguments.rs
@ -5,7 +5,7 @@ use rustpython_parser::ast::{Arguments, Ranged};

 use ruff_formatter::{format_args, write, FormatRuleWithOptions};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
-use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};

 use crate::comments::{
    dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@ -166,17 +166,17 @@ impl FormatNodeRule<Arguments> for FormatArguments {
                            .skip_trivia();

                    let comma = tokens.next();
-                    assert!(matches!(comma, Some(Token { kind: TokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");
+                    assert!(matches!(comma, Some(SimpleToken { kind: SimpleTokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");

                    let slash = tokens.next();
-                    assert!(matches!(slash, Some(Token { kind: TokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");
+                    assert!(matches!(slash, Some(SimpleToken { kind: SimpleTokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");

                    tokens.next()
                } else {
                    first_non_trivia_token(last_node.end(), f.context().source())
                };

-                if maybe_comma_token.map_or(false, |token| token.kind() == TokenKind::Comma) {
+                if maybe_comma_token.map_or(false, |token| token.kind() == SimpleTokenKind::Comma) {
                    write!(f, [hard_line_break()])?;
                }
            }
@ -298,11 +298,11 @@ pub(crate) fn find_argument_separators(
        let comma = tokens
            .next()
            .expect("The function definition can't end here");
-        debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
+        debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
        let slash = tokens
            .next()
            .expect("The function definition can't end here");
-        debug_assert!(slash.kind() == TokenKind::Slash, "{slash:?}");
+        debug_assert!(slash.kind() == SimpleTokenKind::Slash, "{slash:?}");

        Some((preceding_end, slash.range))
    } else {
@ -331,11 +331,11 @@ pub(crate) fn find_argument_separators(
            let comma = tokens
                .next()
                .expect("The function definition can't end here");
-            debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
+            debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
            let star = tokens
                .next()
                .expect("The function definition can't end here");
-            debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
+            debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");

            Some(ArgumentSeparator {
                preceding_end,
@ -348,11 +348,11 @@ pub(crate) fn find_argument_separators(
            let lparen = tokens
                .next()
                .expect("The function definition can't end here");
-            debug_assert!(lparen.kind() == TokenKind::LParen, "{lparen:?}");
+            debug_assert!(lparen.kind() == SimpleTokenKind::LParen, "{lparen:?}");
            let star = tokens
                .next()
                .expect("The function definition can't end here");
-            debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
+            debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");
            Some(ArgumentSeparator {
                preceding_end: arguments.range.start(),
                separator: star.range,
--- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
@ -2,7 +2,7 @@ use ruff_text_size::TextRange;
 use rustpython_parser::ast::{Ranged, StmtClassDef};

 use ruff_formatter::write;
-use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};

 use crate::comments::trailing_comments;
 use crate::expression::parentheses::{parenthesized, Parentheses};
@ -97,7 +97,7 @@ impl Format<PyFormatContext<'_>> for FormatInheritanceClause<'_> {
                .skip_trivia();

            let left_paren_count = tokenizer
-                .take_while(|token| token.kind() == TokenKind::LParen)
+                .take_while(|token| token.kind() == SimpleTokenKind::LParen)
                .count();

            // Ignore the first parentheses count
--- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
@ -2,7 +2,7 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};

 use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
 use ruff_python_ast::function::AnyFunctionDefinition;
-use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
+use ruff_python_trivia::{lines_after, skip_trailing_trivia};

 use crate::comments::{leading_comments, trailing_comments};
 use crate::context::NodeLevel;
--- a/crates/ruff_python_formatter/src/statement/stmt_with.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_with.rs
@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};

 use ruff_formatter::{format_args, write, FormatError};
 use ruff_python_ast::node::AnyNodeRef;
-use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};

 use crate::comments::trailing_comments;
 use crate::expression::parentheses::{
@ -125,7 +125,7 @@ fn are_with_items_parenthesized(

    let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_with_item)
        .skip_trivia()
-        .skip_while(|t| t.kind() == TokenKind::Async);
+        .skip_while(|t| t.kind() == SimpleTokenKind::Async);

    let with_keyword = tokenizer.next().ok_or(FormatError::syntax_error(
        "Expected a with keyword, didn't find any token",
@ -133,13 +133,13 @@ fn are_with_items_parenthesized(

    debug_assert_eq!(
        with_keyword.kind(),
-        TokenKind::With,
+        SimpleTokenKind::With,
        "Expected with keyword but at {with_keyword:?}"
    );

    match tokenizer.next() {
        Some(left_paren) => {
-            debug_assert_eq!(left_paren.kind(), TokenKind::LParen);
+            debug_assert_eq!(left_paren.kind(), SimpleTokenKind::LParen);
            Ok(true)
        }
        None => Ok(false),
--- a/crates/ruff_python_formatter/src/statement/suite.rs
+++ b/crates/ruff_python_formatter/src/statement/suite.rs
@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, Stmt, Suite};
 use ruff_formatter::{
    format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
 };
-use ruff_python_whitespace::lines_before;
+use ruff_python_trivia::lines_before;

 use crate::context::NodeLevel;
 use crate::prelude::*;