Rename ruff_python_whitespace to ruff_python_trivia (#5886)

## Summary

This crate now contains utilities for dealing with trivia more broadly:
whitespace, newlines, "simple" trivia lexing, etc. So renaming it to
reflect its increased responsibilities.

To avoid conflicts, I've also renamed `Token` and `TokenKind` to
`SimpleToken` and `SimpleTokenKind`.
This commit is contained in:
Charlie Marsh 2023-07-19 11:48:27 -04:00 committed by GitHub
parent a75a6de577
commit 5f3da9955a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
86 changed files with 360 additions and 353 deletions

View file

@ -12,7 +12,7 @@ license = { workspace = true }
[dependencies]
ruff_formatter = { path = "../ruff_formatter" }
ruff_python_whitespace = { path = "../ruff_python_whitespace" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_text_size = { workspace = true }

View file

@ -2,8 +2,8 @@ use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_whitespace::{
lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
use ruff_python_trivia::{
lines_after, skip_trailing_trivia, SimpleToken, SimpleTokenKind, SimpleTokenizer,
};
use crate::context::NodeLevel;
@ -294,12 +294,12 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
)
.skip_trivia()
// Skip over any closing parentheses belonging to the expression
.find(|token| token.kind() != TokenKind::RParen);
.find(|token| token.kind() != SimpleTokenKind::RParen);
matches!(
first_token,
Some(Token {
kind: TokenKind::Comma,
Some(SimpleToken {
kind: SimpleTokenKind::Comma,
..
})
)

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, FormatError, SourceCode};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
use ruff_python_trivia::{lines_after, lines_before, skip_trailing_trivia};
use crate::comments::SourceComment;
use crate::context::NodeLevel;

View file

@ -7,8 +7,8 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace;
use ruff_python_whitespace::{
first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
use ruff_python_trivia::{
first_non_trivia_token_rev, PythonWhitespace, SimpleToken, SimpleTokenKind, SimpleTokenizer,
UniversalNewlines,
};
@ -756,7 +756,7 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
for token in tokens {
match token.kind() {
TokenKind::Colon => {
SimpleTokenKind::Colon => {
if comment.slice().start() > token.start() {
// Comment comes after the colon
// ```python
@ -775,10 +775,10 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// ```
break;
}
TokenKind::RParen => {
SimpleTokenKind::RParen => {
// Skip over any closing parentheses
}
TokenKind::Comma => {
SimpleTokenKind::Comma => {
// Skip over any trailing comma
}
kind => {
@ -884,12 +884,12 @@ fn handle_trailing_binary_expression_left_or_operator_comment<'a>(
);
let mut tokens = SimpleTokenizer::new(locator.contents(), between_operands_range).skip_trivia();
let operator_offset = if let Some(non_r_paren) = tokens.find(|t| t.kind() != TokenKind::RParen)
{
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};
let operator_offset =
if let Some(non_r_paren) = tokens.find(|t| t.kind() != SimpleTokenKind::RParen) {
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};
let comment_range = comment.slice().range();
@ -1061,8 +1061,8 @@ fn handle_slice_comments<'a>(
// Check for `foo[ # comment`, but only if they are on the same line
let after_lbracket = matches!(
first_non_trivia_token_rev(comment.slice().start(), locator.contents()),
Some(Token {
kind: TokenKind::LBracket,
Some(SimpleToken {
kind: SimpleTokenKind::LBracket,
..
})
);
@ -1182,11 +1182,11 @@ fn handle_dict_unpacking_comment<'a>(
// we start from the preceding node but we skip its token
for token in tokens.by_ref() {
// Skip closing parentheses that are not part of the node range
if token.kind == TokenKind::RParen {
if token.kind == SimpleTokenKind::RParen {
continue;
}
// The Keyword case
if token.kind == TokenKind::Star {
if token.kind == SimpleTokenKind::Star {
count += 1;
break;
}
@ -1194,8 +1194,8 @@ fn handle_dict_unpacking_comment<'a>(
debug_assert!(
matches!(
token,
Token {
kind: TokenKind::LBrace | TokenKind::Comma | TokenKind::Colon,
SimpleToken {
kind: SimpleTokenKind::LBrace | SimpleTokenKind::Comma | SimpleTokenKind::Colon,
..
}
),
@ -1205,7 +1205,7 @@ fn handle_dict_unpacking_comment<'a>(
}
for token in tokens {
if token.kind != TokenKind::Star {
if token.kind != SimpleTokenKind::Star {
return CommentPlacement::Default(comment);
}
count += 1;
@ -1302,12 +1302,12 @@ fn handle_expr_if_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(body.end(), test.start()),
locator,
TokenKind::If,
SimpleTokenKind::If,
);
let else_token = find_only_token_in_range(
TextRange::new(test.end(), orelse.start()),
locator,
TokenKind::Else,
SimpleTokenKind::Else,
);
// Between `if` and `test`
@ -1369,7 +1369,7 @@ fn handle_with_item_comment<'a>(
let as_token = find_only_token_in_range(
TextRange::new(context_expr.end(), optional_vars.start()),
locator,
TokenKind::As,
SimpleTokenKind::As,
);
// If before the `as` keyword, then it must be a trailing comment of the context expression.
@ -1386,13 +1386,17 @@ fn handle_with_item_comment<'a>(
/// Looks for a token in the range that contains no other tokens except for parentheses outside
/// the expression ranges
fn find_only_token_in_range(range: TextRange, locator: &Locator, token_kind: TokenKind) -> Token {
fn find_only_token_in_range(
range: TextRange,
locator: &Locator,
token_kind: SimpleTokenKind,
) -> SimpleToken {
let mut tokens = SimpleTokenizer::new(locator.contents(), range)
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let token = tokens.next().expect("Expected a token");
debug_assert_eq!(token.kind(), token_kind);
let mut tokens = tokens.skip_while(|token| token.kind == TokenKind::LParen);
let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
debug_assert_eq!(tokens.next(), None);
token
}
@ -1446,7 +1450,7 @@ fn handle_comprehension_comment<'a>(
comprehension.iter.range().start(),
),
locator,
TokenKind::In,
SimpleTokenKind::In,
);
// Comments between the target and the `in`
@ -1509,7 +1513,7 @@ fn handle_comprehension_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(last_end, if_node.range().start()),
locator,
TokenKind::If,
SimpleTokenKind::If,
);
if is_own_line {
if last_end < comment.slice().start() && comment.slice().start() < if_token.start() {

View file

@ -13,7 +13,7 @@ use ruff_python_ast::source_code::{CommentRanges, Locator};
// pre-order.
#[allow(clippy::wildcard_imports)]
use ruff_python_ast::visitor::preorder::*;
use ruff_python_whitespace::is_python_whitespace;
use ruff_python_trivia::is_python_whitespace;
use crate::comments::node_key::NodeRefEqualityKey;
use crate::comments::placement::place_comment;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::dangling_comments;
use crate::expression::expr_generator_exp::GeneratorExpParentheses;
@ -132,14 +132,14 @@ fn is_single_argument_parenthesized(argument: &Expr, call_end: TextSize, source:
SimpleTokenizer::new(source, TextRange::new(argument.end(), call_end)).skip_trivia()
{
match token.kind() {
TokenKind::RParen => {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
TokenKind::Comma => continue,
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{Expr, Ranged};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
@ -158,17 +158,17 @@ pub(crate) fn find_colons(
range: TextRange,
lower: &Option<Box<Expr>>,
upper: &Option<Box<Expr>>,
) -> FormatResult<(Token, Option<Token>)> {
) -> FormatResult<(SimpleToken, Option<SimpleToken>)> {
let after_lower = lower
.as_ref()
.map_or(range.start(), |lower| lower.range().end());
let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_lower, range.end()))
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let first_colon = tokens.next().ok_or(FormatError::syntax_error(
"Din't find any token for slice first colon",
))?;
if first_colon.kind != TokenKind::Colon {
if first_colon.kind != SimpleTokenKind::Colon {
return Err(FormatError::syntax_error(
"slice first colon token was not a colon",
));
@ -179,9 +179,9 @@ pub(crate) fn find_colons(
.map_or(first_colon.end(), |upper| upper.range().end());
let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_upper, range.end()))
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let second_colon = if let Some(token) = tokens.next() {
if token.kind != TokenKind::Colon {
if token.kind != SimpleTokenKind::Colon {
return Err(FormatError::syntax_error(
"Expected a colon for the second colon token",
));

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{ExprUnaryOp, Ranged};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
@ -97,7 +97,7 @@ fn is_operand_parenthesized(unary: &ExprUnaryOp, source: &str) -> bool {
.skip_trivia()
.next()
{
debug_assert_eq!(token.kind(), TokenKind::LParen);
debug_assert_eq!(token.kind(), SimpleTokenKind::LParen);
true
} else {
false

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::context::NodeLevel;
use crate::prelude::*;
@ -77,8 +77,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b
// First test if there's a closing parentheses because it tends to be cheaper.
if matches!(
first_non_trivia_token(expr.end(), contents),
Some(Token {
kind: TokenKind::RParen,
Some(SimpleToken {
kind: SimpleTokenKind::RParen,
..
})
) {
@ -87,8 +87,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b
matches!(
tokenizer.next_back(),
Some(Token {
kind: TokenKind::LParen,
Some(SimpleToken {
kind: SimpleTokenKind::LParen,
..
})
)

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{Arguments, Ranged};
use ruff_formatter::{format_args, write, FormatRuleWithOptions};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::comments::{
dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@ -166,17 +166,17 @@ impl FormatNodeRule<Arguments> for FormatArguments {
.skip_trivia();
let comma = tokens.next();
assert!(matches!(comma, Some(Token { kind: TokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");
assert!(matches!(comma, Some(SimpleToken { kind: SimpleTokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");
let slash = tokens.next();
assert!(matches!(slash, Some(Token { kind: TokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");
assert!(matches!(slash, Some(SimpleToken { kind: SimpleTokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");
tokens.next()
} else {
first_non_trivia_token(last_node.end(), f.context().source())
};
if maybe_comma_token.map_or(false, |token| token.kind() == TokenKind::Comma) {
if maybe_comma_token.map_or(false, |token| token.kind() == SimpleTokenKind::Comma) {
write!(f, [hard_line_break()])?;
}
}
@ -298,11 +298,11 @@ pub(crate) fn find_argument_separators(
let comma = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
let slash = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(slash.kind() == TokenKind::Slash, "{slash:?}");
debug_assert!(slash.kind() == SimpleTokenKind::Slash, "{slash:?}");
Some((preceding_end, slash.range))
} else {
@ -331,11 +331,11 @@ pub(crate) fn find_argument_separators(
let comma = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
let star = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");
Some(ArgumentSeparator {
preceding_end,
@ -348,11 +348,11 @@ pub(crate) fn find_argument_separators(
let lparen = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(lparen.kind() == TokenKind::LParen, "{lparen:?}");
debug_assert!(lparen.kind() == SimpleTokenKind::LParen, "{lparen:?}");
let star = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");
Some(ArgumentSeparator {
preceding_end: arguments.range.start(),
separator: star.range,

View file

@ -2,7 +2,7 @@ use ruff_text_size::TextRange;
use rustpython_parser::ast::{Ranged, StmtClassDef};
use ruff_formatter::write;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
@ -97,7 +97,7 @@ impl Format<PyFormatContext<'_>> for FormatInheritanceClause<'_> {
.skip_trivia();
let left_paren_count = tokenizer
.take_while(|token| token.kind() == TokenKind::LParen)
.take_while(|token| token.kind() == SimpleTokenKind::LParen)
.count();
// Ignore the first parentheses count

View file

@ -2,7 +2,7 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::function::AnyFunctionDefinition;
use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
use ruff_python_trivia::{lines_after, skip_trailing_trivia};
use crate::comments::{leading_comments, trailing_comments};
use crate::context::NodeLevel;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{
@ -125,7 +125,7 @@ fn are_with_items_parenthesized(
let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_with_item)
.skip_trivia()
.skip_while(|t| t.kind() == TokenKind::Async);
.skip_while(|t| t.kind() == SimpleTokenKind::Async);
let with_keyword = tokenizer.next().ok_or(FormatError::syntax_error(
"Expected a with keyword, didn't find any token",
@ -133,13 +133,13 @@ fn are_with_items_parenthesized(
debug_assert_eq!(
with_keyword.kind(),
TokenKind::With,
SimpleTokenKind::With,
"Expected with keyword but at {with_keyword:?}"
);
match tokenizer.next() {
Some(left_paren) => {
debug_assert_eq!(left_paren.kind(), TokenKind::LParen);
debug_assert_eq!(left_paren.kind(), SimpleTokenKind::LParen);
Ok(true)
}
None => Ok(false),

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, Stmt, Suite};
use ruff_formatter::{
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
};
use ruff_python_whitespace::lines_before;
use ruff_python_trivia::lines_before;
use crate::context::NodeLevel;
use crate::prelude::*;