From 4204fc002d67dcb002df6852ec89db9a999c2029 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 14:27:46 -0400 Subject: [PATCH] Remove exception-handler lexing from `unused-bound-exception` fix (#5851) ## Summary The motivation here is that it will make this rule easier to rewrite as a deferred check. Right now, we can't run this rule in the deferred phase, because it depends on the `except_handler` to power its autofix. Instead of lexing the `except_handler`, we can use the `SimpleTokenizer` from the formatter, and just lex forwards and backwards. For context, this rule detects the unused `e` in: ```python try: pass except ValueError as e: pass ``` --- Cargo.lock | 3 +- crates/ruff/src/checkers/ast/mod.rs | 7 +-- crates/ruff/src/rules/pyflakes/fixes.rs | 50 +++++++++---------- crates/ruff_python_formatter/Cargo.toml | 1 - crates/ruff_python_formatter/src/builders.rs | 4 +- .../src/comments/format.rs | 2 +- .../src/comments/placement.rs | 6 ++- .../src/expression/expr_call.rs | 3 +- .../src/expression/expr_slice.rs | 19 +++---- .../src/expression/expr_unary_op.rs | 18 ++++--- .../src/expression/parentheses.rs | 10 ++-- crates/ruff_python_formatter/src/lib.rs | 1 - .../src/other/arguments.rs | 4 +- .../src/statement/stmt_class_def.rs | 13 ++--- .../src/statement/stmt_function_def.rs | 2 +- .../src/statement/stmt_with.rs | 2 +- .../src/statement/suite.rs | 18 ++++--- crates/ruff_python_whitespace/Cargo.toml | 4 ++ crates/ruff_python_whitespace/src/lib.rs | 2 + ...hitespace__tokenizer__tests__Reverse.snap} | 2 +- ..._identifier_ending_in_non_start_char.snap} | 2 +- ...e_word_with_only_id_continuing_chars.snap} | 2 +- ...ce__tokenizer__tests__tokenize_bogus.snap} | 2 +- ...ce__tokenizer__tests__tokenize_comma.snap} | 2 +- ...enizer__tests__tokenize_continuation.snap} | 2 +- ...tokenizer__tests__tokenize_multichar.snap} | 2 +- ...kenizer__tests__tokenize_parentheses.snap} | 2 +- ...ce__tokenizer__tests__tokenize_slash.snap} | 2 +- ...tokenizer__tests__tokenize_substring.snap} | 2 +- ...e__tokenizer__tests__tokenize_trivia.snap} | 2 +- ...ce__tokenizer__tests__tricky_unicode.snap} | 2 +- .../src/tokenizer.rs} | 44 ++++++++-------- 32 files changed, 125 insertions(+), 112 deletions(-) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap} (98%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap} (65%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap} (80%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap} (97%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap} (83%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap} (88%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap} (89%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap} (88%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap} (91%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap} (81%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap} (84%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap} (65%) rename crates/{ruff_python_formatter/src/trivia.rs => ruff_python_whitespace/src/tokenizer.rs} (93%) diff --git a/Cargo.lock b/Cargo.lock index b58fdb4ab8..4d904a5884 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2157,7 +2157,6 @@ dependencies = [ "similar", "smallvec", "thiserror", - "unic-ucd-ident", ] [[package]] @@ -2195,8 +2194,10 @@ version = "0.0.0" name = "ruff_python_whitespace" version = "0.0.0" dependencies = [ + "insta", "memchr", "ruff_text_size", + "unic-ucd-ident", ] [[package]] diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 455e62b520..c1bf7fb8a5 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -4103,11 +4103,8 @@ where ); if self.patch(Rule::UnusedVariable) { diagnostic.try_set_fix(|| { - pyflakes::fixes::remove_exception_handler_assignment( - except_handler, - self.locator, - ) - .map(Fix::automatic) + pyflakes::fixes::remove_exception_handler_assignment(name, self.locator) + .map(Fix::automatic) }); } self.diagnostics.push(diagnostic); diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs index 694e03bf87..c6cdee0f5a 100644 --- a/crates/ruff/src/rules/pyflakes/fixes.rs +++ b/crates/ruff/src/rules/pyflakes/fixes.rs @@ -1,10 +1,10 @@ -use anyhow::{bail, Ok, Result}; +use anyhow::{Context, Ok, Result}; use ruff_text_size::TextRange; -use rustpython_parser::ast::{ExceptHandler, Expr, Ranged}; -use rustpython_parser::{lexer, Mode}; +use rustpython_parser::ast::{Expr, Identifier, Ranged}; use ruff_diagnostics::Edit; use ruff_python_ast::source_code::{Locator, Stylist}; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::autofix::codemods::CodegenStylist; use crate::cst::matchers::{match_call_mut, match_dict, match_expression}; @@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call( /// Generate a [`Edit`] to remove the binding from an exception handler. pub(crate) fn remove_exception_handler_assignment( - except_handler: &ExceptHandler, + bound_exception: &Identifier, locator: &Locator, ) -> Result { - let contents = locator.slice(except_handler.range()); - let mut fix_start = None; - let mut fix_end = None; + // Lex backwards, to the token just before the `as`. + let mut tokenizer = + SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia(); - // End of the token just before the `as` to the semicolon. - let mut prev = None; - for (tok, range) in - lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten() - { - if tok.is_as() { - fix_start = prev; - } - if tok.is_colon() { - fix_end = Some(range.start()); - break; - } - prev = Some(range.end()); - } + // Eat the `as` token. + let preceding = tokenizer + .next_back() + .context("expected the exception name to be preceded by `as`")?; + debug_assert!(matches!(preceding.kind, TokenKind::As)); - if let (Some(start), Some(end)) = (fix_start, fix_end) { - Ok(Edit::deletion(start, end)) - } else { - bail!("Could not find span of exception handler") - } + // Lex to the end of the preceding token, which should be the exception value. + let preceding = tokenizer + .next_back() + .context("expected the exception name to be preceded by a token")?; + + // Lex forwards, to the `:` token. + let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents()) + .next() + .context("expected the exception name to be followed by a colon")?; + debug_assert!(matches!(following.kind, TokenKind::Colon)); + + Ok(Edit::deletion(preceding.end(), following.start())) } diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 381c3ec6c9..ae36a02119 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -28,7 +28,6 @@ rustpython-parser = { workspace = true } serde = { workspace = true, optional = true } smallvec = { workspace = true } thiserror = { workspace = true } -unic-ucd-ident = "0.9.0" [dev-dependencies] ruff_formatter = { path = "../ruff_formatter", features = ["serde"]} diff --git a/crates/ruff_python_formatter/src/builders.rs b/crates/ruff_python_formatter/src/builders.rs index 9f24a49ca2..0456d58df9 100644 --- a/crates/ruff_python_formatter/src/builders.rs +++ b/crates/ruff_python_formatter/src/builders.rs @@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::Ranged; use ruff_formatter::{format_args, write, Argument, Arguments}; +use ruff_python_whitespace::{ + lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind, +}; use crate::context::NodeLevel; use crate::prelude::*; -use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind}; use crate::MagicTrailingComma; /// Adds parentheses and indents `content` if it doesn't fit on a line. diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 84b0e3b654..aa7d7296f1 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged; use ruff_formatter::{format_args, write, FormatError, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia}; use crate::comments::SourceComment; use crate::context::NodeLevel; use crate::prelude::*; -use crate::trivia::{lines_after, lines_before, skip_trailing_trivia}; /// Formats the leading comments of a node. pub(crate) fn leading_node_comments(node: &T) -> FormatLeadingComments diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index e4d7ec9d82..27ae9688c8 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::source_code::Locator; use ruff_python_ast::whitespace; -use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines}; +use ruff_python_whitespace::{ + first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind, + UniversalNewlines, +}; use crate::comments::visitor::{CommentPlacement, DecoratedComment}; use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection}; use crate::other::arguments::{ assign_argument_separator_comment_placement, find_argument_separators, }; -use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind}; /// Implements the custom comment placement logic. pub(super) fn place_comment<'a>( diff --git a/crates/ruff_python_formatter/src/expression/expr_call.rs b/crates/ruff_python_formatter/src/expression/expr_call.rs index c46aa374f7..4054208baf 100644 --- a/crates/ruff_python_formatter/src/expression/expr_call.rs +++ b/crates/ruff_python_formatter/src/expression/expr_call.rs @@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged}; use ruff_formatter::write; use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::comments::dangling_comments; - use crate::expression::parentheses::{ parenthesized, NeedsParentheses, OptionalParentheses, Parentheses, }; use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; use crate::FormatNodeRule; #[derive(Default)] diff --git a/crates/ruff_python_formatter/src/expression/expr_slice.rs b/crates/ruff_python_formatter/src/expression/expr_slice.rs index 0d9dd7445f..93434b8777 100644 --- a/crates/ruff_python_formatter/src/expression/expr_slice.rs +++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs @@ -1,16 +1,17 @@ -use crate::comments::{dangling_comments, SourceComment}; -use crate::context::PyFormatContext; -use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::trivia::Token; -use crate::trivia::{first_non_trivia_token, TokenKind}; -use crate::{AsFormat, FormatNodeRule, PyFormatter}; -use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text}; -use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult}; -use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_text_size::TextRange; use rustpython_parser::ast::ExprSlice; use rustpython_parser::ast::{Expr, Ranged}; +use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text}; +use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult}; +use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind}; + +use crate::comments::{dangling_comments, SourceComment}; +use crate::context::PyFormatContext; +use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; +use crate::{AsFormat, FormatNodeRule, PyFormatter}; + #[derive(Default)] pub struct FormatExprSlice; diff --git a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs index 97462c4d7f..ffe5f0f69c 100644 --- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs +++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs @@ -1,15 +1,17 @@ -use crate::comments::trailing_comments; -use crate::context::PyFormatContext; -use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::trivia::{SimpleTokenizer, TokenKind}; -use crate::{AsFormat, FormatNodeRule, PyFormatter}; -use ruff_formatter::prelude::{hard_line_break, space, text}; -use ruff_formatter::{Format, FormatContext, FormatResult}; -use ruff_python_ast::node::AnyNodeRef; use ruff_text_size::{TextLen, TextRange}; use rustpython_parser::ast::UnaryOp; use rustpython_parser::ast::{ExprUnaryOp, Ranged}; +use ruff_formatter::prelude::{hard_line_break, space, text}; +use ruff_formatter::{Format, FormatContext, FormatResult}; +use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; + +use crate::comments::trailing_comments; +use crate::context::PyFormatContext; +use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; +use crate::{AsFormat, FormatNodeRule, PyFormatter}; + #[derive(Default)] pub struct FormatExprUnaryOp; diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs index 281d1896b8..85981345f4 100644 --- a/crates/ruff_python_formatter/src/expression/parentheses.rs +++ b/crates/ruff_python_formatter/src/expression/parentheses.rs @@ -1,10 +1,12 @@ -use crate::context::NodeLevel; -use crate::prelude::*; -use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; +use rustpython_parser::ast::Ranged; + use ruff_formatter::prelude::tag::Condition; use ruff_formatter::{format_args, write, Argument, Arguments}; use ruff_python_ast::node::AnyNodeRef; -use rustpython_parser::ast::Ranged; +use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; + +use crate::context::NodeLevel; +use crate::prelude::*; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum OptionalParentheses { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 7055bab2fa..d1a7420291 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -33,7 +33,6 @@ pub(crate) mod other; pub(crate) mod pattern; mod prelude; pub(crate) mod statement; -mod trivia; include!("../../ruff_formatter/shared_traits.rs"); diff --git a/crates/ruff_python_formatter/src/other/arguments.rs b/crates/ruff_python_formatter/src/other/arguments.rs index 3e84558ad2..5e4d7fe6f3 100644 --- a/crates/ruff_python_formatter/src/other/arguments.rs +++ b/crates/ruff_python_formatter/src/other/arguments.rs @@ -1,9 +1,11 @@ use std::usize; +use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::{Arguments, Ranged}; use ruff_formatter::{format_args, write}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; use crate::comments::{ dangling_comments, leading_comments, leading_node_comments, trailing_comments, @@ -12,9 +14,7 @@ use crate::comments::{ use crate::context::NodeLevel; use crate::expression::parentheses::parenthesized; use crate::prelude::*; -use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; use crate::FormatNodeRule; -use ruff_text_size::{TextRange, TextSize}; #[derive(Default)] pub struct FormatArguments; diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs index 86c7688284..0876ac7347 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs @@ -1,12 +1,13 @@ -use crate::comments::trailing_comments; - -use crate::expression::parentheses::{parenthesized, Parentheses}; -use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; -use ruff_formatter::write; use ruff_text_size::TextRange; use rustpython_parser::ast::{Ranged, StmtClassDef}; +use ruff_formatter::write; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; + +use crate::comments::trailing_comments; +use crate::expression::parentheses::{parenthesized, Parentheses}; +use crate::prelude::*; + #[derive(Default)] pub struct FormatStmtClassDef; diff --git a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs index 69f370e7c1..f7fa032174 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs @@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef}; use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule}; use ruff_python_ast::function::AnyFunctionDefinition; +use ruff_python_whitespace::{lines_after, skip_trailing_trivia}; use crate::comments::{leading_comments, trailing_comments}; use crate::context::NodeLevel; use crate::expression::parentheses::{optional_parentheses, Parentheses}; use crate::prelude::*; -use crate::trivia::{lines_after, skip_trailing_trivia}; use crate::FormatNodeRule; #[derive(Default)] diff --git a/crates/ruff_python_formatter/src/statement/stmt_with.rs b/crates/ruff_python_formatter/src/statement/stmt_with.rs index 56eca66b17..2c610f029d 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_with.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_with.rs @@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem}; use ruff_formatter::{format_args, write, FormatError}; use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::comments::trailing_comments; use crate::expression::parentheses::{ in_parentheses_only_soft_line_break_or_space, optional_parentheses, }; use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; use crate::FormatNodeRule; pub(super) enum AnyStatementWith<'a> { diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 92fc32ed4e..09a12f6697 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -1,10 +1,12 @@ -use crate::context::NodeLevel; -use crate::prelude::*; -use crate::trivia::lines_before; +use rustpython_parser::ast::{Ranged, Stmt, Suite}; + use ruff_formatter::{ format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions, }; -use rustpython_parser::ast::{Ranged, Stmt, Suite}; +use ruff_python_whitespace::lines_before; + +use crate::context::NodeLevel; +use crate::prelude::*; /// Level at which the [`Suite`] appears in the source code. #[derive(Copy, Clone, Debug)] @@ -185,13 +187,15 @@ impl<'ast> IntoFormat> for Suite { #[cfg(test)] mod tests { + use rustpython_parser::ast::Suite; + use rustpython_parser::Parse; + + use ruff_formatter::format; + use crate::comments::Comments; use crate::prelude::*; use crate::statement::suite::SuiteLevel; use crate::PyFormatOptions; - use ruff_formatter::format; - use rustpython_parser::ast::Suite; - use rustpython_parser::Parse; fn format_suite(level: SuiteLevel) -> String { let source = r#" diff --git a/crates/ruff_python_whitespace/Cargo.toml b/crates/ruff_python_whitespace/Cargo.toml index cbfc1aea24..22b36562d3 100644 --- a/crates/ruff_python_whitespace/Cargo.toml +++ b/crates/ruff_python_whitespace/Cargo.toml @@ -16,3 +16,7 @@ license = { workspace = true } ruff_text_size = { workspace = true } memchr = { workspace = true } +unic-ucd-ident = "0.9.0" + +[dev-dependencies] +insta = { workspace = true } diff --git a/crates/ruff_python_whitespace/src/lib.rs b/crates/ruff_python_whitespace/src/lib.rs index b8c95e351c..4e16d7ca2d 100644 --- a/crates/ruff_python_whitespace/src/lib.rs +++ b/crates/ruff_python_whitespace/src/lib.rs @@ -1,7 +1,9 @@ mod cursor; mod newlines; +mod tokenizer; mod whitespace; pub use cursor::*; pub use newlines::*; +pub use tokenizer::*; pub use whitespace::*; diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap similarity index 98% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap index ec701539c6..3ae643205e 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokenize_reverse() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap similarity index 65% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap index 15e9d84407..6f19b91273 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap similarity index 80% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap index 26e9fd18bc..ccb0282831 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap similarity index 97% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap index 7936816089..f5005ec2c9 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap similarity index 83% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap index 38d1fed60a..a1f98abd4e 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap similarity index 88% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap index 83079fe81a..5e9802280d 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap similarity index 89% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap index 16a1293b44..ff371d781f 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap similarity index 88% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap index ccd6969c2d..6c792f7cf0 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap similarity index 91% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap index 093715cf17..f82f501d65 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap similarity index 81% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap index 181b438c3f..9b06f81cb9 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap similarity index 84% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap index f1d708d6cb..79f9130287 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap similarity index 65% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap index 91b9cb397a..c8aab65b39 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_whitespace/src/tokenizer.rs similarity index 93% rename from crates/ruff_python_formatter/src/trivia.rs rename to crates/ruff_python_whitespace/src/tokenizer.rs index 63f92b6e2f..c8aa15dbb7 100644 --- a/crates/ruff_python_formatter/src/trivia.rs +++ b/crates/ruff_python_whitespace/src/tokenizer.rs @@ -1,7 +1,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use unic_ucd_ident::{is_xid_continue, is_xid_start}; -use ruff_python_whitespace::{is_python_whitespace, Cursor}; +use crate::{is_python_whitespace, Cursor}; /// Searches for the first non-trivia character in `range`. /// @@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor}; /// of the character, the second item the non-trivia character. /// /// Returns `None` if the range is empty or only contains trivia (whitespace or comments). -pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option { +pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option { SimpleTokenizer::starts_at(offset, code) .skip_trivia() .next() @@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option Option { +pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option { SimpleTokenizer::up_to(offset, code) .skip_trivia() .next_back() } /// Returns the number of newlines between `offset` and the first non whitespace character in the source code. -pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 { +pub fn lines_before(offset: TextSize, code: &str) -> u32 { let tokens = SimpleTokenizer::up_to(offset, code); let mut newlines = 0u32; @@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 { } /// Counts the empty lines between `offset` and the first non-whitespace character. -pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 { +pub fn lines_after(offset: TextSize, code: &str) -> u32 { let tokens = SimpleTokenizer::starts_at(offset, code); let mut newlines = 0u32; @@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 { } /// Returns the position after skipping any trailing trivia up to, but not including the newline character. -pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize { +pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize { let tokenizer = SimpleTokenizer::starts_at(offset, code); for token in tokenizer { @@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool { } #[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) struct Token { - pub(crate) kind: TokenKind, - pub(crate) range: TextRange, +pub struct Token { + pub kind: TokenKind, + pub range: TextRange, } impl Token { - pub(crate) const fn kind(&self) -> TokenKind { + pub const fn kind(&self) -> TokenKind { self.kind } #[allow(unused)] - pub(crate) const fn range(&self) -> TextRange { + pub const fn range(&self) -> TextRange { self.range } - pub(crate) const fn start(&self) -> TextSize { + pub const fn start(&self) -> TextSize { self.range.start() } - pub(crate) const fn end(&self) -> TextSize { + pub const fn end(&self) -> TextSize { self.range.end() } } #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) enum TokenKind { +pub enum TokenKind { /// A comment, not including the trailing new line. Comment, @@ -247,7 +247,7 @@ impl TokenKind { /// /// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it /// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file. -pub(crate) struct SimpleTokenizer<'a> { +pub struct SimpleTokenizer<'a> { offset: TextSize, back_offset: TextSize, /// `true` when it is known that the current `back` line has no comment for sure. @@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> { } impl<'a> SimpleTokenizer<'a> { - pub(crate) fn new(source: &'a str, range: TextRange) -> Self { + pub fn new(source: &'a str, range: TextRange) -> Self { Self { offset: range.start(), back_offset: range.end(), @@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> { } } - pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self { + pub fn starts_at(offset: TextSize, source: &'a str) -> Self { let range = TextRange::new(offset, source.text_len()); Self::new(source, range) } /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`. - pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self { + pub fn up_to(offset: TextSize, source: &'a str) -> Self { Self::new(source, TextRange::up_to(offset)) } /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing /// because the lexer doesn't need to scan for comments. - pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self { + pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self { let mut tokenizer = Self::up_to(offset, source); tokenizer.back_line_has_no_comment = true; tokenizer @@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> { /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive /// because it needs to check if the line has any comments when encountering any non-trivia token. - pub(crate) fn next_token_back(&mut self) -> Token { + pub fn next_token_back(&mut self) -> Token { self.cursor.start_token(); let Some(last) = self.cursor.bump_back() else { @@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> { token } - pub(crate) fn skip_trivia(self) -> impl Iterator + DoubleEndedIterator + 'a { + pub fn skip_trivia(self) -> impl Iterator + DoubleEndedIterator + 'a { self.filter(|t| !t.kind().is_trivia()) } } @@ -539,7 +539,7 @@ mod tests { use insta::assert_debug_snapshot; use ruff_text_size::{TextLen, TextRange, TextSize}; - use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token}; + use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token}; struct TokenizationTestCase { source: &'static str,