Rename ruff_python_whitespace to ruff_python_trivia (#5886)

## Summary

This crate now contains utilities for dealing with trivia more broadly:
whitespace, newlines, "simple" trivia lexing, etc. So renaming it to
reflect its increased responsibilities.

To avoid conflicts, I've also renamed `Token` and `TokenKind` to
`SimpleToken` and `SimpleTokenKind`.
This commit is contained in:
Charlie Marsh 2023-07-19 11:48:27 -04:00 committed by GitHub
parent a75a6de577
commit 5f3da9955a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
86 changed files with 360 additions and 353 deletions

View file

@ -133,8 +133,8 @@ At time of writing, the repository includes the following crates:
refer to?"
- `crates/ruff_python_stdlib`: library crate containing Python-specific standard library data, e.g.
the names of all built-in exceptions and which standard library types are immutable.
- `crates/ruff_python_whitespace`: library crate containing Python-specific whitespace analysis
logic (indentation and newlines).
- `crates/ruff_python_trivia`: library crate containing Python-specific trivia utilities (e.g.,
for analyzing indentation, newlines, etc.).
- `crates/ruff_rustpython`: library crate containing `RustPython`-specific utilities.
- `crates/ruff_textwrap`: library crate to indent and dedent Python source code.
- `crates/ruff_wasm`: library crate for exposing Ruff as a WebAssembly module. Powers the

10
Cargo.lock generated
View file

@ -1928,7 +1928,7 @@ dependencies = [
"ruff_python_ast",
"ruff_python_semantic",
"ruff_python_stdlib",
"ruff_python_whitespace",
"ruff_python_trivia",
"ruff_rustpython",
"ruff_text_size",
"ruff_textwrap",
@ -2124,7 +2124,7 @@ dependencies = [
"num-bigint",
"num-traits",
"once_cell",
"ruff_python_whitespace",
"ruff_python_trivia",
"ruff_text_size",
"rustc-hash",
"rustpython-ast",
@ -2148,7 +2148,7 @@ dependencies = [
"once_cell",
"ruff_formatter",
"ruff_python_ast",
"ruff_python_whitespace",
"ruff_python_trivia",
"ruff_text_size",
"rustc-hash",
"rustpython-parser",
@ -2191,7 +2191,7 @@ name = "ruff_python_stdlib"
version = "0.0.0"
[[package]]
name = "ruff_python_whitespace"
name = "ruff_python_trivia"
version = "0.0.0"
dependencies = [
"insta",
@ -2237,7 +2237,7 @@ dependencies = [
name = "ruff_textwrap"
version = "0.0.0"
dependencies = [
"ruff_python_whitespace",
"ruff_python_trivia",
"ruff_text_size",
]

View file

@ -19,7 +19,7 @@ ruff_cache = { path = "../ruff_cache" }
ruff_diagnostics = { path = "../ruff_diagnostics", features = ["serde"] }
ruff_index = { path = "../ruff_index" }
ruff_macros = { path = "../ruff_macros" }
ruff_python_whitespace = { path = "../ruff_python_whitespace" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_python_ast = { path = "../ruff_python_ast", features = ["serde"] }
ruff_python_semantic = { path = "../ruff_python_semantic" }
ruff_python_stdlib = { path = "../ruff_python_stdlib" }

View file

@ -7,7 +7,7 @@ use rustpython_parser::{lexer, Mode};
use ruff_diagnostics::Edit;
use ruff_python_ast::helpers;
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::{is_python_whitespace, NewlineWithTrailingNewline, PythonWhitespace};
use ruff_python_trivia::{is_python_whitespace, NewlineWithTrailingNewline, PythonWhitespace};
use crate::autofix::codemods;

View file

@ -5,7 +5,7 @@ use ruff_text_size::TextSize;
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
use crate::comments::shebang::ShebangDirective;
use crate::registry::Rule;

View file

@ -1,4 +1,4 @@
use ruff_python_whitespace::{is_python_whitespace, Cursor};
use ruff_python_trivia::{is_python_whitespace, Cursor};
use ruff_text_size::{TextLen, TextSize};
/// A shebang directive (e.g., `#!/usr/bin/env python3`).

View file

@ -10,7 +10,7 @@ use rustpython_parser::Tok;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};
use ruff_python_whitespace::UniversalNewlineIterator;
use ruff_python_trivia::UniversalNewlineIterator;
/// Extract doc lines (standalone comments) from a token sequence.
pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines {

View file

@ -5,7 +5,7 @@ use ruff_python_ast::docstrings::{leading_space, leading_words};
use ruff_text_size::{TextLen, TextRange, TextSize};
use strum_macros::EnumIter;
use ruff_python_whitespace::{Line, UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{Line, UniversalNewlineIterator, UniversalNewlines};
use crate::docstrings::styles::SectionStyle;
use crate::docstrings::{Docstring, DocstringBody};

View file

@ -8,7 +8,7 @@ use rustpython_parser::{lexer, Mode, Tok};
use ruff_diagnostics::Edit;
use ruff_python_ast::helpers::is_docstring_stmt;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlineIterator};
use ruff_python_trivia::{PythonWhitespace, UniversalNewlineIterator};
use ruff_textwrap::indent;
#[derive(Debug, Clone, PartialEq, Eq)]
@ -305,7 +305,7 @@ mod tests {
use rustpython_parser::Parse;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_whitespace::LineEnding;
use ruff_python_trivia::LineEnding;
use super::Insertion;

View file

@ -10,7 +10,7 @@ use serde::Serialize;
use serde_json::error::Category;
use ruff_diagnostics::Diagnostic;
use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_python_trivia::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_text_size::{TextRange, TextSize};
use crate::autofix::source_map::{SourceMap, SourceMarker};

View file

@ -13,7 +13,7 @@ use rustpython_parser::ast::Ranged;
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::source_code::Locator;
use ruff_python_whitespace::LineEnding;
use ruff_python_trivia::LineEnding;
use crate::codes::NoqaCode;
use crate::fs::relativize_path;
@ -772,7 +772,7 @@ mod tests {
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::source_code::Locator;
use ruff_python_whitespace::LineEnding;
use ruff_python_trivia::LineEnding;
use crate::noqa::{add_noqa_inner, Directive, NoqaMapping, ParsedFileExemption};
use crate::rules::pycodestyle::rules::AmbiguousVariableName;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{self, Constant, Decorator, Expr, Keyword};
use ruff_python_ast::call_path::{collect_call_path, CallPath};
use ruff_python_ast::helpers::map_callable;
use ruff_python_semantic::SemanticModel;
use ruff_python_whitespace::PythonWhitespace;
use ruff_python_trivia::PythonWhitespace;
pub(super) fn get_mark_decorators(
decorators: &[Decorator],

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast;
use rustpython_parser::ast::{Expr, Ranged, Stmt};
use ruff_python_ast::source_code::Locator;
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
/// Return `true` if a function's return statement include at least one
/// non-`None` value.

View file

@ -12,7 +12,7 @@ use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::stmt_if::if_elif_branches;
use ruff_python_semantic::SemanticModel;
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
use crate::checkers::ast::Checker;
use crate::line_width::LineWidth;

View file

@ -6,7 +6,7 @@ use ruff_diagnostics::{AutofixKind, Violation};
use ruff_diagnostics::{Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::first_colon_range;
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
use crate::checkers::ast::Checker;
use crate::line_width::LineWidth;

View file

@ -2,7 +2,7 @@ use rustpython_parser::ast::{Ranged, Stmt};
use rustpython_parser::{lexer, Mode, Tok};
use ruff_python_ast::source_code::Locator;
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
use ruff_python_trivia::{PythonWhitespace, UniversalNewlines};
use crate::rules::isort::types::TrailingComma;

View file

@ -10,7 +10,7 @@ use ruff_python_ast::helpers::{
followed_by_multi_statement_line, preceded_by_multi_statement_line, trailing_lines_end,
};
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::{leading_indentation, PythonWhitespace, UniversalNewlines};
use ruff_python_trivia::{leading_indentation, PythonWhitespace, UniversalNewlines};
use ruff_textwrap::indent;
use crate::line_width::LineWidth;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{CmpOp, Expr, Ranged};
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::source_code::Locator;
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
use crate::line_width::{LineLength, LineWidth, TabSize};

View file

@ -1,6 +1,6 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
use crate::rules::pycodestyle::helpers::is_overlong;
use crate::settings::Settings;

View file

@ -8,7 +8,7 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::{has_leading_content, has_trailing_content};
use ruff_python_ast::source_code::Generator;
use ruff_python_semantic::SemanticModel;
use ruff_python_whitespace::{leading_indentation, UniversalNewlines};
use ruff_python_trivia::{leading_indentation, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::registry::AsRule;

View file

@ -1,6 +1,6 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
use crate::rules::pycodestyle::helpers::is_overlong;
use crate::settings::Settings;

View file

@ -18,7 +18,7 @@ use rustpython_parser::lexer::LexResult;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_whitespace::is_python_whitespace;
use ruff_python_trivia::is_python_whitespace;
mod extraneous_whitespace;
mod indentation;

View file

@ -4,7 +4,7 @@ use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_whitespace::PythonWhitespace;
use ruff_python_trivia::PythonWhitespace;
use crate::checkers::logical_lines::LogicalLinesContext;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;

View file

@ -2,7 +2,7 @@ use ruff_text_size::{TextLen, TextRange};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::{leading_indentation, Line};
use ruff_python_trivia::{leading_indentation, Line};
/// ## What it does
/// Checks for mixed tabs and spaces in indentation.

View file

@ -3,7 +3,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Indexer;
use ruff_python_whitespace::{leading_indentation, Line};
use ruff_python_trivia::{leading_indentation, Line};
#[violation]
pub struct TabIndentation;

View file

@ -4,7 +4,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers;
use ruff_python_ast::source_code::{Indexer, Locator};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
use crate::registry::Rule;
use crate::settings::Settings;

View file

@ -5,7 +5,7 @@ use ruff_python_ast::cast;
use ruff_python_ast::helpers::map_callable;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_semantic::{Definition, Member, MemberKind, SemanticModel};
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
/// Return the index of the first logical line in a string.
pub(super) fn logical_line(content: &str) -> Option<usize> {

View file

@ -1,6 +1,6 @@
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{UniversalNewlineIterator, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -4,7 +4,7 @@ use rustpython_parser::ast::Ranged;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::{Definition, Member, MemberKind};
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{PythonWhitespace, UniversalNewlineIterator, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -6,7 +6,7 @@ use rustpython_parser::ast::Ranged;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::{Definition, Member, MemberKind};
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{PythonWhitespace, UniversalNewlineIterator, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -3,7 +3,7 @@ use strum::IntoEnumIterator;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{UniversalNewlineIterator, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::sections::SectionKind;

View file

@ -3,7 +3,7 @@ use strum::IntoEnumIterator;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines};
use ruff_python_trivia::{UniversalNewlineIterator, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::sections::SectionKind;

View file

@ -4,7 +4,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Violation};
use ruff_diagnostics::{Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::docstrings::{clean_space, leading_space};
use ruff_python_whitespace::NewlineWithTrailingNewline;
use ruff_python_trivia::NewlineWithTrailingNewline;
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -5,7 +5,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{is_triple_quote, leading_quote};
use ruff_python_semantic::{Definition, Member};
use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_python_trivia::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -4,7 +4,7 @@ use rustpython_parser::ast::Ranged;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::docstrings::clean_space;
use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlines};
use ruff_python_trivia::{NewlineWithTrailingNewline, UniversalNewlines};
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{self, Stmt};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::{Definition, Member, MemberKind};
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -2,7 +2,7 @@ use ruff_text_size::{TextLen, TextRange};
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::NewlineWithTrailingNewline;
use ruff_python_trivia::NewlineWithTrailingNewline;
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -9,7 +9,7 @@ use ruff_python_ast::call_path::{from_qualified_name, CallPath};
use ruff_python_ast::cast;
use ruff_python_semantic::analyze::visibility::{is_property, is_test};
use ruff_python_semantic::{Definition, Member, MemberKind};
use ruff_python_whitespace::UniversalNewlines;
use ruff_python_trivia::UniversalNewlines;
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -1,7 +1,7 @@
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_whitespace::NewlineWithTrailingNewline;
use ruff_python_trivia::NewlineWithTrailingNewline;
use crate::checkers::ast::Checker;
use crate::docstrings::Docstring;

View file

@ -13,7 +13,7 @@ use ruff_python_ast::docstrings::{clean_space, leading_space};
use ruff_python_ast::identifier::Identifier;
use ruff_python_semantic::analyze::visibility::is_staticmethod;
use ruff_python_semantic::{Definition, Member, MemberKind};
use ruff_python_whitespace::{NewlineWithTrailingNewline, PythonWhitespace};
use ruff_python_trivia::{NewlineWithTrailingNewline, PythonWhitespace};
use ruff_textwrap::dedent;
use crate::checkers::ast::Checker;

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{Expr, Ranged};
use ruff_diagnostics::Edit;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_semantic::Binding;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::autofix::codemods::CodegenStylist;
use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
@ -102,7 +102,7 @@ pub(crate) fn remove_exception_handler_assignment(
let preceding = tokenizer
.next_back()
.context("expected the exception name to be preceded by `as`")?;
debug_assert!(matches!(preceding.kind, TokenKind::As));
debug_assert!(matches!(preceding.kind, SimpleTokenKind::As));
// Lex to the end of the preceding token, which should be the exception value.
let preceding = tokenizer
@ -113,7 +113,7 @@ pub(crate) fn remove_exception_handler_assignment(
let following = SimpleTokenizer::starts_at(bound_exception.range.end(), locator.contents())
.next()
.context("expected the exception name to be followed by a colon")?;
debug_assert!(matches!(following.kind, TokenKind::Colon));
debug_assert!(matches!(following.kind, SimpleTokenKind::Colon));
Ok(Edit::deletion(preceding.end(), following.start()))
}

View file

@ -4,7 +4,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
/// ## What it does
/// Check for `noqa` annotations that suppress all diagnostics, as opposed to

View file

@ -5,7 +5,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
/// ## What it does
/// Check for `type: ignore` annotations that suppress all type warnings, as

View file

@ -1,6 +1,6 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
const BIDI_UNICODE: [char; 10] = [
'\u{202A}', //{LEFT-TO-RIGHT EMBEDDING}

View file

@ -3,7 +3,7 @@ use regex::Regex;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_whitespace::Line;
use ruff_python_trivia::Line;
/// ## What it does
/// Checks for unnecessary UTF-8 encoding declarations.

View file

@ -13,7 +13,7 @@ license = { workspace = true }
[lib]
[dependencies]
ruff_python_whitespace = { path = "../ruff_python_whitespace" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_text_size = { workspace = true }
anyhow = { workspace = true }

View file

@ -12,7 +12,7 @@ use rustpython_parser::ast::{
use rustpython_parser::{lexer, Mode, Tok};
use smallvec::SmallVec;
use ruff_python_whitespace::{is_python_whitespace, PythonWhitespace, UniversalNewlineIterator};
use ruff_python_trivia::{is_python_whitespace, PythonWhitespace, UniversalNewlineIterator};
use crate::call_path::CallPath;
use crate::source_code::{Indexer, Locator};

View file

@ -14,7 +14,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_ast::{Alias, Arg, ArgWithDefault};
use rustpython_parser::ast::{self, ExceptHandler, Ranged, Stmt};
use ruff_python_whitespace::{is_python_whitespace, Cursor};
use ruff_python_trivia::{is_python_whitespace, Cursor};
use crate::source_code::Locator;

View file

@ -10,7 +10,7 @@ use rustpython_parser::ast::{
TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
};
use ruff_python_whitespace::LineEnding;
use ruff_python_trivia::LineEnding;
use crate::source_code::stylist::{Indentation, Quote, Stylist};
@ -1470,7 +1470,7 @@ mod tests {
use rustpython_ast::Stmt;
use rustpython_parser::Parse;
use ruff_python_whitespace::LineEnding;
use ruff_python_trivia::LineEnding;
use crate::source_code::stylist::{Indentation, Quote};
use crate::source_code::Generator;

View file

@ -6,7 +6,7 @@ use memchr::{memchr2, memrchr2};
use once_cell::unsync::OnceCell;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_python_whitespace::find_newline;
use ruff_python_trivia::find_newline;
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};

View file

@ -4,7 +4,7 @@ use std::fmt;
use std::ops::Deref;
use once_cell::unsync::OnceCell;
use ruff_python_whitespace::{find_newline, LineEnding};
use ruff_python_trivia::{find_newline, LineEnding};
use rustpython_literal::escape::Quote as StrQuote;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
@ -166,7 +166,7 @@ mod tests {
use rustpython_parser::lexer::lex;
use rustpython_parser::Mode;
use ruff_python_whitespace::{find_newline, LineEnding};
use ruff_python_trivia::{find_newline, LineEnding};
use crate::source_code::stylist::{Indentation, Quote};
use crate::source_code::{Locator, Stylist};

View file

@ -1,7 +1,7 @@
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use ruff_python_whitespace::is_python_whitespace;
use ruff_python_trivia::is_python_whitespace;
use crate::source_code::Locator;

View file

@ -12,7 +12,7 @@ license = { workspace = true }
[dependencies]
ruff_formatter = { path = "../ruff_formatter" }
ruff_python_whitespace = { path = "../ruff_python_whitespace" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_text_size = { workspace = true }

View file

@ -2,8 +2,8 @@ use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_whitespace::{
lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
use ruff_python_trivia::{
lines_after, skip_trailing_trivia, SimpleToken, SimpleTokenKind, SimpleTokenizer,
};
use crate::context::NodeLevel;
@ -294,12 +294,12 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
)
.skip_trivia()
// Skip over any closing parentheses belonging to the expression
.find(|token| token.kind() != TokenKind::RParen);
.find(|token| token.kind() != SimpleTokenKind::RParen);
matches!(
first_token,
Some(Token {
kind: TokenKind::Comma,
Some(SimpleToken {
kind: SimpleTokenKind::Comma,
..
})
)

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::{format_args, write, FormatError, SourceCode};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
use ruff_python_trivia::{lines_after, lines_before, skip_trailing_trivia};
use crate::comments::SourceComment;
use crate::context::NodeLevel;

View file

@ -7,8 +7,8 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::whitespace;
use ruff_python_whitespace::{
first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
use ruff_python_trivia::{
first_non_trivia_token_rev, PythonWhitespace, SimpleToken, SimpleTokenKind, SimpleTokenizer,
UniversalNewlines,
};
@ -756,7 +756,7 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
for token in tokens {
match token.kind() {
TokenKind::Colon => {
SimpleTokenKind::Colon => {
if comment.slice().start() > token.start() {
// Comment comes after the colon
// ```python
@ -775,10 +775,10 @@ fn handle_trailing_end_of_line_condition_comment<'a>(
// ```
break;
}
TokenKind::RParen => {
SimpleTokenKind::RParen => {
// Skip over any closing parentheses
}
TokenKind::Comma => {
SimpleTokenKind::Comma => {
// Skip over any trailing comma
}
kind => {
@ -884,12 +884,12 @@ fn handle_trailing_binary_expression_left_or_operator_comment<'a>(
);
let mut tokens = SimpleTokenizer::new(locator.contents(), between_operands_range).skip_trivia();
let operator_offset = if let Some(non_r_paren) = tokens.find(|t| t.kind() != TokenKind::RParen)
{
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};
let operator_offset =
if let Some(non_r_paren) = tokens.find(|t| t.kind() != SimpleTokenKind::RParen) {
non_r_paren.start()
} else {
return CommentPlacement::Default(comment);
};
let comment_range = comment.slice().range();
@ -1061,8 +1061,8 @@ fn handle_slice_comments<'a>(
// Check for `foo[ # comment`, but only if they are on the same line
let after_lbracket = matches!(
first_non_trivia_token_rev(comment.slice().start(), locator.contents()),
Some(Token {
kind: TokenKind::LBracket,
Some(SimpleToken {
kind: SimpleTokenKind::LBracket,
..
})
);
@ -1182,11 +1182,11 @@ fn handle_dict_unpacking_comment<'a>(
// we start from the preceding node but we skip its token
for token in tokens.by_ref() {
// Skip closing parentheses that are not part of the node range
if token.kind == TokenKind::RParen {
if token.kind == SimpleTokenKind::RParen {
continue;
}
// The Keyword case
if token.kind == TokenKind::Star {
if token.kind == SimpleTokenKind::Star {
count += 1;
break;
}
@ -1194,8 +1194,8 @@ fn handle_dict_unpacking_comment<'a>(
debug_assert!(
matches!(
token,
Token {
kind: TokenKind::LBrace | TokenKind::Comma | TokenKind::Colon,
SimpleToken {
kind: SimpleTokenKind::LBrace | SimpleTokenKind::Comma | SimpleTokenKind::Colon,
..
}
),
@ -1205,7 +1205,7 @@ fn handle_dict_unpacking_comment<'a>(
}
for token in tokens {
if token.kind != TokenKind::Star {
if token.kind != SimpleTokenKind::Star {
return CommentPlacement::Default(comment);
}
count += 1;
@ -1302,12 +1302,12 @@ fn handle_expr_if_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(body.end(), test.start()),
locator,
TokenKind::If,
SimpleTokenKind::If,
);
let else_token = find_only_token_in_range(
TextRange::new(test.end(), orelse.start()),
locator,
TokenKind::Else,
SimpleTokenKind::Else,
);
// Between `if` and `test`
@ -1369,7 +1369,7 @@ fn handle_with_item_comment<'a>(
let as_token = find_only_token_in_range(
TextRange::new(context_expr.end(), optional_vars.start()),
locator,
TokenKind::As,
SimpleTokenKind::As,
);
// If before the `as` keyword, then it must be a trailing comment of the context expression.
@ -1386,13 +1386,17 @@ fn handle_with_item_comment<'a>(
/// Looks for a token in the range that contains no other tokens except for parentheses outside
/// the expression ranges
fn find_only_token_in_range(range: TextRange, locator: &Locator, token_kind: TokenKind) -> Token {
fn find_only_token_in_range(
range: TextRange,
locator: &Locator,
token_kind: SimpleTokenKind,
) -> SimpleToken {
let mut tokens = SimpleTokenizer::new(locator.contents(), range)
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let token = tokens.next().expect("Expected a token");
debug_assert_eq!(token.kind(), token_kind);
let mut tokens = tokens.skip_while(|token| token.kind == TokenKind::LParen);
let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
debug_assert_eq!(tokens.next(), None);
token
}
@ -1446,7 +1450,7 @@ fn handle_comprehension_comment<'a>(
comprehension.iter.range().start(),
),
locator,
TokenKind::In,
SimpleTokenKind::In,
);
// Comments between the target and the `in`
@ -1509,7 +1513,7 @@ fn handle_comprehension_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(last_end, if_node.range().start()),
locator,
TokenKind::If,
SimpleTokenKind::If,
);
if is_own_line {
if last_end < comment.slice().start() && comment.slice().start() < if_token.start() {

View file

@ -13,7 +13,7 @@ use ruff_python_ast::source_code::{CommentRanges, Locator};
// pre-order.
#[allow(clippy::wildcard_imports)]
use ruff_python_ast::visitor::preorder::*;
use ruff_python_whitespace::is_python_whitespace;
use ruff_python_trivia::is_python_whitespace;
use crate::comments::node_key::NodeRefEqualityKey;
use crate::comments::placement::place_comment;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::dangling_comments;
use crate::expression::expr_generator_exp::GeneratorExpParentheses;
@ -132,14 +132,14 @@ fn is_single_argument_parenthesized(argument: &Expr, call_end: TextSize, source:
SimpleTokenizer::new(source, TextRange::new(argument.end(), call_end)).skip_trivia()
{
match token.kind() {
TokenKind::RParen => {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
TokenKind::Comma => continue,
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{Expr, Ranged};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::comments::{dangling_comments, SourceComment};
use crate::context::PyFormatContext;
@ -158,17 +158,17 @@ pub(crate) fn find_colons(
range: TextRange,
lower: &Option<Box<Expr>>,
upper: &Option<Box<Expr>>,
) -> FormatResult<(Token, Option<Token>)> {
) -> FormatResult<(SimpleToken, Option<SimpleToken>)> {
let after_lower = lower
.as_ref()
.map_or(range.start(), |lower| lower.range().end());
let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_lower, range.end()))
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let first_colon = tokens.next().ok_or(FormatError::syntax_error(
"Din't find any token for slice first colon",
))?;
if first_colon.kind != TokenKind::Colon {
if first_colon.kind != SimpleTokenKind::Colon {
return Err(FormatError::syntax_error(
"slice first colon token was not a colon",
));
@ -179,9 +179,9 @@ pub(crate) fn find_colons(
.map_or(first_colon.end(), |upper| upper.range().end());
let mut tokens = SimpleTokenizer::new(contents, TextRange::new(after_upper, range.end()))
.skip_trivia()
.skip_while(|token| token.kind == TokenKind::RParen);
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let second_colon = if let Some(token) = tokens.next() {
if token.kind != TokenKind::Colon {
if token.kind != SimpleTokenKind::Colon {
return Err(FormatError::syntax_error(
"Expected a colon for the second colon token",
));

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{ExprUnaryOp, Ranged};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::context::PyFormatContext;
@ -97,7 +97,7 @@ fn is_operand_parenthesized(unary: &ExprUnaryOp, source: &str) -> bool {
.skip_trivia()
.next()
{
debug_assert_eq!(token.kind(), TokenKind::LParen);
debug_assert_eq!(token.kind(), SimpleTokenKind::LParen);
true
} else {
false

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::Ranged;
use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::context::NodeLevel;
use crate::prelude::*;
@ -77,8 +77,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b
// First test if there's a closing parentheses because it tends to be cheaper.
if matches!(
first_non_trivia_token(expr.end(), contents),
Some(Token {
kind: TokenKind::RParen,
Some(SimpleToken {
kind: SimpleTokenKind::RParen,
..
})
) {
@ -87,8 +87,8 @@ pub(crate) fn is_expression_parenthesized(expr: AnyNodeRef, contents: &str) -> b
matches!(
tokenizer.next_back(),
Some(Token {
kind: TokenKind::LParen,
Some(SimpleToken {
kind: SimpleTokenKind::LParen,
..
})
)

View file

@ -5,7 +5,7 @@ use rustpython_parser::ast::{Arguments, Ranged};
use ruff_formatter::{format_args, write, FormatRuleWithOptions};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use crate::comments::{
dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@ -166,17 +166,17 @@ impl FormatNodeRule<Arguments> for FormatArguments {
.skip_trivia();
let comma = tokens.next();
assert!(matches!(comma, Some(Token { kind: TokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");
assert!(matches!(comma, Some(SimpleToken { kind: SimpleTokenKind::Comma, .. })), "The last positional only argument must be separated by a `,` from the positional only arguments separator `/` but found '{comma:?}'.");
let slash = tokens.next();
assert!(matches!(slash, Some(Token { kind: TokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");
assert!(matches!(slash, Some(SimpleToken { kind: SimpleTokenKind::Slash, .. })), "The positional argument separator must be present for a function that has positional only arguments but found '{slash:?}'.");
tokens.next()
} else {
first_non_trivia_token(last_node.end(), f.context().source())
};
if maybe_comma_token.map_or(false, |token| token.kind() == TokenKind::Comma) {
if maybe_comma_token.map_or(false, |token| token.kind() == SimpleTokenKind::Comma) {
write!(f, [hard_line_break()])?;
}
}
@ -298,11 +298,11 @@ pub(crate) fn find_argument_separators(
let comma = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
let slash = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(slash.kind() == TokenKind::Slash, "{slash:?}");
debug_assert!(slash.kind() == SimpleTokenKind::Slash, "{slash:?}");
Some((preceding_end, slash.range))
} else {
@ -331,11 +331,11 @@ pub(crate) fn find_argument_separators(
let comma = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(comma.kind() == TokenKind::Comma, "{comma:?}");
debug_assert!(comma.kind() == SimpleTokenKind::Comma, "{comma:?}");
let star = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");
Some(ArgumentSeparator {
preceding_end,
@ -348,11 +348,11 @@ pub(crate) fn find_argument_separators(
let lparen = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(lparen.kind() == TokenKind::LParen, "{lparen:?}");
debug_assert!(lparen.kind() == SimpleTokenKind::LParen, "{lparen:?}");
let star = tokens
.next()
.expect("The function definition can't end here");
debug_assert!(star.kind() == TokenKind::Star, "{star:?}");
debug_assert!(star.kind() == SimpleTokenKind::Star, "{star:?}");
Some(ArgumentSeparator {
preceding_end: arguments.range.start(),
separator: star.range,

View file

@ -2,7 +2,7 @@ use ruff_text_size::TextRange;
use rustpython_parser::ast::{Ranged, StmtClassDef};
use ruff_formatter::write;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{parenthesized, Parentheses};
@ -97,7 +97,7 @@ impl Format<PyFormatContext<'_>> for FormatInheritanceClause<'_> {
.skip_trivia();
let left_paren_count = tokenizer
.take_while(|token| token.kind() == TokenKind::LParen)
.take_while(|token| token.kind() == SimpleTokenKind::LParen)
.count();
// Ignore the first parentheses count

View file

@ -2,7 +2,7 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::function::AnyFunctionDefinition;
use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
use ruff_python_trivia::{lines_after, skip_trailing_trivia};
use crate::comments::{leading_comments, trailing_comments};
use crate::context::NodeLevel;

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use crate::comments::trailing_comments;
use crate::expression::parentheses::{
@ -125,7 +125,7 @@ fn are_with_items_parenthesized(
let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_with_item)
.skip_trivia()
.skip_while(|t| t.kind() == TokenKind::Async);
.skip_while(|t| t.kind() == SimpleTokenKind::Async);
let with_keyword = tokenizer.next().ok_or(FormatError::syntax_error(
"Expected a with keyword, didn't find any token",
@ -133,13 +133,13 @@ fn are_with_items_parenthesized(
debug_assert_eq!(
with_keyword.kind(),
TokenKind::With,
SimpleTokenKind::With,
"Expected with keyword but at {with_keyword:?}"
);
match tokenizer.next() {
Some(left_paren) => {
debug_assert_eq!(left_paren.kind(), TokenKind::LParen);
debug_assert_eq!(left_paren.kind(), SimpleTokenKind::LParen);
Ok(true)
}
None => Ok(false),

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::{Ranged, Stmt, Suite};
use ruff_formatter::{
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
};
use ruff_python_whitespace::lines_before;
use ruff_python_trivia::lines_before;
use crate::context::NodeLevel;
use crate::prelude::*;

View file

@ -1,5 +1,5 @@
[package]
name = "ruff_python_whitespace"
name = "ruff_python_trivia"
version = "0.0.0"
publish = false
authors = { workspace = true }

View file

@ -22,7 +22,7 @@ impl UniversalNewlines for str {
///
/// ```rust
/// # use ruff_text_size::TextSize;
/// # use ruff_python_whitespace::{Line, UniversalNewlineIterator};
/// # use ruff_python_trivia::{Line, UniversalNewlineIterator};
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
///
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));

View file

@ -1,217 +1,217 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokenize_reverse()
---
[
Token {
SimpleToken {
kind: RParen,
range: 52..53,
},
Token {
SimpleToken {
kind: Other,
range: 51..52,
},
Token {
SimpleToken {
kind: Bogus,
range: 50..51,
},
Token {
SimpleToken {
kind: Bogus,
range: 49..50,
},
Token {
SimpleToken {
kind: Bogus,
range: 48..49,
},
Token {
SimpleToken {
kind: Bogus,
range: 47..48,
},
Token {
SimpleToken {
kind: Bogus,
range: 46..47,
},
Token {
SimpleToken {
kind: Bogus,
range: 45..46,
},
Token {
SimpleToken {
kind: Bogus,
range: 44..45,
},
Token {
SimpleToken {
kind: Bogus,
range: 43..44,
},
Token {
SimpleToken {
kind: Bogus,
range: 42..43,
},
Token {
SimpleToken {
kind: Bogus,
range: 41..42,
},
Token {
SimpleToken {
kind: Bogus,
range: 40..41,
},
Token {
SimpleToken {
kind: Bogus,
range: 39..40,
},
Token {
SimpleToken {
kind: Bogus,
range: 38..39,
},
Token {
SimpleToken {
kind: Bogus,
range: 37..38,
},
Token {
SimpleToken {
kind: Bogus,
range: 36..37,
},
Token {
SimpleToken {
kind: Bogus,
range: 35..36,
},
Token {
SimpleToken {
kind: Bogus,
range: 34..35,
},
Token {
SimpleToken {
kind: Bogus,
range: 33..34,
},
Token {
SimpleToken {
kind: Bogus,
range: 32..33,
},
Token {
SimpleToken {
kind: Bogus,
range: 31..32,
},
Token {
SimpleToken {
kind: Bogus,
range: 30..31,
},
Token {
SimpleToken {
kind: Bogus,
range: 29..30,
},
Token {
SimpleToken {
kind: Bogus,
range: 28..29,
},
Token {
SimpleToken {
kind: Bogus,
range: 27..28,
},
Token {
SimpleToken {
kind: Bogus,
range: 26..27,
},
Token {
SimpleToken {
kind: Bogus,
range: 25..26,
},
Token {
SimpleToken {
kind: Bogus,
range: 24..25,
},
Token {
SimpleToken {
kind: Bogus,
range: 23..24,
},
Token {
SimpleToken {
kind: Bogus,
range: 22..23,
},
Token {
SimpleToken {
kind: Bogus,
range: 21..22,
},
Token {
SimpleToken {
kind: Bogus,
range: 20..21,
},
Token {
SimpleToken {
kind: Bogus,
range: 19..20,
},
Token {
SimpleToken {
kind: Bogus,
range: 18..19,
},
Token {
SimpleToken {
kind: Bogus,
range: 17..18,
},
Token {
SimpleToken {
kind: Bogus,
range: 16..17,
},
Token {
SimpleToken {
kind: Bogus,
range: 15..16,
},
Token {
SimpleToken {
kind: Bogus,
range: 14..15,
},
Token {
SimpleToken {
kind: Bogus,
range: 13..14,
},
Token {
SimpleToken {
kind: Bogus,
range: 12..13,
},
Token {
SimpleToken {
kind: Bogus,
range: 11..12,
},
Token {
SimpleToken {
kind: Bogus,
range: 10..11,
},
Token {
SimpleToken {
kind: Bogus,
range: 9..10,
},
Token {
SimpleToken {
kind: Bogus,
range: 8..9,
},
Token {
SimpleToken {
kind: Bogus,
range: 7..8,
},
Token {
SimpleToken {
kind: Bogus,
range: 6..7,
},
Token {
SimpleToken {
kind: Bogus,
range: 5..6,
},
Token {
SimpleToken {
kind: Bogus,
range: 4..5,
},
Token {
SimpleToken {
kind: Bogus,
range: 3..4,
},
Token {
SimpleToken {
kind: Bogus,
range: 2..3,
},
Token {
SimpleToken {
kind: Bogus,
range: 1..2,
},
Token {
SimpleToken {
kind: Bogus,
range: 0..1,
},

View file

@ -1,9 +1,9 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Other,
range: 0..2,
},

View file

@ -1,17 +1,17 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Other,
range: 0..1,
},
Token {
SimpleToken {
kind: Bogus,
range: 1..2,
},
Token {
SimpleToken {
kind: Bogus,
range: 2..3,
},

View file

@ -1,125 +1,125 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Comment,
range: 0..17,
},
Token {
SimpleToken {
kind: Newline,
range: 17..18,
},
Token {
SimpleToken {
kind: Whitespace,
range: 18..26,
},
Token {
SimpleToken {
kind: Other,
range: 26..27,
},
Token {
SimpleToken {
kind: Bogus,
range: 27..28,
},
Token {
SimpleToken {
kind: Bogus,
range: 28..29,
},
Token {
SimpleToken {
kind: Bogus,
range: 29..30,
},
Token {
SimpleToken {
kind: Bogus,
range: 30..31,
},
Token {
SimpleToken {
kind: Bogus,
range: 31..32,
},
Token {
SimpleToken {
kind: Bogus,
range: 32..33,
},
Token {
SimpleToken {
kind: Bogus,
range: 33..34,
},
Token {
SimpleToken {
kind: Bogus,
range: 34..35,
},
Token {
SimpleToken {
kind: Bogus,
range: 35..36,
},
Token {
SimpleToken {
kind: Bogus,
range: 36..37,
},
Token {
SimpleToken {
kind: Bogus,
range: 37..38,
},
Token {
SimpleToken {
kind: Bogus,
range: 38..39,
},
Token {
SimpleToken {
kind: Bogus,
range: 39..40,
},
Token {
SimpleToken {
kind: Bogus,
range: 40..41,
},
Token {
SimpleToken {
kind: Bogus,
range: 41..42,
},
Token {
SimpleToken {
kind: Bogus,
range: 42..43,
},
Token {
SimpleToken {
kind: Bogus,
range: 43..44,
},
Token {
SimpleToken {
kind: Bogus,
range: 44..45,
},
Token {
SimpleToken {
kind: Bogus,
range: 45..46,
},
Token {
SimpleToken {
kind: Bogus,
range: 46..47,
},
Token {
SimpleToken {
kind: Bogus,
range: 47..48,
},
Token {
SimpleToken {
kind: Bogus,
range: 48..49,
},
Token {
SimpleToken {
kind: Bogus,
range: 49..50,
},
Token {
SimpleToken {
kind: Bogus,
range: 50..51,
},
Token {
SimpleToken {
kind: Bogus,
range: 51..52,
},
Token {
SimpleToken {
kind: Bogus,
range: 52..53,
},

View file

@ -1,21 +1,21 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Comma,
range: 0..1,
},
Token {
SimpleToken {
kind: Comma,
range: 1..2,
},
Token {
SimpleToken {
kind: Comma,
range: 2..3,
},
Token {
SimpleToken {
kind: Comma,
range: 3..4,
},

View file

@ -1,29 +1,29 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: LParen,
range: 0..1,
},
Token {
SimpleToken {
kind: Whitespace,
range: 1..2,
},
Token {
SimpleToken {
kind: Continuation,
range: 2..3,
},
Token {
SimpleToken {
kind: Newline,
range: 3..4,
},
Token {
SimpleToken {
kind: Whitespace,
range: 4..5,
},
Token {
SimpleToken {
kind: RParen,
range: 5..6,
},

View file

@ -1,33 +1,33 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: If,
range: 0..2,
},
Token {
SimpleToken {
kind: Whitespace,
range: 2..3,
},
Token {
SimpleToken {
kind: In,
range: 3..5,
},
Token {
SimpleToken {
kind: Whitespace,
range: 5..6,
},
Token {
SimpleToken {
kind: Else,
range: 6..10,
},
Token {
SimpleToken {
kind: Whitespace,
range: 10..11,
},
Token {
SimpleToken {
kind: Match,
range: 11..16,
},

View file

@ -1,29 +1,29 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: LParen,
range: 0..1,
},
Token {
SimpleToken {
kind: LBracket,
range: 1..2,
},
Token {
SimpleToken {
kind: LBrace,
range: 2..3,
},
Token {
SimpleToken {
kind: RBrace,
range: 3..4,
},
Token {
SimpleToken {
kind: RBracket,
range: 4..5,
},
Token {
SimpleToken {
kind: RParen,
range: 5..6,
},

View file

@ -1,41 +1,41 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Whitespace,
range: 0..1,
},
Token {
SimpleToken {
kind: Comment,
range: 1..30,
},
Token {
SimpleToken {
kind: Newline,
range: 30..31,
},
Token {
SimpleToken {
kind: Whitespace,
range: 31..39,
},
Token {
SimpleToken {
kind: Comment,
range: 39..77,
},
Token {
SimpleToken {
kind: Newline,
range: 77..78,
},
Token {
SimpleToken {
kind: Whitespace,
range: 78..86,
},
Token {
SimpleToken {
kind: Comma,
range: 86..87,
},
Token {
SimpleToken {
kind: Slash,
range: 87..88,
},

View file

@ -1,17 +1,17 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: RParen,
range: 14..15,
},
Token {
SimpleToken {
kind: Whitespace,
range: 15..16,
},
Token {
SimpleToken {
kind: Comment,
range: 16..25,
},

View file

@ -1,21 +1,21 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Comment,
range: 0..9,
},
Token {
SimpleToken {
kind: Newline,
range: 9..10,
},
Token {
SimpleToken {
kind: Whitespace,
range: 10..14,
},
Token {
SimpleToken {
kind: Comment,
range: 14..23,
},

View file

@ -1,9 +1,9 @@
---
source: crates/ruff_python_whitespace/src/tokenizer.rs
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
Token {
SimpleToken {
kind: Other,
range: 0..6,
},

View file

@ -12,7 +12,7 @@ use crate::{is_python_whitespace, Cursor};
/// of the character, the second item the non-trivia character.
///
/// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<SimpleToken> {
SimpleTokenizer::starts_at(offset, code)
.skip_trivia()
.next()
@ -24,7 +24,7 @@ pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
/// ## Notes
///
/// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<SimpleToken> {
SimpleTokenizer::up_to(offset, code)
.skip_trivia()
.next_back()
@ -37,10 +37,10 @@ pub fn lines_before(offset: TextSize, code: &str) -> u32 {
for token in tokens.rev() {
match token.kind() {
TokenKind::Newline => {
SimpleTokenKind::Newline => {
newlines += 1;
}
TokenKind::Whitespace => {
SimpleTokenKind::Whitespace => {
// ignore
}
_ => {
@ -59,10 +59,10 @@ pub fn lines_after(offset: TextSize, code: &str) -> u32 {
for token in tokens {
match token.kind() {
TokenKind::Newline => {
SimpleTokenKind::Newline => {
newlines += 1;
}
TokenKind::Whitespace => {
SimpleTokenKind::Whitespace => {
// ignore
}
_ => {
@ -80,7 +80,9 @@ pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
for token in tokenizer {
match token.kind() {
TokenKind::Whitespace | TokenKind::Comment | TokenKind::Continuation => {
SimpleTokenKind::Whitespace
| SimpleTokenKind::Comment
| SimpleTokenKind::Continuation => {
// No op
}
_ => {
@ -111,13 +113,13 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
}
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct Token {
pub kind: TokenKind,
pub struct SimpleToken {
pub kind: SimpleTokenKind,
pub range: TextRange,
}
impl Token {
pub const fn kind(&self) -> TokenKind {
impl SimpleToken {
pub const fn kind(&self) -> SimpleTokenKind {
self.kind
}
@ -136,7 +138,7 @@ impl Token {
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum TokenKind {
pub enum SimpleTokenKind {
/// A comment, not including the trailing new line.
Comment,
@ -209,35 +211,35 @@ pub enum TokenKind {
/// Any other non trivia token.
Other,
/// Returned for each character after [`TokenKind::Other`] has been returned once.
/// Returned for each character after [`SimpleTokenKind::Other`] has been returned once.
Bogus,
}
impl TokenKind {
const fn from_non_trivia_char(c: char) -> TokenKind {
impl SimpleTokenKind {
const fn from_non_trivia_char(c: char) -> SimpleTokenKind {
match c {
'(' => TokenKind::LParen,
')' => TokenKind::RParen,
'[' => TokenKind::LBracket,
']' => TokenKind::RBracket,
'{' => TokenKind::LBrace,
'}' => TokenKind::RBrace,
',' => TokenKind::Comma,
':' => TokenKind::Colon,
'/' => TokenKind::Slash,
'*' => TokenKind::Star,
'.' => TokenKind::Dot,
_ => TokenKind::Other,
'(' => SimpleTokenKind::LParen,
')' => SimpleTokenKind::RParen,
'[' => SimpleTokenKind::LBracket,
']' => SimpleTokenKind::RBracket,
'{' => SimpleTokenKind::LBrace,
'}' => SimpleTokenKind::RBrace,
',' => SimpleTokenKind::Comma,
':' => SimpleTokenKind::Colon,
'/' => SimpleTokenKind::Slash,
'*' => SimpleTokenKind::Star,
'.' => SimpleTokenKind::Dot,
_ => SimpleTokenKind::Other,
}
}
const fn is_trivia(self) -> bool {
matches!(
self,
TokenKind::Whitespace
| TokenKind::Newline
| TokenKind::Comment
| TokenKind::Continuation
SimpleTokenKind::Whitespace
| SimpleTokenKind::Newline
| SimpleTokenKind::Comment
| SimpleTokenKind::Continuation
)
}
}
@ -246,8 +248,8 @@ impl TokenKind {
///
/// The tokenizer must start at an offset that is trivia (e.g. not inside of a multiline string).
///
/// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
/// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
/// The tokenizer doesn't guarantee any correctness after it returned a [`SimpleTokenKind::Other`]. That's why it
/// will return [`SimpleTokenKind::Bogus`] for every character after until it reaches the end of the file.
pub struct SimpleTokenizer<'a> {
offset: TextSize,
back_offset: TextSize,
@ -289,34 +291,34 @@ impl<'a> SimpleTokenizer<'a> {
tokenizer
}
fn to_keyword_or_other(&self, range: TextRange) -> TokenKind {
fn to_keyword_or_other(&self, range: TextRange) -> SimpleTokenKind {
let source = &self.source[range];
match source {
"as" => TokenKind::As,
"async" => TokenKind::Async,
"else" => TokenKind::Else,
"if" => TokenKind::If,
"in" => TokenKind::In,
"match" => TokenKind::Match, // Match is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
"with" => TokenKind::With,
"as" => SimpleTokenKind::As,
"async" => SimpleTokenKind::Async,
"else" => SimpleTokenKind::Else,
"if" => SimpleTokenKind::If,
"in" => SimpleTokenKind::In,
"match" => SimpleTokenKind::Match, // Match is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
"with" => SimpleTokenKind::With,
// ...,
_ => TokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
_ => SimpleTokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
}
}
fn next_token(&mut self) -> Token {
fn next_token(&mut self) -> SimpleToken {
self.cursor.start_token();
let Some(first) = self.cursor.bump() else {
return Token {
kind: TokenKind::EndOfFile,
return SimpleToken {
kind: SimpleTokenKind::EndOfFile,
range: TextRange::empty(self.offset),
};
};
if self.bogus {
let token = Token {
kind: TokenKind::Bogus,
let token = SimpleToken {
kind: SimpleTokenKind::Bogus,
range: TextRange::at(self.offset, first.text_len()),
};
@ -327,22 +329,22 @@ impl<'a> SimpleTokenizer<'a> {
let kind = match first {
' ' | '\t' => {
self.cursor.eat_while(|c| matches!(c, ' ' | '\t'));
TokenKind::Whitespace
SimpleTokenKind::Whitespace
}
'\n' => TokenKind::Newline,
'\n' => SimpleTokenKind::Newline,
'\r' => {
self.cursor.eat_char('\n');
TokenKind::Newline
SimpleTokenKind::Newline
}
'#' => {
self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
TokenKind::Comment
SimpleTokenKind::Comment
}
'\\' => TokenKind::Continuation,
'\\' => SimpleTokenKind::Continuation,
c => {
let kind = if is_identifier_start(c) {
@ -352,10 +354,10 @@ impl<'a> SimpleTokenizer<'a> {
let range = TextRange::at(self.offset, token_len);
self.to_keyword_or_other(range)
} else {
TokenKind::from_non_trivia_char(c)
SimpleTokenKind::from_non_trivia_char(c)
};
if kind == TokenKind::Other {
if kind == SimpleTokenKind::Other {
self.bogus = true;
}
kind
@ -364,7 +366,7 @@ impl<'a> SimpleTokenizer<'a> {
let token_len = self.cursor.token_len();
let token = Token {
let token = SimpleToken {
kind,
range: TextRange::at(self.offset, token_len),
};
@ -376,19 +378,19 @@ impl<'a> SimpleTokenizer<'a> {
/// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
/// because it needs to check if the line has any comments when encountering any non-trivia token.
pub fn next_token_back(&mut self) -> Token {
pub fn next_token_back(&mut self) -> SimpleToken {
self.cursor.start_token();
let Some(last) = self.cursor.bump_back() else {
return Token {
kind: TokenKind::EndOfFile,
return SimpleToken {
kind: SimpleTokenKind::EndOfFile,
range: TextRange::empty(self.back_offset),
};
};
if self.bogus {
let token = Token {
kind: TokenKind::Bogus,
let token = SimpleToken {
kind: SimpleTokenKind::Bogus,
range: TextRange::at(self.back_offset - last.text_len(), last.text_len()),
};
@ -401,22 +403,22 @@ impl<'a> SimpleTokenizer<'a> {
// as whitespace rather than being part of the token. This shouldn't matter for what we use the lexer for.
' ' | '\t' => {
self.cursor.eat_back_while(|c| matches!(c, ' ' | '\t'));
TokenKind::Whitespace
SimpleTokenKind::Whitespace
}
'\r' => {
self.back_line_has_no_comment = false;
TokenKind::Newline
SimpleTokenKind::Newline
}
'\n' => {
self.back_line_has_no_comment = false;
self.cursor.eat_char_back('\r');
TokenKind::Newline
SimpleTokenKind::Newline
}
// Empty comment (could also be a comment nested in another comment, but this shouldn't matter for what we use the lexer for)
'#' => TokenKind::Comment,
'#' => SimpleTokenKind::Comment,
// For all other tokens, test if the character isn't part of a comment.
c => {
@ -447,7 +449,8 @@ impl<'a> SimpleTokenizer<'a> {
before_comment.chars().all(|c| {
is_python_whitespace(c)
|| TokenKind::from_non_trivia_char(c) != TokenKind::Other
|| SimpleTokenKind::from_non_trivia_char(c)
!= SimpleTokenKind::Other
})
})
};
@ -462,9 +465,9 @@ impl<'a> SimpleTokenizer<'a> {
self.cursor.bump_back().unwrap();
}
TokenKind::Comment
SimpleTokenKind::Comment
} else if c == '\\' {
TokenKind::Continuation
SimpleTokenKind::Continuation
} else {
let kind = if is_identifier_continuation(c) {
// if we only have identifier continuations but no start (e.g. 555) we
@ -484,13 +487,13 @@ impl<'a> SimpleTokenizer<'a> {
self.to_keyword_or_other(range)
} else {
self.cursor = savepoint;
TokenKind::Other
SimpleTokenKind::Other
}
} else {
TokenKind::from_non_trivia_char(c)
SimpleTokenKind::from_non_trivia_char(c)
};
if kind == TokenKind::Other {
if kind == SimpleTokenKind::Other {
self.bogus = true;
}
@ -503,7 +506,7 @@ impl<'a> SimpleTokenizer<'a> {
let start = self.back_offset - token_len;
let token = Token {
let token = SimpleToken {
kind,
range: TextRange::at(start, token_len),
};
@ -513,18 +516,18 @@ impl<'a> SimpleTokenizer<'a> {
token
}
pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
pub fn skip_trivia(self) -> impl Iterator<Item = SimpleToken> + DoubleEndedIterator + 'a {
self.filter(|t| !t.kind().is_trivia())
}
}
impl Iterator for SimpleTokenizer<'_> {
type Item = Token;
type Item = SimpleToken;
fn next(&mut self) -> Option<Self::Item> {
let token = self.next_token();
if token.kind == TokenKind::EndOfFile {
if token.kind == SimpleTokenKind::EndOfFile {
None
} else {
Some(token)
@ -536,7 +539,7 @@ impl DoubleEndedIterator for SimpleTokenizer<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let token = self.next_token_back();
if token.kind == TokenKind::EndOfFile {
if token.kind == SimpleTokenKind::EndOfFile {
None
} else {
Some(token)
@ -549,12 +552,12 @@ mod tests {
use insta::assert_debug_snapshot;
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
use crate::tokenizer::{lines_after, lines_before, SimpleToken, SimpleTokenizer};
struct TokenizationTestCase {
source: &'static str,
range: TextRange,
tokens: Vec<Token>,
tokens: Vec<SimpleToken>,
}
impl TokenizationTestCase {
@ -567,13 +570,13 @@ mod tests {
assert_eq!(&backwards, &self.tokens);
}
fn tokenize_reverse(&self) -> Vec<Token> {
fn tokenize_reverse(&self) -> Vec<SimpleToken> {
SimpleTokenizer::new(self.source, self.range)
.rev()
.collect()
}
fn tokens(&self) -> &[Token] {
fn tokens(&self) -> &[SimpleToken] {
&self.tokens
}
}

View file

@ -11,5 +11,5 @@ repository = { workspace = true }
license = { workspace = true }
[dependencies]
ruff_python_whitespace = { path = "../ruff_python_whitespace" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_text_size = { workspace = true }

View file

@ -4,7 +4,7 @@
use std::borrow::Cow;
use std::cmp;
use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
use ruff_python_trivia::{PythonWhitespace, UniversalNewlines};
/// Indent each line by the given prefix.
///