Formatter and parser refactoring (#7569)

I got confused and refactored a bit, now the naming should be more consistent. This is the basis for the range formatting work. Chages: * `format_module` -> `format_module_source` (format a string) * `format_node` -> `format_module_ast` (format a program parsed into an AST) * Added `parse_ok_tokens` that takes `Token` instead of `Result<Token>` * Call the source code `source` consistently * Added a `tokens_and_ranges` helper * `python_ast` -> `module` (because that's the type)
2025-09-10 12:26:39 +00:00 · 2023-09-26 15:29:43 +02:00 · 2023-09-26 15:29:43 +02:00 · 4d16e2308d
commit 4d16e2308d
parent 2cb5e43dd7
14 changed files with 126 additions and 138 deletions
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -110,8 +110,8 @@
 //! [lexer]: crate::lexer

 pub use parser::{
-    parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
-    parse_suite, parse_tokens, ParseError, ParseErrorType,
+    parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
+    parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
 };
 use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
 use ruff_text_size::{Ranged, TextRange, TextSize};
--- a/crates/ruff_python_parser/src/parser.rs
+++ b/crates/ruff_python_parser/src/parser.rs
@ -18,7 +18,7 @@ use itertools::Itertools;
 pub(super) use lalrpop_util::ParseError as LalrpopError;
 use ruff_text_size::{TextRange, TextSize};

-use crate::lexer::{lex, lex_starts_at};
+use crate::lexer::{lex, lex_starts_at, Spanned};
 use crate::{
    lexer::{self, LexResult, LexicalError, LexicalErrorType},
    python,
@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
 /// let program = parse(source, Mode::Ipython, "<embedded>");
 /// assert!(program.is_ok());
 /// ```
-pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
+pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
    parse_starts_at(source, mode, source_path, TextSize::default())
 }

@ -191,7 +191,7 @@ pub fn parse_starts_at(
    mode: Mode,
    source_path: &str,
    offset: TextSize,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let lxr = lexer::lex_starts_at(source, mode, offset);
    parse_tokens(lxr, mode, source_path)
 }
@ -215,7 +215,7 @@ pub fn parse_tokens(
    lxr: impl IntoIterator<Item = LexResult>,
    mode: Mode,
    source_path: &str,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let lxr = lxr.into_iter();

    parse_filtered_tokens(
@ -225,19 +225,35 @@ pub fn parse_tokens(
    )
 }

+/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
+pub fn parse_ok_tokens(
+    lxr: impl IntoIterator<Item = Spanned>,
+    mode: Mode,
+    source_path: &str,
+) -> Result<Mod, ParseError> {
+    let lxr = lxr
+        .into_iter()
+        .filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
+    let marker_token = (Tok::start_marker(mode), TextRange::default());
+    let lexer = iter::once(marker_token)
+        .chain(lxr)
+        .map(|(t, range)| (range.start(), t, range.end()));
+    python::TopParser::new()
+        .parse(mode, lexer)
+        .map_err(|e| parse_error_from_lalrpop(e, source_path))
+}
+
 fn parse_filtered_tokens(
    lxr: impl IntoIterator<Item = LexResult>,
    mode: Mode,
    source_path: &str,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let marker_token = (Tok::start_marker(mode), TextRange::default());
    let lexer = iter::once(Ok(marker_token)).chain(lxr);
    python::TopParser::new()
        .parse(
            mode,
-            lexer
-                .into_iter()
-                .map_ok(|(t, range)| (range.start(), t, range.end())),
+            lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
        )
        .map_err(|e| parse_error_from_lalrpop(e, source_path))
 }