Formatter and parser refactoring (#7569)

I got confused and refactored a bit, now the naming should be more consistent. This is the basis for the range formatting work. Chages: * `format_module` -> `format_module_source` (format a string) * `format_node` -> `format_module_ast` (format a program parsed into an AST) * Added `parse_ok_tokens` that takes `Token` instead of `Result<Token>` * Call the source code `source` consistently * Added a `tokens_and_ranges` helper * `python_ast` -> `module` (because that's the type)
2025-10-07 00:50:37 +00:00 · 2023-09-26 15:29:43 +02:00 · 2023-09-26 15:29:43 +02:00 · 4d16e2308d
commit 4d16e2308d
parent 2cb5e43dd7
14 changed files with 126 additions and 138 deletions
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@ -5,9 +5,9 @@ use ruff_formatter::prelude::*;
 use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
 use ruff_python_ast::node::AstNode;
 use ruff_python_ast::Mod;
-use ruff_python_index::CommentRangesBuilder;
-use ruff_python_parser::lexer::{lex, LexicalError};
-use ruff_python_parser::{parse_tokens, Mode, ParseError};
+use ruff_python_index::tokens_and_ranges;
+use ruff_python_parser::lexer::LexicalError;
+use ruff_python_parser::{parse_ok_tokens, Mode, ParseError};
 use ruff_python_trivia::CommentRanges;
 use ruff_source_file::Locator;

@ -121,61 +121,44 @@ impl From<ParseError> for FormatModuleError {
    }
 }

-#[tracing::instrument(level = Level::TRACE, skip_all)]
-pub fn format_module(
-    contents: &str,
+#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
+pub fn format_module_source(
+    source: &str,
    options: PyFormatOptions,
 ) -> Result<Printed, FormatModuleError> {
-    // Tokenize once
-    let mut tokens = Vec::new();
-    let mut comment_ranges = CommentRangesBuilder::default();
-
-    for result in lex(contents, Mode::Module) {
-        let (token, range) = result?;
-
-        comment_ranges.visit_token(&token, range);
-        tokens.push(Ok((token, range)));
-    }
-
-    let comment_ranges = comment_ranges.finish();
-
-    // Parse the AST.
-    let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
-
-    let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;
-
+    let (tokens, comment_ranges) = tokens_and_ranges(source)?;
+    let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
+    let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
    Ok(formatted.print()?)
 }

-pub fn format_node<'a>(
-    root: &'a Mod,
+pub fn format_module_ast<'a>(
+    module: &'a Mod,
    comment_ranges: &'a CommentRanges,
    source: &'a str,
    options: PyFormatOptions,
 ) -> FormatResult<Formatted<PyFormatContext<'a>>> {
-    let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
-
+    let source_code = SourceCode::new(source);
+    let comments = Comments::from_ast(module, source_code, comment_ranges);
    let locator = Locator::new(source);

    let formatted = format!(
        PyFormatContext::new(options, locator.contents(), comments),
-        [root.format()]
+        [module.format()]
    )?;
    formatted
        .context()
        .comments()
-        .assert_all_formatted(SourceCode::new(source));
+        .assert_all_formatted(source_code);
    Ok(formatted)
 }

 /// Public function for generating a printable string of the debug comments.
-pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
-    let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
+pub fn pretty_comments(module: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
+    let source_code = SourceCode::new(source);
+    let comments = Comments::from_ast(module, source_code, comment_ranges);

-    std::format!(
-        "{comments:#?}",
-        comments = comments.debug(SourceCode::new(source))
-    )
+    std::format!("{comments:#?}", comments = comments.debug(source_code))
 }

 #[cfg(test)]
@ -185,11 +168,11 @@ mod tests {
    use anyhow::Result;
    use insta::assert_snapshot;

-    use ruff_python_index::CommentRangesBuilder;
-    use ruff_python_parser::lexer::lex;
-    use ruff_python_parser::{parse_tokens, Mode};
+    use ruff_python_index::tokens_and_ranges;

-    use crate::{format_module, format_node, PyFormatOptions};
+    use ruff_python_parser::{parse_ok_tokens, Mode};
+
+    use crate::{format_module_ast, format_module_source, PyFormatOptions};

    /// Very basic test intentionally kept very similar to the CLI
    #[test]
@ -205,7 +188,7 @@ if True:
    pass
 # trailing
 "#;
-        let actual = format_module(input, PyFormatOptions::default())?
+        let actual = format_module_source(input, PyFormatOptions::default())?
            .as_code()
            .to_string();
        assert_eq!(expected, actual);
@ -216,7 +199,7 @@ if True:
    #[ignore]
    #[test]
    fn quick_test() {
-        let src = r#"
+        let source = r#"
 def main() -> None:
    if True:
        some_very_long_variable_name_abcdefghijk = Foo()
@ -226,23 +209,13 @@ def main() -> None:
        ]

 "#;
-        // Tokenize once
-        let mut tokens = Vec::new();
-        let mut comment_ranges = CommentRangesBuilder::default();
-
-        for result in lex(src, Mode::Module) {
-            let (token, range) = result.unwrap();
-            comment_ranges.visit_token(&token, range);
-            tokens.push(Ok((token, range)));
-        }
-
-        let comment_ranges = comment_ranges.finish();
+        let (tokens, comment_ranges) = tokens_and_ranges(source).unwrap();

        // Parse the AST.
        let source_path = "code_inline.py";
-        let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap();
+        let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
        let options = PyFormatOptions::from_extension(Path::new(source_path));
-        let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap();
+        let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();

        // Uncomment the `dbg` to print the IR.
        // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR