Formatter and parser refactoring (#7569)

I got confused and refactored a bit, now the naming should be more
consistent. This is the basis for the range formatting work.

Chages:
* `format_module` -> `format_module_source` (format a string)
* `format_node` -> `format_module_ast` (format a program parsed into an
AST)
* Added `parse_ok_tokens` that takes `Token` instead of `Result<Token>`
* Call the source code `source` consistently
* Added a `tokens_and_ranges` helper
* `python_ast` -> `module` (because that's the type)
This commit is contained in:
konsti 2023-09-26 15:29:43 +02:00 committed by GitHub
parent 2cb5e43dd7
commit 4d16e2308d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 126 additions and 138 deletions

View file

@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{
criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_formatter::{format_node, PyFormatOptions};
use ruff_python_formatter::{format_module_ast, PyFormatOptions};
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
@ -65,12 +65,13 @@ fn benchmark_formatter(criterion: &mut Criterion) {
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")
let module = parse_tokens(tokens, Mode::Module, "<filename>")
.expect("Input to be a valid python program");
b.iter(|| {
let options = PyFormatOptions::from_extension(Path::new(case.name()));
let formatted = format_node(&python_ast, &comment_ranges, case.code(), options)
let formatted =
format_module_ast(&module, &comment_ranges, case.code(), options)
.expect("Formatting to succeed");
formatted.print().expect("Printing to succeed")

View file

@ -15,7 +15,7 @@ use ruff_linter::fs;
use ruff_linter::logging::LogLevel;
use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module, FormatModuleError};
use ruff_python_formatter::{format_module_source, FormatModuleError};
use ruff_workspace::resolver::python_files_in_path;
use ruff_workspace::FormatterSettings;
@ -151,7 +151,7 @@ fn format_path(
let options = settings.to_format_options(source_type, &unformatted);
debug!("Formatting {} with {:?}", path.display(), options);
let formatted = format_module(&unformatted, options)
let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?;
let formatted = formatted.as_code();

View file

@ -5,7 +5,7 @@ use anyhow::Result;
use log::warn;
use ruff_python_ast::PySourceType;
use ruff_python_formatter::format_module;
use ruff_python_formatter::format_module_source;
use ruff_workspace::resolver::python_file_at_path;
use ruff_workspace::FormatterSettings;
@ -70,7 +70,7 @@ fn format_source(
&unformatted,
);
let formatted = format_module(&unformatted, options)
let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
let formatted = formatted.as_code();

View file

@ -34,7 +34,7 @@ use ruff_formatter::{FormatError, LineWidth, PrintError};
use ruff_linter::logging::LogLevel;
use ruff_linter::settings::types::{FilePattern, FilePatternSet};
use ruff_python_formatter::{
format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions,
format_module_source, FormatModuleError, MagicTrailingComma, PyFormatOptions,
};
use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig, Resolver};
@ -799,7 +799,7 @@ fn format_dev_file(
let content = fs::read_to_string(input_path)?;
#[cfg(not(debug_assertions))]
let start = Instant::now();
let printed = match format_module(&content, options.clone()) {
let printed = match format_module_source(&content, options.clone()) {
Ok(printed) => printed,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInInput(err));
@ -826,7 +826,7 @@ fn format_dev_file(
}
if stability_check {
let reformatted = match format_module(formatted, options) {
let reformatted = match format_module_source(formatted, options) {
Ok(reformatted) => reformatted,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInOutput {

View file

@ -2,17 +2,16 @@
use std::path::{Path, PathBuf};
use anyhow::{bail, Context, Result};
use anyhow::{format_err, Context, Result};
use clap::{command, Parser, ValueEnum};
use ruff_formatter::SourceCode;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_text_size::Ranged;
use crate::comments::collect_comments;
use crate::{format_node, PyFormatOptions};
use crate::{format_module_ast, PyFormatOptions};
#[derive(ValueEnum, Clone, Debug)]
pub enum Emit {
@ -39,36 +38,25 @@ pub struct Cli {
pub print_comments: bool,
}
pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Result<String> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(input, Mode::Module) {
let (token, range) = match result {
Ok((token, range)) => (token, range),
Err(err) => bail!("Source contains syntax errors {err:?}"),
};
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Result<String> {
let (tokens, comment_ranges) = tokens_and_ranges(source)
.map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
// Parse the AST.
let python_ast =
parse_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let module =
parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let options = PyFormatOptions::from_extension(source_type);
let formatted = format_node(&python_ast, &comment_ranges, input, options)
let source_code = SourceCode::new(source);
let formatted = format_module_ast(&module, &comment_ranges, source, options)
.context("Failed to format node")?;
if cli.print_ir {
println!("{}", formatted.document().display(SourceCode::new(input)));
println!("{}", formatted.document().display(source_code));
}
if cli.print_comments {
// Print preceding, following and enclosing nodes
let source_code = SourceCode::new(input);
let decorated_comments = collect_comments(&python_ast, source_code, &comment_ranges);
let decorated_comments = collect_comments(&module, source_code, &comment_ranges);
if !decorated_comments.is_empty() {
println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment");
}
@ -86,13 +74,10 @@ pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Res
comment.enclosing_node().kind(),
comment.enclosing_node().range()
),
comment.slice().text(SourceCode::new(input)),
comment.slice().text(source_code),
);
}
println!(
"{:#?}",
formatted.context().comments().debug(SourceCode::new(input))
);
println!("{:#?}", formatted.context().comments().debug(source_code));
}
Ok(formatted
.print()

View file

@ -549,9 +549,9 @@ mod tests {
use ruff_formatter::SourceCode;
use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_python_trivia::CommentRanges;
use crate::comments::Comments;
@ -563,19 +563,11 @@ mod tests {
}
impl<'a> CommentsTestCase<'a> {
fn from_code(code: &'a str) -> Self {
let source_code = SourceCode::new(code);
let tokens: Vec<_> = lex(code, Mode::Module).collect();
let mut comment_ranges = CommentRangesBuilder::default();
for (token, range) in tokens.iter().flatten() {
comment_ranges.visit_token(token, *range);
}
let comment_ranges = comment_ranges.finish();
let parsed = parse_tokens(tokens, Mode::Module, "test.py")
fn from_code(source: &'a str) -> Self {
let source_code = SourceCode::new(source);
let (tokens, comment_ranges) =
tokens_and_ranges(source).expect("Expect source to be valid Python");
let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
.expect("Expect source to be valid Python");
CommentsTestCase {

View file

@ -5,9 +5,9 @@ use ruff_formatter::prelude::*;
use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
use ruff_python_ast::node::AstNode;
use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::{parse_tokens, Mode, ParseError};
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::LexicalError;
use ruff_python_parser::{parse_ok_tokens, Mode, ParseError};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::Locator;
@ -121,61 +121,44 @@ impl From<ParseError> for FormatModuleError {
}
}
#[tracing::instrument(level = Level::TRACE, skip_all)]
pub fn format_module(
contents: &str,
#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
pub fn format_module_source(
source: &str,
options: PyFormatOptions,
) -> Result<Printed, FormatModuleError> {
// Tokenize once
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(contents, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;
let (tokens, comment_ranges) = tokens_and_ranges(source)?;
let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
Ok(formatted.print()?)
}
pub fn format_node<'a>(
root: &'a Mod,
pub fn format_module_ast<'a>(
module: &'a Mod,
comment_ranges: &'a CommentRanges,
source: &'a str,
options: PyFormatOptions,
) -> FormatResult<Formatted<PyFormatContext<'a>>> {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
let locator = Locator::new(source);
let formatted = format!(
PyFormatContext::new(options, locator.contents(), comments),
[root.format()]
[module.format()]
)?;
formatted
.context()
.comments()
.assert_all_formatted(SourceCode::new(source));
.assert_all_formatted(source_code);
Ok(formatted)
}
/// Public function for generating a printable string of the debug comments.
pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
pub fn pretty_comments(module: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
std::format!(
"{comments:#?}",
comments = comments.debug(SourceCode::new(source))
)
std::format!("{comments:#?}", comments = comments.debug(source_code))
}
#[cfg(test)]
@ -185,11 +168,11 @@ mod tests {
use anyhow::Result;
use insta::assert_snapshot;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_index::tokens_and_ranges;
use crate::{format_module, format_node, PyFormatOptions};
use ruff_python_parser::{parse_ok_tokens, Mode};
use crate::{format_module_ast, format_module_source, PyFormatOptions};
/// Very basic test intentionally kept very similar to the CLI
#[test]
@ -205,7 +188,7 @@ if True:
pass
# trailing
"#;
let actual = format_module(input, PyFormatOptions::default())?
let actual = format_module_source(input, PyFormatOptions::default())?
.as_code()
.to_string();
assert_eq!(expected, actual);
@ -216,7 +199,7 @@ if True:
#[ignore]
#[test]
fn quick_test() {
let src = r#"
let source = r#"
def main() -> None:
if True:
some_very_long_variable_name_abcdefghijk = Foo()
@ -226,23 +209,13 @@ def main() -> None:
]
"#;
// Tokenize once
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(src, Mode::Module) {
let (token, range) = result.unwrap();
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
let (tokens, comment_ranges) = tokens_and_ranges(source).unwrap();
// Parse the AST.
let source_path = "code_inline.py";
let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap();
let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap();
let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();
// Uncomment the `dbg` to print the IR.
// Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR

View file

@ -25,11 +25,11 @@ fn main() -> Result<()> {
cli.emit
);
}
let input = read_from_stdin()?;
let source = read_from_stdin()?;
// It seems reasonable to give this a dummy name
let formatted = format_and_debug_print(&input, &cli, Path::new("stdin.py"))?;
let formatted = format_and_debug_print(&source, &cli, Path::new("stdin.py"))?;
if cli.check {
if formatted == input {
if formatted == source {
return Ok(());
}
bail!("Content not correctly formatted")
@ -37,9 +37,9 @@ fn main() -> Result<()> {
stdout().lock().write_all(formatted.as_bytes())?;
} else {
for file in &cli.files {
let input = fs::read_to_string(file)
let source = fs::read_to_string(file)
.with_context(|| format!("Could not read {}: ", file.display()))?;
let formatted = format_and_debug_print(&input, &cli, file)?;
let formatted = format_and_debug_print(&source, &cli, file)?;
match cli.emit {
Some(Emit::Stdout) => stdout().lock().write_all(formatted.as_bytes())?,
None | Some(Emit::Files) => {

View file

@ -1,5 +1,5 @@
use ruff_formatter::FormatOptions;
use ruff_python_formatter::{format_module, PyFormatOptions};
use ruff_python_formatter::{format_module_source, PyFormatOptions};
use similar::TextDiff;
use std::fmt::{Formatter, Write};
use std::io::BufReader;
@ -20,7 +20,7 @@ fn black_compatibility() {
PyFormatOptions::from_extension(input_path)
};
let printed = format_module(&content, options.clone()).unwrap_or_else(|err| {
let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
panic!(
"Formatting of {} to succeed but encountered error {err}",
input_path.display()
@ -107,7 +107,8 @@ fn format() {
let content = fs::read_to_string(input_path).unwrap();
let options = PyFormatOptions::from_extension(input_path);
let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@ -124,7 +125,7 @@ fn format() {
for (i, options) in options.into_iter().enumerate() {
let printed =
format_module(&content, options.clone()).expect("Formatting to succeed");
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@ -139,7 +140,8 @@ fn format() {
.unwrap();
}
} else {
let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options, input_path);
@ -174,7 +176,7 @@ fn ensure_stability_when_formatting_twice(
options: PyFormatOptions,
input_path: &Path,
) {
let reformatted = match format_module(formatted_code, options) {
let reformatted = match format_module_source(formatted_code, options) {
Ok(reformatted) => reformatted,
Err(err) => {
panic!(

View file

@ -1,6 +1,7 @@
use std::fmt::Debug;
use ruff_python_parser::Tok;
use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::{Mode, Tok};
use ruff_python_trivia::CommentRanges;
use ruff_text_size::TextRange;
@ -20,3 +21,21 @@ impl CommentRangesBuilder {
CommentRanges::new(self.ranges)
}
}
/// Helper method to lex and extract comment ranges
pub fn tokens_and_ranges(
source: &str,
) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(source, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push((token, range));
}
let comment_ranges = comment_ranges.finish();
Ok((tokens, comment_ranges))
}

View file

@ -1,5 +1,5 @@
mod comment_ranges;
mod indexer;
pub use comment_ranges::CommentRangesBuilder;
pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
pub use indexer::Indexer;

View file

@ -110,8 +110,8 @@
//! [lexer]: crate::lexer
pub use parser::{
parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
parse_suite, parse_tokens, ParseError, ParseErrorType,
parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
};
use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
use ruff_text_size::{Ranged, TextRange, TextSize};

View file

@ -18,7 +18,7 @@ use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError;
use ruff_text_size::{TextRange, TextSize};
use crate::lexer::{lex, lex_starts_at};
use crate::lexer::{lex, lex_starts_at, Spanned};
use crate::{
lexer::{self, LexResult, LexicalError, LexicalErrorType},
python,
@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
/// let program = parse(source, Mode::Ipython, "<embedded>");
/// assert!(program.is_ok());
/// ```
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, source_path, TextSize::default())
}
@ -191,7 +191,7 @@ pub fn parse_starts_at(
mode: Mode,
source_path: &str,
offset: TextSize,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, mode, source_path)
}
@ -215,7 +215,7 @@ pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lxr.into_iter();
parse_filtered_tokens(
@ -225,19 +225,35 @@ pub fn parse_tokens(
)
}
/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
pub fn parse_ok_tokens(
lxr: impl IntoIterator<Item = Spanned>,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr
.into_iter()
.filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(marker_token)
.chain(lxr)
.map(|(t, range)| (range.start(), t, range.end()));
python::TopParser::new()
.parse(mode, lexer)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}
fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(
mode,
lexer
.into_iter()
.map_ok(|(t, range)| (range.start(), t, range.end())),
lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}

View file

@ -14,7 +14,7 @@ use ruff_linter::settings::{flags, DUMMY_VARIABLE_RGX, PREFIXES};
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::{Mod, PySourceType};
use ruff_python_codegen::Stylist;
use ruff_python_formatter::{format_node, pretty_comments, PyFormatContext};
use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext};
use ruff_python_index::{CommentRangesBuilder, Indexer};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{parse_tokens, AsMode, Mode};
@ -305,7 +305,7 @@ impl<'a> ParsedModule<'a> {
.formatter
.to_format_options(PySourceType::default(), self.source_code);
format_node(
format_module_ast(
&self.module,
&self.comment_ranges,
self.source_code,