Formatter and parser refactoring (#7569)

I got confused and refactored a bit, now the naming should be more
consistent. This is the basis for the range formatting work.

Chages:
* `format_module` -> `format_module_source` (format a string)
* `format_node` -> `format_module_ast` (format a program parsed into an
AST)
* Added `parse_ok_tokens` that takes `Token` instead of `Result<Token>`
* Call the source code `source` consistently
* Added a `tokens_and_ranges` helper
* `python_ast` -> `module` (because that's the type)
This commit is contained in:
konsti 2023-09-26 15:29:43 +02:00 committed by GitHub
parent 2cb5e43dd7
commit 4d16e2308d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 126 additions and 138 deletions

View file

@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{
criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
}; };
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_formatter::{format_node, PyFormatOptions}; use ruff_python_formatter::{format_module_ast, PyFormatOptions};
use ruff_python_index::CommentRangesBuilder; use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex; use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode}; use ruff_python_parser::{parse_tokens, Mode};
@ -65,12 +65,13 @@ fn benchmark_formatter(criterion: &mut Criterion) {
let comment_ranges = comment_ranges.finish(); let comment_ranges = comment_ranges.finish();
// Parse the AST. // Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>") let module = parse_tokens(tokens, Mode::Module, "<filename>")
.expect("Input to be a valid python program"); .expect("Input to be a valid python program");
b.iter(|| { b.iter(|| {
let options = PyFormatOptions::from_extension(Path::new(case.name())); let options = PyFormatOptions::from_extension(Path::new(case.name()));
let formatted = format_node(&python_ast, &comment_ranges, case.code(), options) let formatted =
format_module_ast(&module, &comment_ranges, case.code(), options)
.expect("Formatting to succeed"); .expect("Formatting to succeed");
formatted.print().expect("Printing to succeed") formatted.print().expect("Printing to succeed")

View file

@ -15,7 +15,7 @@ use ruff_linter::fs;
use ruff_linter::logging::LogLevel; use ruff_linter::logging::LogLevel;
use ruff_linter::warn_user_once; use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType}; use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module, FormatModuleError}; use ruff_python_formatter::{format_module_source, FormatModuleError};
use ruff_workspace::resolver::python_files_in_path; use ruff_workspace::resolver::python_files_in_path;
use ruff_workspace::FormatterSettings; use ruff_workspace::FormatterSettings;
@ -151,7 +151,7 @@ fn format_path(
let options = settings.to_format_options(source_type, &unformatted); let options = settings.to_format_options(source_type, &unformatted);
debug!("Formatting {} with {:?}", path.display(), options); debug!("Formatting {} with {:?}", path.display(), options);
let formatted = format_module(&unformatted, options) let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?; .map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?;
let formatted = formatted.as_code(); let formatted = formatted.as_code();

View file

@ -5,7 +5,7 @@ use anyhow::Result;
use log::warn; use log::warn;
use ruff_python_ast::PySourceType; use ruff_python_ast::PySourceType;
use ruff_python_formatter::format_module; use ruff_python_formatter::format_module_source;
use ruff_workspace::resolver::python_file_at_path; use ruff_workspace::resolver::python_file_at_path;
use ruff_workspace::FormatterSettings; use ruff_workspace::FormatterSettings;
@ -70,7 +70,7 @@ fn format_source(
&unformatted, &unformatted,
); );
let formatted = format_module(&unformatted, options) let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?; .map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
let formatted = formatted.as_code(); let formatted = formatted.as_code();

View file

@ -34,7 +34,7 @@ use ruff_formatter::{FormatError, LineWidth, PrintError};
use ruff_linter::logging::LogLevel; use ruff_linter::logging::LogLevel;
use ruff_linter::settings::types::{FilePattern, FilePatternSet}; use ruff_linter::settings::types::{FilePattern, FilePatternSet};
use ruff_python_formatter::{ use ruff_python_formatter::{
format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions, format_module_source, FormatModuleError, MagicTrailingComma, PyFormatOptions,
}; };
use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig, Resolver}; use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig, Resolver};
@ -799,7 +799,7 @@ fn format_dev_file(
let content = fs::read_to_string(input_path)?; let content = fs::read_to_string(input_path)?;
#[cfg(not(debug_assertions))] #[cfg(not(debug_assertions))]
let start = Instant::now(); let start = Instant::now();
let printed = match format_module(&content, options.clone()) { let printed = match format_module_source(&content, options.clone()) {
Ok(printed) => printed, Ok(printed) => printed,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInInput(err)); return Err(CheckFileError::SyntaxErrorInInput(err));
@ -826,7 +826,7 @@ fn format_dev_file(
} }
if stability_check { if stability_check {
let reformatted = match format_module(formatted, options) { let reformatted = match format_module_source(formatted, options) {
Ok(reformatted) => reformatted, Ok(reformatted) => reformatted,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInOutput { return Err(CheckFileError::SyntaxErrorInOutput {

View file

@ -2,17 +2,16 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::{bail, Context, Result}; use anyhow::{format_err, Context, Result};
use clap::{command, Parser, ValueEnum}; use clap::{command, Parser, ValueEnum};
use ruff_formatter::SourceCode; use ruff_formatter::SourceCode;
use ruff_python_index::CommentRangesBuilder; use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::lex; use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_python_parser::{parse_tokens, Mode};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::comments::collect_comments; use crate::comments::collect_comments;
use crate::{format_node, PyFormatOptions}; use crate::{format_module_ast, PyFormatOptions};
#[derive(ValueEnum, Clone, Debug)] #[derive(ValueEnum, Clone, Debug)]
pub enum Emit { pub enum Emit {
@ -39,36 +38,25 @@ pub struct Cli {
pub print_comments: bool, pub print_comments: bool,
} }
pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Result<String> { pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Result<String> {
let mut tokens = Vec::new(); let (tokens, comment_ranges) = tokens_and_ranges(source)
let mut comment_ranges = CommentRangesBuilder::default(); .map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
for result in lex(input, Mode::Module) {
let (token, range) = match result {
Ok((token, range)) => (token, range),
Err(err) => bail!("Source contains syntax errors {err:?}"),
};
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
// Parse the AST. // Parse the AST.
let python_ast = let module =
parse_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?; parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let options = PyFormatOptions::from_extension(source_type); let options = PyFormatOptions::from_extension(source_type);
let formatted = format_node(&python_ast, &comment_ranges, input, options)
let source_code = SourceCode::new(source);
let formatted = format_module_ast(&module, &comment_ranges, source, options)
.context("Failed to format node")?; .context("Failed to format node")?;
if cli.print_ir { if cli.print_ir {
println!("{}", formatted.document().display(SourceCode::new(input))); println!("{}", formatted.document().display(source_code));
} }
if cli.print_comments { if cli.print_comments {
// Print preceding, following and enclosing nodes // Print preceding, following and enclosing nodes
let source_code = SourceCode::new(input); let decorated_comments = collect_comments(&module, source_code, &comment_ranges);
let decorated_comments = collect_comments(&python_ast, source_code, &comment_ranges);
if !decorated_comments.is_empty() { if !decorated_comments.is_empty() {
println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment"); println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment");
} }
@ -86,13 +74,10 @@ pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Res
comment.enclosing_node().kind(), comment.enclosing_node().kind(),
comment.enclosing_node().range() comment.enclosing_node().range()
), ),
comment.slice().text(SourceCode::new(input)), comment.slice().text(source_code),
); );
} }
println!( println!("{:#?}", formatted.context().comments().debug(source_code));
"{:#?}",
formatted.context().comments().debug(SourceCode::new(input))
);
} }
Ok(formatted Ok(formatted
.print() .print()

View file

@ -549,9 +549,9 @@ mod tests {
use ruff_formatter::SourceCode; use ruff_formatter::SourceCode;
use ruff_python_ast::Mod; use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder; use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode}; use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_python_trivia::CommentRanges; use ruff_python_trivia::CommentRanges;
use crate::comments::Comments; use crate::comments::Comments;
@ -563,19 +563,11 @@ mod tests {
} }
impl<'a> CommentsTestCase<'a> { impl<'a> CommentsTestCase<'a> {
fn from_code(code: &'a str) -> Self { fn from_code(source: &'a str) -> Self {
let source_code = SourceCode::new(code); let source_code = SourceCode::new(source);
let tokens: Vec<_> = lex(code, Mode::Module).collect(); let (tokens, comment_ranges) =
tokens_and_ranges(source).expect("Expect source to be valid Python");
let mut comment_ranges = CommentRangesBuilder::default(); let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
for (token, range) in tokens.iter().flatten() {
comment_ranges.visit_token(token, *range);
}
let comment_ranges = comment_ranges.finish();
let parsed = parse_tokens(tokens, Mode::Module, "test.py")
.expect("Expect source to be valid Python"); .expect("Expect source to be valid Python");
CommentsTestCase { CommentsTestCase {

View file

@ -5,9 +5,9 @@ use ruff_formatter::prelude::*;
use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode}; use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
use ruff_python_ast::node::AstNode; use ruff_python_ast::node::AstNode;
use ruff_python_ast::Mod; use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder; use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::{lex, LexicalError}; use ruff_python_parser::lexer::LexicalError;
use ruff_python_parser::{parse_tokens, Mode, ParseError}; use ruff_python_parser::{parse_ok_tokens, Mode, ParseError};
use ruff_python_trivia::CommentRanges; use ruff_python_trivia::CommentRanges;
use ruff_source_file::Locator; use ruff_source_file::Locator;
@ -121,61 +121,44 @@ impl From<ParseError> for FormatModuleError {
} }
} }
#[tracing::instrument(level = Level::TRACE, skip_all)] #[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
pub fn format_module( pub fn format_module_source(
contents: &str, source: &str,
options: PyFormatOptions, options: PyFormatOptions,
) -> Result<Printed, FormatModuleError> { ) -> Result<Printed, FormatModuleError> {
// Tokenize once let (tokens, comment_ranges) = tokens_and_ranges(source)?;
let mut tokens = Vec::new(); let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
let mut comment_ranges = CommentRangesBuilder::default(); let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
for result in lex(contents, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;
Ok(formatted.print()?) Ok(formatted.print()?)
} }
pub fn format_node<'a>( pub fn format_module_ast<'a>(
root: &'a Mod, module: &'a Mod,
comment_ranges: &'a CommentRanges, comment_ranges: &'a CommentRanges,
source: &'a str, source: &'a str,
options: PyFormatOptions, options: PyFormatOptions,
) -> FormatResult<Formatted<PyFormatContext<'a>>> { ) -> FormatResult<Formatted<PyFormatContext<'a>>> {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges); let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
let locator = Locator::new(source); let locator = Locator::new(source);
let formatted = format!( let formatted = format!(
PyFormatContext::new(options, locator.contents(), comments), PyFormatContext::new(options, locator.contents(), comments),
[root.format()] [module.format()]
)?; )?;
formatted formatted
.context() .context()
.comments() .comments()
.assert_all_formatted(SourceCode::new(source)); .assert_all_formatted(source_code);
Ok(formatted) Ok(formatted)
} }
/// Public function for generating a printable string of the debug comments. /// Public function for generating a printable string of the debug comments.
pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String { pub fn pretty_comments(module: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges); let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
std::format!( std::format!("{comments:#?}", comments = comments.debug(source_code))
"{comments:#?}",
comments = comments.debug(SourceCode::new(source))
)
} }
#[cfg(test)] #[cfg(test)]
@ -185,11 +168,11 @@ mod tests {
use anyhow::Result; use anyhow::Result;
use insta::assert_snapshot; use insta::assert_snapshot;
use ruff_python_index::CommentRangesBuilder; use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use crate::{format_module, format_node, PyFormatOptions}; use ruff_python_parser::{parse_ok_tokens, Mode};
use crate::{format_module_ast, format_module_source, PyFormatOptions};
/// Very basic test intentionally kept very similar to the CLI /// Very basic test intentionally kept very similar to the CLI
#[test] #[test]
@ -205,7 +188,7 @@ if True:
pass pass
# trailing # trailing
"#; "#;
let actual = format_module(input, PyFormatOptions::default())? let actual = format_module_source(input, PyFormatOptions::default())?
.as_code() .as_code()
.to_string(); .to_string();
assert_eq!(expected, actual); assert_eq!(expected, actual);
@ -216,7 +199,7 @@ if True:
#[ignore] #[ignore]
#[test] #[test]
fn quick_test() { fn quick_test() {
let src = r#" let source = r#"
def main() -> None: def main() -> None:
if True: if True:
some_very_long_variable_name_abcdefghijk = Foo() some_very_long_variable_name_abcdefghijk = Foo()
@ -226,23 +209,13 @@ def main() -> None:
] ]
"#; "#;
// Tokenize once let (tokens, comment_ranges) = tokens_and_ranges(source).unwrap();
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(src, Mode::Module) {
let (token, range) = result.unwrap();
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
// Parse the AST. // Parse the AST.
let source_path = "code_inline.py"; let source_path = "code_inline.py";
let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap(); let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path)); let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap(); let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();
// Uncomment the `dbg` to print the IR. // Uncomment the `dbg` to print the IR.
// Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR

View file

@ -25,11 +25,11 @@ fn main() -> Result<()> {
cli.emit cli.emit
); );
} }
let input = read_from_stdin()?; let source = read_from_stdin()?;
// It seems reasonable to give this a dummy name // It seems reasonable to give this a dummy name
let formatted = format_and_debug_print(&input, &cli, Path::new("stdin.py"))?; let formatted = format_and_debug_print(&source, &cli, Path::new("stdin.py"))?;
if cli.check { if cli.check {
if formatted == input { if formatted == source {
return Ok(()); return Ok(());
} }
bail!("Content not correctly formatted") bail!("Content not correctly formatted")
@ -37,9 +37,9 @@ fn main() -> Result<()> {
stdout().lock().write_all(formatted.as_bytes())?; stdout().lock().write_all(formatted.as_bytes())?;
} else { } else {
for file in &cli.files { for file in &cli.files {
let input = fs::read_to_string(file) let source = fs::read_to_string(file)
.with_context(|| format!("Could not read {}: ", file.display()))?; .with_context(|| format!("Could not read {}: ", file.display()))?;
let formatted = format_and_debug_print(&input, &cli, file)?; let formatted = format_and_debug_print(&source, &cli, file)?;
match cli.emit { match cli.emit {
Some(Emit::Stdout) => stdout().lock().write_all(formatted.as_bytes())?, Some(Emit::Stdout) => stdout().lock().write_all(formatted.as_bytes())?,
None | Some(Emit::Files) => { None | Some(Emit::Files) => {

View file

@ -1,5 +1,5 @@
use ruff_formatter::FormatOptions; use ruff_formatter::FormatOptions;
use ruff_python_formatter::{format_module, PyFormatOptions}; use ruff_python_formatter::{format_module_source, PyFormatOptions};
use similar::TextDiff; use similar::TextDiff;
use std::fmt::{Formatter, Write}; use std::fmt::{Formatter, Write};
use std::io::BufReader; use std::io::BufReader;
@ -20,7 +20,7 @@ fn black_compatibility() {
PyFormatOptions::from_extension(input_path) PyFormatOptions::from_extension(input_path)
}; };
let printed = format_module(&content, options.clone()).unwrap_or_else(|err| { let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
panic!( panic!(
"Formatting of {} to succeed but encountered error {err}", "Formatting of {} to succeed but encountered error {err}",
input_path.display() input_path.display()
@ -107,7 +107,8 @@ fn format() {
let content = fs::read_to_string(input_path).unwrap(); let content = fs::read_to_string(input_path).unwrap();
let options = PyFormatOptions::from_extension(input_path); let options = PyFormatOptions::from_extension(input_path);
let printed = format_module(&content, options.clone()).expect("Formatting to succeed"); let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code(); let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path); ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@ -124,7 +125,7 @@ fn format() {
for (i, options) in options.into_iter().enumerate() { for (i, options) in options.into_iter().enumerate() {
let printed = let printed =
format_module(&content, options.clone()).expect("Formatting to succeed"); format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code(); let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path); ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@ -139,7 +140,8 @@ fn format() {
.unwrap(); .unwrap();
} }
} else { } else {
let printed = format_module(&content, options.clone()).expect("Formatting to succeed"); let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code(); let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options, input_path); ensure_stability_when_formatting_twice(formatted_code, options, input_path);
@ -174,7 +176,7 @@ fn ensure_stability_when_formatting_twice(
options: PyFormatOptions, options: PyFormatOptions,
input_path: &Path, input_path: &Path,
) { ) {
let reformatted = match format_module(formatted_code, options) { let reformatted = match format_module_source(formatted_code, options) {
Ok(reformatted) => reformatted, Ok(reformatted) => reformatted,
Err(err) => { Err(err) => {
panic!( panic!(

View file

@ -1,6 +1,7 @@
use std::fmt::Debug; use std::fmt::Debug;
use ruff_python_parser::Tok; use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::{Mode, Tok};
use ruff_python_trivia::CommentRanges; use ruff_python_trivia::CommentRanges;
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
@ -20,3 +21,21 @@ impl CommentRangesBuilder {
CommentRanges::new(self.ranges) CommentRanges::new(self.ranges)
} }
} }
/// Helper method to lex and extract comment ranges
pub fn tokens_and_ranges(
source: &str,
) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(source, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push((token, range));
}
let comment_ranges = comment_ranges.finish();
Ok((tokens, comment_ranges))
}

View file

@ -1,5 +1,5 @@
mod comment_ranges; mod comment_ranges;
mod indexer; mod indexer;
pub use comment_ranges::CommentRangesBuilder; pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
pub use indexer::Indexer; pub use indexer::Indexer;

View file

@ -110,8 +110,8 @@
//! [lexer]: crate::lexer //! [lexer]: crate::lexer
pub use parser::{ pub use parser::{
parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at, parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
parse_suite, parse_tokens, ParseError, ParseErrorType, parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
}; };
use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite}; use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};

View file

@ -18,7 +18,7 @@ use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError; pub(super) use lalrpop_util::ParseError as LalrpopError;
use ruff_text_size::{TextRange, TextSize}; use ruff_text_size::{TextRange, TextSize};
use crate::lexer::{lex, lex_starts_at}; use crate::lexer::{lex, lex_starts_at, Spanned};
use crate::{ use crate::{
lexer::{self, LexResult, LexicalError, LexicalErrorType}, lexer::{self, LexResult, LexicalError, LexicalErrorType},
python, python,
@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
/// let program = parse(source, Mode::Ipython, "<embedded>"); /// let program = parse(source, Mode::Ipython, "<embedded>");
/// assert!(program.is_ok()); /// assert!(program.is_ok());
/// ``` /// ```
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> { pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, source_path, TextSize::default()) parse_starts_at(source, mode, source_path, TextSize::default())
} }
@ -191,7 +191,7 @@ pub fn parse_starts_at(
mode: Mode, mode: Mode,
source_path: &str, source_path: &str,
offset: TextSize, offset: TextSize,
) -> Result<ast::Mod, ParseError> { ) -> Result<Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset); let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, mode, source_path) parse_tokens(lxr, mode, source_path)
} }
@ -215,7 +215,7 @@ pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>, lxr: impl IntoIterator<Item = LexResult>,
mode: Mode, mode: Mode,
source_path: &str, source_path: &str,
) -> Result<ast::Mod, ParseError> { ) -> Result<Mod, ParseError> {
let lxr = lxr.into_iter(); let lxr = lxr.into_iter();
parse_filtered_tokens( parse_filtered_tokens(
@ -225,19 +225,35 @@ pub fn parse_tokens(
) )
} }
/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
pub fn parse_ok_tokens(
lxr: impl IntoIterator<Item = Spanned>,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr
.into_iter()
.filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(marker_token)
.chain(lxr)
.map(|(t, range)| (range.start(), t, range.end()));
python::TopParser::new()
.parse(mode, lexer)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}
fn parse_filtered_tokens( fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>, lxr: impl IntoIterator<Item = LexResult>,
mode: Mode, mode: Mode,
source_path: &str, source_path: &str,
) -> Result<ast::Mod, ParseError> { ) -> Result<Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default()); let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr); let lexer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new() python::TopParser::new()
.parse( .parse(
mode, mode,
lexer lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
.into_iter()
.map_ok(|(t, range)| (range.start(), t, range.end())),
) )
.map_err(|e| parse_error_from_lalrpop(e, source_path)) .map_err(|e| parse_error_from_lalrpop(e, source_path))
} }

View file

@ -14,7 +14,7 @@ use ruff_linter::settings::{flags, DUMMY_VARIABLE_RGX, PREFIXES};
use ruff_linter::source_kind::SourceKind; use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::{Mod, PySourceType}; use ruff_python_ast::{Mod, PySourceType};
use ruff_python_codegen::Stylist; use ruff_python_codegen::Stylist;
use ruff_python_formatter::{format_node, pretty_comments, PyFormatContext}; use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext};
use ruff_python_index::{CommentRangesBuilder, Indexer}; use ruff_python_index::{CommentRangesBuilder, Indexer};
use ruff_python_parser::lexer::LexResult; use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{parse_tokens, AsMode, Mode}; use ruff_python_parser::{parse_tokens, AsMode, Mode};
@ -305,7 +305,7 @@ impl<'a> ParsedModule<'a> {
.formatter .formatter
.to_format_options(PySourceType::default(), self.source_code); .to_format_options(PySourceType::default(), self.source_code);
format_node( format_module_ast(
&self.module, &self.module,
&self.comment_ranges, &self.comment_ranges,
self.source_code, self.source_code,