mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-19 02:55:43 +00:00

(Supersedes #9152, authored by @LaBatata101) ## Summary This PR replaces the current parser generated from LALRPOP to a hand-written recursive descent parser. It also updates the grammar for [PEP 646](https://peps.python.org/pep-0646/) so that the parser outputs the correct AST. For example, in `data[*x]`, the index expression is now a tuple with a single starred expression instead of just a starred expression. Beyond the performance improvements, the parser is also error resilient and can provide better error messages. The behavior as seen by any downstream tools isn't changed. That is, the linter and formatter can still assume that the parser will _stop_ at the first syntax error. This will be updated in the following months. For more details about the change here, refer to the PR corresponding to the individual commits and the release blog post. ## Test Plan Write _lots_ and _lots_ of tests for both valid and invalid syntax and verify the output. ## Acknowledgements - @MichaReiser for reviewing 100+ parser PRs and continuously providing guidance throughout the project - @LaBatata101 for initiating the transition to a hand-written parser in #9152 - @addisoncrump for implementing the fuzzer which helped [catch](https://github.com/astral-sh/ruff/pull/10903) [a](https://github.com/astral-sh/ruff/pull/10910) [lot](https://github.com/astral-sh/ruff/pull/10966) [of](https://github.com/astral-sh/ruff/pull/10896) [bugs](https://github.com/astral-sh/ruff/pull/10877) --------- Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org> Co-authored-by: Micha Reiser <micha@reiser.io>
293 lines
9.3 KiB
Rust
293 lines
9.3 KiB
Rust
use std::cmp::Ordering;
|
|
use std::fmt::{Formatter, Write};
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
use annotate_snippets::display_list::{DisplayList, FormatOptions};
|
|
use annotate_snippets::snippet::{AnnotationType, Slice, Snippet, SourceAnnotation};
|
|
|
|
use ruff_python_ast::visitor::preorder::{walk_module, PreorderVisitor, TraversalSignal};
|
|
use ruff_python_ast::{AnyNodeRef, Mod};
|
|
use ruff_python_parser::{Mode, ParseErrorType, Program};
|
|
use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
|
|
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
|
|
|
#[test]
|
|
fn valid_syntax() {
|
|
insta::glob!("../resources", "valid/**/*.py", test_valid_syntax);
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_syntax() {
|
|
insta::glob!("../resources", "invalid/**/*.py", test_invalid_syntax);
|
|
}
|
|
|
|
#[test]
|
|
fn inline_ok() {
|
|
insta::glob!("../resources/inline", "ok/**/*.py", test_valid_syntax);
|
|
}
|
|
|
|
#[test]
|
|
fn inline_err() {
|
|
insta::glob!("../resources/inline", "err/**/*.py", test_invalid_syntax);
|
|
}
|
|
|
|
/// Asserts that the parser generates no syntax errors for a valid program.
|
|
/// Snapshots the AST.
|
|
fn test_valid_syntax(input_path: &Path) {
|
|
let source = fs::read_to_string(input_path).expect("Expected test file to exist");
|
|
let program = Program::parse_str(&source, Mode::Module);
|
|
|
|
if !program.is_valid() {
|
|
let line_index = LineIndex::from_source_text(&source);
|
|
let source_code = SourceCode::new(&source, &line_index);
|
|
|
|
let mut message = "Expected no syntax errors for a valid program but the parser generated the following errors:\n".to_string();
|
|
|
|
for error in program.errors() {
|
|
writeln!(
|
|
&mut message,
|
|
"{}\n",
|
|
CodeFrame {
|
|
range: error.location,
|
|
error,
|
|
source_code: &source_code,
|
|
}
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
panic!("{input_path:?}: {message}");
|
|
}
|
|
|
|
validate_ast(program.ast(), source.text_len(), input_path);
|
|
|
|
let mut output = String::new();
|
|
writeln!(&mut output, "## AST").unwrap();
|
|
writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap();
|
|
|
|
insta::with_settings!({
|
|
omit_expression => true,
|
|
input_file => input_path,
|
|
prepend_module_to_snapshot => false,
|
|
}, {
|
|
insta::assert_snapshot!(output);
|
|
});
|
|
}
|
|
|
|
/// Assert that the parser generates at least one syntax error for the given input file.
|
|
/// Snapshots the AST and the error messages.
|
|
fn test_invalid_syntax(input_path: &Path) {
|
|
let source = fs::read_to_string(input_path).expect("Expected test file to exist");
|
|
let program = Program::parse_str(&source, Mode::Module);
|
|
|
|
assert!(
|
|
!program.is_valid(),
|
|
"{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors."
|
|
);
|
|
|
|
validate_ast(program.ast(), source.text_len(), input_path);
|
|
|
|
let mut output = String::new();
|
|
writeln!(&mut output, "## AST").unwrap();
|
|
writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap();
|
|
|
|
writeln!(&mut output, "## Errors\n").unwrap();
|
|
|
|
let line_index = LineIndex::from_source_text(&source);
|
|
let source_code = SourceCode::new(&source, &line_index);
|
|
|
|
for error in program.errors() {
|
|
writeln!(
|
|
&mut output,
|
|
"{}\n",
|
|
CodeFrame {
|
|
range: error.location,
|
|
error,
|
|
source_code: &source_code,
|
|
}
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
insta::with_settings!({
|
|
omit_expression => true,
|
|
input_file => input_path,
|
|
prepend_module_to_snapshot => false,
|
|
}, {
|
|
insta::assert_snapshot!(output);
|
|
});
|
|
}
|
|
|
|
// Test that is intentionally ignored by default.
|
|
// Use it for quickly debugging a parser issue.
|
|
#[test]
|
|
#[ignore]
|
|
#[allow(clippy::print_stdout)]
|
|
fn parser_quick_test() {
|
|
let source = "\
|
|
data[*x,]
|
|
";
|
|
|
|
let program = Program::parse_str(source, Mode::Module);
|
|
|
|
println!("AST:\n----\n{:#?}", program.ast());
|
|
|
|
if !program.is_valid() {
|
|
println!("Errors:\n-------");
|
|
|
|
let line_index = LineIndex::from_source_text(source);
|
|
let source_code = SourceCode::new(source, &line_index);
|
|
|
|
for error in program.errors() {
|
|
// Sometimes the code frame doesn't show the error message, so we print
|
|
// the message as well.
|
|
println!("Syntax Error: {error}");
|
|
println!(
|
|
"{}\n",
|
|
CodeFrame {
|
|
range: error.location,
|
|
error,
|
|
source_code: &source_code,
|
|
}
|
|
);
|
|
}
|
|
|
|
println!();
|
|
}
|
|
}
|
|
|
|
struct CodeFrame<'a> {
|
|
range: TextRange,
|
|
error: &'a ParseErrorType,
|
|
source_code: &'a SourceCode<'a, 'a>,
|
|
}
|
|
|
|
impl std::fmt::Display for CodeFrame<'_> {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
// Copied and modified from ruff_linter/src/message/text.rs
|
|
let content_start_index = self.source_code.line_index(self.range.start());
|
|
let mut start_index = content_start_index.saturating_sub(2);
|
|
|
|
// Trim leading empty lines.
|
|
while start_index < content_start_index {
|
|
if !self.source_code.line_text(start_index).trim().is_empty() {
|
|
break;
|
|
}
|
|
start_index = start_index.saturating_add(1);
|
|
}
|
|
|
|
let content_end_index = self.source_code.line_index(self.range.end());
|
|
let mut end_index = content_end_index
|
|
.saturating_add(2)
|
|
.min(OneIndexed::from_zero_indexed(self.source_code.line_count()));
|
|
|
|
// Trim trailing empty lines.
|
|
while end_index > content_end_index {
|
|
if !self.source_code.line_text(end_index).trim().is_empty() {
|
|
break;
|
|
}
|
|
|
|
end_index = end_index.saturating_sub(1);
|
|
}
|
|
|
|
let start_offset = self.source_code.line_start(start_index);
|
|
let end_offset = self.source_code.line_end(end_index);
|
|
|
|
let annotation_range = self.range - start_offset;
|
|
let source = self
|
|
.source_code
|
|
.slice(TextRange::new(start_offset, end_offset));
|
|
|
|
let start_char = source[TextRange::up_to(annotation_range.start())]
|
|
.chars()
|
|
.count();
|
|
|
|
let char_length = source[annotation_range].chars().count();
|
|
let label = format!("Syntax Error: {error}", error = self.error);
|
|
|
|
let snippet = Snippet {
|
|
title: None,
|
|
slices: vec![Slice {
|
|
source,
|
|
line_start: start_index.get(),
|
|
annotations: vec![SourceAnnotation {
|
|
label: &label,
|
|
annotation_type: AnnotationType::Error,
|
|
range: (start_char, start_char + char_length),
|
|
}],
|
|
// The origin (file name, line number, and column number) is already encoded
|
|
// in the `label`.
|
|
origin: None,
|
|
fold: false,
|
|
}],
|
|
footer: Vec::new(),
|
|
opt: FormatOptions::default(),
|
|
};
|
|
|
|
writeln!(f, "{message}", message = DisplayList::from(snippet))
|
|
}
|
|
}
|
|
|
|
/// Verifies that:
|
|
/// * the range of the parent node fully encloses all its child nodes
|
|
/// * the ranges are strictly increasing when traversing the nodes in pre-order.
|
|
/// * all ranges are within the length of the source code.
|
|
fn validate_ast(root: &Mod, source_len: TextSize, test_path: &Path) {
|
|
walk_module(&mut ValidateAstVisitor::new(source_len, test_path), root);
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct ValidateAstVisitor<'a> {
|
|
parents: Vec<AnyNodeRef<'a>>,
|
|
previous: Option<AnyNodeRef<'a>>,
|
|
source_length: TextSize,
|
|
test_path: &'a Path,
|
|
}
|
|
|
|
impl<'a> ValidateAstVisitor<'a> {
|
|
fn new(source_length: TextSize, test_path: &'a Path) -> Self {
|
|
Self {
|
|
parents: Vec::new(),
|
|
previous: None,
|
|
source_length,
|
|
test_path,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'ast> PreorderVisitor<'ast> for ValidateAstVisitor<'ast> {
|
|
fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal {
|
|
assert!(
|
|
node.end() <= self.source_length,
|
|
"{path}: The range of the node exceeds the length of the source code. Node: {node:#?}",
|
|
path = self.test_path.display()
|
|
);
|
|
|
|
if let Some(previous) = self.previous {
|
|
assert_ne!(previous.range().ordering(node.range()), Ordering::Greater,
|
|
"{path}: The ranges of the nodes are not strictly increasing when traversing the AST in pre-order.\nPrevious node: {previous:#?}\n\nCurrent node: {node:#?}\n\nRoot: {root:#?}",
|
|
path = self.test_path.display(),
|
|
root = self.parents.first()
|
|
);
|
|
}
|
|
|
|
if let Some(parent) = self.parents.last() {
|
|
assert!(parent.range().contains_range(node.range()),
|
|
"{path}: The range of the parent node does not fully enclose the range of the child node.\nParent node: {parent:#?}\n\nChild node: {node:#?}\n\nRoot: {root:#?}",
|
|
path = self.test_path.display(),
|
|
root = self.parents.first()
|
|
);
|
|
}
|
|
|
|
self.parents.push(node);
|
|
|
|
TraversalSignal::Traverse
|
|
}
|
|
|
|
fn leave_node(&mut self, node: AnyNodeRef<'ast>) {
|
|
self.parents.pop().expect("Expected tree to be balanced");
|
|
|
|
self.previous = Some(node);
|
|
}
|
|
}
|