[syntax-errors] Start detecting compile-time syntax errors (#16106)

## Summary

This PR implements the "greeter" approach for checking the AST for
syntax errors emitted by the CPython compiler. It introduces two main
infrastructural changes to support all of the compile-time errors:
1. Adds a new `semantic_errors` module to the parser crate with public
`SemanticSyntaxChecker` and `SemanticSyntaxError` types
2. Embeds a `SemanticSyntaxChecker` in the `ruff_linter::Checker` for
checking these errors in ruff

As a proof of concept, it also implements detection of two syntax
errors:
1. A reimplementation of
[`late-future-import`](https://docs.astral.sh/ruff/rules/late-future-import/)
(`F404`)
2. Detection of rebound comprehension iteration variables
(https://github.com/astral-sh/ruff/issues/14395)

## Test plan
Existing F404 tests, new inline tests in the `ruff_python_parser` crate,
and a linter CLI test showing an example of the `Message` output.

I also tested in VS Code, where `preview = false` and turning off syntax
errors both disable the new errors:


![image](https://github.com/user-attachments/assets/cf453d95-04f7-484b-8440-cb812f29d45e)

And on the playground, where `preview = false` also disables the errors:


![image](https://github.com/user-attachments/assets/a97570c4-1efa-439f-9d99-a54487dd6064)


Fixes #14395

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Brent Westbrook 2025-03-21 14:45:25 -04:00 committed by GitHub
parent b1deab83d9
commit 2baaedda6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1601 additions and 93 deletions

View file

@ -1,3 +1,4 @@
use std::cell::RefCell;
use std::cmp::Ordering;
use std::fmt::{Formatter, Write};
use std::fs;
@ -5,7 +6,11 @@ use std::path::Path;
use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_python_ast::visitor::source_order::{walk_module, SourceOrderVisitor, TraversalSignal};
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{AnyNodeRef, Mod, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxCheckerVisitor, SemanticSyntaxContext, SemanticSyntaxError,
};
use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType, ParseOptions, Token};
use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
@ -81,6 +86,38 @@ fn test_valid_syntax(input_path: &Path) {
writeln!(&mut output, "## AST").unwrap();
writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap();
let parsed = parsed.try_into_module().expect("Parsed with Mode::Module");
let mut visitor = SemanticSyntaxCheckerVisitor::new(TestContext::default());
for stmt in parsed.suite() {
visitor.visit_stmt(stmt);
}
let semantic_syntax_errors = visitor.into_context().diagnostics.into_inner();
if !semantic_syntax_errors.is_empty() {
let mut message = "Expected no semantic syntax errors for a valid program:\n".to_string();
let line_index = LineIndex::from_source_text(&source);
let source_code = SourceCode::new(&source, &line_index);
for error in semantic_syntax_errors {
writeln!(
&mut message,
"{}\n",
CodeFrame {
range: error.range,
error: &ParseErrorType::OtherError(error.to_string()),
source_code: &source_code,
}
)
.unwrap();
}
panic!("{input_path:?}: {message}");
}
insta::with_settings!({
omit_expression => true,
input_file => input_path,
@ -99,11 +136,6 @@ fn test_invalid_syntax(input_path: &Path) {
});
let parsed = parse_unchecked(&source, options);
assert!(
parsed.has_syntax_errors(),
"{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors."
);
validate_tokens(parsed.tokens(), source.text_len(), input_path);
validate_ast(parsed.syntax(), source.text_len(), input_path);
@ -148,6 +180,38 @@ fn test_invalid_syntax(input_path: &Path) {
.unwrap();
}
let parsed = parsed.try_into_module().expect("Parsed with Mode::Module");
let mut visitor = SemanticSyntaxCheckerVisitor::new(TestContext::default());
for stmt in parsed.suite() {
visitor.visit_stmt(stmt);
}
let semantic_syntax_errors = visitor.into_context().diagnostics.into_inner();
assert!(
parsed.has_syntax_errors() || !semantic_syntax_errors.is_empty(),
"{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors."
);
if !semantic_syntax_errors.is_empty() {
writeln!(&mut output, "## Semantic Syntax Errors\n").unwrap();
}
for error in semantic_syntax_errors {
writeln!(
&mut output,
"{}\n",
CodeFrame {
range: error.range,
error: &ParseErrorType::OtherError(error.to_string()),
source_code: &source_code,
}
)
.unwrap();
}
insta::with_settings!({
omit_expression => true,
input_file => input_path,
@ -393,3 +457,22 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
self.previous = Some(node);
}
}
#[derive(Debug, Default)]
struct TestContext {
diagnostics: RefCell<Vec<SemanticSyntaxError>>,
}
impl SemanticSyntaxContext for TestContext {
fn seen_docstring_boundary(&self) -> bool {
false
}
fn python_version(&self) -> PythonVersion {
PythonVersion::default()
}
fn report_semantic_error(&self, error: SemanticSyntaxError) {
self.diagnostics.borrow_mut().push(error);
}
}

View file

@ -22,7 +22,7 @@ fn project_root() -> PathBuf {
#[test]
fn generate_inline_tests() -> Result<()> {
let parser_dir = project_root().join("crates/ruff_python_parser/src/parser");
let parser_dir = project_root().join("crates/ruff_python_parser/src/");
let tests = TestCollection::try_from(parser_dir.as_path())?;
let mut test_files = TestFiles::default();

View file

@ -0,0 +1,903 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/inline/err/rebound_comprehension_variable.py
---
## AST
```
Module(
ModModule {
range: 0..342,
body: [
Expr(
StmtExpr {
range: 0..28,
value: ListComp(
ExprListComp {
range: 0..28,
elt: Named(
ExprNamed {
range: 2..8,
target: Name(
ExprName {
range: 2..3,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 7..8,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 10..27,
target: Name(
ExprName {
range: 14..15,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 19..27,
func: Name(
ExprName {
range: 19..24,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 24..27,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 25..26,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 29..57,
value: SetComp(
ExprSetComp {
range: 29..57,
elt: Named(
ExprNamed {
range: 31..37,
target: Name(
ExprName {
range: 31..32,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 36..37,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 39..56,
target: Name(
ExprName {
range: 43..44,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 48..56,
func: Name(
ExprName {
range: 48..53,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 53..56,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 54..55,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 58..91,
value: DictComp(
ExprDictComp {
range: 58..91,
key: Named(
ExprNamed {
range: 60..66,
target: Name(
ExprName {
range: 60..61,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 65..66,
value: Int(
0,
),
},
),
},
),
value: Name(
ExprName {
range: 69..72,
id: Name("val"),
ctx: Load,
},
),
generators: [
Comprehension {
range: 73..90,
target: Name(
ExprName {
range: 77..78,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 82..90,
func: Name(
ExprName {
range: 82..87,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 87..90,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 88..89,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 92..125,
value: DictComp(
ExprDictComp {
range: 92..125,
key: Name(
ExprName {
range: 93..96,
id: Name("key"),
ctx: Load,
},
),
value: Named(
ExprNamed {
range: 99..105,
target: Name(
ExprName {
range: 99..100,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 104..105,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 107..124,
target: Name(
ExprName {
range: 111..112,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 116..124,
func: Name(
ExprName {
range: 116..121,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 121..124,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 122..123,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 126..154,
value: Generator(
ExprGenerator {
range: 126..154,
elt: Named(
ExprNamed {
range: 128..134,
target: Name(
ExprName {
range: 128..129,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 133..134,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 136..153,
target: Name(
ExprName {
range: 140..141,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 145..153,
func: Name(
ExprName {
range: 145..150,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 150..153,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 151..152,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
parenthesized: true,
},
),
},
),
Expr(
StmtExpr {
range: 155..185,
value: ListComp(
ExprListComp {
range: 155..185,
elt: List(
ExprList {
range: 156..166,
elts: [
Named(
ExprNamed {
range: 158..164,
target: Name(
ExprName {
range: 158..159,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 163..164,
value: Int(
0,
),
},
),
},
),
],
ctx: Load,
},
),
generators: [
Comprehension {
range: 167..184,
target: Name(
ExprName {
range: 171..172,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 176..184,
func: Name(
ExprName {
range: 176..181,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 181..184,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 182..183,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 186..233,
value: ListComp(
ExprListComp {
range: 186..233,
elt: Named(
ExprNamed {
range: 188..194,
target: Name(
ExprName {
range: 188..189,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 193..194,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 196..214,
target: Name(
ExprName {
range: 200..201,
id: Name("b"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 205..214,
func: Name(
ExprName {
range: 205..210,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 211..214,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 212..213,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
Comprehension {
range: 215..232,
target: Name(
ExprName {
range: 219..220,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 224..232,
func: Name(
ExprName {
range: 224..229,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 229..232,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 230..231,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 234..281,
value: ListComp(
ExprListComp {
range: 234..281,
elt: Named(
ExprNamed {
range: 236..242,
target: Name(
ExprName {
range: 236..237,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 241..242,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 244..262,
target: Name(
ExprName {
range: 248..249,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 253..262,
func: Name(
ExprName {
range: 253..258,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 259..262,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 260..261,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
Comprehension {
range: 263..280,
target: Name(
ExprName {
range: 267..268,
id: Name("b"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 272..280,
func: Name(
ExprName {
range: 272..277,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 277..280,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 278..279,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
Expr(
StmtExpr {
range: 282..341,
value: ListComp(
ExprListComp {
range: 282..341,
elt: Tuple(
ExprTuple {
range: 283..303,
elts: [
Named(
ExprNamed {
range: 285..291,
target: Name(
ExprName {
range: 285..286,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 290..291,
value: Int(
0,
),
},
),
},
),
Named(
ExprNamed {
range: 295..301,
target: Name(
ExprName {
range: 295..296,
id: Name("b"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 300..301,
value: Int(
1,
),
},
),
},
),
],
ctx: Load,
parenthesized: true,
},
),
generators: [
Comprehension {
range: 304..322,
target: Name(
ExprName {
range: 308..309,
id: Name("a"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 313..322,
func: Name(
ExprName {
range: 313..318,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 319..322,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 320..321,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
Comprehension {
range: 323..340,
target: Name(
ExprName {
range: 327..328,
id: Name("b"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 332..340,
func: Name(
ExprName {
range: 332..337,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 337..340,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 338..339,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
],
},
)
```
## Semantic Syntax Errors
|
1 | [(a := 0) for a in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
2 | {(a := 0) for a in range(0)}
3 | {(a := 0): val for a in range(0)}
|
|
1 | [(a := 0) for a in range(0)]
2 | {(a := 0) for a in range(0)}
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
3 | {(a := 0): val for a in range(0)}
4 | {key: (a := 0) for a in range(0)}
|
|
1 | [(a := 0) for a in range(0)]
2 | {(a := 0) for a in range(0)}
3 | {(a := 0): val for a in range(0)}
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
4 | {key: (a := 0) for a in range(0)}
5 | ((a := 0) for a in range(0))
|
|
2 | {(a := 0) for a in range(0)}
3 | {(a := 0): val for a in range(0)}
4 | {key: (a := 0) for a in range(0)}
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
5 | ((a := 0) for a in range(0))
6 | [[(a := 0)] for a in range(0)]
|
|
3 | {(a := 0): val for a in range(0)}
4 | {key: (a := 0) for a in range(0)}
5 | ((a := 0) for a in range(0))
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
6 | [[(a := 0)] for a in range(0)]
7 | [(a := 0) for b in range (0) for a in range(0)]
|
|
4 | {key: (a := 0) for a in range(0)}
5 | ((a := 0) for a in range(0))
6 | [[(a := 0)] for a in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
7 | [(a := 0) for b in range (0) for a in range(0)]
8 | [(a := 0) for a in range (0) for b in range(0)]
|
|
5 | ((a := 0) for a in range(0))
6 | [[(a := 0)] for a in range(0)]
7 | [(a := 0) for b in range (0) for a in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
8 | [(a := 0) for a in range (0) for b in range(0)]
9 | [((a := 0), (b := 1)) for a in range (0) for b in range(0)]
|
|
6 | [[(a := 0)] for a in range(0)]
7 | [(a := 0) for b in range (0) for a in range(0)]
8 | [(a := 0) for a in range (0) for b in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
9 | [((a := 0), (b := 1)) for a in range (0) for b in range(0)]
|
|
7 | [(a := 0) for b in range (0) for a in range(0)]
8 | [(a := 0) for a in range (0) for b in range(0)]
9 | [((a := 0), (b := 1)) for a in range (0) for b in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
|
|
7 | [(a := 0) for b in range (0) for a in range(0)]
8 | [(a := 0) for a in range (0) for b in range(0)]
9 | [((a := 0), (b := 1)) for a in range (0) for b in range(0)]
| ^ Syntax Error: assignment expression cannot rebind comprehension variable
|

View file

@ -1,7 +1,6 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/valid/expressions/arguments.py
snapshot_kind: text
---
## AST
@ -1159,7 +1158,7 @@ Module(
target: Name(
ExprName {
range: 562..563,
id: Name("x"),
id: Name("i"),
ctx: Store,
},
),

View file

@ -0,0 +1,85 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/inline/ok/non_rebound_comprehension_variable.py
---
## AST
```
Module(
ModModule {
range: 0..27,
body: [
Expr(
StmtExpr {
range: 0..26,
value: ListComp(
ExprListComp {
range: 0..26,
elt: Named(
ExprNamed {
range: 1..7,
target: Name(
ExprName {
range: 1..2,
id: Name("a"),
ctx: Store,
},
),
value: NumberLiteral(
ExprNumberLiteral {
range: 6..7,
value: Int(
0,
),
},
),
},
),
generators: [
Comprehension {
range: 8..25,
target: Name(
ExprName {
range: 12..13,
id: Name("x"),
ctx: Store,
},
),
iter: Call(
ExprCall {
range: 17..25,
func: Name(
ExprName {
range: 17..22,
id: Name("range"),
ctx: Load,
},
),
arguments: Arguments {
range: 22..25,
args: [
NumberLiteral(
ExprNumberLiteral {
range: 23..24,
value: Int(
0,
),
},
),
],
keywords: [],
},
},
),
ifs: [],
is_async: false,
},
],
},
),
},
),
],
},
)
```