mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-07 12:18:19 +00:00
[syntax-errors] Start detecting compile-time syntax errors (#16106)
## Summary This PR implements the "greeter" approach for checking the AST for syntax errors emitted by the CPython compiler. It introduces two main infrastructural changes to support all of the compile-time errors: 1. Adds a new `semantic_errors` module to the parser crate with public `SemanticSyntaxChecker` and `SemanticSyntaxError` types 2. Embeds a `SemanticSyntaxChecker` in the `ruff_linter::Checker` for checking these errors in ruff As a proof of concept, it also implements detection of two syntax errors: 1. A reimplementation of [`late-future-import`](https://docs.astral.sh/ruff/rules/late-future-import/) (`F404`) 2. Detection of rebound comprehension iteration variables (https://github.com/astral-sh/ruff/issues/14395) ## Test plan Existing F404 tests, new inline tests in the `ruff_python_parser` crate, and a linter CLI test showing an example of the `Message` output. I also tested in VS Code, where `preview = false` and turning off syntax errors both disable the new errors:  And on the playground, where `preview = false` also disables the errors:  Fixes #14395 --------- Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
b1deab83d9
commit
2baaedda6c
17 changed files with 1601 additions and 93 deletions
273
crates/ruff_python_parser/src/semantic_errors.rs
Normal file
273
crates/ruff_python_parser/src/semantic_errors.rs
Normal file
|
@ -0,0 +1,273 @@
|
|||
//! [`SemanticSyntaxChecker`] for AST-based syntax errors.
|
||||
//!
|
||||
//! This checker is not responsible for traversing the AST itself. Instead, its
|
||||
//! [`SemanticSyntaxChecker::visit_stmt`] and [`SemanticSyntaxChecker::visit_expr`] methods should
|
||||
//! be called in a parent `Visitor`'s `visit_stmt` and `visit_expr` methods, respectively.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use ruff_python_ast::{
|
||||
self as ast,
|
||||
visitor::{walk_expr, Visitor},
|
||||
Expr, PythonVersion, Stmt, StmtExpr, StmtImportFrom,
|
||||
};
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SemanticSyntaxChecker {
|
||||
/// The checker has traversed past the `__future__` import boundary.
|
||||
///
|
||||
/// For example, the checker could be visiting `x` in:
|
||||
///
|
||||
/// ```python
|
||||
/// from __future__ import annotations
|
||||
///
|
||||
/// import os
|
||||
///
|
||||
/// x: int = 1
|
||||
/// ```
|
||||
///
|
||||
/// Python considers it a syntax error to import from `__future__` after any other
|
||||
/// non-`__future__`-importing statements.
|
||||
seen_futures_boundary: bool,
|
||||
}
|
||||
|
||||
impl SemanticSyntaxChecker {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
seen_futures_boundary: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SemanticSyntaxChecker {
|
||||
fn add_error<Ctx: SemanticSyntaxContext>(
|
||||
context: &Ctx,
|
||||
kind: SemanticSyntaxErrorKind,
|
||||
range: TextRange,
|
||||
) {
|
||||
context.report_semantic_error(SemanticSyntaxError {
|
||||
kind,
|
||||
range,
|
||||
python_version: context.python_version(),
|
||||
});
|
||||
}
|
||||
|
||||
fn check_stmt<Ctx: SemanticSyntaxContext>(&self, stmt: &ast::Stmt, ctx: &Ctx) {
|
||||
if let Stmt::ImportFrom(StmtImportFrom { range, module, .. }) = stmt {
|
||||
if self.seen_futures_boundary && matches!(module.as_deref(), Some("__future__")) {
|
||||
Self::add_error(ctx, SemanticSyntaxErrorKind::LateFutureImport, *range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn visit_stmt<Ctx: SemanticSyntaxContext>(&mut self, stmt: &ast::Stmt, ctx: &Ctx) {
|
||||
// update internal state
|
||||
match stmt {
|
||||
Stmt::Expr(StmtExpr { value, .. })
|
||||
if !ctx.seen_docstring_boundary() && value.is_string_literal_expr() => {}
|
||||
Stmt::ImportFrom(StmtImportFrom { module, .. }) => {
|
||||
// Allow __future__ imports until we see a non-__future__ import.
|
||||
if !matches!(module.as_deref(), Some("__future__")) {
|
||||
self.seen_futures_boundary = true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
self.seen_futures_boundary = true;
|
||||
}
|
||||
}
|
||||
|
||||
// check for errors
|
||||
self.check_stmt(stmt, ctx);
|
||||
}
|
||||
|
||||
pub fn visit_expr<Ctx: SemanticSyntaxContext>(&mut self, expr: &Expr, ctx: &Ctx) {
|
||||
match expr {
|
||||
Expr::ListComp(ast::ExprListComp {
|
||||
elt, generators, ..
|
||||
})
|
||||
| Expr::SetComp(ast::ExprSetComp {
|
||||
elt, generators, ..
|
||||
})
|
||||
| Expr::Generator(ast::ExprGenerator {
|
||||
elt, generators, ..
|
||||
}) => Self::check_generator_expr(elt, generators, ctx),
|
||||
Expr::DictComp(ast::ExprDictComp {
|
||||
key,
|
||||
value,
|
||||
generators,
|
||||
..
|
||||
}) => {
|
||||
Self::check_generator_expr(key, generators, ctx);
|
||||
Self::check_generator_expr(value, generators, ctx);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a [`SyntaxErrorKind::ReboundComprehensionVariable`] if `expr` rebinds an iteration
|
||||
/// variable in `generators`.
|
||||
fn check_generator_expr<Ctx: SemanticSyntaxContext>(
|
||||
expr: &Expr,
|
||||
comprehensions: &[ast::Comprehension],
|
||||
ctx: &Ctx,
|
||||
) {
|
||||
let rebound_variables = {
|
||||
let mut visitor = ReboundComprehensionVisitor {
|
||||
comprehensions,
|
||||
rebound_variables: Vec::new(),
|
||||
};
|
||||
visitor.visit_expr(expr);
|
||||
visitor.rebound_variables
|
||||
};
|
||||
|
||||
// TODO(brent) with multiple diagnostic ranges, we could mark both the named expr (current)
|
||||
// and the name expr being rebound
|
||||
for range in rebound_variables {
|
||||
// test_err rebound_comprehension_variable
|
||||
// [(a := 0) for a in range(0)]
|
||||
// {(a := 0) for a in range(0)}
|
||||
// {(a := 0): val for a in range(0)}
|
||||
// {key: (a := 0) for a in range(0)}
|
||||
// ((a := 0) for a in range(0))
|
||||
// [[(a := 0)] for a in range(0)]
|
||||
// [(a := 0) for b in range (0) for a in range(0)]
|
||||
// [(a := 0) for a in range (0) for b in range(0)]
|
||||
// [((a := 0), (b := 1)) for a in range (0) for b in range(0)]
|
||||
|
||||
// test_ok non_rebound_comprehension_variable
|
||||
// [a := 0 for x in range(0)]
|
||||
Self::add_error(
|
||||
ctx,
|
||||
SemanticSyntaxErrorKind::ReboundComprehensionVariable,
|
||||
range,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SemanticSyntaxChecker {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SemanticSyntaxError {
|
||||
pub kind: SemanticSyntaxErrorKind,
|
||||
pub range: TextRange,
|
||||
pub python_version: PythonVersion,
|
||||
}
|
||||
|
||||
impl Display for SemanticSyntaxError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.kind {
|
||||
SemanticSyntaxErrorKind::LateFutureImport => {
|
||||
f.write_str("__future__ imports must be at the top of the file")
|
||||
}
|
||||
SemanticSyntaxErrorKind::ReboundComprehensionVariable => {
|
||||
f.write_str("assignment expression cannot rebind comprehension variable")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SemanticSyntaxErrorKind {
|
||||
/// Represents the use of a `__future__` import after the beginning of a file.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```python
|
||||
/// from pathlib import Path
|
||||
///
|
||||
/// from __future__ import annotations
|
||||
/// ```
|
||||
///
|
||||
/// This corresponds to the [`late-future-import`] (`F404`) rule in ruff.
|
||||
///
|
||||
/// [`late-future-import`]: https://docs.astral.sh/ruff/rules/late-future-import/
|
||||
LateFutureImport,
|
||||
|
||||
/// Represents the rebinding of the iteration variable of a list, set, or dict comprehension or
|
||||
/// a generator expression.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```python
|
||||
/// [(a := 0) for a in range(0)]
|
||||
/// {(a := 0) for a in range(0)}
|
||||
/// {(a := 0): val for a in range(0)}
|
||||
/// {key: (a := 0) for a in range(0)}
|
||||
/// ((a := 0) for a in range(0))
|
||||
/// ```
|
||||
ReboundComprehensionVariable,
|
||||
}
|
||||
|
||||
/// Searches for the first named expression (`x := y`) rebinding one of the `iteration_variables` in
|
||||
/// a comprehension or generator expression.
|
||||
struct ReboundComprehensionVisitor<'a> {
|
||||
comprehensions: &'a [ast::Comprehension],
|
||||
rebound_variables: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl Visitor<'_> for ReboundComprehensionVisitor<'_> {
|
||||
fn visit_expr(&mut self, expr: &Expr) {
|
||||
if let Expr::Named(ast::ExprNamed { target, .. }) = expr {
|
||||
if let Expr::Name(ast::ExprName { id, range, .. }) = &**target {
|
||||
if self.comprehensions.iter().any(|comp| {
|
||||
comp.target
|
||||
.as_name_expr()
|
||||
.is_some_and(|name| name.id == *id)
|
||||
}) {
|
||||
self.rebound_variables.push(*range);
|
||||
}
|
||||
};
|
||||
}
|
||||
walk_expr(self, expr);
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SemanticSyntaxContext {
|
||||
/// Returns `true` if a module's docstring boundary has been passed.
|
||||
fn seen_docstring_boundary(&self) -> bool;
|
||||
|
||||
/// The target Python version for detecting backwards-incompatible syntax changes.
|
||||
fn python_version(&self) -> PythonVersion;
|
||||
|
||||
fn report_semantic_error(&self, error: SemanticSyntaxError);
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SemanticSyntaxCheckerVisitor<Ctx> {
|
||||
checker: SemanticSyntaxChecker,
|
||||
context: Ctx,
|
||||
}
|
||||
|
||||
impl<Ctx> SemanticSyntaxCheckerVisitor<Ctx> {
|
||||
pub fn new(context: Ctx) -> Self {
|
||||
Self {
|
||||
checker: SemanticSyntaxChecker::new(),
|
||||
context,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_context(self) -> Ctx {
|
||||
self.context
|
||||
}
|
||||
}
|
||||
|
||||
impl<Ctx> Visitor<'_> for SemanticSyntaxCheckerVisitor<Ctx>
|
||||
where
|
||||
Ctx: SemanticSyntaxContext,
|
||||
{
|
||||
fn visit_stmt(&mut self, stmt: &'_ Stmt) {
|
||||
self.checker.visit_stmt(stmt, &self.context);
|
||||
ruff_python_ast::visitor::walk_stmt(self, stmt);
|
||||
}
|
||||
|
||||
fn visit_expr(&mut self, expr: &'_ Expr) {
|
||||
self.checker.visit_expr(expr, &self.context);
|
||||
ruff_python_ast::visitor::walk_expr(self, expr);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue