Start detecting version-related syntax errors in the parser (#16090)

## Summary

This PR builds on the changes in #16220 to pass a target Python version
to the parser. It also adds the `Parser::unsupported_syntax_errors` field, which
collects version-related syntax errors while parsing. These syntax
errors are then turned into `Message`s in ruff (in preview mode).

This PR only detects one syntax error (`match` statement before Python
3.10), but it has been pretty quick to extend to several other simple
errors (see #16308 for example).

## Test Plan

The current tests are CLI tests in the linter crate, but these could be
supplemented with inline parser tests after #16357.

I also tested the display of these syntax errors in VS Code:


![image](https://github.com/user-attachments/assets/062b4441-740e-46c3-887c-a954049ef26e)

![image](https://github.com/user-attachments/assets/101f55b8-146c-4d59-b6b0-922f19bcd0fa)

---------

Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
This commit is contained in:
Brent Westbrook 2025-02-25 23:03:48 -05:00 committed by GitHub
parent b39a4ad01d
commit 78806361fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 356 additions and 37 deletions

View file

@ -1,5 +1,6 @@
use std::fmt;
use std::fmt::{self, Display};
use ruff_python_ast::PythonVersion;
use ruff_text_size::TextRange;
use crate::TokenKind;
@ -426,6 +427,50 @@ impl std::fmt::Display for LexicalErrorType {
}
}
/// Represents a version-related syntax error detected during parsing.
///
/// An example of a version-related error is the use of a `match` statement before Python 3.10, when
/// it was first introduced. See [`UnsupportedSyntaxErrorKind`] for other kinds of errors.
#[derive(Debug, PartialEq, Clone)]
pub struct UnsupportedSyntaxError {
pub kind: UnsupportedSyntaxErrorKind,
pub range: TextRange,
/// The target [`PythonVersion`] for which this error was detected.
///
/// This is different from the version reported by the
/// [`minimum_version`](UnsupportedSyntaxError::minimum_version) method, which is the earliest
/// allowed version for this piece of syntax. The `target_version` is primarily used for
/// user-facing error messages.
pub target_version: PythonVersion,
}
impl UnsupportedSyntaxError {
/// The earliest allowed version for the syntax associated with this error.
pub const fn minimum_version(&self) -> PythonVersion {
match self.kind {
UnsupportedSyntaxErrorKind::MatchBeforePy310 => PythonVersion::PY310,
}
}
}
impl Display for UnsupportedSyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.kind {
UnsupportedSyntaxErrorKind::MatchBeforePy310 => write!(
f,
"Cannot use `match` statement on Python {} (syntax was added in Python {})",
self.target_version,
self.minimum_version(),
),
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum UnsupportedSyntaxErrorKind {
MatchBeforePy310,
}
#[cfg(target_pointer_width = "64")]
mod sizes {
use crate::error::{LexicalError, LexicalErrorType};

View file

@ -67,7 +67,10 @@
use std::iter::FusedIterator;
use std::ops::Deref;
pub use crate::error::{FStringErrorType, LexicalErrorType, ParseError, ParseErrorType};
pub use crate::error::{
FStringErrorType, LexicalErrorType, ParseError, ParseErrorType, UnsupportedSyntaxError,
UnsupportedSyntaxErrorKind,
};
pub use crate::parser::ParseOptions;
pub use crate::token::{Token, TokenKind};
@ -305,6 +308,7 @@ pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
errors: Vec<ParseError>,
unsupported_syntax_errors: Vec<UnsupportedSyntaxError>,
}
impl<T> Parsed<T> {
@ -323,6 +327,11 @@ impl<T> Parsed<T> {
&self.errors
}
/// Returns a list of version-related syntax errors found during parsing.
pub fn unsupported_syntax_errors(&self) -> &[UnsupportedSyntaxError] {
&self.unsupported_syntax_errors
}
/// Consumes the [`Parsed`] output and returns the contained syntax node.
pub fn into_syntax(self) -> T {
self.syntax
@ -334,12 +343,18 @@ impl<T> Parsed<T> {
}
/// Returns `true` if the parsed source code is valid i.e., it has no syntax errors.
///
/// Note that this does not include version-related
/// [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors).
pub fn is_valid(&self) -> bool {
self.errors.is_empty()
}
/// Returns the [`Parsed`] output as a [`Result`], returning [`Ok`] if it has no syntax errors,
/// or [`Err`] containing the first [`ParseError`] encountered.
///
/// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not
/// cause [`Err`] to be returned.
pub fn as_result(&self) -> Result<&Parsed<T>, &[ParseError]> {
if self.is_valid() {
Ok(self)
@ -350,6 +365,9 @@ impl<T> Parsed<T> {
/// Consumes the [`Parsed`] output and returns a [`Result`] which is [`Ok`] if it has no syntax
/// errors, or [`Err`] containing the first [`ParseError`] encountered.
///
/// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not
/// cause [`Err`] to be returned.
pub(crate) fn into_result(self) -> Result<Parsed<T>, ParseError> {
if self.is_valid() {
Ok(self)
@ -373,6 +391,7 @@ impl Parsed<Mod> {
syntax: module,
tokens: self.tokens,
errors: self.errors,
unsupported_syntax_errors: self.unsupported_syntax_errors,
}),
Mod::Expression(_) => None,
}
@ -392,6 +411,7 @@ impl Parsed<Mod> {
syntax: expression,
tokens: self.tokens,
errors: self.errors,
unsupported_syntax_errors: self.unsupported_syntax_errors,
}),
}
}

View file

@ -5,6 +5,7 @@ use bitflags::bitflags;
use ruff_python_ast::{Mod, ModExpression, ModModule};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::error::UnsupportedSyntaxError;
use crate::parser::expression::ExpressionContext;
use crate::parser::progress::{ParserProgress, TokenId};
use crate::token::TokenValue;
@ -35,6 +36,9 @@ pub(crate) struct Parser<'src> {
/// Stores all the syntax errors found during the parsing.
errors: Vec<ParseError>,
/// Stores non-fatal syntax errors found during parsing, such as version-related errors.
unsupported_syntax_errors: Vec<UnsupportedSyntaxError>,
/// Options for how the code will be parsed.
options: ParseOptions,
@ -70,6 +74,7 @@ impl<'src> Parser<'src> {
options,
source,
errors: Vec::new(),
unsupported_syntax_errors: Vec::new(),
tokens,
recovery_context: RecoveryContext::empty(),
prev_token_end: TextSize::new(0),
@ -166,6 +171,7 @@ impl<'src> Parser<'src> {
syntax,
tokens: Tokens::new(tokens),
errors: parse_errors,
unsupported_syntax_errors: self.unsupported_syntax_errors,
};
}
@ -197,6 +203,7 @@ impl<'src> Parser<'src> {
syntax,
tokens: Tokens::new(tokens),
errors: merged,
unsupported_syntax_errors: self.unsupported_syntax_errors,
}
}
@ -658,6 +665,7 @@ impl<'src> Parser<'src> {
ParserCheckpoint {
tokens: self.tokens.checkpoint(),
errors_position: self.errors.len(),
unsupported_syntax_errors_position: self.unsupported_syntax_errors.len(),
current_token_id: self.current_token_id,
prev_token_end: self.prev_token_end,
recovery_context: self.recovery_context,
@ -669,6 +677,7 @@ impl<'src> Parser<'src> {
let ParserCheckpoint {
tokens,
errors_position,
unsupported_syntax_errors_position,
current_token_id,
prev_token_end,
recovery_context,
@ -676,6 +685,8 @@ impl<'src> Parser<'src> {
self.tokens.rewind(tokens);
self.errors.truncate(errors_position);
self.unsupported_syntax_errors
.truncate(unsupported_syntax_errors_position);
self.current_token_id = current_token_id;
self.prev_token_end = prev_token_end;
self.recovery_context = recovery_context;
@ -685,6 +696,7 @@ impl<'src> Parser<'src> {
struct ParserCheckpoint {
tokens: TokenSourceCheckpoint,
errors_position: usize,
unsupported_syntax_errors_position: usize,
current_token_id: TokenId,
prev_token_end: TextSize,
recovery_context: RecoveryContext,

View file

@ -1,4 +1,4 @@
use ruff_python_ast::PySourceType;
use ruff_python_ast::{PySourceType, PythonVersion};
use crate::{AsMode, Mode};
@ -20,15 +20,28 @@ use crate::{AsMode, Mode};
///
/// let options = ParseOptions::from(PySourceType::Python);
/// ```
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct ParseOptions {
/// Specify the mode in which the code will be parsed.
pub(crate) mode: Mode,
/// Target version for detecting version-related syntax errors.
pub(crate) target_version: PythonVersion,
}
impl ParseOptions {
#[must_use]
pub fn with_target_version(mut self, target_version: PythonVersion) -> Self {
self.target_version = target_version;
self
}
}
impl From<Mode> for ParseOptions {
fn from(mode: Mode) -> Self {
Self { mode }
Self {
mode,
target_version: PythonVersion::default(),
}
}
}
@ -36,6 +49,7 @@ impl From<PySourceType> for ParseOptions {
fn from(source_type: PySourceType) -> Self {
Self {
mode: source_type.as_mode(),
target_version: PythonVersion::default(),
}
}
}

View file

@ -5,7 +5,8 @@ use rustc_hash::{FxBuildHasher, FxHashSet};
use ruff_python_ast::name::Name;
use ruff_python_ast::{
self as ast, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, Stmt, WithItem,
self as ast, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, PythonVersion, Stmt,
WithItem,
};
use ruff_text_size::{Ranged, TextSize};
@ -16,7 +17,7 @@ use crate::parser::{
};
use crate::token::{TokenKind, TokenValue};
use crate::token_set::TokenSet;
use crate::{Mode, ParseErrorType};
use crate::{Mode, ParseErrorType, UnsupportedSyntaxError, UnsupportedSyntaxErrorKind};
use super::expression::ExpressionContext;
use super::Parenthesized;
@ -2257,11 +2258,21 @@ impl<'src> Parser<'src> {
let start = self.node_start();
self.bump(TokenKind::Match);
let match_range = self.node_range(start);
let subject = self.parse_match_subject_expression();
self.expect(TokenKind::Colon);
let cases = self.parse_match_body();
if self.options.target_version < PythonVersion::PY310 {
self.unsupported_syntax_errors.push(UnsupportedSyntaxError {
kind: UnsupportedSyntaxErrorKind::MatchBeforePy310,
range: match_range,
target_version: self.options.target_version,
});
}
ast::StmtMatch {
subject: Box::new(subject),
cases,