mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-14 15:40:51 +00:00
Start detecting version-related syntax errors in the parser (#16090)
## Summary This PR builds on the changes in #16220 to pass a target Python version to the parser. It also adds the `Parser::unsupported_syntax_errors` field, which collects version-related syntax errors while parsing. These syntax errors are then turned into `Message`s in ruff (in preview mode). This PR only detects one syntax error (`match` statement before Python 3.10), but it has been pretty quick to extend to several other simple errors (see #16308 for example). ## Test Plan The current tests are CLI tests in the linter crate, but these could be supplemented with inline parser tests after #16357. I also tested the display of these syntax errors in VS Code:   --------- Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
This commit is contained in:
parent
b39a4ad01d
commit
78806361fd
14 changed files with 356 additions and 37 deletions
|
@ -1,5 +1,6 @@
|
|||
use std::fmt;
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use ruff_python_ast::PythonVersion;
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
use crate::TokenKind;
|
||||
|
@ -426,6 +427,50 @@ impl std::fmt::Display for LexicalErrorType {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents a version-related syntax error detected during parsing.
|
||||
///
|
||||
/// An example of a version-related error is the use of a `match` statement before Python 3.10, when
|
||||
/// it was first introduced. See [`UnsupportedSyntaxErrorKind`] for other kinds of errors.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct UnsupportedSyntaxError {
|
||||
pub kind: UnsupportedSyntaxErrorKind,
|
||||
pub range: TextRange,
|
||||
/// The target [`PythonVersion`] for which this error was detected.
|
||||
///
|
||||
/// This is different from the version reported by the
|
||||
/// [`minimum_version`](UnsupportedSyntaxError::minimum_version) method, which is the earliest
|
||||
/// allowed version for this piece of syntax. The `target_version` is primarily used for
|
||||
/// user-facing error messages.
|
||||
pub target_version: PythonVersion,
|
||||
}
|
||||
|
||||
impl UnsupportedSyntaxError {
|
||||
/// The earliest allowed version for the syntax associated with this error.
|
||||
pub const fn minimum_version(&self) -> PythonVersion {
|
||||
match self.kind {
|
||||
UnsupportedSyntaxErrorKind::MatchBeforePy310 => PythonVersion::PY310,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for UnsupportedSyntaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
UnsupportedSyntaxErrorKind::MatchBeforePy310 => write!(
|
||||
f,
|
||||
"Cannot use `match` statement on Python {} (syntax was added in Python {})",
|
||||
self.target_version,
|
||||
self.minimum_version(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
pub enum UnsupportedSyntaxErrorKind {
|
||||
MatchBeforePy310,
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
mod sizes {
|
||||
use crate::error::{LexicalError, LexicalErrorType};
|
||||
|
|
|
@ -67,7 +67,10 @@
|
|||
use std::iter::FusedIterator;
|
||||
use std::ops::Deref;
|
||||
|
||||
pub use crate::error::{FStringErrorType, LexicalErrorType, ParseError, ParseErrorType};
|
||||
pub use crate::error::{
|
||||
FStringErrorType, LexicalErrorType, ParseError, ParseErrorType, UnsupportedSyntaxError,
|
||||
UnsupportedSyntaxErrorKind,
|
||||
};
|
||||
pub use crate::parser::ParseOptions;
|
||||
pub use crate::token::{Token, TokenKind};
|
||||
|
||||
|
@ -305,6 +308,7 @@ pub struct Parsed<T> {
|
|||
syntax: T,
|
||||
tokens: Tokens,
|
||||
errors: Vec<ParseError>,
|
||||
unsupported_syntax_errors: Vec<UnsupportedSyntaxError>,
|
||||
}
|
||||
|
||||
impl<T> Parsed<T> {
|
||||
|
@ -323,6 +327,11 @@ impl<T> Parsed<T> {
|
|||
&self.errors
|
||||
}
|
||||
|
||||
/// Returns a list of version-related syntax errors found during parsing.
|
||||
pub fn unsupported_syntax_errors(&self) -> &[UnsupportedSyntaxError] {
|
||||
&self.unsupported_syntax_errors
|
||||
}
|
||||
|
||||
/// Consumes the [`Parsed`] output and returns the contained syntax node.
|
||||
pub fn into_syntax(self) -> T {
|
||||
self.syntax
|
||||
|
@ -334,12 +343,18 @@ impl<T> Parsed<T> {
|
|||
}
|
||||
|
||||
/// Returns `true` if the parsed source code is valid i.e., it has no syntax errors.
|
||||
///
|
||||
/// Note that this does not include version-related
|
||||
/// [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors).
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.errors.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the [`Parsed`] output as a [`Result`], returning [`Ok`] if it has no syntax errors,
|
||||
/// or [`Err`] containing the first [`ParseError`] encountered.
|
||||
///
|
||||
/// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not
|
||||
/// cause [`Err`] to be returned.
|
||||
pub fn as_result(&self) -> Result<&Parsed<T>, &[ParseError]> {
|
||||
if self.is_valid() {
|
||||
Ok(self)
|
||||
|
@ -350,6 +365,9 @@ impl<T> Parsed<T> {
|
|||
|
||||
/// Consumes the [`Parsed`] output and returns a [`Result`] which is [`Ok`] if it has no syntax
|
||||
/// errors, or [`Err`] containing the first [`ParseError`] encountered.
|
||||
///
|
||||
/// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not
|
||||
/// cause [`Err`] to be returned.
|
||||
pub(crate) fn into_result(self) -> Result<Parsed<T>, ParseError> {
|
||||
if self.is_valid() {
|
||||
Ok(self)
|
||||
|
@ -373,6 +391,7 @@ impl Parsed<Mod> {
|
|||
syntax: module,
|
||||
tokens: self.tokens,
|
||||
errors: self.errors,
|
||||
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
||||
}),
|
||||
Mod::Expression(_) => None,
|
||||
}
|
||||
|
@ -392,6 +411,7 @@ impl Parsed<Mod> {
|
|||
syntax: expression,
|
||||
tokens: self.tokens,
|
||||
errors: self.errors,
|
||||
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ use bitflags::bitflags;
|
|||
use ruff_python_ast::{Mod, ModExpression, ModModule};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::error::UnsupportedSyntaxError;
|
||||
use crate::parser::expression::ExpressionContext;
|
||||
use crate::parser::progress::{ParserProgress, TokenId};
|
||||
use crate::token::TokenValue;
|
||||
|
@ -35,6 +36,9 @@ pub(crate) struct Parser<'src> {
|
|||
/// Stores all the syntax errors found during the parsing.
|
||||
errors: Vec<ParseError>,
|
||||
|
||||
/// Stores non-fatal syntax errors found during parsing, such as version-related errors.
|
||||
unsupported_syntax_errors: Vec<UnsupportedSyntaxError>,
|
||||
|
||||
/// Options for how the code will be parsed.
|
||||
options: ParseOptions,
|
||||
|
||||
|
@ -70,6 +74,7 @@ impl<'src> Parser<'src> {
|
|||
options,
|
||||
source,
|
||||
errors: Vec::new(),
|
||||
unsupported_syntax_errors: Vec::new(),
|
||||
tokens,
|
||||
recovery_context: RecoveryContext::empty(),
|
||||
prev_token_end: TextSize::new(0),
|
||||
|
@ -166,6 +171,7 @@ impl<'src> Parser<'src> {
|
|||
syntax,
|
||||
tokens: Tokens::new(tokens),
|
||||
errors: parse_errors,
|
||||
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -197,6 +203,7 @@ impl<'src> Parser<'src> {
|
|||
syntax,
|
||||
tokens: Tokens::new(tokens),
|
||||
errors: merged,
|
||||
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -658,6 +665,7 @@ impl<'src> Parser<'src> {
|
|||
ParserCheckpoint {
|
||||
tokens: self.tokens.checkpoint(),
|
||||
errors_position: self.errors.len(),
|
||||
unsupported_syntax_errors_position: self.unsupported_syntax_errors.len(),
|
||||
current_token_id: self.current_token_id,
|
||||
prev_token_end: self.prev_token_end,
|
||||
recovery_context: self.recovery_context,
|
||||
|
@ -669,6 +677,7 @@ impl<'src> Parser<'src> {
|
|||
let ParserCheckpoint {
|
||||
tokens,
|
||||
errors_position,
|
||||
unsupported_syntax_errors_position,
|
||||
current_token_id,
|
||||
prev_token_end,
|
||||
recovery_context,
|
||||
|
@ -676,6 +685,8 @@ impl<'src> Parser<'src> {
|
|||
|
||||
self.tokens.rewind(tokens);
|
||||
self.errors.truncate(errors_position);
|
||||
self.unsupported_syntax_errors
|
||||
.truncate(unsupported_syntax_errors_position);
|
||||
self.current_token_id = current_token_id;
|
||||
self.prev_token_end = prev_token_end;
|
||||
self.recovery_context = recovery_context;
|
||||
|
@ -685,6 +696,7 @@ impl<'src> Parser<'src> {
|
|||
struct ParserCheckpoint {
|
||||
tokens: TokenSourceCheckpoint,
|
||||
errors_position: usize,
|
||||
unsupported_syntax_errors_position: usize,
|
||||
current_token_id: TokenId,
|
||||
prev_token_end: TextSize,
|
||||
recovery_context: RecoveryContext,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use ruff_python_ast::PySourceType;
|
||||
use ruff_python_ast::{PySourceType, PythonVersion};
|
||||
|
||||
use crate::{AsMode, Mode};
|
||||
|
||||
|
@ -20,15 +20,28 @@ use crate::{AsMode, Mode};
|
|||
///
|
||||
/// let options = ParseOptions::from(PySourceType::Python);
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ParseOptions {
|
||||
/// Specify the mode in which the code will be parsed.
|
||||
pub(crate) mode: Mode,
|
||||
/// Target version for detecting version-related syntax errors.
|
||||
pub(crate) target_version: PythonVersion,
|
||||
}
|
||||
|
||||
impl ParseOptions {
|
||||
#[must_use]
|
||||
pub fn with_target_version(mut self, target_version: PythonVersion) -> Self {
|
||||
self.target_version = target_version;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Mode> for ParseOptions {
|
||||
fn from(mode: Mode) -> Self {
|
||||
Self { mode }
|
||||
Self {
|
||||
mode,
|
||||
target_version: PythonVersion::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,6 +49,7 @@ impl From<PySourceType> for ParseOptions {
|
|||
fn from(source_type: PySourceType) -> Self {
|
||||
Self {
|
||||
mode: source_type.as_mode(),
|
||||
target_version: PythonVersion::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,8 @@ use rustc_hash::{FxBuildHasher, FxHashSet};
|
|||
|
||||
use ruff_python_ast::name::Name;
|
||||
use ruff_python_ast::{
|
||||
self as ast, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, Stmt, WithItem,
|
||||
self as ast, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, PythonVersion, Stmt,
|
||||
WithItem,
|
||||
};
|
||||
use ruff_text_size::{Ranged, TextSize};
|
||||
|
||||
|
@ -16,7 +17,7 @@ use crate::parser::{
|
|||
};
|
||||
use crate::token::{TokenKind, TokenValue};
|
||||
use crate::token_set::TokenSet;
|
||||
use crate::{Mode, ParseErrorType};
|
||||
use crate::{Mode, ParseErrorType, UnsupportedSyntaxError, UnsupportedSyntaxErrorKind};
|
||||
|
||||
use super::expression::ExpressionContext;
|
||||
use super::Parenthesized;
|
||||
|
@ -2257,11 +2258,21 @@ impl<'src> Parser<'src> {
|
|||
let start = self.node_start();
|
||||
self.bump(TokenKind::Match);
|
||||
|
||||
let match_range = self.node_range(start);
|
||||
|
||||
let subject = self.parse_match_subject_expression();
|
||||
self.expect(TokenKind::Colon);
|
||||
|
||||
let cases = self.parse_match_body();
|
||||
|
||||
if self.options.target_version < PythonVersion::PY310 {
|
||||
self.unsupported_syntax_errors.push(UnsupportedSyntaxError {
|
||||
kind: UnsupportedSyntaxErrorKind::MatchBeforePy310,
|
||||
range: match_range,
|
||||
target_version: self.options.target_version,
|
||||
});
|
||||
}
|
||||
|
||||
ast::StmtMatch {
|
||||
subject: Box::new(subject),
|
||||
cases,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue