mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-11 14:18:28 +00:00

## Summary Garbage collect ASTs once we are done checking a given file. Queries with a cross-file dependency on the AST will reparse the file on demand. This reduces ty's peak memory usage by ~20-30%. The primary change of this PR is adding a `node_index` field to every AST node, that is assigned by the parser. `ParsedModule` can use this to create a flat index of AST nodes any time the file is parsed (or reparsed). This allows `AstNodeRef` to simply index into the current instance of the `ParsedModule`, instead of storing a pointer directly. The indices are somewhat hackily (using an atomic integer) assigned by the `parsed_module` query instead of by the parser directly. Assigning the indices in source-order in the (recursive) parser turns out to be difficult, and collecting the nodes during semantic indexing is impossible as `SemanticIndex` does not hold onto a specific `ParsedModuleRef`, which the pointers in the flat AST are tied to. This means that we have to do an extra AST traversal to assign and collect the nodes into a flat index, but the small performance impact (~3% on cold runs) seems worth it for the memory savings. Part of https://github.com/astral-sh/ty/issues/214.
1464 lines
56 KiB
Rust
1464 lines
56 KiB
Rust
use std::cmp::Ordering;
|
|
|
|
use bitflags::bitflags;
|
|
|
|
use ruff_python_ast::{AtomicNodeIndex, Mod, ModExpression, ModModule};
|
|
use ruff_text_size::{Ranged, TextRange, TextSize};
|
|
|
|
use crate::error::UnsupportedSyntaxError;
|
|
use crate::parser::expression::ExpressionContext;
|
|
use crate::parser::progress::{ParserProgress, TokenId};
|
|
use crate::token::TokenValue;
|
|
use crate::token_set::TokenSet;
|
|
use crate::token_source::{TokenSource, TokenSourceCheckpoint};
|
|
use crate::{Mode, ParseError, ParseErrorType, TokenKind, UnsupportedSyntaxErrorKind};
|
|
use crate::{Parsed, Tokens};
|
|
|
|
pub use crate::parser::options::ParseOptions;
|
|
|
|
mod expression;
|
|
mod helpers;
|
|
mod options;
|
|
mod pattern;
|
|
mod progress;
|
|
mod recovery;
|
|
mod statement;
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
#[derive(Debug)]
|
|
pub(crate) struct Parser<'src> {
|
|
source: &'src str,
|
|
|
|
/// Token source for the parser that skips over any non-trivia token.
|
|
tokens: TokenSource<'src>,
|
|
|
|
/// Stores all the syntax errors found during the parsing.
|
|
errors: Vec<ParseError>,
|
|
|
|
/// Stores non-fatal syntax errors found during parsing, such as version-related errors.
|
|
unsupported_syntax_errors: Vec<UnsupportedSyntaxError>,
|
|
|
|
/// Options for how the code will be parsed.
|
|
options: ParseOptions,
|
|
|
|
/// The ID of the current token. This is used to track the progress of the parser
|
|
/// to avoid infinite loops when the parser is stuck.
|
|
current_token_id: TokenId,
|
|
|
|
/// The end of the previous token processed. This is used to determine a node's end.
|
|
prev_token_end: TextSize,
|
|
|
|
/// The recovery context in which the parser is currently in.
|
|
recovery_context: RecoveryContext,
|
|
|
|
/// The start offset in the source code from which to start parsing at.
|
|
start_offset: TextSize,
|
|
}
|
|
|
|
impl<'src> Parser<'src> {
|
|
/// Create a new parser for the given source code.
|
|
pub(crate) fn new(source: &'src str, options: ParseOptions) -> Self {
|
|
Parser::new_starts_at(source, TextSize::new(0), options)
|
|
}
|
|
|
|
/// Create a new parser for the given source code which starts parsing at the given offset.
|
|
pub(crate) fn new_starts_at(
|
|
source: &'src str,
|
|
start_offset: TextSize,
|
|
options: ParseOptions,
|
|
) -> Self {
|
|
let tokens = TokenSource::from_source(source, options.mode, start_offset);
|
|
|
|
Parser {
|
|
options,
|
|
source,
|
|
errors: Vec::new(),
|
|
unsupported_syntax_errors: Vec::new(),
|
|
tokens,
|
|
recovery_context: RecoveryContext::empty(),
|
|
prev_token_end: TextSize::new(0),
|
|
start_offset,
|
|
current_token_id: TokenId::default(),
|
|
}
|
|
}
|
|
|
|
/// Consumes the [`Parser`] and returns the parsed [`Parsed`].
|
|
pub(crate) fn parse(mut self) -> Parsed<Mod> {
|
|
let syntax = match self.options.mode {
|
|
Mode::Expression | Mode::ParenthesizedExpression => {
|
|
Mod::Expression(self.parse_single_expression())
|
|
}
|
|
Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()),
|
|
};
|
|
|
|
self.finish(syntax)
|
|
}
|
|
|
|
/// Parses a single expression.
|
|
///
|
|
/// This is to be used for [`Mode::Expression`].
|
|
///
|
|
/// ## Recovery
|
|
///
|
|
/// After parsing a single expression, an error is reported and all remaining tokens are
|
|
/// dropped by the parser.
|
|
fn parse_single_expression(&mut self) -> ModExpression {
|
|
let start = self.node_start();
|
|
let parsed_expr = self.parse_expression_list(ExpressionContext::default());
|
|
|
|
// All remaining newlines are actually going to be non-logical newlines.
|
|
self.eat(TokenKind::Newline);
|
|
|
|
if !self.at(TokenKind::EndOfFile) {
|
|
self.add_error(
|
|
ParseErrorType::UnexpectedExpressionToken,
|
|
self.current_token_range(),
|
|
);
|
|
|
|
// TODO(dhruvmanila): How should error recovery work here? Just truncate after the expression?
|
|
let mut progress = ParserProgress::default();
|
|
loop {
|
|
progress.assert_progressing(self);
|
|
if self.at(TokenKind::EndOfFile) {
|
|
break;
|
|
}
|
|
self.bump_any();
|
|
}
|
|
}
|
|
|
|
self.bump(TokenKind::EndOfFile);
|
|
|
|
ModExpression {
|
|
body: Box::new(parsed_expr.expr),
|
|
range: self.node_range(start),
|
|
node_index: AtomicNodeIndex::dummy(),
|
|
}
|
|
}
|
|
|
|
/// Parses a Python module.
|
|
///
|
|
/// This is to be used for [`Mode::Module`] and [`Mode::Ipython`].
|
|
fn parse_module(&mut self) -> ModModule {
|
|
let body = self.parse_list_into_vec(
|
|
RecoveryContextKind::ModuleStatements,
|
|
Parser::parse_statement,
|
|
);
|
|
|
|
self.bump(TokenKind::EndOfFile);
|
|
|
|
ModModule {
|
|
body,
|
|
range: TextRange::new(self.start_offset, self.current_token_range().end()),
|
|
node_index: AtomicNodeIndex::dummy(),
|
|
}
|
|
}
|
|
|
|
fn finish(self, syntax: Mod) -> Parsed<Mod> {
|
|
assert_eq!(
|
|
self.current_token_kind(),
|
|
TokenKind::EndOfFile,
|
|
"Parser should be at the end of the file."
|
|
);
|
|
|
|
// TODO consider re-integrating lexical error handling into the parser?
|
|
let parse_errors = self.errors;
|
|
let (tokens, lex_errors) = self.tokens.finish();
|
|
|
|
// Fast path for when there are no lex errors.
|
|
// There's no fast path for when there are no parse errors because a lex error
|
|
// always results in a parse error.
|
|
if lex_errors.is_empty() {
|
|
return Parsed {
|
|
syntax,
|
|
tokens: Tokens::new(tokens),
|
|
errors: parse_errors,
|
|
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
|
};
|
|
}
|
|
|
|
let mut merged = Vec::with_capacity(parse_errors.len().saturating_add(lex_errors.len()));
|
|
|
|
let mut parse_errors = parse_errors.into_iter().peekable();
|
|
let mut lex_errors = lex_errors.into_iter().peekable();
|
|
|
|
while let (Some(parse_error), Some(lex_error)) = (parse_errors.peek(), lex_errors.peek()) {
|
|
match parse_error
|
|
.location
|
|
.start()
|
|
.cmp(&lex_error.location().start())
|
|
{
|
|
Ordering::Less => merged.push(parse_errors.next().unwrap()),
|
|
Ordering::Equal => {
|
|
// Skip the parse error if we already have a lex error at the same location..
|
|
parse_errors.next().unwrap();
|
|
merged.push(lex_errors.next().unwrap().into());
|
|
}
|
|
Ordering::Greater => merged.push(lex_errors.next().unwrap().into()),
|
|
}
|
|
}
|
|
|
|
merged.extend(parse_errors);
|
|
merged.extend(lex_errors.map(ParseError::from));
|
|
|
|
Parsed {
|
|
syntax,
|
|
tokens: Tokens::new(tokens),
|
|
errors: merged,
|
|
unsupported_syntax_errors: self.unsupported_syntax_errors,
|
|
}
|
|
}
|
|
|
|
/// Returns the start position for a node that starts at the current token.
|
|
fn node_start(&self) -> TextSize {
|
|
self.current_token_range().start()
|
|
}
|
|
|
|
fn node_range(&self, start: TextSize) -> TextRange {
|
|
// It's possible during error recovery that the parsing didn't consume any tokens. In that
|
|
// case, `last_token_end` still points to the end of the previous token but `start` is the
|
|
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
|
|
// panic in that case because `start > end`. This path "detects" this case and creates an
|
|
// empty range instead.
|
|
//
|
|
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
|
|
// the two tokens. For example:
|
|
//
|
|
// ```python
|
|
// # last token end
|
|
// # | current token (newline) start
|
|
// # v v
|
|
// def foo \n
|
|
// # ^
|
|
// # assume there's trailing whitespace here
|
|
// ```
|
|
//
|
|
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
|
|
// source. These are comments and non-logical newlines. For example:
|
|
//
|
|
// ```python
|
|
// # last token end
|
|
// # v
|
|
// def foo # comment\n
|
|
// # ^ current token (newline) start
|
|
// ```
|
|
//
|
|
// In either of the above cases, there's a "gap" between the end of the last token and start
|
|
// of the current token.
|
|
if self.prev_token_end <= start {
|
|
// We need to create an empty range at the last token end instead of the start because
|
|
// otherwise this node range will fall outside the range of it's parent node. Taking
|
|
// the above example:
|
|
//
|
|
// ```python
|
|
// if True:
|
|
// # function start
|
|
// # | function end
|
|
// # v v
|
|
// def foo # comment
|
|
// # ^ current token start
|
|
// ```
|
|
//
|
|
// Here, the current token start is the start of parameter range but the function ends
|
|
// at `foo`. Even if there's a function body, the range of parameters would still be
|
|
// before the comment.
|
|
|
|
// test_err node_range_with_gaps
|
|
// def foo # comment
|
|
// def bar(): ...
|
|
// def baz
|
|
TextRange::empty(self.prev_token_end)
|
|
} else {
|
|
TextRange::new(start, self.prev_token_end)
|
|
}
|
|
}
|
|
|
|
fn missing_node_range(&self) -> TextRange {
|
|
// TODO(dhruvmanila): This range depends on whether the missing node is
|
|
// on the leftmost or the rightmost of the expression. It's incorrect for
|
|
// the leftmost missing node because the range is outside the expression
|
|
// range. For example,
|
|
//
|
|
// ```python
|
|
// value = ** y
|
|
// # ^^^^ expression range
|
|
// # ^ last token end
|
|
// ```
|
|
TextRange::empty(self.prev_token_end)
|
|
}
|
|
|
|
/// Moves the parser to the next token.
|
|
fn do_bump(&mut self, kind: TokenKind) {
|
|
if !matches!(
|
|
self.current_token_kind(),
|
|
// TODO explore including everything up to the dedent as part of the body.
|
|
TokenKind::Dedent
|
|
// Don't include newlines in the body
|
|
| TokenKind::Newline
|
|
// TODO(micha): Including the semi feels more correct but it isn't compatible with lalrpop and breaks the
|
|
// formatters semicolon detection. Exclude it for now
|
|
| TokenKind::Semi
|
|
) {
|
|
self.prev_token_end = self.current_token_range().end();
|
|
}
|
|
|
|
self.tokens.bump(kind);
|
|
self.current_token_id.increment();
|
|
}
|
|
|
|
/// Returns the next token kind without consuming it.
|
|
fn peek(&mut self) -> TokenKind {
|
|
self.tokens.peek()
|
|
}
|
|
|
|
/// Returns the next two token kinds without consuming it.
|
|
fn peek2(&mut self) -> (TokenKind, TokenKind) {
|
|
self.tokens.peek2()
|
|
}
|
|
|
|
/// Returns the current token kind.
|
|
#[inline]
|
|
fn current_token_kind(&self) -> TokenKind {
|
|
self.tokens.current_kind()
|
|
}
|
|
|
|
/// Returns the range of the current token.
|
|
#[inline]
|
|
fn current_token_range(&self) -> TextRange {
|
|
self.tokens.current_range()
|
|
}
|
|
|
|
/// Returns the current token ID.
|
|
#[inline]
|
|
fn current_token_id(&self) -> TokenId {
|
|
self.current_token_id
|
|
}
|
|
|
|
/// Bumps the current token assuming it is of the given kind.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// If the current token is not of the given kind.
|
|
fn bump(&mut self, kind: TokenKind) {
|
|
assert_eq!(self.current_token_kind(), kind);
|
|
|
|
self.do_bump(kind);
|
|
}
|
|
|
|
/// Take the token value from the underlying token source and bump the current token.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// If the current token is not of the given kind.
|
|
fn bump_value(&mut self, kind: TokenKind) -> TokenValue {
|
|
let value = self.tokens.take_value();
|
|
self.bump(kind);
|
|
value
|
|
}
|
|
|
|
/// Bumps the current token assuming it is found in the given token set.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// If the current token is not found in the given token set.
|
|
fn bump_ts(&mut self, ts: TokenSet) {
|
|
let kind = self.current_token_kind();
|
|
assert!(ts.contains(kind));
|
|
|
|
self.do_bump(kind);
|
|
}
|
|
|
|
/// Bumps the current token regardless of its kind and advances to the next token.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// If the parser is at end of file.
|
|
fn bump_any(&mut self) {
|
|
let kind = self.current_token_kind();
|
|
assert_ne!(kind, TokenKind::EndOfFile);
|
|
|
|
self.do_bump(kind);
|
|
}
|
|
|
|
/// Bumps the soft keyword token as a `Name` token.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// If the current token is not a soft keyword.
|
|
pub(crate) fn bump_soft_keyword_as_name(&mut self) {
|
|
assert!(self.at_soft_keyword());
|
|
|
|
self.do_bump(TokenKind::Name);
|
|
}
|
|
|
|
/// Consume the current token if it is of the given kind. Returns `true` if it matches, `false`
|
|
/// otherwise.
|
|
fn eat(&mut self, kind: TokenKind) -> bool {
|
|
if self.at(kind) {
|
|
self.do_bump(kind);
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Eat the current token if its of the expected kind, otherwise adds an appropriate error.
|
|
fn expect(&mut self, expected: TokenKind) -> bool {
|
|
if self.eat(expected) {
|
|
return true;
|
|
}
|
|
|
|
self.add_error(
|
|
ParseErrorType::ExpectedToken {
|
|
found: self.current_token_kind(),
|
|
expected,
|
|
},
|
|
self.current_token_range(),
|
|
);
|
|
|
|
false
|
|
}
|
|
|
|
fn add_error<T>(&mut self, error: ParseErrorType, ranged: T)
|
|
where
|
|
T: Ranged,
|
|
{
|
|
fn inner(errors: &mut Vec<ParseError>, error: ParseErrorType, range: TextRange) {
|
|
// Avoid flagging multiple errors at the same location
|
|
let is_same_location = errors
|
|
.last()
|
|
.is_some_and(|last| last.location.start() == range.start());
|
|
|
|
if !is_same_location {
|
|
errors.push(ParseError {
|
|
error,
|
|
location: range,
|
|
});
|
|
}
|
|
}
|
|
|
|
inner(&mut self.errors, error, ranged.range());
|
|
}
|
|
|
|
/// Add an [`UnsupportedSyntaxError`] with the given [`UnsupportedSyntaxErrorKind`] and
|
|
/// [`TextRange`] if its minimum version is less than [`Parser::target_version`].
|
|
fn add_unsupported_syntax_error(&mut self, kind: UnsupportedSyntaxErrorKind, range: TextRange) {
|
|
if kind.is_unsupported(self.options.target_version) {
|
|
self.unsupported_syntax_errors.push(UnsupportedSyntaxError {
|
|
kind,
|
|
range,
|
|
target_version: self.options.target_version,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if the current token is of the given kind.
|
|
fn at(&self, kind: TokenKind) -> bool {
|
|
self.current_token_kind() == kind
|
|
}
|
|
|
|
/// Returns `true` if the current token is found in the given token set.
|
|
fn at_ts(&self, ts: TokenSet) -> bool {
|
|
ts.contains(self.current_token_kind())
|
|
}
|
|
|
|
fn src_text<T>(&self, ranged: T) -> &'src str
|
|
where
|
|
T: Ranged,
|
|
{
|
|
&self.source[ranged.range()]
|
|
}
|
|
|
|
/// Parses a list of elements into a vector where each element is parsed using
|
|
/// the given `parse_element` function.
|
|
fn parse_list_into_vec<T>(
|
|
&mut self,
|
|
recovery_context_kind: RecoveryContextKind,
|
|
parse_element: impl Fn(&mut Parser<'src>) -> T,
|
|
) -> Vec<T> {
|
|
let mut elements = Vec::new();
|
|
self.parse_list(recovery_context_kind, |p| elements.push(parse_element(p)));
|
|
elements
|
|
}
|
|
|
|
/// Parses a list of elements where each element is parsed using the given
|
|
/// `parse_element` function.
|
|
///
|
|
/// The difference between this function and `parse_list_into_vec` is that
|
|
/// this function does not return the parsed elements. Instead, it is the
|
|
/// caller's responsibility to handle the parsed elements. This is the reason
|
|
/// that the `parse_element` parameter is bound to [`FnMut`] instead of [`Fn`].
|
|
fn parse_list(
|
|
&mut self,
|
|
recovery_context_kind: RecoveryContextKind,
|
|
mut parse_element: impl FnMut(&mut Parser<'src>),
|
|
) {
|
|
let mut progress = ParserProgress::default();
|
|
|
|
let saved_context = self.recovery_context;
|
|
self.recovery_context = self
|
|
.recovery_context
|
|
.union(RecoveryContext::from_kind(recovery_context_kind));
|
|
|
|
loop {
|
|
progress.assert_progressing(self);
|
|
|
|
if recovery_context_kind.is_list_element(self) {
|
|
parse_element(self);
|
|
} else if recovery_context_kind.is_regular_list_terminator(self) {
|
|
break;
|
|
} else {
|
|
// Run the error recovery: If the token is recognised as an element or terminator
|
|
// of an enclosing list, then we try to re-lex in the context of a logical line and
|
|
// break out of list parsing.
|
|
if self.is_enclosing_list_element_or_terminator() {
|
|
self.tokens.re_lex_logical_token();
|
|
break;
|
|
}
|
|
|
|
self.add_error(
|
|
recovery_context_kind.create_error(self),
|
|
self.current_token_range(),
|
|
);
|
|
|
|
self.bump_any();
|
|
}
|
|
}
|
|
|
|
self.recovery_context = saved_context;
|
|
}
|
|
|
|
/// Parses a comma separated list of elements into a vector where each element
|
|
/// is parsed using the given `parse_element` function.
|
|
fn parse_comma_separated_list_into_vec<T>(
|
|
&mut self,
|
|
recovery_context_kind: RecoveryContextKind,
|
|
parse_element: impl Fn(&mut Parser<'src>) -> T,
|
|
) -> Vec<T> {
|
|
let mut elements = Vec::new();
|
|
self.parse_comma_separated_list(recovery_context_kind, |p| elements.push(parse_element(p)));
|
|
elements
|
|
}
|
|
|
|
/// Parses a comma separated list of elements where each element is parsed
|
|
/// using the given `parse_element` function.
|
|
///
|
|
/// The difference between this function and `parse_comma_separated_list_into_vec`
|
|
/// is that this function does not return the parsed elements. Instead, it is the
|
|
/// caller's responsibility to handle the parsed elements. This is the reason
|
|
/// that the `parse_element` parameter is bound to [`FnMut`] instead of [`Fn`].
|
|
///
|
|
/// Returns `true` if there is a trailing comma present.
|
|
fn parse_comma_separated_list(
|
|
&mut self,
|
|
recovery_context_kind: RecoveryContextKind,
|
|
mut parse_element: impl FnMut(&mut Parser<'src>),
|
|
) -> bool {
|
|
let mut progress = ParserProgress::default();
|
|
|
|
let saved_context = self.recovery_context;
|
|
self.recovery_context = self
|
|
.recovery_context
|
|
.union(RecoveryContext::from_kind(recovery_context_kind));
|
|
|
|
let mut first_element = true;
|
|
let mut trailing_comma_range: Option<TextRange> = None;
|
|
|
|
loop {
|
|
progress.assert_progressing(self);
|
|
|
|
if recovery_context_kind.is_list_element(self) {
|
|
parse_element(self);
|
|
|
|
// Only unset this when we've completely parsed a single element. This is mainly to
|
|
// raise the correct error in case the first element isn't valid and the current
|
|
// token isn't a comma. Without this knowledge, the parser would later expect a
|
|
// comma instead of raising the context error.
|
|
first_element = false;
|
|
|
|
let maybe_comma_range = self.current_token_range();
|
|
if self.eat(TokenKind::Comma) {
|
|
trailing_comma_range = Some(maybe_comma_range);
|
|
continue;
|
|
}
|
|
trailing_comma_range = None;
|
|
}
|
|
|
|
// test_ok comma_separated_regular_list_terminator
|
|
// # The first element is parsed by `parse_list_like_expression` and the comma after
|
|
// # the first element is expected by `parse_list_expression`
|
|
// [0]
|
|
// [0, 1]
|
|
// [0, 1,]
|
|
// [0, 1, 2]
|
|
// [0, 1, 2,]
|
|
if recovery_context_kind.is_regular_list_terminator(self) {
|
|
break;
|
|
}
|
|
|
|
// test_err comma_separated_missing_comma_between_elements
|
|
// # The comma between the first two elements is expected in `parse_list_expression`.
|
|
// [0, 1 2]
|
|
if recovery_context_kind.is_list_element(self) {
|
|
// This is a special case to expect a comma between two elements and should be
|
|
// checked before running the error recovery. This is because the error recovery
|
|
// will always run as the parser is currently at a list element.
|
|
self.expect(TokenKind::Comma);
|
|
continue;
|
|
}
|
|
|
|
// Run the error recovery: If the token is recognised as an element or terminator of an
|
|
// enclosing list, then we try to re-lex in the context of a logical line and break out
|
|
// of list parsing.
|
|
if self.is_enclosing_list_element_or_terminator() {
|
|
self.tokens.re_lex_logical_token();
|
|
break;
|
|
}
|
|
|
|
if first_element || self.at(TokenKind::Comma) {
|
|
// There are two conditions when we need to add the recovery context error:
|
|
//
|
|
// 1. If the parser is at a comma which means that there's a missing element
|
|
// otherwise the comma would've been consumed by the first `eat` call above.
|
|
// And, the parser doesn't take the re-lexing route on a comma token.
|
|
// 2. If it's the first element and the current token is not a comma which means
|
|
// that it's an invalid element.
|
|
|
|
// test_err comma_separated_missing_element_between_commas
|
|
// [0, 1, , 2]
|
|
|
|
// test_err comma_separated_missing_first_element
|
|
// call(= 1)
|
|
self.add_error(
|
|
recovery_context_kind.create_error(self),
|
|
self.current_token_range(),
|
|
);
|
|
|
|
trailing_comma_range = if self.at(TokenKind::Comma) {
|
|
Some(self.current_token_range())
|
|
} else {
|
|
None
|
|
};
|
|
} else {
|
|
// Otherwise, there should've been a comma at this position. This could be because
|
|
// the element isn't consumed completely by `parse_element`.
|
|
|
|
// test_err comma_separated_missing_comma
|
|
// call(**x := 1)
|
|
self.expect(TokenKind::Comma);
|
|
|
|
trailing_comma_range = None;
|
|
}
|
|
|
|
self.bump_any();
|
|
}
|
|
|
|
if let Some(trailing_comma_range) = trailing_comma_range {
|
|
if !recovery_context_kind.allow_trailing_comma() {
|
|
self.add_error(
|
|
ParseErrorType::OtherError("Trailing comma not allowed".to_string()),
|
|
trailing_comma_range,
|
|
);
|
|
}
|
|
}
|
|
|
|
self.recovery_context = saved_context;
|
|
|
|
trailing_comma_range.is_some()
|
|
}
|
|
|
|
#[cold]
|
|
fn is_enclosing_list_element_or_terminator(&self) -> bool {
|
|
for context in self.recovery_context.kind_iter() {
|
|
if context.is_list_terminator(self) || context.is_list_element(self) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Creates a checkpoint to which the parser can later return to using [`Self::rewind`].
|
|
fn checkpoint(&self) -> ParserCheckpoint {
|
|
ParserCheckpoint {
|
|
tokens: self.tokens.checkpoint(),
|
|
errors_position: self.errors.len(),
|
|
unsupported_syntax_errors_position: self.unsupported_syntax_errors.len(),
|
|
current_token_id: self.current_token_id,
|
|
prev_token_end: self.prev_token_end,
|
|
recovery_context: self.recovery_context,
|
|
}
|
|
}
|
|
|
|
/// Restore the parser to the given checkpoint.
|
|
fn rewind(&mut self, checkpoint: ParserCheckpoint) {
|
|
let ParserCheckpoint {
|
|
tokens,
|
|
errors_position,
|
|
unsupported_syntax_errors_position,
|
|
current_token_id,
|
|
prev_token_end,
|
|
recovery_context,
|
|
} = checkpoint;
|
|
|
|
self.tokens.rewind(tokens);
|
|
self.errors.truncate(errors_position);
|
|
self.unsupported_syntax_errors
|
|
.truncate(unsupported_syntax_errors_position);
|
|
self.current_token_id = current_token_id;
|
|
self.prev_token_end = prev_token_end;
|
|
self.recovery_context = recovery_context;
|
|
}
|
|
}
|
|
|
|
struct ParserCheckpoint {
|
|
tokens: TokenSourceCheckpoint,
|
|
errors_position: usize,
|
|
unsupported_syntax_errors_position: usize,
|
|
current_token_id: TokenId,
|
|
prev_token_end: TextSize,
|
|
recovery_context: RecoveryContext,
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
|
enum SequenceMatchPatternParentheses {
|
|
Tuple,
|
|
List,
|
|
}
|
|
|
|
impl SequenceMatchPatternParentheses {
|
|
/// Returns the token kind that closes the parentheses.
|
|
const fn closing_kind(self) -> TokenKind {
|
|
match self {
|
|
SequenceMatchPatternParentheses::Tuple => TokenKind::Rpar,
|
|
SequenceMatchPatternParentheses::List => TokenKind::Rsqb,
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if the parentheses are for a list pattern e.g., `case [a, b]: ...`.
|
|
const fn is_list(self) -> bool {
|
|
matches!(self, SequenceMatchPatternParentheses::List)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
|
enum FunctionKind {
|
|
/// A lambda expression, e.g., `lambda x: x`
|
|
Lambda,
|
|
/// A function definition, e.g., `def f(x): ...`
|
|
FunctionDef,
|
|
}
|
|
|
|
impl FunctionKind {
|
|
/// Returns the token that terminates a list of parameters.
|
|
const fn list_terminator(self) -> TokenKind {
|
|
match self {
|
|
FunctionKind::Lambda => TokenKind::Colon,
|
|
FunctionKind::FunctionDef => TokenKind::Rpar,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
|
enum WithItemKind {
|
|
/// A list of `with` items that are surrounded by parentheses.
|
|
///
|
|
/// ```python
|
|
/// with (item1, item2): ...
|
|
/// with (item1, item2 as foo): ...
|
|
/// ```
|
|
///
|
|
/// The parentheses belongs to the `with` statement.
|
|
Parenthesized,
|
|
|
|
/// The `with` item has a parenthesized expression.
|
|
///
|
|
/// ```python
|
|
/// with (item) as foo: ...
|
|
/// ```
|
|
///
|
|
/// The parentheses belongs to the context expression.
|
|
ParenthesizedExpression,
|
|
|
|
/// The `with` items aren't parenthesized in any way.
|
|
///
|
|
/// ```python
|
|
/// with item: ...
|
|
/// with item as foo: ...
|
|
/// with item1, item2: ...
|
|
/// ```
|
|
///
|
|
/// There are no parentheses around the items.
|
|
Unparenthesized,
|
|
}
|
|
|
|
impl WithItemKind {
|
|
/// Returns `true` if the with items are parenthesized.
|
|
const fn is_parenthesized(self) -> bool {
|
|
matches!(self, WithItemKind::Parenthesized)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
|
enum InterpolatedStringElementsKind {
|
|
/// The regular f-string elements.
|
|
///
|
|
/// For example, the `"hello "`, `x`, and `" world"` elements in:
|
|
/// ```py
|
|
/// f"hello {x:.2f} world"
|
|
/// ```
|
|
Regular,
|
|
|
|
/// The f-string elements are part of the format specifier.
|
|
///
|
|
/// For example, the `.2f` in:
|
|
/// ```py
|
|
/// f"hello {x:.2f} world"
|
|
/// ```
|
|
FormatSpec,
|
|
}
|
|
|
|
impl InterpolatedStringElementsKind {
|
|
const fn list_terminators(self) -> TokenSet {
|
|
match self {
|
|
InterpolatedStringElementsKind::Regular => {
|
|
TokenSet::new([TokenKind::FStringEnd, TokenKind::TStringEnd])
|
|
}
|
|
// test_ok fstring_format_spec_terminator
|
|
// f"hello {x:} world"
|
|
// f"hello {x:.3f} world"
|
|
InterpolatedStringElementsKind::FormatSpec => TokenSet::new([TokenKind::Rbrace]),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
|
enum Parenthesized {
|
|
/// The elements are parenthesized, e.g., `(a, b)`.
|
|
Yes,
|
|
/// The elements are not parenthesized, e.g., `a, b`.
|
|
No,
|
|
}
|
|
|
|
impl From<bool> for Parenthesized {
|
|
fn from(value: bool) -> Self {
|
|
if value {
|
|
Parenthesized::Yes
|
|
} else {
|
|
Parenthesized::No
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Parenthesized {
|
|
/// Returns `true` if the parenthesized value is `Yes`.
|
|
const fn is_yes(self) -> bool {
|
|
matches!(self, Parenthesized::Yes)
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
enum ListTerminatorKind {
|
|
/// The current token terminates the list.
|
|
Regular,
|
|
/// The current token doesn't terminate the list, but is useful for better error recovery.
|
|
ErrorRecovery,
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
enum RecoveryContextKind {
|
|
/// When parsing a list of statements at the module level i.e., at the top level of a file.
|
|
ModuleStatements,
|
|
|
|
/// When parsing a list of statements in a block e.g., the body of a function or a class.
|
|
BlockStatements,
|
|
|
|
/// The `elif` clauses of an `if` statement
|
|
Elif,
|
|
|
|
/// The `except` clauses of a `try` statement
|
|
Except,
|
|
|
|
/// When parsing a list of assignment targets
|
|
AssignmentTargets,
|
|
|
|
/// When parsing a list of type parameters
|
|
TypeParams,
|
|
|
|
/// When parsing a list of names in a `from ... import ...` statement
|
|
ImportFromAsNames(Parenthesized),
|
|
|
|
/// When parsing a list of names in an `import` statement
|
|
ImportNames,
|
|
|
|
/// When parsing a list of slice elements e.g., `data[1, 2]`.
|
|
///
|
|
/// This is different from `ListElements` as the surrounding context is
|
|
/// different in that the list is part of a subscript expression.
|
|
Slices,
|
|
|
|
/// When parsing a list of elements in a list expression e.g., `[1, 2]`
|
|
ListElements,
|
|
|
|
/// When parsing a list of elements in a set expression e.g., `{1, 2}`
|
|
SetElements,
|
|
|
|
/// When parsing a list of elements in a dictionary expression e.g., `{1: "a", **data}`
|
|
DictElements,
|
|
|
|
/// When parsing a list of elements in a tuple expression e.g., `(1, 2)`
|
|
TupleElements(Parenthesized),
|
|
|
|
/// When parsing a list of patterns in a match statement with an optional
|
|
/// parentheses, e.g., `case a, b: ...`, `case (a, b): ...`, `case [a, b]: ...`
|
|
SequenceMatchPattern(Option<SequenceMatchPatternParentheses>),
|
|
|
|
/// When parsing a mapping pattern in a match statement
|
|
MatchPatternMapping,
|
|
|
|
/// When parsing a list of arguments in a class pattern for the match statement
|
|
MatchPatternClassArguments,
|
|
|
|
/// When parsing a list of arguments in a function call or a class definition
|
|
Arguments,
|
|
|
|
/// When parsing a `del` statement
|
|
DeleteTargets,
|
|
|
|
/// When parsing a list of identifiers
|
|
Identifiers,
|
|
|
|
/// When parsing a list of parameters in a function definition which can be
|
|
/// either a function definition or a lambda expression.
|
|
Parameters(FunctionKind),
|
|
|
|
/// When parsing a list of items in a `with` statement
|
|
WithItems(WithItemKind),
|
|
|
|
/// When parsing a list of f-string or t-string elements which are either literal elements, expressions, or interpolations.
|
|
InterpolatedStringElements(InterpolatedStringElementsKind),
|
|
}
|
|
|
|
impl RecoveryContextKind {
|
|
/// Returns `true` if a trailing comma is allowed in the current context.
|
|
const fn allow_trailing_comma(self) -> bool {
|
|
matches!(
|
|
self,
|
|
RecoveryContextKind::Slices
|
|
| RecoveryContextKind::TupleElements(_)
|
|
| RecoveryContextKind::SetElements
|
|
| RecoveryContextKind::ListElements
|
|
| RecoveryContextKind::DictElements
|
|
| RecoveryContextKind::Arguments
|
|
| RecoveryContextKind::MatchPatternMapping
|
|
| RecoveryContextKind::SequenceMatchPattern(_)
|
|
| RecoveryContextKind::MatchPatternClassArguments
|
|
// Only allow a trailing comma if the with item itself is parenthesized
|
|
| RecoveryContextKind::WithItems(WithItemKind::Parenthesized)
|
|
| RecoveryContextKind::Parameters(_)
|
|
| RecoveryContextKind::TypeParams
|
|
| RecoveryContextKind::DeleteTargets
|
|
| RecoveryContextKind::ImportFromAsNames(Parenthesized::Yes)
|
|
)
|
|
}
|
|
|
|
/// Returns `true` if the parser is at a token that terminates the list as per the context.
|
|
///
|
|
/// This token could either end the list or is only present for better error recovery. Refer to
|
|
/// [`is_regular_list_terminator`] to only check against the former.
|
|
///
|
|
/// [`is_regular_list_terminator`]: RecoveryContextKind::is_regular_list_terminator
|
|
fn is_list_terminator(self, p: &Parser) -> bool {
|
|
self.list_terminator_kind(p).is_some()
|
|
}
|
|
|
|
/// Returns `true` if the parser is at a token that terminates the list as per the context but
|
|
/// the token isn't part of the error recovery set.
|
|
fn is_regular_list_terminator(self, p: &Parser) -> bool {
|
|
matches!(
|
|
self.list_terminator_kind(p),
|
|
Some(ListTerminatorKind::Regular)
|
|
)
|
|
}
|
|
|
|
/// Checks the current token the parser is at and returns the list terminator kind if the token
|
|
/// terminates the list as per the context.
|
|
fn list_terminator_kind(self, p: &Parser) -> Option<ListTerminatorKind> {
|
|
// The end of file marker ends all lists.
|
|
if p.at(TokenKind::EndOfFile) {
|
|
return Some(ListTerminatorKind::Regular);
|
|
}
|
|
|
|
match self {
|
|
// The parser must consume all tokens until the end
|
|
RecoveryContextKind::ModuleStatements => None,
|
|
RecoveryContextKind::BlockStatements => p
|
|
.at(TokenKind::Dedent)
|
|
.then_some(ListTerminatorKind::Regular),
|
|
|
|
RecoveryContextKind::Elif => {
|
|
p.at(TokenKind::Else).then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::Except => {
|
|
matches!(p.current_token_kind(), TokenKind::Finally | TokenKind::Else)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::AssignmentTargets => {
|
|
// test_ok assign_targets_terminator
|
|
// x = y = z = 1; a, b
|
|
// x = y = z = 1
|
|
// a, b
|
|
matches!(p.current_token_kind(), TokenKind::Newline | TokenKind::Semi)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
|
|
// Tokens other than `]` are for better error recovery. For example, recover when we
|
|
// find the `:` of a clause header or the equal of a type assignment.
|
|
RecoveryContextKind::TypeParams => {
|
|
if p.at(TokenKind::Rsqb) {
|
|
Some(ListTerminatorKind::Regular)
|
|
} else {
|
|
matches!(
|
|
p.current_token_kind(),
|
|
TokenKind::Newline | TokenKind::Colon | TokenKind::Equal | TokenKind::Lpar
|
|
)
|
|
.then_some(ListTerminatorKind::ErrorRecovery)
|
|
}
|
|
}
|
|
// The names of an import statement cannot be parenthesized, so `)` is not a
|
|
// terminator.
|
|
RecoveryContextKind::ImportNames => {
|
|
// test_ok import_stmt_terminator
|
|
// import a, b; import c, d
|
|
// import a, b
|
|
// c, d
|
|
matches!(p.current_token_kind(), TokenKind::Semi | TokenKind::Newline)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::ImportFromAsNames(_) => {
|
|
// test_ok from_import_stmt_terminator
|
|
// from a import (b, c)
|
|
// from a import (b, c); x, y
|
|
// from a import b, c; x, y
|
|
// from a import b, c
|
|
// x, y
|
|
matches!(
|
|
p.current_token_kind(),
|
|
TokenKind::Rpar | TokenKind::Semi | TokenKind::Newline
|
|
)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
// The elements in a container expression cannot end with a newline
|
|
// as all of them are actually non-logical newlines.
|
|
RecoveryContextKind::Slices | RecoveryContextKind::ListElements => {
|
|
p.at(TokenKind::Rsqb).then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::SetElements | RecoveryContextKind::DictElements => p
|
|
.at(TokenKind::Rbrace)
|
|
.then_some(ListTerminatorKind::Regular),
|
|
RecoveryContextKind::TupleElements(parenthesized) => {
|
|
if parenthesized.is_yes() {
|
|
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
|
|
} else {
|
|
p.at_sequence_end().then_some(ListTerminatorKind::Regular)
|
|
}
|
|
}
|
|
RecoveryContextKind::SequenceMatchPattern(parentheses) => match parentheses {
|
|
None => {
|
|
// test_ok match_sequence_pattern_terminator
|
|
// match subject:
|
|
// case a if x: ...
|
|
// case a, b: ...
|
|
// case a, b if x: ...
|
|
// case a: ...
|
|
matches!(p.current_token_kind(), TokenKind::Colon | TokenKind::If)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
Some(parentheses) => {
|
|
// test_ok match_sequence_pattern_parentheses_terminator
|
|
// match subject:
|
|
// case [a, b]: ...
|
|
// case (a, b): ...
|
|
p.at(parentheses.closing_kind())
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
},
|
|
RecoveryContextKind::MatchPatternMapping => p
|
|
.at(TokenKind::Rbrace)
|
|
.then_some(ListTerminatorKind::Regular),
|
|
RecoveryContextKind::MatchPatternClassArguments => {
|
|
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::Arguments => {
|
|
p.at(TokenKind::Rpar).then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::DeleteTargets | RecoveryContextKind::Identifiers => {
|
|
// test_ok del_targets_terminator
|
|
// del a, b; c, d
|
|
// del a, b
|
|
// c, d
|
|
matches!(p.current_token_kind(), TokenKind::Semi | TokenKind::Newline)
|
|
.then_some(ListTerminatorKind::Regular)
|
|
}
|
|
RecoveryContextKind::Parameters(function_kind) => {
|
|
// `lambda x, y: ...` or `def f(x, y): ...`
|
|
if p.at(function_kind.list_terminator()) {
|
|
Some(ListTerminatorKind::Regular)
|
|
} else {
|
|
// To recover from missing closing parentheses
|
|
(p.at(TokenKind::Rarrow) || p.at_compound_stmt())
|
|
.then_some(ListTerminatorKind::ErrorRecovery)
|
|
}
|
|
}
|
|
RecoveryContextKind::WithItems(with_item_kind) => match with_item_kind {
|
|
WithItemKind::Parenthesized => match p.current_token_kind() {
|
|
TokenKind::Rpar => Some(ListTerminatorKind::Regular),
|
|
TokenKind::Colon => Some(ListTerminatorKind::ErrorRecovery),
|
|
_ => None,
|
|
},
|
|
WithItemKind::Unparenthesized | WithItemKind::ParenthesizedExpression => p
|
|
.at(TokenKind::Colon)
|
|
.then_some(ListTerminatorKind::Regular),
|
|
},
|
|
RecoveryContextKind::InterpolatedStringElements(kind) => {
|
|
if p.at_ts(kind.list_terminators()) {
|
|
Some(ListTerminatorKind::Regular)
|
|
} else {
|
|
// test_err unterminated_fstring_newline_recovery
|
|
// f"hello
|
|
// 1 + 1
|
|
// f"hello {x
|
|
// 2 + 2
|
|
// f"hello {x:
|
|
// 3 + 3
|
|
// f"hello {x}
|
|
// 4 + 4
|
|
p.at(TokenKind::Newline)
|
|
.then_some(ListTerminatorKind::ErrorRecovery)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn is_list_element(self, p: &Parser) -> bool {
|
|
match self {
|
|
RecoveryContextKind::ModuleStatements => p.at_stmt(),
|
|
RecoveryContextKind::BlockStatements => p.at_stmt(),
|
|
RecoveryContextKind::Elif => p.at(TokenKind::Elif),
|
|
RecoveryContextKind::Except => p.at(TokenKind::Except),
|
|
RecoveryContextKind::AssignmentTargets => p.at(TokenKind::Equal),
|
|
RecoveryContextKind::TypeParams => p.at_type_param(),
|
|
RecoveryContextKind::ImportNames => p.at_name_or_soft_keyword(),
|
|
RecoveryContextKind::ImportFromAsNames(_) => {
|
|
p.at(TokenKind::Star) || p.at_name_or_soft_keyword()
|
|
}
|
|
RecoveryContextKind::Slices => p.at(TokenKind::Colon) || p.at_expr(),
|
|
RecoveryContextKind::ListElements
|
|
| RecoveryContextKind::SetElements
|
|
| RecoveryContextKind::TupleElements(_) => p.at_expr(),
|
|
RecoveryContextKind::DictElements => p.at(TokenKind::DoubleStar) || p.at_expr(),
|
|
RecoveryContextKind::SequenceMatchPattern(_) => {
|
|
// `+` doesn't start any pattern but is here for better error recovery.
|
|
p.at(TokenKind::Plus) || p.at_pattern_start()
|
|
}
|
|
RecoveryContextKind::MatchPatternMapping => {
|
|
// A star pattern is invalid as a mapping key and is here only for
|
|
// better error recovery.
|
|
p.at(TokenKind::Star) || p.at_mapping_pattern_start()
|
|
}
|
|
RecoveryContextKind::MatchPatternClassArguments => p.at_pattern_start(),
|
|
RecoveryContextKind::Arguments => p.at_expr(),
|
|
RecoveryContextKind::DeleteTargets => p.at_expr(),
|
|
RecoveryContextKind::Identifiers => p.at_name_or_soft_keyword(),
|
|
RecoveryContextKind::Parameters(_) => {
|
|
matches!(
|
|
p.current_token_kind(),
|
|
TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash
|
|
) || p.at_name_or_soft_keyword()
|
|
}
|
|
RecoveryContextKind::WithItems(_) => p.at_expr(),
|
|
RecoveryContextKind::InterpolatedStringElements(_) => matches!(
|
|
p.current_token_kind(),
|
|
// Literal element
|
|
TokenKind::FStringMiddle | TokenKind::TStringMiddle
|
|
// Expression element
|
|
| TokenKind::Lbrace
|
|
),
|
|
}
|
|
}
|
|
|
|
fn create_error(self, p: &Parser) -> ParseErrorType {
|
|
match self {
|
|
RecoveryContextKind::ModuleStatements | RecoveryContextKind::BlockStatements => {
|
|
if p.at(TokenKind::Indent) {
|
|
ParseErrorType::UnexpectedIndentation
|
|
} else {
|
|
ParseErrorType::OtherError("Expected a statement".to_string())
|
|
}
|
|
}
|
|
RecoveryContextKind::Elif => ParseErrorType::OtherError(
|
|
"Expected an `elif` or `else` clause, or the end of the `if` statement."
|
|
.to_string(),
|
|
),
|
|
RecoveryContextKind::Except => ParseErrorType::OtherError(
|
|
"Expected an `except` or `finally` clause or the end of the `try` statement."
|
|
.to_string(),
|
|
),
|
|
RecoveryContextKind::AssignmentTargets => {
|
|
if p.current_token_kind().is_keyword() {
|
|
ParseErrorType::OtherError(
|
|
"The keyword is not allowed as a variable declaration name".to_string(),
|
|
)
|
|
} else {
|
|
ParseErrorType::OtherError("Expected an assignment target".to_string())
|
|
}
|
|
}
|
|
RecoveryContextKind::TypeParams => ParseErrorType::OtherError(
|
|
"Expected a type parameter or the end of the type parameter list".to_string(),
|
|
),
|
|
RecoveryContextKind::ImportFromAsNames(parenthesized) => {
|
|
if parenthesized.is_yes() {
|
|
ParseErrorType::OtherError("Expected an import name or a ')'".to_string())
|
|
} else {
|
|
ParseErrorType::OtherError("Expected an import name".to_string())
|
|
}
|
|
}
|
|
RecoveryContextKind::ImportNames => {
|
|
ParseErrorType::OtherError("Expected an import name".to_string())
|
|
}
|
|
RecoveryContextKind::Slices => ParseErrorType::OtherError(
|
|
"Expected an expression or the end of the slice list".to_string(),
|
|
),
|
|
RecoveryContextKind::ListElements => {
|
|
ParseErrorType::OtherError("Expected an expression or a ']'".to_string())
|
|
}
|
|
RecoveryContextKind::SetElements | RecoveryContextKind::DictElements => {
|
|
ParseErrorType::OtherError("Expected an expression or a '}'".to_string())
|
|
}
|
|
RecoveryContextKind::TupleElements(parenthesized) => {
|
|
if parenthesized.is_yes() {
|
|
ParseErrorType::OtherError("Expected an expression or a ')'".to_string())
|
|
} else {
|
|
ParseErrorType::OtherError("Expected an expression".to_string())
|
|
}
|
|
}
|
|
RecoveryContextKind::SequenceMatchPattern(_) => ParseErrorType::OtherError(
|
|
"Expected a pattern or the end of the sequence pattern".to_string(),
|
|
),
|
|
RecoveryContextKind::MatchPatternMapping => ParseErrorType::OtherError(
|
|
"Expected a mapping pattern or the end of the mapping pattern".to_string(),
|
|
),
|
|
RecoveryContextKind::MatchPatternClassArguments => {
|
|
ParseErrorType::OtherError("Expected a pattern or a ')'".to_string())
|
|
}
|
|
RecoveryContextKind::Arguments => {
|
|
ParseErrorType::OtherError("Expected an expression or a ')'".to_string())
|
|
}
|
|
RecoveryContextKind::DeleteTargets => {
|
|
ParseErrorType::OtherError("Expected a delete target".to_string())
|
|
}
|
|
RecoveryContextKind::Identifiers => {
|
|
ParseErrorType::OtherError("Expected an identifier".to_string())
|
|
}
|
|
RecoveryContextKind::Parameters(_) => ParseErrorType::OtherError(
|
|
"Expected a parameter or the end of the parameter list".to_string(),
|
|
),
|
|
RecoveryContextKind::WithItems(with_item_kind) => match with_item_kind {
|
|
WithItemKind::Parenthesized => {
|
|
ParseErrorType::OtherError("Expected an expression or a ')'".to_string())
|
|
}
|
|
_ => ParseErrorType::OtherError(
|
|
"Expected an expression or the end of the with item list".to_string(),
|
|
),
|
|
},
|
|
RecoveryContextKind::InterpolatedStringElements(kind) => match kind {
|
|
InterpolatedStringElementsKind::Regular => ParseErrorType::OtherError(
|
|
"Expected an f-string or t-string element or the end of the f-string or t-string".to_string(),
|
|
),
|
|
InterpolatedStringElementsKind::FormatSpec => ParseErrorType::OtherError(
|
|
"Expected an f-string or t-string element or a '}'".to_string(),
|
|
),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
|
|
struct RecoveryContext(u32);
|
|
|
|
bitflags! {
|
|
impl RecoveryContext: u32 {
|
|
const MODULE_STATEMENTS = 1 << 0;
|
|
const BLOCK_STATEMENTS = 1 << 1;
|
|
const ELIF = 1 << 2;
|
|
const EXCEPT = 1 << 3;
|
|
const ASSIGNMENT_TARGETS = 1 << 4;
|
|
const TYPE_PARAMS = 1 << 5;
|
|
const IMPORT_FROM_AS_NAMES_PARENTHESIZED = 1 << 6;
|
|
const IMPORT_FROM_AS_NAMES_UNPARENTHESIZED = 1 << 7;
|
|
const IMPORT_NAMES = 1 << 8;
|
|
const SLICES = 1 << 9;
|
|
const LIST_ELEMENTS = 1 << 10;
|
|
const SET_ELEMENTS = 1 << 11;
|
|
const DICT_ELEMENTS = 1 << 12;
|
|
const TUPLE_ELEMENTS_PARENTHESIZED = 1 << 13;
|
|
const TUPLE_ELEMENTS_UNPARENTHESIZED = 1 << 14;
|
|
const SEQUENCE_MATCH_PATTERN = 1 << 15;
|
|
const SEQUENCE_MATCH_PATTERN_LIST = 1 << 16;
|
|
const SEQUENCE_MATCH_PATTERN_TUPLE = 1 << 17;
|
|
const MATCH_PATTERN_MAPPING = 1 << 18;
|
|
const MATCH_PATTERN_CLASS_ARGUMENTS = 1 << 19;
|
|
const ARGUMENTS = 1 << 20;
|
|
const DELETE = 1 << 21;
|
|
const IDENTIFIERS = 1 << 22;
|
|
const FUNCTION_PARAMETERS = 1 << 23;
|
|
const LAMBDA_PARAMETERS = 1 << 24;
|
|
const WITH_ITEMS_PARENTHESIZED = 1 << 25;
|
|
const WITH_ITEMS_PARENTHESIZED_EXPRESSION = 1 << 26;
|
|
const WITH_ITEMS_UNPARENTHESIZED = 1 << 28;
|
|
const FT_STRING_ELEMENTS = 1 << 29;
|
|
const FT_STRING_ELEMENTS_IN_FORMAT_SPEC = 1 << 30;
|
|
}
|
|
}
|
|
|
|
impl RecoveryContext {
|
|
const fn from_kind(kind: RecoveryContextKind) -> Self {
|
|
match kind {
|
|
RecoveryContextKind::ModuleStatements => RecoveryContext::MODULE_STATEMENTS,
|
|
RecoveryContextKind::BlockStatements => RecoveryContext::BLOCK_STATEMENTS,
|
|
RecoveryContextKind::Elif => RecoveryContext::ELIF,
|
|
RecoveryContextKind::Except => RecoveryContext::EXCEPT,
|
|
RecoveryContextKind::AssignmentTargets => RecoveryContext::ASSIGNMENT_TARGETS,
|
|
RecoveryContextKind::TypeParams => RecoveryContext::TYPE_PARAMS,
|
|
RecoveryContextKind::ImportFromAsNames(parenthesized) => match parenthesized {
|
|
Parenthesized::Yes => RecoveryContext::IMPORT_FROM_AS_NAMES_PARENTHESIZED,
|
|
Parenthesized::No => RecoveryContext::IMPORT_FROM_AS_NAMES_UNPARENTHESIZED,
|
|
},
|
|
RecoveryContextKind::ImportNames => RecoveryContext::IMPORT_NAMES,
|
|
RecoveryContextKind::Slices => RecoveryContext::SLICES,
|
|
RecoveryContextKind::ListElements => RecoveryContext::LIST_ELEMENTS,
|
|
RecoveryContextKind::SetElements => RecoveryContext::SET_ELEMENTS,
|
|
RecoveryContextKind::DictElements => RecoveryContext::DICT_ELEMENTS,
|
|
RecoveryContextKind::TupleElements(parenthesized) => match parenthesized {
|
|
Parenthesized::Yes => RecoveryContext::TUPLE_ELEMENTS_PARENTHESIZED,
|
|
Parenthesized::No => RecoveryContext::TUPLE_ELEMENTS_UNPARENTHESIZED,
|
|
},
|
|
RecoveryContextKind::SequenceMatchPattern(parentheses) => match parentheses {
|
|
None => RecoveryContext::SEQUENCE_MATCH_PATTERN,
|
|
Some(SequenceMatchPatternParentheses::List) => {
|
|
RecoveryContext::SEQUENCE_MATCH_PATTERN_LIST
|
|
}
|
|
Some(SequenceMatchPatternParentheses::Tuple) => {
|
|
RecoveryContext::SEQUENCE_MATCH_PATTERN_TUPLE
|
|
}
|
|
},
|
|
RecoveryContextKind::MatchPatternMapping => RecoveryContext::MATCH_PATTERN_MAPPING,
|
|
RecoveryContextKind::MatchPatternClassArguments => {
|
|
RecoveryContext::MATCH_PATTERN_CLASS_ARGUMENTS
|
|
}
|
|
RecoveryContextKind::Arguments => RecoveryContext::ARGUMENTS,
|
|
RecoveryContextKind::DeleteTargets => RecoveryContext::DELETE,
|
|
RecoveryContextKind::Identifiers => RecoveryContext::IDENTIFIERS,
|
|
RecoveryContextKind::Parameters(function_kind) => match function_kind {
|
|
FunctionKind::Lambda => RecoveryContext::LAMBDA_PARAMETERS,
|
|
FunctionKind::FunctionDef => RecoveryContext::FUNCTION_PARAMETERS,
|
|
},
|
|
RecoveryContextKind::WithItems(with_item_kind) => match with_item_kind {
|
|
WithItemKind::Parenthesized => RecoveryContext::WITH_ITEMS_PARENTHESIZED,
|
|
WithItemKind::ParenthesizedExpression => {
|
|
RecoveryContext::WITH_ITEMS_PARENTHESIZED_EXPRESSION
|
|
}
|
|
WithItemKind::Unparenthesized => RecoveryContext::WITH_ITEMS_UNPARENTHESIZED,
|
|
},
|
|
RecoveryContextKind::InterpolatedStringElements(kind) => match kind {
|
|
InterpolatedStringElementsKind::Regular => RecoveryContext::FT_STRING_ELEMENTS,
|
|
InterpolatedStringElementsKind::FormatSpec => {
|
|
RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Safe conversion to the corresponding [`RecoveryContextKind`] (inverse of [`Self::from_kind`]).
|
|
///
|
|
/// Returns `None` if the `RecoveryContext` is empty or has multiple flags set.
|
|
const fn to_kind(self) -> Option<RecoveryContextKind> {
|
|
Some(match self {
|
|
RecoveryContext::MODULE_STATEMENTS => RecoveryContextKind::ModuleStatements,
|
|
RecoveryContext::BLOCK_STATEMENTS => RecoveryContextKind::BlockStatements,
|
|
RecoveryContext::ELIF => RecoveryContextKind::Elif,
|
|
RecoveryContext::EXCEPT => RecoveryContextKind::Except,
|
|
RecoveryContext::ASSIGNMENT_TARGETS => RecoveryContextKind::AssignmentTargets,
|
|
RecoveryContext::TYPE_PARAMS => RecoveryContextKind::TypeParams,
|
|
RecoveryContext::IMPORT_FROM_AS_NAMES_PARENTHESIZED => {
|
|
RecoveryContextKind::ImportFromAsNames(Parenthesized::Yes)
|
|
}
|
|
RecoveryContext::IMPORT_FROM_AS_NAMES_UNPARENTHESIZED => {
|
|
RecoveryContextKind::ImportFromAsNames(Parenthesized::No)
|
|
}
|
|
RecoveryContext::IMPORT_NAMES => RecoveryContextKind::ImportNames,
|
|
RecoveryContext::SLICES => RecoveryContextKind::Slices,
|
|
RecoveryContext::LIST_ELEMENTS => RecoveryContextKind::ListElements,
|
|
RecoveryContext::SET_ELEMENTS => RecoveryContextKind::SetElements,
|
|
RecoveryContext::DICT_ELEMENTS => RecoveryContextKind::DictElements,
|
|
RecoveryContext::TUPLE_ELEMENTS_PARENTHESIZED => {
|
|
RecoveryContextKind::TupleElements(Parenthesized::Yes)
|
|
}
|
|
RecoveryContext::TUPLE_ELEMENTS_UNPARENTHESIZED => {
|
|
RecoveryContextKind::TupleElements(Parenthesized::No)
|
|
}
|
|
RecoveryContext::SEQUENCE_MATCH_PATTERN => {
|
|
RecoveryContextKind::SequenceMatchPattern(None)
|
|
}
|
|
RecoveryContext::SEQUENCE_MATCH_PATTERN_LIST => {
|
|
RecoveryContextKind::SequenceMatchPattern(Some(
|
|
SequenceMatchPatternParentheses::List,
|
|
))
|
|
}
|
|
RecoveryContext::SEQUENCE_MATCH_PATTERN_TUPLE => {
|
|
RecoveryContextKind::SequenceMatchPattern(Some(
|
|
SequenceMatchPatternParentheses::Tuple,
|
|
))
|
|
}
|
|
RecoveryContext::MATCH_PATTERN_MAPPING => RecoveryContextKind::MatchPatternMapping,
|
|
RecoveryContext::MATCH_PATTERN_CLASS_ARGUMENTS => {
|
|
RecoveryContextKind::MatchPatternClassArguments
|
|
}
|
|
RecoveryContext::ARGUMENTS => RecoveryContextKind::Arguments,
|
|
RecoveryContext::DELETE => RecoveryContextKind::DeleteTargets,
|
|
RecoveryContext::IDENTIFIERS => RecoveryContextKind::Identifiers,
|
|
RecoveryContext::FUNCTION_PARAMETERS => {
|
|
RecoveryContextKind::Parameters(FunctionKind::FunctionDef)
|
|
}
|
|
RecoveryContext::LAMBDA_PARAMETERS => {
|
|
RecoveryContextKind::Parameters(FunctionKind::Lambda)
|
|
}
|
|
RecoveryContext::WITH_ITEMS_PARENTHESIZED => {
|
|
RecoveryContextKind::WithItems(WithItemKind::Parenthesized)
|
|
}
|
|
RecoveryContext::WITH_ITEMS_PARENTHESIZED_EXPRESSION => {
|
|
RecoveryContextKind::WithItems(WithItemKind::ParenthesizedExpression)
|
|
}
|
|
RecoveryContext::WITH_ITEMS_UNPARENTHESIZED => {
|
|
RecoveryContextKind::WithItems(WithItemKind::Unparenthesized)
|
|
}
|
|
RecoveryContext::FT_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements(
|
|
InterpolatedStringElementsKind::Regular,
|
|
),
|
|
RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC => {
|
|
RecoveryContextKind::InterpolatedStringElements(
|
|
InterpolatedStringElementsKind::FormatSpec,
|
|
)
|
|
}
|
|
_ => return None,
|
|
})
|
|
}
|
|
|
|
fn kind_iter(self) -> impl Iterator<Item = RecoveryContextKind> {
|
|
self.iter().map(|context| {
|
|
context
|
|
.to_kind()
|
|
.expect("Expected context to be of a single kind.")
|
|
})
|
|
}
|
|
}
|