RustPython-Parser/parser/src/error.rs
Dimitris Fasarakis Hilliard 66cbe3fe62 Document parser crate.
2023-02-12 17:58:19 +02:00

331 lines
12 KiB
Rust

//! Error types for the parser.
//!
//! These types are used to represent errors that occur during lexing and parsing and are
//! returned by the `parse_*` functions in the [parser] module and the iterator in the
//! [lexer] implementation.
//!
//! [parser]: crate::parser
//! [lexer]: crate::lexer
// Define internal parse error types.
// The goal is to provide a matching and a safe error API, masking errors from LALR
use crate::{ast::Location, token::Tok};
use lalrpop_util::ParseError as LalrpopError;
use std::fmt;
/// Represents an error during lexing.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
/// The type of error that occurred.
pub error: LexicalErrorType,
/// The location of the error.
pub location: Location,
}
impl LexicalError {
/// Creates a new `LexicalError` with the given error type and location.
pub fn new(error: LexicalErrorType, location: Location) -> Self {
Self { error, location }
}
}
/// Represents the different types of errors that can occur during lexing.
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
// TODO: Can probably be removed, the places it is used seem to be able
// to use the `UnicodeError` variant instead.
#[doc(hidden)]
StringError,
// TODO: Should take a start/end position to report.
/// Decoding of a unicode escape sequence in a string literal failed.
UnicodeError,
/// The nesting of brackets/braces/parentheses is not balanced.
NestingError,
/// The indentation is not consistent.
IndentationError,
/// Inconsistent use of tabs and spaces.
TabError,
/// Encountered a tab after a space.
TabsAfterSpaces,
/// A non-default argument follows a default argument.
DefaultArgumentError,
/// A duplicate argument was found in a function definition.
DuplicateArgumentError(String),
/// A positional argument follows a keyword argument.
PositionalArgumentError,
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
UnpackedArgumentError,
/// A keyword argument was repeated.
DuplicateKeywordArgumentError(String),
/// An unrecognized token was encountered.
UnrecognizedToken { tok: char },
/// An f-string error containing the [`FStringErrorType`].
FStringError(FStringErrorType),
/// An unexpected character was encountered after a line continuation.
LineContinuationError,
/// An unexpected end of file was encountered.
Eof,
/// An unexpected error occurred.
OtherError(String),
}
impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::IndentationError => {
write!(f, "unindent does not match any outer indentation level")
}
LexicalErrorType::TabError => {
write!(f, "inconsistent use of tabs and spaces in indentation")
}
LexicalErrorType::TabsAfterSpaces => {
write!(f, "Tabs not allowed as part of indentation after spaces")
}
LexicalErrorType::DefaultArgumentError => {
write!(f, "non-default argument follows default argument")
}
LexicalErrorType::DuplicateArgumentError(arg_name) => {
write!(f, "duplicate argument '{arg_name}' in function definition")
}
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
write!(f, "keyword argument repeated: {arg_name}")
}
LexicalErrorType::PositionalArgumentError => {
write!(f, "positional argument follows keyword argument")
}
LexicalErrorType::UnpackedArgumentError => {
write!(
f,
"iterable argument unpacking follows keyword argument unpacking"
)
}
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {tok}")
}
LexicalErrorType::LineContinuationError => {
write!(f, "unexpected character after line continuation character")
}
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
}
}
}
// TODO: consolidate these with ParseError
/// An error that occurred during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub struct FStringError {
/// The type of error that occurred.
pub error: FStringErrorType,
/// The location of the error.
pub location: Location,
}
impl FStringError {
/// Creates a new `FStringError` with the given error type and location.
pub fn new(error: FStringErrorType, location: Location) -> Self {
Self { error, location }
}
}
impl From<FStringError> for LexicalError {
fn from(err: FStringError) -> Self {
LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
}
}
}
/// Represents the different types of errors that can occur during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
/// Expected a right brace after an opened left brace.
UnclosedLbrace,
/// Expected a left brace after an ending right brace.
UnopenedRbrace,
/// Expected a right brace after a conversion flag.
ExpectedRbrace,
/// An error occurred while parsing an f-string expression.
InvalidExpression(Box<ParseErrorType>),
/// An invalid conversion flag was encountered.
InvalidConversionFlag,
/// An empty expression was encountered.
EmptyExpression,
/// An opening delimiter was not closed properly.
MismatchedDelimiter(char, char),
/// Too many nested expressions in an f-string.
ExpressionNestedTooDeeply,
/// The f-string expression cannot include the given character.
ExpressionCannotInclude(char),
/// A single right brace was encountered.
SingleRbrace,
/// A closing delimiter was not opened properly.
Unmatched(char),
// TODO: Test this case.
/// Unterminated string.
UnterminatedString,
}
impl fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
FStringErrorType::InvalidExpression(error) => {
write!(f, "{error}")
}
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
FStringErrorType::MismatchedDelimiter(first, second) => write!(
f,
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
),
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
FStringErrorType::ExpressionNestedTooDeeply => {
write!(f, "expressions nested too deeply")
}
FStringErrorType::UnterminatedString => {
write!(f, "unterminated string")
}
FStringErrorType::ExpressionCannotInclude(c) => {
if *c == '\\' {
write!(f, "f-string expression part cannot include a backslash")
} else {
write!(f, "f-string expression part cannot include '{c}'s")
}
}
}
}
}
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
fn from(err: FStringError) -> Self {
lalrpop_util::ParseError::User {
error: LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
},
}
}
}
/// Represents an error during parsing.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq, thiserror::Error)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
Eof,
/// Parser encountered an extra token
ExtraToken(Tok),
/// Parser encountered an invalid token
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(Tok, Option<String>),
// Maps to `User` type from `lalrpop-util`
/// Parser encountered an error during lexing.
Lexical(LexicalErrorType),
}
// Convert `lalrpop_util::ParseError` to our internal type
pub(crate) fn parse_error_from_lalrpop(
err: LalrpopError<Location, Tok, LexicalError>,
source_path: &str,
) -> ParseError {
let source_path = source_path.to_owned();
match err {
// TODO: Are there cases where this isn't an EOF?
LalrpopError::InvalidToken { location } => ParseError {
error: ParseErrorType::Eof,
location,
source_path,
},
LalrpopError::ExtraToken { token } => ParseError {
error: ParseErrorType::ExtraToken(token.1),
location: token.0,
source_path,
},
LalrpopError::User { error } => ParseError {
error: ParseErrorType::Lexical(error.error),
location: error.location,
source_path,
},
LalrpopError::UnrecognizedToken { token, expected } => {
// Hacky, but it's how CPython does it. See PyParser_AddToken,
// in particular "Only one possible expected token" comment.
let expected = (expected.len() == 1).then(|| expected[0].clone());
ParseError {
error: ParseErrorType::UnrecognizedToken(token.1, expected),
location: token.0.with_col_offset(1),
source_path,
}
}
LalrpopError::UnrecognizedEOF { location, expected } => {
// This could be an initial indentation error that we should ignore
let indent_error = expected == ["Indent"];
if indent_error {
ParseError {
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
location,
source_path,
}
} else {
ParseError {
error: ParseErrorType::Eof,
location,
source_path,
}
}
}
}
}
impl fmt::Display for ParseErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
if *tok == Tok::Indent {
write!(f, "unexpected indent")
} else if expected.as_deref() == Some("Indent") {
write!(f, "expected an indented block")
} else {
write!(f, "invalid syntax. Got unexpected token {tok}")
}
}
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
}
}
}
impl ParseErrorType {
/// Returns true if the error is an indentation error.
pub fn is_indentation_error(&self) -> bool {
match self {
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
ParseErrorType::UnrecognizedToken(token, expected) => {
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
}
_ => false,
}
}
/// Returns true if the error is a tab error.
pub fn is_tab_error(&self) -> bool {
matches!(
self,
ParseErrorType::Lexical(LexicalErrorType::TabError)
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
)
}
}