mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-12 23:55:21 +00:00
277 lines
9.8 KiB
Rust
277 lines
9.8 KiB
Rust
//! Define internal parse error types
|
|
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
|
|
|
|
use crate::{ast::Location, token::Tok};
|
|
use lalrpop_util::ParseError as LalrpopError;
|
|
use std::fmt;
|
|
|
|
/// Represents an error during lexical scanning.
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct LexicalError {
|
|
pub error: LexicalErrorType,
|
|
pub location: Location,
|
|
}
|
|
|
|
impl LexicalError {
|
|
pub fn new(error: LexicalErrorType, location: Location) -> Self {
|
|
Self { error, location }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum LexicalErrorType {
|
|
StringError,
|
|
UnicodeError,
|
|
NestingError,
|
|
IndentationError,
|
|
TabError,
|
|
TabsAfterSpaces,
|
|
DefaultArgumentError,
|
|
DuplicateArgumentError(String),
|
|
PositionalArgumentError,
|
|
UnpackedArgumentError,
|
|
DuplicateKeywordArgumentError(String),
|
|
UnrecognizedToken { tok: char },
|
|
FStringError(FStringErrorType),
|
|
LineContinuationError,
|
|
Eof,
|
|
OtherError(String),
|
|
}
|
|
|
|
impl fmt::Display for LexicalErrorType {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
|
LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {error}"),
|
|
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
|
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
|
|
LexicalErrorType::IndentationError => {
|
|
write!(f, "unindent does not match any outer indentation level")
|
|
}
|
|
LexicalErrorType::TabError => {
|
|
write!(f, "inconsistent use of tabs and spaces in indentation")
|
|
}
|
|
LexicalErrorType::TabsAfterSpaces => {
|
|
write!(f, "Tabs not allowed as part of indentation after spaces")
|
|
}
|
|
LexicalErrorType::DefaultArgumentError => {
|
|
write!(f, "non-default argument follows default argument")
|
|
}
|
|
LexicalErrorType::DuplicateArgumentError(arg_name) => {
|
|
write!(f, "duplicate argument '{arg_name}' in function definition")
|
|
}
|
|
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
|
|
write!(f, "keyword argument repeated: {arg_name}")
|
|
}
|
|
LexicalErrorType::PositionalArgumentError => {
|
|
write!(f, "positional argument follows keyword argument")
|
|
}
|
|
LexicalErrorType::UnpackedArgumentError => {
|
|
write!(
|
|
f,
|
|
"iterable argument unpacking follows keyword argument unpacking"
|
|
)
|
|
}
|
|
LexicalErrorType::UnrecognizedToken { tok } => {
|
|
write!(f, "Got unexpected token {tok}")
|
|
}
|
|
LexicalErrorType::LineContinuationError => {
|
|
write!(f, "unexpected character after line continuation character")
|
|
}
|
|
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
|
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: consolidate these with ParseError
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct FStringError {
|
|
pub error: FStringErrorType,
|
|
pub location: Location,
|
|
}
|
|
|
|
impl FStringError {
|
|
pub fn new(error: FStringErrorType, location: Location) -> Self {
|
|
Self { error, location }
|
|
}
|
|
}
|
|
|
|
impl From<FStringError> for LexicalError {
|
|
fn from(err: FStringError) -> Self {
|
|
LexicalError {
|
|
error: LexicalErrorType::FStringError(err.error),
|
|
location: err.location,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum FStringErrorType {
|
|
UnclosedLbrace,
|
|
UnopenedRbrace,
|
|
ExpectedRbrace,
|
|
InvalidExpression(Box<ParseErrorType>),
|
|
InvalidConversionFlag,
|
|
EmptyExpression,
|
|
MismatchedDelimiter(char, char),
|
|
ExpressionNestedTooDeeply,
|
|
ExpressionCannotInclude(char),
|
|
SingleRbrace,
|
|
Unmatched(char),
|
|
UnterminatedString,
|
|
}
|
|
|
|
impl fmt::Display for FStringErrorType {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
|
|
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
|
|
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
|
FStringErrorType::InvalidExpression(error) => {
|
|
write!(f, "{error}")
|
|
}
|
|
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
|
|
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
|
|
FStringErrorType::MismatchedDelimiter(first, second) => write!(
|
|
f,
|
|
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
|
),
|
|
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
|
|
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
|
FStringErrorType::ExpressionNestedTooDeeply => {
|
|
write!(f, "expressions nested too deeply")
|
|
}
|
|
FStringErrorType::UnterminatedString => {
|
|
write!(f, "unterminated string")
|
|
}
|
|
FStringErrorType::ExpressionCannotInclude(c) => {
|
|
if *c == '\\' {
|
|
write!(f, "f-string expression part cannot include a backslash")
|
|
} else {
|
|
write!(f, "f-string expression part cannot include '{c}'s")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
|
|
fn from(err: FStringError) -> Self {
|
|
lalrpop_util::ParseError::User {
|
|
error: LexicalError {
|
|
error: LexicalErrorType::FStringError(err.error),
|
|
location: err.location,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Represents an error during parsing
|
|
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
|
|
|
#[derive(Debug, PartialEq, thiserror::Error)]
|
|
pub enum ParseErrorType {
|
|
/// Parser encountered an unexpected end of input
|
|
Eof,
|
|
/// Parser encountered an extra token
|
|
ExtraToken(Tok),
|
|
/// Parser encountered an invalid token
|
|
InvalidToken,
|
|
/// Parser encountered an unexpected token
|
|
UnrecognizedToken(Tok, Option<String>),
|
|
/// Maps to `User` type from `lalrpop-util`
|
|
Lexical(LexicalErrorType),
|
|
}
|
|
|
|
/// Convert `lalrpop_util::ParseError` to our internal type
|
|
pub(crate) fn parse_error_from_lalrpop(
|
|
err: LalrpopError<Location, Tok, LexicalError>,
|
|
source_path: &str,
|
|
) -> ParseError {
|
|
let source_path = source_path.to_owned();
|
|
match err {
|
|
// TODO: Are there cases where this isn't an EOF?
|
|
LalrpopError::InvalidToken { location } => ParseError {
|
|
error: ParseErrorType::Eof,
|
|
location,
|
|
source_path,
|
|
},
|
|
LalrpopError::ExtraToken { token } => ParseError {
|
|
error: ParseErrorType::ExtraToken(token.1),
|
|
location: token.0,
|
|
source_path,
|
|
},
|
|
LalrpopError::User { error } => ParseError {
|
|
error: ParseErrorType::Lexical(error.error),
|
|
location: error.location,
|
|
source_path,
|
|
},
|
|
LalrpopError::UnrecognizedToken { token, expected } => {
|
|
// Hacky, but it's how CPython does it. See PyParser_AddToken,
|
|
// in particular "Only one possible expected token" comment.
|
|
let expected = (expected.len() == 1).then(|| expected[0].clone());
|
|
ParseError {
|
|
error: ParseErrorType::UnrecognizedToken(token.1, expected),
|
|
location: Location::new(token.0.row(), token.0.column() + 1),
|
|
source_path,
|
|
}
|
|
}
|
|
LalrpopError::UnrecognizedEOF { location, expected } => {
|
|
// This could be an initial indentation error that we should ignore
|
|
let indent_error = expected == ["Indent"];
|
|
if indent_error {
|
|
ParseError {
|
|
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
|
|
location,
|
|
source_path,
|
|
}
|
|
} else {
|
|
ParseError {
|
|
error: ParseErrorType::Eof,
|
|
location,
|
|
source_path,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for ParseErrorType {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match *self {
|
|
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
|
|
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
|
|
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
|
|
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
|
|
if *tok == Tok::Indent {
|
|
write!(f, "unexpected indent")
|
|
} else if expected.as_deref() == Some("Indent") {
|
|
write!(f, "expected an indented block")
|
|
} else {
|
|
write!(f, "invalid syntax. Got unexpected token {tok}")
|
|
}
|
|
}
|
|
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ParseErrorType {
|
|
pub fn is_indentation_error(&self) -> bool {
|
|
match self {
|
|
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
|
|
ParseErrorType::UnrecognizedToken(token, expected) => {
|
|
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
pub fn is_tab_error(&self) -> bool {
|
|
matches!(
|
|
self,
|
|
ParseErrorType::Lexical(LexicalErrorType::TabError)
|
|
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
|
|
)
|
|
}
|
|
}
|