mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-16 01:25:25 +00:00
Break down rustpython_parser::error module
because it doesn't share any common errors but specific error for each sub module
This commit is contained in:
parent
2a8aa6f308
commit
1511b6631b
7 changed files with 338 additions and 345 deletions
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast,
|
ast,
|
||||||
error::{LexicalError, LexicalErrorType},
|
lexer::{LexicalError, LexicalErrorType},
|
||||||
function::{ArgumentList, parse_args, parse_params, validate_arguments},
|
function::{ArgumentList, parse_args, parse_params, validate_arguments},
|
||||||
lexer,
|
lexer,
|
||||||
context::set_context,
|
context::set_context,
|
||||||
|
|
|
@ -1,331 +0,0 @@
|
||||||
//! Error types for the parser.
|
|
||||||
//!
|
|
||||||
//! These types are used to represent errors that occur during lexing and parsing and are
|
|
||||||
//! returned by the `parse_*` functions in the [parser] module and the iterator in the
|
|
||||||
//! [lexer] implementation.
|
|
||||||
//!
|
|
||||||
//! [parser]: crate::parser
|
|
||||||
//! [lexer]: crate::lexer
|
|
||||||
|
|
||||||
// Define internal parse error types.
|
|
||||||
// The goal is to provide a matching and a safe error API, masking errors from LALR
|
|
||||||
use crate::{ast::Location, token::Tok};
|
|
||||||
use lalrpop_util::ParseError as LalrpopError;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
/// Represents an error during lexing.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct LexicalError {
|
|
||||||
/// The type of error that occurred.
|
|
||||||
pub error: LexicalErrorType,
|
|
||||||
/// The location of the error.
|
|
||||||
pub location: Location,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexicalError {
|
|
||||||
/// Creates a new `LexicalError` with the given error type and location.
|
|
||||||
pub fn new(error: LexicalErrorType, location: Location) -> Self {
|
|
||||||
Self { error, location }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during lexing.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum LexicalErrorType {
|
|
||||||
// TODO: Can probably be removed, the places it is used seem to be able
|
|
||||||
// to use the `UnicodeError` variant instead.
|
|
||||||
#[doc(hidden)]
|
|
||||||
StringError,
|
|
||||||
// TODO: Should take a start/end position to report.
|
|
||||||
/// Decoding of a unicode escape sequence in a string literal failed.
|
|
||||||
UnicodeError,
|
|
||||||
/// The nesting of brackets/braces/parentheses is not balanced.
|
|
||||||
NestingError,
|
|
||||||
/// The indentation is not consistent.
|
|
||||||
IndentationError,
|
|
||||||
/// Inconsistent use of tabs and spaces.
|
|
||||||
TabError,
|
|
||||||
/// Encountered a tab after a space.
|
|
||||||
TabsAfterSpaces,
|
|
||||||
/// A non-default argument follows a default argument.
|
|
||||||
DefaultArgumentError,
|
|
||||||
/// A duplicate argument was found in a function definition.
|
|
||||||
DuplicateArgumentError(String),
|
|
||||||
/// A positional argument follows a keyword argument.
|
|
||||||
PositionalArgumentError,
|
|
||||||
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
|
|
||||||
UnpackedArgumentError,
|
|
||||||
/// A keyword argument was repeated.
|
|
||||||
DuplicateKeywordArgumentError(String),
|
|
||||||
/// An unrecognized token was encountered.
|
|
||||||
UnrecognizedToken { tok: char },
|
|
||||||
/// An f-string error containing the [`FStringErrorType`].
|
|
||||||
FStringError(FStringErrorType),
|
|
||||||
/// An unexpected character was encountered after a line continuation.
|
|
||||||
LineContinuationError,
|
|
||||||
/// An unexpected end of file was encountered.
|
|
||||||
Eof,
|
|
||||||
/// An unexpected error occurred.
|
|
||||||
OtherError(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for LexicalErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
|
||||||
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
|
||||||
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
|
||||||
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
|
|
||||||
LexicalErrorType::IndentationError => {
|
|
||||||
write!(f, "unindent does not match any outer indentation level")
|
|
||||||
}
|
|
||||||
LexicalErrorType::TabError => {
|
|
||||||
write!(f, "inconsistent use of tabs and spaces in indentation")
|
|
||||||
}
|
|
||||||
LexicalErrorType::TabsAfterSpaces => {
|
|
||||||
write!(f, "Tabs not allowed as part of indentation after spaces")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DefaultArgumentError => {
|
|
||||||
write!(f, "non-default argument follows default argument")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DuplicateArgumentError(arg_name) => {
|
|
||||||
write!(f, "duplicate argument '{arg_name}' in function definition")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
|
|
||||||
write!(f, "keyword argument repeated: {arg_name}")
|
|
||||||
}
|
|
||||||
LexicalErrorType::PositionalArgumentError => {
|
|
||||||
write!(f, "positional argument follows keyword argument")
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnpackedArgumentError => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"iterable argument unpacking follows keyword argument unpacking"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnrecognizedToken { tok } => {
|
|
||||||
write!(f, "Got unexpected token {tok}")
|
|
||||||
}
|
|
||||||
LexicalErrorType::LineContinuationError => {
|
|
||||||
write!(f, "unexpected character after line continuation character")
|
|
||||||
}
|
|
||||||
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
|
||||||
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: consolidate these with ParseError
|
|
||||||
/// An error that occurred during parsing of an f-string.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct FStringError {
|
|
||||||
/// The type of error that occurred.
|
|
||||||
pub error: FStringErrorType,
|
|
||||||
/// The location of the error.
|
|
||||||
pub location: Location,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FStringError {
|
|
||||||
/// Creates a new `FStringError` with the given error type and location.
|
|
||||||
pub fn new(error: FStringErrorType, location: Location) -> Self {
|
|
||||||
Self { error, location }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<FStringError> for LexicalError {
|
|
||||||
fn from(err: FStringError) -> Self {
|
|
||||||
LexicalError {
|
|
||||||
error: LexicalErrorType::FStringError(err.error),
|
|
||||||
location: err.location,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during parsing of an f-string.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum FStringErrorType {
|
|
||||||
/// Expected a right brace after an opened left brace.
|
|
||||||
UnclosedLbrace,
|
|
||||||
/// Expected a left brace after an ending right brace.
|
|
||||||
UnopenedRbrace,
|
|
||||||
/// Expected a right brace after a conversion flag.
|
|
||||||
ExpectedRbrace,
|
|
||||||
/// An error occurred while parsing an f-string expression.
|
|
||||||
InvalidExpression(Box<ParseErrorType>),
|
|
||||||
/// An invalid conversion flag was encountered.
|
|
||||||
InvalidConversionFlag,
|
|
||||||
/// An empty expression was encountered.
|
|
||||||
EmptyExpression,
|
|
||||||
/// An opening delimiter was not closed properly.
|
|
||||||
MismatchedDelimiter(char, char),
|
|
||||||
/// Too many nested expressions in an f-string.
|
|
||||||
ExpressionNestedTooDeeply,
|
|
||||||
/// The f-string expression cannot include the given character.
|
|
||||||
ExpressionCannotInclude(char),
|
|
||||||
/// A single right brace was encountered.
|
|
||||||
SingleRbrace,
|
|
||||||
/// A closing delimiter was not opened properly.
|
|
||||||
Unmatched(char),
|
|
||||||
// TODO: Test this case.
|
|
||||||
/// Unterminated string.
|
|
||||||
UnterminatedString,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for FStringErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
|
|
||||||
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
|
|
||||||
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
|
||||||
FStringErrorType::InvalidExpression(error) => {
|
|
||||||
write!(f, "{error}")
|
|
||||||
}
|
|
||||||
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
|
|
||||||
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
|
|
||||||
FStringErrorType::MismatchedDelimiter(first, second) => write!(
|
|
||||||
f,
|
|
||||||
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
|
||||||
),
|
|
||||||
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
|
|
||||||
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
|
||||||
FStringErrorType::ExpressionNestedTooDeeply => {
|
|
||||||
write!(f, "expressions nested too deeply")
|
|
||||||
}
|
|
||||||
FStringErrorType::UnterminatedString => {
|
|
||||||
write!(f, "unterminated string")
|
|
||||||
}
|
|
||||||
FStringErrorType::ExpressionCannotInclude(c) => {
|
|
||||||
if *c == '\\' {
|
|
||||||
write!(f, "f-string expression part cannot include a backslash")
|
|
||||||
} else {
|
|
||||||
write!(f, "f-string expression part cannot include '{c}'s")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
|
|
||||||
fn from(err: FStringError) -> Self {
|
|
||||||
lalrpop_util::ParseError::User {
|
|
||||||
error: LexicalError {
|
|
||||||
error: LexicalErrorType::FStringError(err.error),
|
|
||||||
location: err.location,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents an error during parsing.
|
|
||||||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during parsing.
|
|
||||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
|
||||||
pub enum ParseErrorType {
|
|
||||||
/// Parser encountered an unexpected end of input
|
|
||||||
Eof,
|
|
||||||
/// Parser encountered an extra token
|
|
||||||
ExtraToken(Tok),
|
|
||||||
/// Parser encountered an invalid token
|
|
||||||
InvalidToken,
|
|
||||||
/// Parser encountered an unexpected token
|
|
||||||
UnrecognizedToken(Tok, Option<String>),
|
|
||||||
// Maps to `User` type from `lalrpop-util`
|
|
||||||
/// Parser encountered an error during lexing.
|
|
||||||
Lexical(LexicalErrorType),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert `lalrpop_util::ParseError` to our internal type
|
|
||||||
pub(crate) fn parse_error_from_lalrpop(
|
|
||||||
err: LalrpopError<Location, Tok, LexicalError>,
|
|
||||||
source_path: &str,
|
|
||||||
) -> ParseError {
|
|
||||||
let source_path = source_path.to_owned();
|
|
||||||
match err {
|
|
||||||
// TODO: Are there cases where this isn't an EOF?
|
|
||||||
LalrpopError::InvalidToken { location } => ParseError {
|
|
||||||
error: ParseErrorType::Eof,
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::ExtraToken { token } => ParseError {
|
|
||||||
error: ParseErrorType::ExtraToken(token.1),
|
|
||||||
location: token.0,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::User { error } => ParseError {
|
|
||||||
error: ParseErrorType::Lexical(error.error),
|
|
||||||
location: error.location,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::UnrecognizedToken { token, expected } => {
|
|
||||||
// Hacky, but it's how CPython does it. See PyParser_AddToken,
|
|
||||||
// in particular "Only one possible expected token" comment.
|
|
||||||
let expected = (expected.len() == 1).then(|| expected[0].clone());
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::UnrecognizedToken(token.1, expected),
|
|
||||||
location: token.0.with_col_offset(1),
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LalrpopError::UnrecognizedEOF { location, expected } => {
|
|
||||||
// This could be an initial indentation error that we should ignore
|
|
||||||
let indent_error = expected == ["Indent"];
|
|
||||||
if indent_error {
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::Eof,
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ParseErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
|
|
||||||
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
|
|
||||||
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
|
|
||||||
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
|
|
||||||
if *tok == Tok::Indent {
|
|
||||||
write!(f, "unexpected indent")
|
|
||||||
} else if expected.as_deref() == Some("Indent") {
|
|
||||||
write!(f, "expected an indented block")
|
|
||||||
} else {
|
|
||||||
write!(f, "invalid syntax. Got unexpected token {tok}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ParseErrorType {
|
|
||||||
/// Returns true if the error is an indentation error.
|
|
||||||
pub fn is_indentation_error(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
|
|
||||||
ParseErrorType::UnrecognizedToken(token, expected) => {
|
|
||||||
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
|
|
||||||
}
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the error is a tab error.
|
|
||||||
pub fn is_tab_error(&self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self,
|
|
||||||
ParseErrorType::Lexical(LexicalErrorType::TabError)
|
|
||||||
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +1,7 @@
|
||||||
// Contains functions that perform validation and parsing of arguments and parameters.
|
// Contains functions that perform validation and parsing of arguments and parameters.
|
||||||
// Checks apply both to functions and to lambdas.
|
// Checks apply both to functions and to lambdas.
|
||||||
use crate::ast;
|
use crate::ast;
|
||||||
use crate::error::{LexicalError, LexicalErrorType};
|
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::FxHashSet;
|
||||||
|
|
||||||
pub(crate) struct ArgumentList {
|
pub(crate) struct ArgumentList {
|
||||||
|
@ -149,8 +149,8 @@ fn is_starred(exp: &ast::Expr) -> bool {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::error::{LexicalErrorType, ParseErrorType};
|
use crate::lexer::LexicalErrorType;
|
||||||
use crate::parser::parse_program;
|
use crate::parser::{parse_program, ParseErrorType};
|
||||||
|
|
||||||
macro_rules! function_and_lambda {
|
macro_rules! function_and_lambda {
|
||||||
($($name:ident: $code:expr,)*) => {
|
($($name:ident: $code:expr,)*) => {
|
||||||
|
|
|
@ -35,9 +35,9 @@
|
||||||
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
||||||
pub use super::token::{StringKind, Tok};
|
pub use super::token::{StringKind, Tok};
|
||||||
use crate::ast::Location;
|
use crate::ast::Location;
|
||||||
use crate::error::{LexicalError, LexicalErrorType};
|
|
||||||
use crate::mode::Mode;
|
use crate::mode::Mode;
|
||||||
use crate::soft_keywords::SoftKeywordTransformer;
|
use crate::soft_keywords::SoftKeywordTransformer;
|
||||||
|
use crate::string::FStringErrorType;
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
use num_traits::identities::Zero;
|
use num_traits::identities::Zero;
|
||||||
use num_traits::Num;
|
use num_traits::Num;
|
||||||
|
@ -1212,6 +1212,112 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents an error that occur during lexing and are
|
||||||
|
/// returned by the `parse_*` functions in the iterator in the
|
||||||
|
/// [lexer] implementation.
|
||||||
|
///
|
||||||
|
/// [lexer]: crate::lexer
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct LexicalError {
|
||||||
|
/// The type of error that occurred.
|
||||||
|
pub error: LexicalErrorType,
|
||||||
|
/// The location of the error.
|
||||||
|
pub location: Location,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LexicalError {
|
||||||
|
/// Creates a new `LexicalError` with the given error type and location.
|
||||||
|
pub fn new(error: LexicalErrorType, location: Location) -> Self {
|
||||||
|
Self { error, location }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during lexing.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum LexicalErrorType {
|
||||||
|
// TODO: Can probably be removed, the places it is used seem to be able
|
||||||
|
// to use the `UnicodeError` variant instead.
|
||||||
|
#[doc(hidden)]
|
||||||
|
StringError,
|
||||||
|
// TODO: Should take a start/end position to report.
|
||||||
|
/// Decoding of a unicode escape sequence in a string literal failed.
|
||||||
|
UnicodeError,
|
||||||
|
/// The nesting of brackets/braces/parentheses is not balanced.
|
||||||
|
NestingError,
|
||||||
|
/// The indentation is not consistent.
|
||||||
|
IndentationError,
|
||||||
|
/// Inconsistent use of tabs and spaces.
|
||||||
|
TabError,
|
||||||
|
/// Encountered a tab after a space.
|
||||||
|
TabsAfterSpaces,
|
||||||
|
/// A non-default argument follows a default argument.
|
||||||
|
DefaultArgumentError,
|
||||||
|
/// A duplicate argument was found in a function definition.
|
||||||
|
DuplicateArgumentError(String),
|
||||||
|
/// A positional argument follows a keyword argument.
|
||||||
|
PositionalArgumentError,
|
||||||
|
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
|
||||||
|
UnpackedArgumentError,
|
||||||
|
/// A keyword argument was repeated.
|
||||||
|
DuplicateKeywordArgumentError(String),
|
||||||
|
/// An unrecognized token was encountered.
|
||||||
|
UnrecognizedToken { tok: char },
|
||||||
|
/// An f-string error containing the [`FStringErrorType`].
|
||||||
|
FStringError(FStringErrorType),
|
||||||
|
/// An unexpected character was encountered after a line continuation.
|
||||||
|
LineContinuationError,
|
||||||
|
/// An unexpected end of file was encountered.
|
||||||
|
Eof,
|
||||||
|
/// An unexpected error occurred.
|
||||||
|
OtherError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LexicalErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
||||||
|
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
||||||
|
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
||||||
|
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
|
||||||
|
LexicalErrorType::IndentationError => {
|
||||||
|
write!(f, "unindent does not match any outer indentation level")
|
||||||
|
}
|
||||||
|
LexicalErrorType::TabError => {
|
||||||
|
write!(f, "inconsistent use of tabs and spaces in indentation")
|
||||||
|
}
|
||||||
|
LexicalErrorType::TabsAfterSpaces => {
|
||||||
|
write!(f, "Tabs not allowed as part of indentation after spaces")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DefaultArgumentError => {
|
||||||
|
write!(f, "non-default argument follows default argument")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DuplicateArgumentError(arg_name) => {
|
||||||
|
write!(f, "duplicate argument '{arg_name}' in function definition")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
|
||||||
|
write!(f, "keyword argument repeated: {arg_name}")
|
||||||
|
}
|
||||||
|
LexicalErrorType::PositionalArgumentError => {
|
||||||
|
write!(f, "positional argument follows keyword argument")
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnpackedArgumentError => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"iterable argument unpacking follows keyword argument unpacking"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnrecognizedToken { tok } => {
|
||||||
|
write!(f, "Got unexpected token {tok}")
|
||||||
|
}
|
||||||
|
LexicalErrorType::LineContinuationError => {
|
||||||
|
write!(f, "unexpected character after line continuation character")
|
||||||
|
}
|
||||||
|
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
||||||
|
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{make_tokenizer, StringKind, Tok};
|
use super::{make_tokenizer, StringKind, Tok};
|
||||||
|
|
|
@ -124,7 +124,6 @@
|
||||||
extern crate log;
|
extern crate log;
|
||||||
pub use rustpython_ast as ast;
|
pub use rustpython_ast as ast;
|
||||||
|
|
||||||
pub mod error;
|
|
||||||
mod function;
|
mod function;
|
||||||
pub mod lexer;
|
pub mod lexer;
|
||||||
pub mod mode;
|
pub mod mode;
|
||||||
|
|
|
@ -12,13 +12,15 @@
|
||||||
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
||||||
//! [`Mode`]: crate::mode
|
//! [`Mode`]: crate::mode
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok};
|
||||||
pub use crate::mode::Mode;
|
pub use crate::mode::Mode;
|
||||||
use crate::{ast, error::ParseError, lexer, python};
|
use crate::{ast, lexer, python};
|
||||||
use ast::Location;
|
use ast::Location;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
|
pub(super) use lalrpop_util::ParseError as LalrpopError;
|
||||||
|
|
||||||
/// Parse a full Python program usually consisting of multiple lines.
|
/// Parse a full Python program usually consisting of multiple lines.
|
||||||
///
|
///
|
||||||
/// This is a convenience function that can be used to parse a full Python program without having to
|
/// This is a convenience function that can be used to parse a full Python program without having to
|
||||||
|
@ -194,7 +196,124 @@ pub fn parse_tokens(
|
||||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||||
python::TopParser::new()
|
python::TopParser::new()
|
||||||
.parse(tokenizer.into_iter())
|
.parse(tokenizer.into_iter())
|
||||||
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
.map_err(|e| parse_error_from_lalrpop(e, source_path))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents represent errors that occur during parsing and are
|
||||||
|
/// returned by the `parse_*` functions in the [parser] module.
|
||||||
|
///
|
||||||
|
/// [parser]: crate::parser
|
||||||
|
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during parsing.
|
||||||
|
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||||
|
pub enum ParseErrorType {
|
||||||
|
/// Parser encountered an unexpected end of input
|
||||||
|
Eof,
|
||||||
|
/// Parser encountered an extra token
|
||||||
|
ExtraToken(Tok),
|
||||||
|
/// Parser encountered an invalid token
|
||||||
|
InvalidToken,
|
||||||
|
/// Parser encountered an unexpected token
|
||||||
|
UnrecognizedToken(Tok, Option<String>),
|
||||||
|
// Maps to `User` type from `lalrpop-util`
|
||||||
|
/// Parser encountered an error during lexing.
|
||||||
|
Lexical(LexicalErrorType),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert `lalrpop_util::ParseError` to our internal type
|
||||||
|
fn parse_error_from_lalrpop(
|
||||||
|
err: LalrpopError<Location, Tok, LexicalError>,
|
||||||
|
source_path: &str,
|
||||||
|
) -> ParseError {
|
||||||
|
let source_path = source_path.to_owned();
|
||||||
|
match err {
|
||||||
|
// TODO: Are there cases where this isn't an EOF?
|
||||||
|
LalrpopError::InvalidToken { location } => ParseError {
|
||||||
|
error: ParseErrorType::Eof,
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::ExtraToken { token } => ParseError {
|
||||||
|
error: ParseErrorType::ExtraToken(token.1),
|
||||||
|
location: token.0,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::User { error } => ParseError {
|
||||||
|
error: ParseErrorType::Lexical(error.error),
|
||||||
|
location: error.location,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::UnrecognizedToken { token, expected } => {
|
||||||
|
// Hacky, but it's how CPython does it. See PyParser_AddToken,
|
||||||
|
// in particular "Only one possible expected token" comment.
|
||||||
|
let expected = (expected.len() == 1).then(|| expected[0].clone());
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::UnrecognizedToken(token.1, expected),
|
||||||
|
location: token.0.with_col_offset(1),
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LalrpopError::UnrecognizedEOF { location, expected } => {
|
||||||
|
// This could be an initial indentation error that we should ignore
|
||||||
|
let indent_error = expected == ["Indent"];
|
||||||
|
if indent_error {
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::Eof,
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ParseErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match *self {
|
||||||
|
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
|
||||||
|
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
|
||||||
|
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
|
||||||
|
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
|
||||||
|
if *tok == Tok::Indent {
|
||||||
|
write!(f, "unexpected indent")
|
||||||
|
} else if expected.as_deref() == Some("Indent") {
|
||||||
|
write!(f, "expected an indented block")
|
||||||
|
} else {
|
||||||
|
write!(f, "invalid syntax. Got unexpected token {tok}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParseErrorType {
|
||||||
|
/// Returns true if the error is an indentation error.
|
||||||
|
pub fn is_indentation_error(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
|
||||||
|
ParseErrorType::UnrecognizedToken(token, expected) => {
|
||||||
|
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the error is a tab error.
|
||||||
|
pub fn is_tab_error(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
ParseErrorType::Lexical(LexicalErrorType::TabError)
|
||||||
|
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -3,15 +3,14 @@
|
||||||
// The lexer doesn't do any special handling of f-strings, it just treats them as
|
// The lexer doesn't do any special handling of f-strings, it just treats them as
|
||||||
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
||||||
// we have to do the parsing here, manually.
|
// we have to do the parsing here, manually.
|
||||||
use itertools::Itertools;
|
|
||||||
|
|
||||||
use self::FStringErrorType::*;
|
use self::FStringErrorType::*;
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
||||||
error::{FStringError, FStringErrorType, LexicalError, LexicalErrorType, ParseError},
|
lexer::{LexicalError, LexicalErrorType},
|
||||||
parser::parse_expression_located,
|
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
|
||||||
token::StringKind,
|
token::{StringKind, Tok},
|
||||||
};
|
};
|
||||||
|
use itertools::Itertools;
|
||||||
use std::{iter, str};
|
use std::{iter, str};
|
||||||
|
|
||||||
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||||
|
@ -651,6 +650,107 @@ pub(crate) fn parse_strings(
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: consolidate these with ParseError
|
||||||
|
/// An error that occurred during parsing of an f-string.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct FStringError {
|
||||||
|
/// The type of error that occurred.
|
||||||
|
pub error: FStringErrorType,
|
||||||
|
/// The location of the error.
|
||||||
|
pub location: Location,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FStringError {
|
||||||
|
/// Creates a new `FStringError` with the given error type and location.
|
||||||
|
pub fn new(error: FStringErrorType, location: Location) -> Self {
|
||||||
|
Self { error, location }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FStringError> for LexicalError {
|
||||||
|
fn from(err: FStringError) -> Self {
|
||||||
|
LexicalError {
|
||||||
|
error: LexicalErrorType::FStringError(err.error),
|
||||||
|
location: err.location,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during parsing of an f-string.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum FStringErrorType {
|
||||||
|
/// Expected a right brace after an opened left brace.
|
||||||
|
UnclosedLbrace,
|
||||||
|
/// Expected a left brace after an ending right brace.
|
||||||
|
UnopenedRbrace,
|
||||||
|
/// Expected a right brace after a conversion flag.
|
||||||
|
ExpectedRbrace,
|
||||||
|
/// An error occurred while parsing an f-string expression.
|
||||||
|
InvalidExpression(Box<ParseErrorType>),
|
||||||
|
/// An invalid conversion flag was encountered.
|
||||||
|
InvalidConversionFlag,
|
||||||
|
/// An empty expression was encountered.
|
||||||
|
EmptyExpression,
|
||||||
|
/// An opening delimiter was not closed properly.
|
||||||
|
MismatchedDelimiter(char, char),
|
||||||
|
/// Too many nested expressions in an f-string.
|
||||||
|
ExpressionNestedTooDeeply,
|
||||||
|
/// The f-string expression cannot include the given character.
|
||||||
|
ExpressionCannotInclude(char),
|
||||||
|
/// A single right brace was encountered.
|
||||||
|
SingleRbrace,
|
||||||
|
/// A closing delimiter was not opened properly.
|
||||||
|
Unmatched(char),
|
||||||
|
// TODO: Test this case.
|
||||||
|
/// Unterminated string.
|
||||||
|
UnterminatedString,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for FStringErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
|
||||||
|
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
|
||||||
|
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
||||||
|
FStringErrorType::InvalidExpression(error) => {
|
||||||
|
write!(f, "{error}")
|
||||||
|
}
|
||||||
|
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
|
||||||
|
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
|
||||||
|
FStringErrorType::MismatchedDelimiter(first, second) => write!(
|
||||||
|
f,
|
||||||
|
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
||||||
|
),
|
||||||
|
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
|
||||||
|
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
||||||
|
FStringErrorType::ExpressionNestedTooDeeply => {
|
||||||
|
write!(f, "expressions nested too deeply")
|
||||||
|
}
|
||||||
|
FStringErrorType::UnterminatedString => {
|
||||||
|
write!(f, "unterminated string")
|
||||||
|
}
|
||||||
|
FStringErrorType::ExpressionCannotInclude(c) => {
|
||||||
|
if *c == '\\' {
|
||||||
|
write!(f, "f-string expression part cannot include a backslash")
|
||||||
|
} else {
|
||||||
|
write!(f, "f-string expression part cannot include '{c}'s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
|
||||||
|
fn from(err: FStringError) -> Self {
|
||||||
|
lalrpop_util::ParseError::User {
|
||||||
|
error: LexicalError {
|
||||||
|
error: LexicalErrorType::FStringError(err.error),
|
||||||
|
location: err.location,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue