Document parser crate.

This commit is contained in:
Dimitris Fasarakis Hilliard 2023-02-07 21:42:15 +02:00
parent 4ee4fcf407
commit 66cbe3fe62
6 changed files with 429 additions and 86 deletions

View file

@ -1,40 +1,71 @@
//! Define internal parse error types
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
//! Error types for the parser.
//!
//! These types are used to represent errors that occur during lexing and parsing and are
//! returned by the `parse_*` functions in the [parser] module and the iterator in the
//! [lexer] implementation.
//!
//! [parser]: crate::parser
//! [lexer]: crate::lexer
// Define internal parse error types.
// The goal is to provide a matching and a safe error API, masking errors from LALR
use crate::{ast::Location, token::Tok};
use lalrpop_util::ParseError as LalrpopError;
use std::fmt;
/// Represents an error during lexical scanning.
/// Represents an error during lexing.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
/// The type of error that occurred.
pub error: LexicalErrorType,
/// The location of the error.
pub location: Location,
}
impl LexicalError {
/// Creates a new `LexicalError` with the given error type and location.
pub fn new(error: LexicalErrorType, location: Location) -> Self {
Self { error, location }
}
}
/// Represents the different types of errors that can occur during lexing.
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
// TODO: Can probably be removed, the places it is used seem to be able
// to use the `UnicodeError` variant instead.
#[doc(hidden)]
StringError,
// TODO: Should take a start/end position to report.
/// Decoding of a unicode escape sequence in a string literal failed.
UnicodeError,
/// The nesting of brackets/braces/parentheses is not balanced.
NestingError,
/// The indentation is not consistent.
IndentationError,
/// Inconsistent use of tabs and spaces.
TabError,
/// Encountered a tab after a space.
TabsAfterSpaces,
/// A non-default argument follows a default argument.
DefaultArgumentError,
/// A duplicate argument was found in a function definition.
DuplicateArgumentError(String),
/// A positional argument follows a keyword argument.
PositionalArgumentError,
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
UnpackedArgumentError,
/// A keyword argument was repeated.
DuplicateKeywordArgumentError(String),
/// An unrecognized token was encountered.
UnrecognizedToken { tok: char },
/// An f-string error containing the [`FStringErrorType`].
FStringError(FStringErrorType),
/// An unexpected character was encountered after a line continuation.
LineContinuationError,
/// An unexpected end of file was encountered.
Eof,
/// An unexpected error occurred.
OtherError(String),
}
@ -85,13 +116,17 @@ impl fmt::Display for LexicalErrorType {
}
// TODO: consolidate these with ParseError
/// An error that occurred during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub struct FStringError {
/// The type of error that occurred.
pub error: FStringErrorType,
/// The location of the error.
pub location: Location,
}
impl FStringError {
/// Creates a new `FStringError` with the given error type and location.
pub fn new(error: FStringErrorType, location: Location) -> Self {
Self { error, location }
}
@ -106,19 +141,33 @@ impl From<FStringError> for LexicalError {
}
}
/// Represents the different types of errors that can occur during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
/// Expected a right brace after an opened left brace.
UnclosedLbrace,
/// Expected a left brace after an ending right brace.
UnopenedRbrace,
/// Expected a right brace after a conversion flag.
ExpectedRbrace,
/// An error occurred while parsing an f-string expression.
InvalidExpression(Box<ParseErrorType>),
/// An invalid conversion flag was encountered.
InvalidConversionFlag,
/// An empty expression was encountered.
EmptyExpression,
/// An opening delimiter was not closed properly.
MismatchedDelimiter(char, char),
/// Too many nested expressions in an f-string.
ExpressionNestedTooDeeply,
/// The f-string expression cannot include the given character.
ExpressionCannotInclude(char),
/// A single right brace was encountered.
SingleRbrace,
/// A closing delimiter was not opened properly.
Unmatched(char),
// TODO: Test this case.
/// Unterminated string.
UnterminatedString,
}
@ -167,9 +216,10 @@ impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
}
}
/// Represents an error during parsing
/// Represents an error during parsing.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq, thiserror::Error)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
@ -180,11 +230,12 @@ pub enum ParseErrorType {
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(Tok, Option<String>),
/// Maps to `User` type from `lalrpop-util`
// Maps to `User` type from `lalrpop-util`
/// Parser encountered an error during lexing.
Lexical(LexicalErrorType),
}
/// Convert `lalrpop_util::ParseError` to our internal type
// Convert `lalrpop_util::ParseError` to our internal type
pub(crate) fn parse_error_from_lalrpop(
err: LalrpopError<Location, Tok, LexicalError>,
source_path: &str,
@ -258,6 +309,7 @@ impl fmt::Display for ParseErrorType {
}
impl ParseErrorType {
/// Returns true if the error is an indentation error.
pub fn is_indentation_error(&self) -> bool {
match self {
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
@ -267,6 +319,8 @@ impl ParseErrorType {
_ => false,
}
}
/// Returns true if the error is a tab error.
pub fn is_tab_error(&self) -> bool {
matches!(
self,