From 1511b6631b7197c9f37adbd8e9a706da2125c6d4 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 16:16:12 +0900 Subject: [PATCH 1/7] Break down rustpython_parser::error module because it doesn't share any common errors but specific error for each sub module --- parser/python.lalrpop | 2 +- parser/src/error.rs | 331 ----------------------------------------- parser/src/function.rs | 6 +- parser/src/lexer.rs | 108 +++++++++++++- parser/src/lib.rs | 1 - parser/src/parser.rs | 125 +++++++++++++++- parser/src/string.rs | 110 +++++++++++++- 7 files changed, 338 insertions(+), 345 deletions(-) delete mode 100644 parser/src/error.rs diff --git a/parser/python.lalrpop b/parser/python.lalrpop index 9b3d226..57d5dbe 100644 --- a/parser/python.lalrpop +++ b/parser/python.lalrpop @@ -5,7 +5,7 @@ use crate::{ ast, - error::{LexicalError, LexicalErrorType}, + lexer::{LexicalError, LexicalErrorType}, function::{ArgumentList, parse_args, parse_params, validate_arguments}, lexer, context::set_context, diff --git a/parser/src/error.rs b/parser/src/error.rs deleted file mode 100644 index dab07f8..0000000 --- a/parser/src/error.rs +++ /dev/null @@ -1,331 +0,0 @@ -//! Error types for the parser. -//! -//! These types are used to represent errors that occur during lexing and parsing and are -//! returned by the `parse_*` functions in the [parser] module and the iterator in the -//! [lexer] implementation. -//! -//! [parser]: crate::parser -//! [lexer]: crate::lexer - -// Define internal parse error types. -// The goal is to provide a matching and a safe error API, masking errors from LALR -use crate::{ast::Location, token::Tok}; -use lalrpop_util::ParseError as LalrpopError; -use std::fmt; - -/// Represents an error during lexing. -#[derive(Debug, PartialEq)] -pub struct LexicalError { - /// The type of error that occurred. - pub error: LexicalErrorType, - /// The location of the error. - pub location: Location, -} - -impl LexicalError { - /// Creates a new `LexicalError` with the given error type and location. - pub fn new(error: LexicalErrorType, location: Location) -> Self { - Self { error, location } - } -} - -/// Represents the different types of errors that can occur during lexing. -#[derive(Debug, PartialEq)] -pub enum LexicalErrorType { - // TODO: Can probably be removed, the places it is used seem to be able - // to use the `UnicodeError` variant instead. - #[doc(hidden)] - StringError, - // TODO: Should take a start/end position to report. - /// Decoding of a unicode escape sequence in a string literal failed. - UnicodeError, - /// The nesting of brackets/braces/parentheses is not balanced. - NestingError, - /// The indentation is not consistent. - IndentationError, - /// Inconsistent use of tabs and spaces. - TabError, - /// Encountered a tab after a space. - TabsAfterSpaces, - /// A non-default argument follows a default argument. - DefaultArgumentError, - /// A duplicate argument was found in a function definition. - DuplicateArgumentError(String), - /// A positional argument follows a keyword argument. - PositionalArgumentError, - /// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`. - UnpackedArgumentError, - /// A keyword argument was repeated. - DuplicateKeywordArgumentError(String), - /// An unrecognized token was encountered. - UnrecognizedToken { tok: char }, - /// An f-string error containing the [`FStringErrorType`]. - FStringError(FStringErrorType), - /// An unexpected character was encountered after a line continuation. - LineContinuationError, - /// An unexpected end of file was encountered. - Eof, - /// An unexpected error occurred. - OtherError(String), -} - -impl fmt::Display for LexicalErrorType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - LexicalErrorType::StringError => write!(f, "Got unexpected string"), - LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"), - LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), - LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), - LexicalErrorType::IndentationError => { - write!(f, "unindent does not match any outer indentation level") - } - LexicalErrorType::TabError => { - write!(f, "inconsistent use of tabs and spaces in indentation") - } - LexicalErrorType::TabsAfterSpaces => { - write!(f, "Tabs not allowed as part of indentation after spaces") - } - LexicalErrorType::DefaultArgumentError => { - write!(f, "non-default argument follows default argument") - } - LexicalErrorType::DuplicateArgumentError(arg_name) => { - write!(f, "duplicate argument '{arg_name}' in function definition") - } - LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => { - write!(f, "keyword argument repeated: {arg_name}") - } - LexicalErrorType::PositionalArgumentError => { - write!(f, "positional argument follows keyword argument") - } - LexicalErrorType::UnpackedArgumentError => { - write!( - f, - "iterable argument unpacking follows keyword argument unpacking" - ) - } - LexicalErrorType::UnrecognizedToken { tok } => { - write!(f, "Got unexpected token {tok}") - } - LexicalErrorType::LineContinuationError => { - write!(f, "unexpected character after line continuation character") - } - LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"), - LexicalErrorType::OtherError(msg) => write!(f, "{msg}"), - } - } -} - -// TODO: consolidate these with ParseError -/// An error that occurred during parsing of an f-string. -#[derive(Debug, PartialEq)] -pub struct FStringError { - /// The type of error that occurred. - pub error: FStringErrorType, - /// The location of the error. - pub location: Location, -} - -impl FStringError { - /// Creates a new `FStringError` with the given error type and location. - pub fn new(error: FStringErrorType, location: Location) -> Self { - Self { error, location } - } -} - -impl From for LexicalError { - fn from(err: FStringError) -> Self { - LexicalError { - error: LexicalErrorType::FStringError(err.error), - location: err.location, - } - } -} - -/// Represents the different types of errors that can occur during parsing of an f-string. -#[derive(Debug, PartialEq)] -pub enum FStringErrorType { - /// Expected a right brace after an opened left brace. - UnclosedLbrace, - /// Expected a left brace after an ending right brace. - UnopenedRbrace, - /// Expected a right brace after a conversion flag. - ExpectedRbrace, - /// An error occurred while parsing an f-string expression. - InvalidExpression(Box), - /// An invalid conversion flag was encountered. - InvalidConversionFlag, - /// An empty expression was encountered. - EmptyExpression, - /// An opening delimiter was not closed properly. - MismatchedDelimiter(char, char), - /// Too many nested expressions in an f-string. - ExpressionNestedTooDeeply, - /// The f-string expression cannot include the given character. - ExpressionCannotInclude(char), - /// A single right brace was encountered. - SingleRbrace, - /// A closing delimiter was not opened properly. - Unmatched(char), - // TODO: Test this case. - /// Unterminated string. - UnterminatedString, -} - -impl fmt::Display for FStringErrorType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"), - FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"), - FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), - FStringErrorType::InvalidExpression(error) => { - write!(f, "{error}") - } - FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"), - FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"), - FStringErrorType::MismatchedDelimiter(first, second) => write!( - f, - "closing parenthesis '{second}' does not match opening parenthesis '{first}'" - ), - FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"), - FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"), - FStringErrorType::ExpressionNestedTooDeeply => { - write!(f, "expressions nested too deeply") - } - FStringErrorType::UnterminatedString => { - write!(f, "unterminated string") - } - FStringErrorType::ExpressionCannotInclude(c) => { - if *c == '\\' { - write!(f, "f-string expression part cannot include a backslash") - } else { - write!(f, "f-string expression part cannot include '{c}'s") - } - } - } - } -} - -impl From for LalrpopError { - fn from(err: FStringError) -> Self { - lalrpop_util::ParseError::User { - error: LexicalError { - error: LexicalErrorType::FStringError(err.error), - location: err.location, - }, - } - } -} - -/// Represents an error during parsing. -pub type ParseError = rustpython_compiler_core::BaseError; - -/// Represents the different types of errors that can occur during parsing. -#[derive(Debug, PartialEq, thiserror::Error)] -pub enum ParseErrorType { - /// Parser encountered an unexpected end of input - Eof, - /// Parser encountered an extra token - ExtraToken(Tok), - /// Parser encountered an invalid token - InvalidToken, - /// Parser encountered an unexpected token - UnrecognizedToken(Tok, Option), - // Maps to `User` type from `lalrpop-util` - /// Parser encountered an error during lexing. - Lexical(LexicalErrorType), -} - -// Convert `lalrpop_util::ParseError` to our internal type -pub(crate) fn parse_error_from_lalrpop( - err: LalrpopError, - source_path: &str, -) -> ParseError { - let source_path = source_path.to_owned(); - match err { - // TODO: Are there cases where this isn't an EOF? - LalrpopError::InvalidToken { location } => ParseError { - error: ParseErrorType::Eof, - location, - source_path, - }, - LalrpopError::ExtraToken { token } => ParseError { - error: ParseErrorType::ExtraToken(token.1), - location: token.0, - source_path, - }, - LalrpopError::User { error } => ParseError { - error: ParseErrorType::Lexical(error.error), - location: error.location, - source_path, - }, - LalrpopError::UnrecognizedToken { token, expected } => { - // Hacky, but it's how CPython does it. See PyParser_AddToken, - // in particular "Only one possible expected token" comment. - let expected = (expected.len() == 1).then(|| expected[0].clone()); - ParseError { - error: ParseErrorType::UnrecognizedToken(token.1, expected), - location: token.0.with_col_offset(1), - source_path, - } - } - LalrpopError::UnrecognizedEOF { location, expected } => { - // This could be an initial indentation error that we should ignore - let indent_error = expected == ["Indent"]; - if indent_error { - ParseError { - error: ParseErrorType::Lexical(LexicalErrorType::IndentationError), - location, - source_path, - } - } else { - ParseError { - error: ParseErrorType::Eof, - location, - source_path, - } - } - } - } -} - -impl fmt::Display for ParseErrorType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - ParseErrorType::Eof => write!(f, "Got unexpected EOF"), - ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"), - ParseErrorType::InvalidToken => write!(f, "Got invalid token"), - ParseErrorType::UnrecognizedToken(ref tok, ref expected) => { - if *tok == Tok::Indent { - write!(f, "unexpected indent") - } else if expected.as_deref() == Some("Indent") { - write!(f, "expected an indented block") - } else { - write!(f, "invalid syntax. Got unexpected token {tok}") - } - } - ParseErrorType::Lexical(ref error) => write!(f, "{error}"), - } - } -} - -impl ParseErrorType { - /// Returns true if the error is an indentation error. - pub fn is_indentation_error(&self) -> bool { - match self { - ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true, - ParseErrorType::UnrecognizedToken(token, expected) => { - *token == Tok::Indent || expected.clone() == Some("Indent".to_owned()) - } - _ => false, - } - } - - /// Returns true if the error is a tab error. - pub fn is_tab_error(&self) -> bool { - matches!( - self, - ParseErrorType::Lexical(LexicalErrorType::TabError) - | ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces) - ) - } -} diff --git a/parser/src/function.rs b/parser/src/function.rs index 35c3ad2..dc95039 100644 --- a/parser/src/function.rs +++ b/parser/src/function.rs @@ -1,7 +1,7 @@ // Contains functions that perform validation and parsing of arguments and parameters. // Checks apply both to functions and to lambdas. use crate::ast; -use crate::error::{LexicalError, LexicalErrorType}; +use crate::lexer::{LexicalError, LexicalErrorType}; use rustc_hash::FxHashSet; pub(crate) struct ArgumentList { @@ -149,8 +149,8 @@ fn is_starred(exp: &ast::Expr) -> bool { #[cfg(test)] mod tests { - use crate::error::{LexicalErrorType, ParseErrorType}; - use crate::parser::parse_program; + use crate::lexer::LexicalErrorType; + use crate::parser::{parse_program, ParseErrorType}; macro_rules! function_and_lambda { ($($name:ident: $code:expr,)*) => { diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 2ea7867..7da3b8c 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -35,9 +35,9 @@ //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html pub use super::token::{StringKind, Tok}; use crate::ast::Location; -use crate::error::{LexicalError, LexicalErrorType}; use crate::mode::Mode; use crate::soft_keywords::SoftKeywordTransformer; +use crate::string::FStringErrorType; use num_bigint::BigInt; use num_traits::identities::Zero; use num_traits::Num; @@ -1212,6 +1212,112 @@ where } } +/// Represents an error that occur during lexing and are +/// returned by the `parse_*` functions in the iterator in the +/// [lexer] implementation. +/// +/// [lexer]: crate::lexer +#[derive(Debug, PartialEq)] +pub struct LexicalError { + /// The type of error that occurred. + pub error: LexicalErrorType, + /// The location of the error. + pub location: Location, +} + +impl LexicalError { + /// Creates a new `LexicalError` with the given error type and location. + pub fn new(error: LexicalErrorType, location: Location) -> Self { + Self { error, location } + } +} + +/// Represents the different types of errors that can occur during lexing. +#[derive(Debug, PartialEq)] +pub enum LexicalErrorType { + // TODO: Can probably be removed, the places it is used seem to be able + // to use the `UnicodeError` variant instead. + #[doc(hidden)] + StringError, + // TODO: Should take a start/end position to report. + /// Decoding of a unicode escape sequence in a string literal failed. + UnicodeError, + /// The nesting of brackets/braces/parentheses is not balanced. + NestingError, + /// The indentation is not consistent. + IndentationError, + /// Inconsistent use of tabs and spaces. + TabError, + /// Encountered a tab after a space. + TabsAfterSpaces, + /// A non-default argument follows a default argument. + DefaultArgumentError, + /// A duplicate argument was found in a function definition. + DuplicateArgumentError(String), + /// A positional argument follows a keyword argument. + PositionalArgumentError, + /// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`. + UnpackedArgumentError, + /// A keyword argument was repeated. + DuplicateKeywordArgumentError(String), + /// An unrecognized token was encountered. + UnrecognizedToken { tok: char }, + /// An f-string error containing the [`FStringErrorType`]. + FStringError(FStringErrorType), + /// An unexpected character was encountered after a line continuation. + LineContinuationError, + /// An unexpected end of file was encountered. + Eof, + /// An unexpected error occurred. + OtherError(String), +} + +impl std::fmt::Display for LexicalErrorType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + LexicalErrorType::StringError => write!(f, "Got unexpected string"), + LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"), + LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), + LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), + LexicalErrorType::IndentationError => { + write!(f, "unindent does not match any outer indentation level") + } + LexicalErrorType::TabError => { + write!(f, "inconsistent use of tabs and spaces in indentation") + } + LexicalErrorType::TabsAfterSpaces => { + write!(f, "Tabs not allowed as part of indentation after spaces") + } + LexicalErrorType::DefaultArgumentError => { + write!(f, "non-default argument follows default argument") + } + LexicalErrorType::DuplicateArgumentError(arg_name) => { + write!(f, "duplicate argument '{arg_name}' in function definition") + } + LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => { + write!(f, "keyword argument repeated: {arg_name}") + } + LexicalErrorType::PositionalArgumentError => { + write!(f, "positional argument follows keyword argument") + } + LexicalErrorType::UnpackedArgumentError => { + write!( + f, + "iterable argument unpacking follows keyword argument unpacking" + ) + } + LexicalErrorType::UnrecognizedToken { tok } => { + write!(f, "Got unexpected token {tok}") + } + LexicalErrorType::LineContinuationError => { + write!(f, "unexpected character after line continuation character") + } + LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"), + LexicalErrorType::OtherError(msg) => write!(f, "{msg}"), + } + } +} + #[cfg(test)] mod tests { use super::{make_tokenizer, StringKind, Tok}; diff --git a/parser/src/lib.rs b/parser/src/lib.rs index bfb56d7..440b6f6 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -124,7 +124,6 @@ extern crate log; pub use rustpython_ast as ast; -pub mod error; mod function; pub mod lexer; pub mod mode; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index e0865b2..1854c25 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -12,13 +12,15 @@ //! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree //! [`Mode`]: crate::mode -use crate::lexer::{LexResult, Tok}; +use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok}; pub use crate::mode::Mode; -use crate::{ast, error::ParseError, lexer, python}; +use crate::{ast, lexer, python}; use ast::Location; use itertools::Itertools; use std::iter; +pub(super) use lalrpop_util::ParseError as LalrpopError; + /// Parse a full Python program usually consisting of multiple lines. /// /// This is a convenience function that can be used to parse a full Python program without having to @@ -194,7 +196,124 @@ pub fn parse_tokens( .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); python::TopParser::new() .parse(tokenizer.into_iter()) - .map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path)) + .map_err(|e| parse_error_from_lalrpop(e, source_path)) +} + +/// Represents represent errors that occur during parsing and are +/// returned by the `parse_*` functions in the [parser] module. +/// +/// [parser]: crate::parser +pub type ParseError = rustpython_compiler_core::BaseError; + +/// Represents the different types of errors that can occur during parsing. +#[derive(Debug, PartialEq, thiserror::Error)] +pub enum ParseErrorType { + /// Parser encountered an unexpected end of input + Eof, + /// Parser encountered an extra token + ExtraToken(Tok), + /// Parser encountered an invalid token + InvalidToken, + /// Parser encountered an unexpected token + UnrecognizedToken(Tok, Option), + // Maps to `User` type from `lalrpop-util` + /// Parser encountered an error during lexing. + Lexical(LexicalErrorType), +} + +// Convert `lalrpop_util::ParseError` to our internal type +fn parse_error_from_lalrpop( + err: LalrpopError, + source_path: &str, +) -> ParseError { + let source_path = source_path.to_owned(); + match err { + // TODO: Are there cases where this isn't an EOF? + LalrpopError::InvalidToken { location } => ParseError { + error: ParseErrorType::Eof, + location, + source_path, + }, + LalrpopError::ExtraToken { token } => ParseError { + error: ParseErrorType::ExtraToken(token.1), + location: token.0, + source_path, + }, + LalrpopError::User { error } => ParseError { + error: ParseErrorType::Lexical(error.error), + location: error.location, + source_path, + }, + LalrpopError::UnrecognizedToken { token, expected } => { + // Hacky, but it's how CPython does it. See PyParser_AddToken, + // in particular "Only one possible expected token" comment. + let expected = (expected.len() == 1).then(|| expected[0].clone()); + ParseError { + error: ParseErrorType::UnrecognizedToken(token.1, expected), + location: token.0.with_col_offset(1), + source_path, + } + } + LalrpopError::UnrecognizedEOF { location, expected } => { + // This could be an initial indentation error that we should ignore + let indent_error = expected == ["Indent"]; + if indent_error { + ParseError { + error: ParseErrorType::Lexical(LexicalErrorType::IndentationError), + location, + source_path, + } + } else { + ParseError { + error: ParseErrorType::Eof, + location, + source_path, + } + } + } + } +} + +impl std::fmt::Display for ParseErrorType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + ParseErrorType::Eof => write!(f, "Got unexpected EOF"), + ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"), + ParseErrorType::InvalidToken => write!(f, "Got invalid token"), + ParseErrorType::UnrecognizedToken(ref tok, ref expected) => { + if *tok == Tok::Indent { + write!(f, "unexpected indent") + } else if expected.as_deref() == Some("Indent") { + write!(f, "expected an indented block") + } else { + write!(f, "invalid syntax. Got unexpected token {tok}") + } + } + ParseErrorType::Lexical(ref error) => write!(f, "{error}"), + } + } +} + +impl ParseErrorType { + /// Returns true if the error is an indentation error. + pub fn is_indentation_error(&self) -> bool { + match self { + ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true, + ParseErrorType::UnrecognizedToken(token, expected) => { + *token == Tok::Indent || expected.clone() == Some("Indent".to_owned()) + } + _ => false, + } + } + + /// Returns true if the error is a tab error. + pub fn is_tab_error(&self) -> bool { + matches!( + self, + ParseErrorType::Lexical(LexicalErrorType::TabError) + | ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces) + ) + } } #[cfg(test)] diff --git a/parser/src/string.rs b/parser/src/string.rs index 84c6fe2..0143177 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -3,15 +3,14 @@ // The lexer doesn't do any special handling of f-strings, it just treats them as // regular strings. Since the parser has no definition of f-string formats (Pending PEP 701) // we have to do the parsing here, manually. -use itertools::Itertools; - use self::FStringErrorType::*; use crate::{ ast::{Constant, ConversionFlag, Expr, ExprKind, Location}, - error::{FStringError, FStringErrorType, LexicalError, LexicalErrorType, ParseError}, - parser::parse_expression_located, - token::StringKind, + lexer::{LexicalError, LexicalErrorType}, + parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType}, + token::{StringKind, Tok}, }; +use itertools::Itertools; use std::{iter, str}; // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798 @@ -651,6 +650,107 @@ pub(crate) fn parse_strings( )) } +// TODO: consolidate these with ParseError +/// An error that occurred during parsing of an f-string. +#[derive(Debug, PartialEq)] +pub struct FStringError { + /// The type of error that occurred. + pub error: FStringErrorType, + /// The location of the error. + pub location: Location, +} + +impl FStringError { + /// Creates a new `FStringError` with the given error type and location. + pub fn new(error: FStringErrorType, location: Location) -> Self { + Self { error, location } + } +} + +impl From for LexicalError { + fn from(err: FStringError) -> Self { + LexicalError { + error: LexicalErrorType::FStringError(err.error), + location: err.location, + } + } +} + +/// Represents the different types of errors that can occur during parsing of an f-string. +#[derive(Debug, PartialEq)] +pub enum FStringErrorType { + /// Expected a right brace after an opened left brace. + UnclosedLbrace, + /// Expected a left brace after an ending right brace. + UnopenedRbrace, + /// Expected a right brace after a conversion flag. + ExpectedRbrace, + /// An error occurred while parsing an f-string expression. + InvalidExpression(Box), + /// An invalid conversion flag was encountered. + InvalidConversionFlag, + /// An empty expression was encountered. + EmptyExpression, + /// An opening delimiter was not closed properly. + MismatchedDelimiter(char, char), + /// Too many nested expressions in an f-string. + ExpressionNestedTooDeeply, + /// The f-string expression cannot include the given character. + ExpressionCannotInclude(char), + /// A single right brace was encountered. + SingleRbrace, + /// A closing delimiter was not opened properly. + Unmatched(char), + // TODO: Test this case. + /// Unterminated string. + UnterminatedString, +} + +impl std::fmt::Display for FStringErrorType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"), + FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"), + FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), + FStringErrorType::InvalidExpression(error) => { + write!(f, "{error}") + } + FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"), + FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"), + FStringErrorType::MismatchedDelimiter(first, second) => write!( + f, + "closing parenthesis '{second}' does not match opening parenthesis '{first}'" + ), + FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"), + FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"), + FStringErrorType::ExpressionNestedTooDeeply => { + write!(f, "expressions nested too deeply") + } + FStringErrorType::UnterminatedString => { + write!(f, "unterminated string") + } + FStringErrorType::ExpressionCannotInclude(c) => { + if *c == '\\' { + write!(f, "f-string expression part cannot include a backslash") + } else { + write!(f, "f-string expression part cannot include '{c}'s") + } + } + } + } +} + +impl From for LalrpopError { + fn from(err: FStringError) -> Self { + lalrpop_util::ParseError::User { + error: LexicalError { + error: LexicalErrorType::FStringError(err.error), + location: err.location, + }, + } + } +} + #[cfg(test)] mod tests { use super::*; From 66e3080173cdc996a6548004eb46a13ae35f0dca Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 16:52:40 +0900 Subject: [PATCH 2/7] Fix ModeParseError message --- core/src/mode.rs | 8 +++----- parser/src/mode.rs | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/core/src/mode.rs b/core/src/mode.rs index b56f226..ca0d599 100644 --- a/core/src/mode.rs +++ b/core/src/mode.rs @@ -15,18 +15,16 @@ impl std::str::FromStr for Mode { "exec" => Ok(Mode::Exec), "eval" => Ok(Mode::Eval), "single" => Ok(Mode::Single), - _ => Err(ModeParseError { _priv: () }), + _ => Err(ModeParseError(())), } } } #[derive(Debug)] -pub struct ModeParseError { - _priv: (), -} +pub struct ModeParseError(()); impl std::fmt::Display for ModeParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, r#"mode should be "exec", "eval", or "single""#) + write!(f, r#"mode must be "exec", "eval", or "single""#) } } diff --git a/parser/src/mode.rs b/parser/src/mode.rs index 4403fbe..9e9f1e1 100644 --- a/parser/src/mode.rs +++ b/parser/src/mode.rs @@ -39,19 +39,17 @@ impl std::str::FromStr for Mode { match s { "exec" | "single" => Ok(Mode::Module), "eval" => Ok(Mode::Expression), - _ => Err(ModeParseError { _priv: () }), + _ => Err(ModeParseError(())), } } } /// Returned when a given mode is not valid. #[derive(Debug)] -pub struct ModeParseError { - _priv: (), -} +pub struct ModeParseError(()); impl std::fmt::Display for ModeParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, r#"mode should be "exec", "eval", or "single""#) + write!(f, r#"mode must be "exec", "eval", or "single""#) } } From 39fc23cf9290904283670b36b272863fa3479358 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 17:28:49 +0900 Subject: [PATCH 3/7] relocate feature-independent use --- ast/src/constant.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ast/src/constant.rs b/ast/src/constant.rs index 54aa759..55ee09a 100644 --- a/ast/src/constant.rs +++ b/ast/src/constant.rs @@ -1,5 +1,3 @@ -use std::fmt::Error; - use num_bigint::BigInt; pub use rustpython_compiler_core::ConversionFlag; @@ -44,7 +42,9 @@ impl std::fmt::Display for Constant { Constant::None => f.pad("None"), Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }), Constant::Str(s) => rustpython_common::str::repr(s).fmt(f), - Constant::Bytes(b) => f.pad(&rustpython_common::bytes::repr(b).map_err(|_err| Error)?), + Constant::Bytes(b) => { + f.pad(&rustpython_common::bytes::repr(b).map_err(|_err| std::fmt::Error)?) + } Constant::Int(i) => i.fmt(f), Constant::Tuple(tup) => { if let [elt] = &**tup { From 8580e4ebb5203e3fc61a6ec4505cc50011a3094a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 15:58:51 +0900 Subject: [PATCH 4/7] make_tokenizer -> lex to integrate terms we don't distinguish scanner or tokenizer from lexer --- parser/src/lexer.rs | 30 +++++++++++++++--------------- parser/src/lib.rs | 8 ++++---- parser/src/parser.rs | 12 ++++++------ parser/src/soft_keywords.rs | 4 ++-- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 7da3b8c..fb0a909 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -4,7 +4,7 @@ //! governing what is and is not a valid token are defined in the Python reference //! guide section on [Lexical analysis]. //! -//! The primary function in this module is [`make_tokenizer`], which takes a string slice +//! The primary function in this module is [`lex`], which takes a string slice //! and returns an iterator over the tokens in the source code. The tokens are currently returned //! as a `Result`, where [`Spanned`] is a tuple containing the //! start and end [`Location`] and a [`Tok`] denoting the token. @@ -12,12 +12,12 @@ //! # Example //! //! ``` -//! use rustpython_parser::lexer::{make_tokenizer, Tok}; +//! use rustpython_parser::lexer::{lex, Tok}; //! use rustpython_parser::mode::Mode; //! use rustpython_parser::token::StringKind; //! //! let source = "x = 'RustPython'"; -//! let tokens = make_tokenizer(source, Mode::Module) +//! let tokens = lex(source, Mode::Module) //! .map(|tok| tok.expect("Failed to lex")) //! .collect::>(); //! @@ -195,29 +195,29 @@ pub type Spanned = (Location, Tok, Location); /// The result of lexing a token. pub type LexResult = Result; -/// Create a new tokenizer from a source string. +/// Create a new lexer from a source string. /// /// # Examples /// /// ``` /// use rustpython_parser::mode::Mode; -/// use rustpython_parser::lexer::{make_tokenizer}; +/// use rustpython_parser::lexer::{lex}; /// /// let source = "def hello(): return 'world'"; -/// let tokenizer = make_tokenizer(source, Mode::Module); +/// let lexer = lex(source, Mode::Module); /// -/// for token in tokenizer { +/// for token in lexer { /// println!("{:?}", token); /// } /// ``` #[inline] -pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator + '_ { - make_tokenizer_located(source, mode, Location::default()) +pub fn lex(source: &str, mode: Mode) -> impl Iterator + '_ { + lex_located(source, mode, Location::default()) } -/// Create a new tokenizer from a source string, starting at a given location. -/// You probably want to use [`make_tokenizer`] instead. -pub fn make_tokenizer_located( +/// Create a new lexer from a source string, starting at a given location. +/// You probably want to use [`lex`] instead. +pub fn lex_located( source: &str, mode: Mode, start_location: Location, @@ -230,7 +230,7 @@ where T: Iterator, { /// Create a new lexer from T and a starting location. You probably want to use - /// [`make_tokenizer`] instead. + /// [`lex`] instead. pub fn new(input: T, start: Location) -> Self { let mut lxr = Lexer { at_begin_of_line: true, @@ -1320,7 +1320,7 @@ impl std::fmt::Display for LexicalErrorType { #[cfg(test)] mod tests { - use super::{make_tokenizer, StringKind, Tok}; + use super::{lex, StringKind, Tok}; use crate::mode::Mode; use num_bigint::BigInt; @@ -1329,7 +1329,7 @@ mod tests { const UNIX_EOL: &str = "\n"; pub fn lex_source(source: &str) -> Vec { - let lexer = make_tokenizer(source, Mode::Module); + let lexer = lex(source, Mode::Module); lexer.map(|x| x.unwrap().1).collect() } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 440b6f6..6769246 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -67,20 +67,20 @@ //! //! ``` //! use rustpython_parser::mode::Mode; -//! use rustpython_parser::lexer::make_tokenizer; +//! use rustpython_parser::lexer::lex; //! //! let python_source = r#" //! def is_odd(i): //! return bool(i & 1) //! "#; -//! let mut tokens = make_tokenizer(python_source, Mode::Module); +//! let mut tokens = lex(python_source, Mode::Module); //! assert!(tokens.all(|t| t.is_ok())); //! ``` //! //! These tokens can be directly fed into the parser to generate an AST: //! //! ``` -//! use rustpython_parser::lexer::make_tokenizer; +//! use rustpython_parser::lexer::lex; //! use rustpython_parser::mode::Mode; //! use rustpython_parser::parser::parse_tokens; //! @@ -88,7 +88,7 @@ //! def is_odd(i): //! return bool(i & 1) //! "#; -//! let tokens = make_tokenizer(python_source, Mode::Module); +//! let tokens = lex(python_source, Mode::Module); //! let ast = parse_tokens(tokens, Mode::Module, ""); //! //! assert!(ast.is_ok()); diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 1854c25..2cdb7c2 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -164,7 +164,7 @@ pub fn parse_located( source_path: &str, location: Location, ) -> Result { - let lxr = lexer::make_tokenizer_located(source, mode, location); + let lxr = lexer::lex_located(source, mode, location); parse_tokens(lxr, mode, source_path) } @@ -175,14 +175,14 @@ pub fn parse_located( /// # Example /// /// As an example, instead of parsing a string, we can parse a list of tokens after we generate -/// them using the [`lexer::make_tokenizer`] function: +/// them using the [`lexer::lex`] function: /// /// ``` -/// use rustpython_parser::lexer::make_tokenizer; +/// use rustpython_parser::lexer::lex; /// use rustpython_parser::mode::Mode; /// use rustpython_parser::parser::parse_tokens; /// -/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, ""); +/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, ""); /// assert!(expr.is_ok()); /// ``` pub fn parse_tokens( @@ -191,11 +191,11 @@ pub fn parse_tokens( source_path: &str, ) -> Result { let marker_token = (Default::default(), mode.to_marker(), Default::default()); - let tokenizer = iter::once(Ok(marker_token)) + let lexer = iter::once(Ok(marker_token)) .chain(lxr) .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); python::TopParser::new() - .parse(tokenizer.into_iter()) + .parse(lexer.into_iter()) .map_err(|e| parse_error_from_lalrpop(e, source_path)) } diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs index a029ccd..7011f3a 100644 --- a/parser/src/soft_keywords.rs +++ b/parser/src/soft_keywords.rs @@ -27,9 +27,9 @@ impl SoftKeywordTransformer where I: Iterator, { - pub fn new(tokenizer: I, mode: Mode) -> Self { + pub fn new(lexer: I, mode: Mode) -> Self { Self { - underlying: tokenizer.multipeek(), + underlying: lexer.multipeek(), start_of_line: matches!(mode, Mode::Interactive | Mode::Module), } } From cb8c6fb78dd9ba224e12b4b4f23e12ae1b9eeae5 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 17:14:20 +0900 Subject: [PATCH 5/7] Flatten rustpython_parser interface --- parser/python.lalrpop | 197 ++++++++++++++++++------------------ parser/src/function.rs | 6 +- parser/src/lexer.rs | 31 +++--- parser/src/lib.rs | 35 +++---- parser/src/parser.rs | 36 +++---- parser/src/soft_keywords.rs | 4 +- parser/src/string.rs | 36 ++++--- 7 files changed, 168 insertions(+), 177 deletions(-) diff --git a/parser/python.lalrpop b/parser/python.lalrpop index 57d5dbe..3ca06b5 100644 --- a/parser/python.lalrpop +++ b/parser/python.lalrpop @@ -7,10 +7,9 @@ use crate::{ ast, lexer::{LexicalError, LexicalErrorType}, function::{ArgumentList, parse_args, parse_params, validate_arguments}, - lexer, context::set_context, string::parse_strings, - token::StringKind, + token::{self, StringKind}, }; use num_bigint::BigInt; @@ -1937,106 +1936,106 @@ extern { type Location = ast::Location; type Error = LexicalError; - enum lexer::Tok { - Indent => lexer::Tok::Indent, - Dedent => lexer::Tok::Dedent, - StartModule => lexer::Tok::StartModule, - StartInteractive => lexer::Tok::StartInteractive, - StartExpression => lexer::Tok::StartExpression, - "+" => lexer::Tok::Plus, - "-" => lexer::Tok::Minus, - "~" => lexer::Tok::Tilde, - ":" => lexer::Tok::Colon, - "." => lexer::Tok::Dot, - "..." => lexer::Tok::Ellipsis, - "," => lexer::Tok::Comma, - "*" => lexer::Tok::Star, - "**" => lexer::Tok::DoubleStar, - "&" => lexer::Tok::Amper, - "@" => lexer::Tok::At, - "%" => lexer::Tok::Percent, - "//" => lexer::Tok::DoubleSlash, - "^" => lexer::Tok::CircumFlex, - "|" => lexer::Tok::Vbar, - "<<" => lexer::Tok::LeftShift, - ">>" => lexer::Tok::RightShift, - "/" => lexer::Tok::Slash, - "(" => lexer::Tok::Lpar, - ")" => lexer::Tok::Rpar, - "[" => lexer::Tok::Lsqb, - "]" => lexer::Tok::Rsqb, - "{" => lexer::Tok::Lbrace, - "}" => lexer::Tok::Rbrace, - "=" => lexer::Tok::Equal, - "+=" => lexer::Tok::PlusEqual, - "-=" => lexer::Tok::MinusEqual, - "*=" => lexer::Tok::StarEqual, - "@=" => lexer::Tok::AtEqual, - "/=" => lexer::Tok::SlashEqual, - "%=" => lexer::Tok::PercentEqual, - "&=" => lexer::Tok::AmperEqual, - "|=" => lexer::Tok::VbarEqual, - "^=" => lexer::Tok::CircumflexEqual, - "<<=" => lexer::Tok::LeftShiftEqual, - ">>=" => lexer::Tok::RightShiftEqual, - "**=" => lexer::Tok::DoubleStarEqual, - "//=" => lexer::Tok::DoubleSlashEqual, - ":=" => lexer::Tok::ColonEqual, - "==" => lexer::Tok::EqEqual, - "!=" => lexer::Tok::NotEqual, - "<" => lexer::Tok::Less, - "<=" => lexer::Tok::LessEqual, - ">" => lexer::Tok::Greater, - ">=" => lexer::Tok::GreaterEqual, - "->" => lexer::Tok::Rarrow, - "and" => lexer::Tok::And, - "as" => lexer::Tok::As, - "assert" => lexer::Tok::Assert, - "async" => lexer::Tok::Async, - "await" => lexer::Tok::Await, - "break" => lexer::Tok::Break, - "class" => lexer::Tok::Class, - "continue" => lexer::Tok::Continue, - "def" => lexer::Tok::Def, - "del" => lexer::Tok::Del, - "elif" => lexer::Tok::Elif, - "else" => lexer::Tok::Else, - "except" => lexer::Tok::Except, - "finally" => lexer::Tok::Finally, - "for" => lexer::Tok::For, - "from" => lexer::Tok::From, - "global" => lexer::Tok::Global, - "if" => lexer::Tok::If, - "import" => lexer::Tok::Import, - "in" => lexer::Tok::In, - "is" => lexer::Tok::Is, - "lambda" => lexer::Tok::Lambda, - "nonlocal" => lexer::Tok::Nonlocal, - "not" => lexer::Tok::Not, - "or" => lexer::Tok::Or, - "pass" => lexer::Tok::Pass, - "raise" => lexer::Tok::Raise, - "return" => lexer::Tok::Return, - "try" => lexer::Tok::Try, - "while" => lexer::Tok::While, - "match" => lexer::Tok::Match, - "case" => lexer::Tok::Case, - "with" => lexer::Tok::With, - "yield" => lexer::Tok::Yield, - "True" => lexer::Tok::True, - "False" => lexer::Tok::False, - "None" => lexer::Tok::None, - int => lexer::Tok::Int { value: }, - float => lexer::Tok::Float { value: }, - complex => lexer::Tok::Complex { real: , imag: }, - string => lexer::Tok::String { + enum token::Tok { + Indent => token::Tok::Indent, + Dedent => token::Tok::Dedent, + StartModule => token::Tok::StartModule, + StartInteractive => token::Tok::StartInteractive, + StartExpression => token::Tok::StartExpression, + "+" => token::Tok::Plus, + "-" => token::Tok::Minus, + "~" => token::Tok::Tilde, + ":" => token::Tok::Colon, + "." => token::Tok::Dot, + "..." => token::Tok::Ellipsis, + "," => token::Tok::Comma, + "*" => token::Tok::Star, + "**" => token::Tok::DoubleStar, + "&" => token::Tok::Amper, + "@" => token::Tok::At, + "%" => token::Tok::Percent, + "//" => token::Tok::DoubleSlash, + "^" => token::Tok::CircumFlex, + "|" => token::Tok::Vbar, + "<<" => token::Tok::LeftShift, + ">>" => token::Tok::RightShift, + "/" => token::Tok::Slash, + "(" => token::Tok::Lpar, + ")" => token::Tok::Rpar, + "[" => token::Tok::Lsqb, + "]" => token::Tok::Rsqb, + "{" => token::Tok::Lbrace, + "}" => token::Tok::Rbrace, + "=" => token::Tok::Equal, + "+=" => token::Tok::PlusEqual, + "-=" => token::Tok::MinusEqual, + "*=" => token::Tok::StarEqual, + "@=" => token::Tok::AtEqual, + "/=" => token::Tok::SlashEqual, + "%=" => token::Tok::PercentEqual, + "&=" => token::Tok::AmperEqual, + "|=" => token::Tok::VbarEqual, + "^=" => token::Tok::CircumflexEqual, + "<<=" => token::Tok::LeftShiftEqual, + ">>=" => token::Tok::RightShiftEqual, + "**=" => token::Tok::DoubleStarEqual, + "//=" => token::Tok::DoubleSlashEqual, + ":=" => token::Tok::ColonEqual, + "==" => token::Tok::EqEqual, + "!=" => token::Tok::NotEqual, + "<" => token::Tok::Less, + "<=" => token::Tok::LessEqual, + ">" => token::Tok::Greater, + ">=" => token::Tok::GreaterEqual, + "->" => token::Tok::Rarrow, + "and" => token::Tok::And, + "as" => token::Tok::As, + "assert" => token::Tok::Assert, + "async" => token::Tok::Async, + "await" => token::Tok::Await, + "break" => token::Tok::Break, + "class" => token::Tok::Class, + "continue" => token::Tok::Continue, + "def" => token::Tok::Def, + "del" => token::Tok::Del, + "elif" => token::Tok::Elif, + "else" => token::Tok::Else, + "except" => token::Tok::Except, + "finally" => token::Tok::Finally, + "for" => token::Tok::For, + "from" => token::Tok::From, + "global" => token::Tok::Global, + "if" => token::Tok::If, + "import" => token::Tok::Import, + "in" => token::Tok::In, + "is" => token::Tok::Is, + "lambda" => token::Tok::Lambda, + "nonlocal" => token::Tok::Nonlocal, + "not" => token::Tok::Not, + "or" => token::Tok::Or, + "pass" => token::Tok::Pass, + "raise" => token::Tok::Raise, + "return" => token::Tok::Return, + "try" => token::Tok::Try, + "while" => token::Tok::While, + "match" => token::Tok::Match, + "case" => token::Tok::Case, + "with" => token::Tok::With, + "yield" => token::Tok::Yield, + "True" => token::Tok::True, + "False" => token::Tok::False, + "None" => token::Tok::None, + int => token::Tok::Int { value: }, + float => token::Tok::Float { value: }, + complex => token::Tok::Complex { real: , imag: }, + string => token::Tok::String { value: , kind: , triple_quoted: }, - name => lexer::Tok::Name { name: }, - "\n" => lexer::Tok::Newline, - ";" => lexer::Tok::Semi, - "#" => lexer::Tok::Comment(_), + name => token::Tok::Name { name: }, + "\n" => token::Tok::Newline, + ";" => token::Tok::Semi, + "#" => token::Tok::Comment(_), } } diff --git a/parser/src/function.rs b/parser/src/function.rs index dc95039..cfda1c5 100644 --- a/parser/src/function.rs +++ b/parser/src/function.rs @@ -1,7 +1,9 @@ // Contains functions that perform validation and parsing of arguments and parameters. // Checks apply both to functions and to lambdas. -use crate::ast; -use crate::lexer::{LexicalError, LexicalErrorType}; +use crate::{ + ast, + lexer::{LexicalError, LexicalErrorType}, +}; use rustc_hash::FxHashSet; pub(crate) struct ArgumentList { diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index fb0a909..f4862b3 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -12,9 +12,7 @@ //! # Example //! //! ``` -//! use rustpython_parser::lexer::{lex, Tok}; -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::token::StringKind; +//! use rustpython_parser::{lexer::lex, Tok, Mode, StringKind}; //! //! let source = "x = 'RustPython'"; //! let tokens = lex(source, Mode::Module) @@ -33,19 +31,16 @@ //! ``` //! //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html -pub use super::token::{StringKind, Tok}; -use crate::ast::Location; -use crate::mode::Mode; -use crate::soft_keywords::SoftKeywordTransformer; -use crate::string::FStringErrorType; +use crate::{ + ast::Location, + mode::Mode, + soft_keywords::SoftKeywordTransformer, + string::FStringErrorType, + token::{StringKind, Tok}, +}; use num_bigint::BigInt; -use num_traits::identities::Zero; -use num_traits::Num; -use std::char; -use std::cmp::Ordering; -use std::ops::Index; -use std::slice::SliceIndex; -use std::str::FromStr; +use num_traits::{Num, Zero}; +use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr}; use unic_emoji_char::is_emoji_presentation; use unic_ucd_ident::{is_xid_continue, is_xid_start}; @@ -200,8 +195,7 @@ pub type LexResult = Result; /// # Examples /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::lexer::{lex}; +/// use rustpython_parser::{Mode, lexer::lex}; /// /// let source = "def hello(): return 'world'"; /// let lexer = lex(source, Mode::Module); @@ -1320,8 +1314,7 @@ impl std::fmt::Display for LexicalErrorType { #[cfg(test)] mod tests { - use super::{lex, StringKind, Tok}; - use crate::mode::Mode; + use super::*; use num_bigint::BigInt; const WINDOWS_EOL: &str = "\r\n"; diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 6769246..bc97074 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -54,20 +54,18 @@ //! //! The functionality of this crate is split into several modules: //! -//! - [token]: This module contains the definition of the tokens that are generated by the lexer. +//! - token: This module contains the definition of the tokens that are generated by the lexer. //! - [lexer]: This module contains the lexer and is responsible for generating the tokens. -//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST. +//! - parser: This module contains an interface to the parser and is responsible for generating the AST. //! - Functions and strings have special parsing requirements that are handled in additional files. -//! - [mode]: This module contains the definition of the different modes that the parser can be in. -//! - [error]: This module contains the definition of the errors that can be returned by the parser. +//! - mode: This module contains the definition of the different modes that the parser can be in. //! //! # Examples //! //! For example, to get a stream of tokens from a given string, one could do this: //! //! ``` -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::lexer::lex; +//! use rustpython_parser::{lexer::lex, Mode}; //! //! let python_source = r#" //! def is_odd(i): @@ -80,9 +78,7 @@ //! These tokens can be directly fed into the parser to generate an AST: //! //! ``` -//! use rustpython_parser::lexer::lex; -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::parser::parse_tokens; +//! use rustpython_parser::{lexer::lex, Mode, parse_tokens}; //! //! let python_source = r#" //! def is_odd(i): @@ -98,7 +94,7 @@ //! mode or tokenizing the source beforehand: //! //! ``` -//! use rustpython_parser::parser::parse_program; +//! use rustpython_parser::parse_program; //! //! let python_source = r#" //! def is_odd(i): @@ -111,11 +107,7 @@ //! //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing -//! [token]: crate::token //! [lexer]: crate::lexer -//! [parser]: crate::parser -//! [mode]: crate::mode -//! [error]: crate::error #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_root_url = "https://docs.rs/rustpython-parser/")] @@ -125,12 +117,21 @@ extern crate log; pub use rustpython_ast as ast; mod function; +// Skip flattening lexer to distinguish from full parser pub mod lexer; -pub mod mode; -pub mod parser; +mod mode; +mod parser; mod string; #[rustfmt::skip] mod python; mod context; mod soft_keywords; -pub mod token; +mod token; + +pub use mode::Mode; +pub use parser::{ + parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens, + ParseError, ParseErrorType, +}; +pub use string::FStringErrorType; +pub use token::{StringKind, Tok}; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 2cdb7c2..1241848 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -12,10 +12,13 @@ //! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree //! [`Mode`]: crate::mode -use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok}; -pub use crate::mode::Mode; -use crate::{ast, lexer, python}; -use ast::Location; +use crate::{ + ast::{self, Location}, + lexer::{self, LexResult, LexicalError, LexicalErrorType}, + mode::Mode, + python, + token::Tok, +}; use itertools::Itertools; use std::iter; @@ -31,7 +34,7 @@ pub(super) use lalrpop_util::ParseError as LalrpopError; /// For example, parsing a simple function definition and a call to that function: /// /// ``` -/// use rustpython_parser::parser; +/// use rustpython_parser as parser; /// let source = r#" /// def foo(): /// return 42 @@ -59,7 +62,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result"); /// /// assert!(expr.is_ok()); @@ -80,8 +83,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result", Location::new(5, 20)); /// assert!(expr.is_ok()); @@ -108,8 +110,7 @@ pub fn parse_expression_located( /// parsing: /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::parser::parse; +/// use rustpython_parser::{Mode, parse}; /// /// let expr = parse("1 + 2", Mode::Expression, ""); /// assert!(expr.is_ok()); @@ -118,8 +119,7 @@ pub fn parse_expression_located( /// Alternatively, we can parse a full Python program consisting of multiple lines: /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::parser::parse; +/// use rustpython_parser::{Mode, parse}; /// /// let source = r#" /// class Greeter: @@ -142,9 +142,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result"); /// assert!(expr.is_ok()); @@ -200,9 +196,7 @@ pub fn parse_tokens( } /// Represents represent errors that occur during parsing and are -/// returned by the `parse_*` functions in the [parser] module. -/// -/// [parser]: crate::parser +/// returned by the `parse_*` functions. pub type ParseError = rustpython_compiler_core::BaseError; /// Represents the different types of errors that can occur during parsing. diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs index 7011f3a..2613914 100644 --- a/parser/src/soft_keywords.rs +++ b/parser/src/soft_keywords.rs @@ -1,8 +1,6 @@ +use crate::{lexer::LexResult, mode::Mode, token::Tok}; use itertools::{Itertools, MultiPeek}; -use crate::lexer::{LexResult, Tok}; -pub use crate::mode::Mode; - /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` /// and `case`). /// diff --git a/parser/src/string.rs b/parser/src/string.rs index 0143177..a8be777 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -3,7 +3,6 @@ // The lexer doesn't do any special handling of f-strings, it just treats them as // regular strings. Since the parser has no definition of f-string formats (Pending PEP 701) // we have to do the parsing here, manually. -use self::FStringErrorType::*; use crate::{ ast::{Constant, ConversionFlag, Expr, ExprKind, Location}, lexer::{LexicalError, LexicalErrorType}, @@ -11,13 +10,12 @@ use crate::{ token::{StringKind, Tok}, }; use itertools::Itertools; -use std::{iter, str}; // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798 const MAX_UNICODE_NAME: usize = 88; struct StringParser<'a> { - chars: iter::Peekable>, + chars: std::iter::Peekable>, kind: StringKind, start: Location, end: Location, @@ -177,6 +175,8 @@ impl<'a> StringParser<'a> { } fn parse_formatted_value(&mut self, nested: u8) -> Result, LexicalError> { + use FStringErrorType::*; + let mut expression = String::new(); let mut spec = None; let mut delims = Vec::new(); @@ -402,6 +402,8 @@ impl<'a> StringParser<'a> { } fn parse_fstring(&mut self, nested: u8) -> Result, LexicalError> { + use FStringErrorType::*; + if nested >= 2 { return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into()); } @@ -653,7 +655,7 @@ pub(crate) fn parse_strings( // TODO: consolidate these with ParseError /// An error that occurred during parsing of an f-string. #[derive(Debug, PartialEq)] -pub struct FStringError { +struct FStringError { /// The type of error that occurred. pub error: FStringErrorType, /// The location of the error. @@ -708,28 +710,29 @@ pub enum FStringErrorType { impl std::fmt::Display for FStringErrorType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use FStringErrorType::*; match self { - FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"), - FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"), - FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), - FStringErrorType::InvalidExpression(error) => { + UnclosedLbrace => write!(f, "expecting '}}'"), + UnopenedRbrace => write!(f, "Unopened '}}'"), + ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), + InvalidExpression(error) => { write!(f, "{error}") } - FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"), - FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"), - FStringErrorType::MismatchedDelimiter(first, second) => write!( + InvalidConversionFlag => write!(f, "invalid conversion character"), + EmptyExpression => write!(f, "empty expression not allowed"), + MismatchedDelimiter(first, second) => write!( f, "closing parenthesis '{second}' does not match opening parenthesis '{first}'" ), - FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"), - FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"), - FStringErrorType::ExpressionNestedTooDeeply => { + SingleRbrace => write!(f, "single '}}' is not allowed"), + Unmatched(delim) => write!(f, "unmatched '{delim}'"), + ExpressionNestedTooDeeply => { write!(f, "expressions nested too deeply") } - FStringErrorType::UnterminatedString => { + UnterminatedString => { write!(f, "unterminated string") } - FStringErrorType::ExpressionCannotInclude(c) => { + ExpressionCannotInclude(c) => { if *c == '\\' { write!(f, "f-string expression part cannot include a backslash") } else { @@ -832,6 +835,7 @@ mod tests { #[test] fn test_parse_invalid_fstring() { + use FStringErrorType::*; assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace); From 97a08ee77ba74c1f402046bcc9dc4f2bf8d4af2b Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 20:38:56 +0900 Subject: [PATCH 6/7] remove #[macro_use] --- parser/src/lexer.rs | 1 + parser/src/lib.rs | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index f4862b3..45f5ca8 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -38,6 +38,7 @@ use crate::{ string::FStringErrorType, token::{StringKind, Tok}, }; +use log::trace; use num_bigint::BigInt; use num_traits::{Num, Zero}; use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr}; diff --git a/parser/src/lib.rs b/parser/src/lib.rs index bc97074..9516932 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -112,8 +112,6 @@ #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_root_url = "https://docs.rs/rustpython-parser/")] -#[macro_use] -extern crate log; pub use rustpython_ast as ast; mod function; From e26369a34e723524ea1996c49255e9a17ad22f28 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 22 Feb 2023 21:01:39 +0900 Subject: [PATCH 7/7] use super::* from tests submodules --- ast/src/constant.rs | 6 +++--- core/src/location.rs | 2 +- parser/src/function.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ast/src/constant.rs b/ast/src/constant.rs index 55ee09a..6099272 100644 --- a/ast/src/constant.rs +++ b/ast/src/constant.rs @@ -133,12 +133,12 @@ impl crate::fold::Fold for ConstantOptimizer { #[cfg(test)] mod tests { + use super::*; + #[cfg(feature = "constant-optimization")] #[test] fn test_constant_opt() { - use super::*; - use crate::fold::Fold; - use crate::*; + use crate::{fold::Fold, *}; let start = Default::default(); let end = None; diff --git a/core/src/location.rs b/core/src/location.rs index 9d32b54..d9cd59b 100644 --- a/core/src/location.rs +++ b/core/src/location.rs @@ -96,7 +96,7 @@ impl Location { #[cfg(test)] mod tests { - use crate::Location; + use super::*; #[test] fn test_gt() { diff --git a/parser/src/function.rs b/parser/src/function.rs index cfda1c5..0f580e7 100644 --- a/parser/src/function.rs +++ b/parser/src/function.rs @@ -151,7 +151,7 @@ fn is_starred(exp: &ast::Expr) -> bool { #[cfg(test)] mod tests { - use crate::lexer::LexicalErrorType; + use super::*; use crate::parser::{parse_program, ParseErrorType}; macro_rules! function_and_lambda {