diff --git a/parser/python.lalrpop b/parser/python.lalrpop index 57d5dbe..3ca06b5 100644 --- a/parser/python.lalrpop +++ b/parser/python.lalrpop @@ -7,10 +7,9 @@ use crate::{ ast, lexer::{LexicalError, LexicalErrorType}, function::{ArgumentList, parse_args, parse_params, validate_arguments}, - lexer, context::set_context, string::parse_strings, - token::StringKind, + token::{self, StringKind}, }; use num_bigint::BigInt; @@ -1937,106 +1936,106 @@ extern { type Location = ast::Location; type Error = LexicalError; - enum lexer::Tok { - Indent => lexer::Tok::Indent, - Dedent => lexer::Tok::Dedent, - StartModule => lexer::Tok::StartModule, - StartInteractive => lexer::Tok::StartInteractive, - StartExpression => lexer::Tok::StartExpression, - "+" => lexer::Tok::Plus, - "-" => lexer::Tok::Minus, - "~" => lexer::Tok::Tilde, - ":" => lexer::Tok::Colon, - "." => lexer::Tok::Dot, - "..." => lexer::Tok::Ellipsis, - "," => lexer::Tok::Comma, - "*" => lexer::Tok::Star, - "**" => lexer::Tok::DoubleStar, - "&" => lexer::Tok::Amper, - "@" => lexer::Tok::At, - "%" => lexer::Tok::Percent, - "//" => lexer::Tok::DoubleSlash, - "^" => lexer::Tok::CircumFlex, - "|" => lexer::Tok::Vbar, - "<<" => lexer::Tok::LeftShift, - ">>" => lexer::Tok::RightShift, - "/" => lexer::Tok::Slash, - "(" => lexer::Tok::Lpar, - ")" => lexer::Tok::Rpar, - "[" => lexer::Tok::Lsqb, - "]" => lexer::Tok::Rsqb, - "{" => lexer::Tok::Lbrace, - "}" => lexer::Tok::Rbrace, - "=" => lexer::Tok::Equal, - "+=" => lexer::Tok::PlusEqual, - "-=" => lexer::Tok::MinusEqual, - "*=" => lexer::Tok::StarEqual, - "@=" => lexer::Tok::AtEqual, - "/=" => lexer::Tok::SlashEqual, - "%=" => lexer::Tok::PercentEqual, - "&=" => lexer::Tok::AmperEqual, - "|=" => lexer::Tok::VbarEqual, - "^=" => lexer::Tok::CircumflexEqual, - "<<=" => lexer::Tok::LeftShiftEqual, - ">>=" => lexer::Tok::RightShiftEqual, - "**=" => lexer::Tok::DoubleStarEqual, - "//=" => lexer::Tok::DoubleSlashEqual, - ":=" => lexer::Tok::ColonEqual, - "==" => lexer::Tok::EqEqual, - "!=" => lexer::Tok::NotEqual, - "<" => lexer::Tok::Less, - "<=" => lexer::Tok::LessEqual, - ">" => lexer::Tok::Greater, - ">=" => lexer::Tok::GreaterEqual, - "->" => lexer::Tok::Rarrow, - "and" => lexer::Tok::And, - "as" => lexer::Tok::As, - "assert" => lexer::Tok::Assert, - "async" => lexer::Tok::Async, - "await" => lexer::Tok::Await, - "break" => lexer::Tok::Break, - "class" => lexer::Tok::Class, - "continue" => lexer::Tok::Continue, - "def" => lexer::Tok::Def, - "del" => lexer::Tok::Del, - "elif" => lexer::Tok::Elif, - "else" => lexer::Tok::Else, - "except" => lexer::Tok::Except, - "finally" => lexer::Tok::Finally, - "for" => lexer::Tok::For, - "from" => lexer::Tok::From, - "global" => lexer::Tok::Global, - "if" => lexer::Tok::If, - "import" => lexer::Tok::Import, - "in" => lexer::Tok::In, - "is" => lexer::Tok::Is, - "lambda" => lexer::Tok::Lambda, - "nonlocal" => lexer::Tok::Nonlocal, - "not" => lexer::Tok::Not, - "or" => lexer::Tok::Or, - "pass" => lexer::Tok::Pass, - "raise" => lexer::Tok::Raise, - "return" => lexer::Tok::Return, - "try" => lexer::Tok::Try, - "while" => lexer::Tok::While, - "match" => lexer::Tok::Match, - "case" => lexer::Tok::Case, - "with" => lexer::Tok::With, - "yield" => lexer::Tok::Yield, - "True" => lexer::Tok::True, - "False" => lexer::Tok::False, - "None" => lexer::Tok::None, - int => lexer::Tok::Int { value: }, - float => lexer::Tok::Float { value: }, - complex => lexer::Tok::Complex { real: , imag: }, - string => lexer::Tok::String { + enum token::Tok { + Indent => token::Tok::Indent, + Dedent => token::Tok::Dedent, + StartModule => token::Tok::StartModule, + StartInteractive => token::Tok::StartInteractive, + StartExpression => token::Tok::StartExpression, + "+" => token::Tok::Plus, + "-" => token::Tok::Minus, + "~" => token::Tok::Tilde, + ":" => token::Tok::Colon, + "." => token::Tok::Dot, + "..." => token::Tok::Ellipsis, + "," => token::Tok::Comma, + "*" => token::Tok::Star, + "**" => token::Tok::DoubleStar, + "&" => token::Tok::Amper, + "@" => token::Tok::At, + "%" => token::Tok::Percent, + "//" => token::Tok::DoubleSlash, + "^" => token::Tok::CircumFlex, + "|" => token::Tok::Vbar, + "<<" => token::Tok::LeftShift, + ">>" => token::Tok::RightShift, + "/" => token::Tok::Slash, + "(" => token::Tok::Lpar, + ")" => token::Tok::Rpar, + "[" => token::Tok::Lsqb, + "]" => token::Tok::Rsqb, + "{" => token::Tok::Lbrace, + "}" => token::Tok::Rbrace, + "=" => token::Tok::Equal, + "+=" => token::Tok::PlusEqual, + "-=" => token::Tok::MinusEqual, + "*=" => token::Tok::StarEqual, + "@=" => token::Tok::AtEqual, + "/=" => token::Tok::SlashEqual, + "%=" => token::Tok::PercentEqual, + "&=" => token::Tok::AmperEqual, + "|=" => token::Tok::VbarEqual, + "^=" => token::Tok::CircumflexEqual, + "<<=" => token::Tok::LeftShiftEqual, + ">>=" => token::Tok::RightShiftEqual, + "**=" => token::Tok::DoubleStarEqual, + "//=" => token::Tok::DoubleSlashEqual, + ":=" => token::Tok::ColonEqual, + "==" => token::Tok::EqEqual, + "!=" => token::Tok::NotEqual, + "<" => token::Tok::Less, + "<=" => token::Tok::LessEqual, + ">" => token::Tok::Greater, + ">=" => token::Tok::GreaterEqual, + "->" => token::Tok::Rarrow, + "and" => token::Tok::And, + "as" => token::Tok::As, + "assert" => token::Tok::Assert, + "async" => token::Tok::Async, + "await" => token::Tok::Await, + "break" => token::Tok::Break, + "class" => token::Tok::Class, + "continue" => token::Tok::Continue, + "def" => token::Tok::Def, + "del" => token::Tok::Del, + "elif" => token::Tok::Elif, + "else" => token::Tok::Else, + "except" => token::Tok::Except, + "finally" => token::Tok::Finally, + "for" => token::Tok::For, + "from" => token::Tok::From, + "global" => token::Tok::Global, + "if" => token::Tok::If, + "import" => token::Tok::Import, + "in" => token::Tok::In, + "is" => token::Tok::Is, + "lambda" => token::Tok::Lambda, + "nonlocal" => token::Tok::Nonlocal, + "not" => token::Tok::Not, + "or" => token::Tok::Or, + "pass" => token::Tok::Pass, + "raise" => token::Tok::Raise, + "return" => token::Tok::Return, + "try" => token::Tok::Try, + "while" => token::Tok::While, + "match" => token::Tok::Match, + "case" => token::Tok::Case, + "with" => token::Tok::With, + "yield" => token::Tok::Yield, + "True" => token::Tok::True, + "False" => token::Tok::False, + "None" => token::Tok::None, + int => token::Tok::Int { value: }, + float => token::Tok::Float { value: }, + complex => token::Tok::Complex { real: , imag: }, + string => token::Tok::String { value: , kind: , triple_quoted: }, - name => lexer::Tok::Name { name: }, - "\n" => lexer::Tok::Newline, - ";" => lexer::Tok::Semi, - "#" => lexer::Tok::Comment(_), + name => token::Tok::Name { name: }, + "\n" => token::Tok::Newline, + ";" => token::Tok::Semi, + "#" => token::Tok::Comment(_), } } diff --git a/parser/src/function.rs b/parser/src/function.rs index dc95039..cfda1c5 100644 --- a/parser/src/function.rs +++ b/parser/src/function.rs @@ -1,7 +1,9 @@ // Contains functions that perform validation and parsing of arguments and parameters. // Checks apply both to functions and to lambdas. -use crate::ast; -use crate::lexer::{LexicalError, LexicalErrorType}; +use crate::{ + ast, + lexer::{LexicalError, LexicalErrorType}, +}; use rustc_hash::FxHashSet; pub(crate) struct ArgumentList { diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index fb0a909..f4862b3 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -12,9 +12,7 @@ //! # Example //! //! ``` -//! use rustpython_parser::lexer::{lex, Tok}; -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::token::StringKind; +//! use rustpython_parser::{lexer::lex, Tok, Mode, StringKind}; //! //! let source = "x = 'RustPython'"; //! let tokens = lex(source, Mode::Module) @@ -33,19 +31,16 @@ //! ``` //! //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html -pub use super::token::{StringKind, Tok}; -use crate::ast::Location; -use crate::mode::Mode; -use crate::soft_keywords::SoftKeywordTransformer; -use crate::string::FStringErrorType; +use crate::{ + ast::Location, + mode::Mode, + soft_keywords::SoftKeywordTransformer, + string::FStringErrorType, + token::{StringKind, Tok}, +}; use num_bigint::BigInt; -use num_traits::identities::Zero; -use num_traits::Num; -use std::char; -use std::cmp::Ordering; -use std::ops::Index; -use std::slice::SliceIndex; -use std::str::FromStr; +use num_traits::{Num, Zero}; +use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr}; use unic_emoji_char::is_emoji_presentation; use unic_ucd_ident::{is_xid_continue, is_xid_start}; @@ -200,8 +195,7 @@ pub type LexResult = Result; /// # Examples /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::lexer::{lex}; +/// use rustpython_parser::{Mode, lexer::lex}; /// /// let source = "def hello(): return 'world'"; /// let lexer = lex(source, Mode::Module); @@ -1320,8 +1314,7 @@ impl std::fmt::Display for LexicalErrorType { #[cfg(test)] mod tests { - use super::{lex, StringKind, Tok}; - use crate::mode::Mode; + use super::*; use num_bigint::BigInt; const WINDOWS_EOL: &str = "\r\n"; diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 6769246..bc97074 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -54,20 +54,18 @@ //! //! The functionality of this crate is split into several modules: //! -//! - [token]: This module contains the definition of the tokens that are generated by the lexer. +//! - token: This module contains the definition of the tokens that are generated by the lexer. //! - [lexer]: This module contains the lexer and is responsible for generating the tokens. -//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST. +//! - parser: This module contains an interface to the parser and is responsible for generating the AST. //! - Functions and strings have special parsing requirements that are handled in additional files. -//! - [mode]: This module contains the definition of the different modes that the parser can be in. -//! - [error]: This module contains the definition of the errors that can be returned by the parser. +//! - mode: This module contains the definition of the different modes that the parser can be in. //! //! # Examples //! //! For example, to get a stream of tokens from a given string, one could do this: //! //! ``` -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::lexer::lex; +//! use rustpython_parser::{lexer::lex, Mode}; //! //! let python_source = r#" //! def is_odd(i): @@ -80,9 +78,7 @@ //! These tokens can be directly fed into the parser to generate an AST: //! //! ``` -//! use rustpython_parser::lexer::lex; -//! use rustpython_parser::mode::Mode; -//! use rustpython_parser::parser::parse_tokens; +//! use rustpython_parser::{lexer::lex, Mode, parse_tokens}; //! //! let python_source = r#" //! def is_odd(i): @@ -98,7 +94,7 @@ //! mode or tokenizing the source beforehand: //! //! ``` -//! use rustpython_parser::parser::parse_program; +//! use rustpython_parser::parse_program; //! //! let python_source = r#" //! def is_odd(i): @@ -111,11 +107,7 @@ //! //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing -//! [token]: crate::token //! [lexer]: crate::lexer -//! [parser]: crate::parser -//! [mode]: crate::mode -//! [error]: crate::error #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_root_url = "https://docs.rs/rustpython-parser/")] @@ -125,12 +117,21 @@ extern crate log; pub use rustpython_ast as ast; mod function; +// Skip flattening lexer to distinguish from full parser pub mod lexer; -pub mod mode; -pub mod parser; +mod mode; +mod parser; mod string; #[rustfmt::skip] mod python; mod context; mod soft_keywords; -pub mod token; +mod token; + +pub use mode::Mode; +pub use parser::{ + parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens, + ParseError, ParseErrorType, +}; +pub use string::FStringErrorType; +pub use token::{StringKind, Tok}; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 2cdb7c2..1241848 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -12,10 +12,13 @@ //! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree //! [`Mode`]: crate::mode -use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok}; -pub use crate::mode::Mode; -use crate::{ast, lexer, python}; -use ast::Location; +use crate::{ + ast::{self, Location}, + lexer::{self, LexResult, LexicalError, LexicalErrorType}, + mode::Mode, + python, + token::Tok, +}; use itertools::Itertools; use std::iter; @@ -31,7 +34,7 @@ pub(super) use lalrpop_util::ParseError as LalrpopError; /// For example, parsing a simple function definition and a call to that function: /// /// ``` -/// use rustpython_parser::parser; +/// use rustpython_parser as parser; /// let source = r#" /// def foo(): /// return 42 @@ -59,7 +62,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result"); /// /// assert!(expr.is_ok()); @@ -80,8 +83,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result", Location::new(5, 20)); /// assert!(expr.is_ok()); @@ -108,8 +110,7 @@ pub fn parse_expression_located( /// parsing: /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::parser::parse; +/// use rustpython_parser::{Mode, parse}; /// /// let expr = parse("1 + 2", Mode::Expression, ""); /// assert!(expr.is_ok()); @@ -118,8 +119,7 @@ pub fn parse_expression_located( /// Alternatively, we can parse a full Python program consisting of multiple lines: /// /// ``` -/// use rustpython_parser::mode::Mode; -/// use rustpython_parser::parser::parse; +/// use rustpython_parser::{Mode, parse}; /// /// let source = r#" /// class Greeter: @@ -142,9 +142,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result"); /// assert!(expr.is_ok()); @@ -200,9 +196,7 @@ pub fn parse_tokens( } /// Represents represent errors that occur during parsing and are -/// returned by the `parse_*` functions in the [parser] module. -/// -/// [parser]: crate::parser +/// returned by the `parse_*` functions. pub type ParseError = rustpython_compiler_core::BaseError; /// Represents the different types of errors that can occur during parsing. diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs index 7011f3a..2613914 100644 --- a/parser/src/soft_keywords.rs +++ b/parser/src/soft_keywords.rs @@ -1,8 +1,6 @@ +use crate::{lexer::LexResult, mode::Mode, token::Tok}; use itertools::{Itertools, MultiPeek}; -use crate::lexer::{LexResult, Tok}; -pub use crate::mode::Mode; - /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` /// and `case`). /// diff --git a/parser/src/string.rs b/parser/src/string.rs index 0143177..a8be777 100644 --- a/parser/src/string.rs +++ b/parser/src/string.rs @@ -3,7 +3,6 @@ // The lexer doesn't do any special handling of f-strings, it just treats them as // regular strings. Since the parser has no definition of f-string formats (Pending PEP 701) // we have to do the parsing here, manually. -use self::FStringErrorType::*; use crate::{ ast::{Constant, ConversionFlag, Expr, ExprKind, Location}, lexer::{LexicalError, LexicalErrorType}, @@ -11,13 +10,12 @@ use crate::{ token::{StringKind, Tok}, }; use itertools::Itertools; -use std::{iter, str}; // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798 const MAX_UNICODE_NAME: usize = 88; struct StringParser<'a> { - chars: iter::Peekable>, + chars: std::iter::Peekable>, kind: StringKind, start: Location, end: Location, @@ -177,6 +175,8 @@ impl<'a> StringParser<'a> { } fn parse_formatted_value(&mut self, nested: u8) -> Result, LexicalError> { + use FStringErrorType::*; + let mut expression = String::new(); let mut spec = None; let mut delims = Vec::new(); @@ -402,6 +402,8 @@ impl<'a> StringParser<'a> { } fn parse_fstring(&mut self, nested: u8) -> Result, LexicalError> { + use FStringErrorType::*; + if nested >= 2 { return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into()); } @@ -653,7 +655,7 @@ pub(crate) fn parse_strings( // TODO: consolidate these with ParseError /// An error that occurred during parsing of an f-string. #[derive(Debug, PartialEq)] -pub struct FStringError { +struct FStringError { /// The type of error that occurred. pub error: FStringErrorType, /// The location of the error. @@ -708,28 +710,29 @@ pub enum FStringErrorType { impl std::fmt::Display for FStringErrorType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use FStringErrorType::*; match self { - FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"), - FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"), - FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), - FStringErrorType::InvalidExpression(error) => { + UnclosedLbrace => write!(f, "expecting '}}'"), + UnopenedRbrace => write!(f, "Unopened '}}'"), + ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), + InvalidExpression(error) => { write!(f, "{error}") } - FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"), - FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"), - FStringErrorType::MismatchedDelimiter(first, second) => write!( + InvalidConversionFlag => write!(f, "invalid conversion character"), + EmptyExpression => write!(f, "empty expression not allowed"), + MismatchedDelimiter(first, second) => write!( f, "closing parenthesis '{second}' does not match opening parenthesis '{first}'" ), - FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"), - FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"), - FStringErrorType::ExpressionNestedTooDeeply => { + SingleRbrace => write!(f, "single '}}' is not allowed"), + Unmatched(delim) => write!(f, "unmatched '{delim}'"), + ExpressionNestedTooDeeply => { write!(f, "expressions nested too deeply") } - FStringErrorType::UnterminatedString => { + UnterminatedString => { write!(f, "unterminated string") } - FStringErrorType::ExpressionCannotInclude(c) => { + ExpressionCannotInclude(c) => { if *c == '\\' { write!(f, "f-string expression part cannot include a backslash") } else { @@ -832,6 +835,7 @@ mod tests { #[test] fn test_parse_invalid_fstring() { + use FStringErrorType::*; assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);