Flatten rustpython_parser interface

This commit is contained in:
Jeong YunWon 2023-02-22 17:14:20 +09:00
parent 8580e4ebb5
commit cb8c6fb78d
7 changed files with 168 additions and 177 deletions

View file

@ -7,10 +7,9 @@ use crate::{
ast,
lexer::{LexicalError, LexicalErrorType},
function::{ArgumentList, parse_args, parse_params, validate_arguments},
lexer,
context::set_context,
string::parse_strings,
token::StringKind,
token::{self, StringKind},
};
use num_bigint::BigInt;
@ -1937,106 +1936,106 @@ extern {
type Location = ast::Location;
type Error = LexicalError;
enum lexer::Tok {
Indent => lexer::Tok::Indent,
Dedent => lexer::Tok::Dedent,
StartModule => lexer::Tok::StartModule,
StartInteractive => lexer::Tok::StartInteractive,
StartExpression => lexer::Tok::StartExpression,
"+" => lexer::Tok::Plus,
"-" => lexer::Tok::Minus,
"~" => lexer::Tok::Tilde,
":" => lexer::Tok::Colon,
"." => lexer::Tok::Dot,
"..." => lexer::Tok::Ellipsis,
"," => lexer::Tok::Comma,
"*" => lexer::Tok::Star,
"**" => lexer::Tok::DoubleStar,
"&" => lexer::Tok::Amper,
"@" => lexer::Tok::At,
"%" => lexer::Tok::Percent,
"//" => lexer::Tok::DoubleSlash,
"^" => lexer::Tok::CircumFlex,
"|" => lexer::Tok::Vbar,
"<<" => lexer::Tok::LeftShift,
">>" => lexer::Tok::RightShift,
"/" => lexer::Tok::Slash,
"(" => lexer::Tok::Lpar,
")" => lexer::Tok::Rpar,
"[" => lexer::Tok::Lsqb,
"]" => lexer::Tok::Rsqb,
"{" => lexer::Tok::Lbrace,
"}" => lexer::Tok::Rbrace,
"=" => lexer::Tok::Equal,
"+=" => lexer::Tok::PlusEqual,
"-=" => lexer::Tok::MinusEqual,
"*=" => lexer::Tok::StarEqual,
"@=" => lexer::Tok::AtEqual,
"/=" => lexer::Tok::SlashEqual,
"%=" => lexer::Tok::PercentEqual,
"&=" => lexer::Tok::AmperEqual,
"|=" => lexer::Tok::VbarEqual,
"^=" => lexer::Tok::CircumflexEqual,
"<<=" => lexer::Tok::LeftShiftEqual,
">>=" => lexer::Tok::RightShiftEqual,
"**=" => lexer::Tok::DoubleStarEqual,
"//=" => lexer::Tok::DoubleSlashEqual,
":=" => lexer::Tok::ColonEqual,
"==" => lexer::Tok::EqEqual,
"!=" => lexer::Tok::NotEqual,
"<" => lexer::Tok::Less,
"<=" => lexer::Tok::LessEqual,
">" => lexer::Tok::Greater,
">=" => lexer::Tok::GreaterEqual,
"->" => lexer::Tok::Rarrow,
"and" => lexer::Tok::And,
"as" => lexer::Tok::As,
"assert" => lexer::Tok::Assert,
"async" => lexer::Tok::Async,
"await" => lexer::Tok::Await,
"break" => lexer::Tok::Break,
"class" => lexer::Tok::Class,
"continue" => lexer::Tok::Continue,
"def" => lexer::Tok::Def,
"del" => lexer::Tok::Del,
"elif" => lexer::Tok::Elif,
"else" => lexer::Tok::Else,
"except" => lexer::Tok::Except,
"finally" => lexer::Tok::Finally,
"for" => lexer::Tok::For,
"from" => lexer::Tok::From,
"global" => lexer::Tok::Global,
"if" => lexer::Tok::If,
"import" => lexer::Tok::Import,
"in" => lexer::Tok::In,
"is" => lexer::Tok::Is,
"lambda" => lexer::Tok::Lambda,
"nonlocal" => lexer::Tok::Nonlocal,
"not" => lexer::Tok::Not,
"or" => lexer::Tok::Or,
"pass" => lexer::Tok::Pass,
"raise" => lexer::Tok::Raise,
"return" => lexer::Tok::Return,
"try" => lexer::Tok::Try,
"while" => lexer::Tok::While,
"match" => lexer::Tok::Match,
"case" => lexer::Tok::Case,
"with" => lexer::Tok::With,
"yield" => lexer::Tok::Yield,
"True" => lexer::Tok::True,
"False" => lexer::Tok::False,
"None" => lexer::Tok::None,
int => lexer::Tok::Int { value: <BigInt> },
float => lexer::Tok::Float { value: <f64> },
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
string => lexer::Tok::String {
enum token::Tok {
Indent => token::Tok::Indent,
Dedent => token::Tok::Dedent,
StartModule => token::Tok::StartModule,
StartInteractive => token::Tok::StartInteractive,
StartExpression => token::Tok::StartExpression,
"+" => token::Tok::Plus,
"-" => token::Tok::Minus,
"~" => token::Tok::Tilde,
":" => token::Tok::Colon,
"." => token::Tok::Dot,
"..." => token::Tok::Ellipsis,
"," => token::Tok::Comma,
"*" => token::Tok::Star,
"**" => token::Tok::DoubleStar,
"&" => token::Tok::Amper,
"@" => token::Tok::At,
"%" => token::Tok::Percent,
"//" => token::Tok::DoubleSlash,
"^" => token::Tok::CircumFlex,
"|" => token::Tok::Vbar,
"<<" => token::Tok::LeftShift,
">>" => token::Tok::RightShift,
"/" => token::Tok::Slash,
"(" => token::Tok::Lpar,
")" => token::Tok::Rpar,
"[" => token::Tok::Lsqb,
"]" => token::Tok::Rsqb,
"{" => token::Tok::Lbrace,
"}" => token::Tok::Rbrace,
"=" => token::Tok::Equal,
"+=" => token::Tok::PlusEqual,
"-=" => token::Tok::MinusEqual,
"*=" => token::Tok::StarEqual,
"@=" => token::Tok::AtEqual,
"/=" => token::Tok::SlashEqual,
"%=" => token::Tok::PercentEqual,
"&=" => token::Tok::AmperEqual,
"|=" => token::Tok::VbarEqual,
"^=" => token::Tok::CircumflexEqual,
"<<=" => token::Tok::LeftShiftEqual,
">>=" => token::Tok::RightShiftEqual,
"**=" => token::Tok::DoubleStarEqual,
"//=" => token::Tok::DoubleSlashEqual,
":=" => token::Tok::ColonEqual,
"==" => token::Tok::EqEqual,
"!=" => token::Tok::NotEqual,
"<" => token::Tok::Less,
"<=" => token::Tok::LessEqual,
">" => token::Tok::Greater,
">=" => token::Tok::GreaterEqual,
"->" => token::Tok::Rarrow,
"and" => token::Tok::And,
"as" => token::Tok::As,
"assert" => token::Tok::Assert,
"async" => token::Tok::Async,
"await" => token::Tok::Await,
"break" => token::Tok::Break,
"class" => token::Tok::Class,
"continue" => token::Tok::Continue,
"def" => token::Tok::Def,
"del" => token::Tok::Del,
"elif" => token::Tok::Elif,
"else" => token::Tok::Else,
"except" => token::Tok::Except,
"finally" => token::Tok::Finally,
"for" => token::Tok::For,
"from" => token::Tok::From,
"global" => token::Tok::Global,
"if" => token::Tok::If,
"import" => token::Tok::Import,
"in" => token::Tok::In,
"is" => token::Tok::Is,
"lambda" => token::Tok::Lambda,
"nonlocal" => token::Tok::Nonlocal,
"not" => token::Tok::Not,
"or" => token::Tok::Or,
"pass" => token::Tok::Pass,
"raise" => token::Tok::Raise,
"return" => token::Tok::Return,
"try" => token::Tok::Try,
"while" => token::Tok::While,
"match" => token::Tok::Match,
"case" => token::Tok::Case,
"with" => token::Tok::With,
"yield" => token::Tok::Yield,
"True" => token::Tok::True,
"False" => token::Tok::False,
"None" => token::Tok::None,
int => token::Tok::Int { value: <BigInt> },
float => token::Tok::Float { value: <f64> },
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
string => token::Tok::String {
value: <String>,
kind: <StringKind>,
triple_quoted: <bool>
},
name => lexer::Tok::Name { name: <String> },
"\n" => lexer::Tok::Newline,
";" => lexer::Tok::Semi,
"#" => lexer::Tok::Comment(_),
name => token::Tok::Name { name: <String> },
"\n" => token::Tok::Newline,
";" => token::Tok::Semi,
"#" => token::Tok::Comment(_),
}
}

View file

@ -1,7 +1,9 @@
// Contains functions that perform validation and parsing of arguments and parameters.
// Checks apply both to functions and to lambdas.
use crate::ast;
use crate::lexer::{LexicalError, LexicalErrorType};
use crate::{
ast,
lexer::{LexicalError, LexicalErrorType},
};
use rustc_hash::FxHashSet;
pub(crate) struct ArgumentList {

View file

@ -12,9 +12,7 @@
//! # Example
//!
//! ```
//! use rustpython_parser::lexer::{lex, Tok};
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::token::StringKind;
//! use rustpython_parser::{lexer::lex, Tok, Mode, StringKind};
//!
//! let source = "x = 'RustPython'";
//! let tokens = lex(source, Mode::Module)
@ -33,19 +31,16 @@
//! ```
//!
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
pub use super::token::{StringKind, Tok};
use crate::ast::Location;
use crate::mode::Mode;
use crate::soft_keywords::SoftKeywordTransformer;
use crate::string::FStringErrorType;
use crate::{
ast::Location,
mode::Mode,
soft_keywords::SoftKeywordTransformer,
string::FStringErrorType,
token::{StringKind, Tok},
};
use num_bigint::BigInt;
use num_traits::identities::Zero;
use num_traits::Num;
use std::char;
use std::cmp::Ordering;
use std::ops::Index;
use std::slice::SliceIndex;
use std::str::FromStr;
use num_traits::{Num, Zero};
use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr};
use unic_emoji_char::is_emoji_presentation;
use unic_ucd_ident::{is_xid_continue, is_xid_start};
@ -200,8 +195,7 @@ pub type LexResult = Result<Spanned, LexicalError>;
/// # Examples
///
/// ```
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::lexer::{lex};
/// use rustpython_parser::{Mode, lexer::lex};
///
/// let source = "def hello(): return 'world'";
/// let lexer = lex(source, Mode::Module);
@ -1320,8 +1314,7 @@ impl std::fmt::Display for LexicalErrorType {
#[cfg(test)]
mod tests {
use super::{lex, StringKind, Tok};
use crate::mode::Mode;
use super::*;
use num_bigint::BigInt;
const WINDOWS_EOL: &str = "\r\n";

View file

@ -54,20 +54,18 @@
//!
//! The functionality of this crate is split into several modules:
//!
//! - [token]: This module contains the definition of the tokens that are generated by the lexer.
//! - token: This module contains the definition of the tokens that are generated by the lexer.
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST.
//! - parser: This module contains an interface to the parser and is responsible for generating the AST.
//! - Functions and strings have special parsing requirements that are handled in additional files.
//! - [mode]: This module contains the definition of the different modes that the parser can be in.
//! - [error]: This module contains the definition of the errors that can be returned by the parser.
//! - mode: This module contains the definition of the different modes that the parser can be in.
//!
//! # Examples
//!
//! For example, to get a stream of tokens from a given string, one could do this:
//!
//! ```
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::lexer::lex;
//! use rustpython_parser::{lexer::lex, Mode};
//!
//! let python_source = r#"
//! def is_odd(i):
@ -80,9 +78,7 @@
//! These tokens can be directly fed into the parser to generate an AST:
//!
//! ```
//! use rustpython_parser::lexer::lex;
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::parser::parse_tokens;
//! use rustpython_parser::{lexer::lex, Mode, parse_tokens};
//!
//! let python_source = r#"
//! def is_odd(i):
@ -98,7 +94,7 @@
//! mode or tokenizing the source beforehand:
//!
//! ```
//! use rustpython_parser::parser::parse_program;
//! use rustpython_parser::parse_program;
//!
//! let python_source = r#"
//! def is_odd(i):
@ -111,11 +107,7 @@
//!
//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
//! [parsing]: https://en.wikipedia.org/wiki/Parsing
//! [token]: crate::token
//! [lexer]: crate::lexer
//! [parser]: crate::parser
//! [mode]: crate::mode
//! [error]: crate::error
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
@ -125,12 +117,21 @@ extern crate log;
pub use rustpython_ast as ast;
mod function;
// Skip flattening lexer to distinguish from full parser
pub mod lexer;
pub mod mode;
pub mod parser;
mod mode;
mod parser;
mod string;
#[rustfmt::skip]
mod python;
mod context;
mod soft_keywords;
pub mod token;
mod token;
pub use mode::Mode;
pub use parser::{
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
ParseError, ParseErrorType,
};
pub use string::FStringErrorType;
pub use token::{StringKind, Tok};

View file

@ -12,10 +12,13 @@
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
//! [`Mode`]: crate::mode
use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok};
pub use crate::mode::Mode;
use crate::{ast, lexer, python};
use ast::Location;
use crate::{
ast::{self, Location},
lexer::{self, LexResult, LexicalError, LexicalErrorType},
mode::Mode,
python,
token::Tok,
};
use itertools::Itertools;
use std::iter;
@ -31,7 +34,7 @@ pub(super) use lalrpop_util::ParseError as LalrpopError;
/// For example, parsing a simple function definition and a call to that function:
///
/// ```
/// use rustpython_parser::parser;
/// use rustpython_parser as parser;
/// let source = r#"
/// def foo():
/// return 42
@ -59,7 +62,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result<ast::Suite, Pars
///
/// ```
/// extern crate num_bigint;
/// use rustpython_parser::{parser, ast};
/// use rustpython_parser as parser;
/// let expr = parser::parse_expression("1 + 2", "<embedded>");
///
/// assert!(expr.is_ok());
@ -80,8 +83,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result<ast::Expr, ParseErro
/// somewhat silly, location:
///
/// ```
/// use rustpython_parser::parser::parse_expression_located;
/// use rustpython_parser::ast::Location;
/// use rustpython_parser::{ast::Location, parse_expression_located};
///
/// let expr = parse_expression_located("1 + 2", "<embedded>", Location::new(5, 20));
/// assert!(expr.is_ok());
@ -108,8 +110,7 @@ pub fn parse_expression_located(
/// parsing:
///
/// ```
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse;
/// use rustpython_parser::{Mode, parse};
///
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
/// assert!(expr.is_ok());
@ -118,8 +119,7 @@ pub fn parse_expression_located(
/// Alternatively, we can parse a full Python program consisting of multiple lines:
///
/// ```
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse;
/// use rustpython_parser::{Mode, parse};
///
/// let source = r#"
/// class Greeter:
@ -142,9 +142,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
/// # Example
///
/// ```
/// use rustpython_parser::ast::Location;
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse_located;
/// use rustpython_parser::{ast::Location, Mode, parse_located};
///
/// let source = r#"
/// def fib(i):
@ -178,9 +176,7 @@ pub fn parse_located(
/// them using the [`lexer::lex`] function:
///
/// ```
/// use rustpython_parser::lexer::lex;
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse_tokens;
/// use rustpython_parser::{lexer::lex, Mode, parse_tokens};
///
/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
/// assert!(expr.is_ok());
@ -200,9 +196,7 @@ pub fn parse_tokens(
}
/// Represents represent errors that occur during parsing and are
/// returned by the `parse_*` functions in the [parser] module.
///
/// [parser]: crate::parser
/// returned by the `parse_*` functions.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing.

View file

@ -1,8 +1,6 @@
use crate::{lexer::LexResult, mode::Mode, token::Tok};
use itertools::{Itertools, MultiPeek};
use crate::lexer::{LexResult, Tok};
pub use crate::mode::Mode;
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
/// and `case`).
///

View file

@ -3,7 +3,6 @@
// The lexer doesn't do any special handling of f-strings, it just treats them as
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
// we have to do the parsing here, manually.
use self::FStringErrorType::*;
use crate::{
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
lexer::{LexicalError, LexicalErrorType},
@ -11,13 +10,12 @@ use crate::{
token::{StringKind, Tok},
};
use itertools::Itertools;
use std::{iter, str};
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
const MAX_UNICODE_NAME: usize = 88;
struct StringParser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
chars: std::iter::Peekable<std::str::Chars<'a>>,
kind: StringKind,
start: Location,
end: Location,
@ -177,6 +175,8 @@ impl<'a> StringParser<'a> {
}
fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
use FStringErrorType::*;
let mut expression = String::new();
let mut spec = None;
let mut delims = Vec::new();
@ -402,6 +402,8 @@ impl<'a> StringParser<'a> {
}
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
use FStringErrorType::*;
if nested >= 2 {
return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
}
@ -653,7 +655,7 @@ pub(crate) fn parse_strings(
// TODO: consolidate these with ParseError
/// An error that occurred during parsing of an f-string.
#[derive(Debug, PartialEq)]
pub struct FStringError {
struct FStringError {
/// The type of error that occurred.
pub error: FStringErrorType,
/// The location of the error.
@ -708,28 +710,29 @@ pub enum FStringErrorType {
impl std::fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use FStringErrorType::*;
match self {
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
FStringErrorType::InvalidExpression(error) => {
UnclosedLbrace => write!(f, "expecting '}}'"),
UnopenedRbrace => write!(f, "Unopened '}}'"),
ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
InvalidExpression(error) => {
write!(f, "{error}")
}
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
FStringErrorType::MismatchedDelimiter(first, second) => write!(
InvalidConversionFlag => write!(f, "invalid conversion character"),
EmptyExpression => write!(f, "empty expression not allowed"),
MismatchedDelimiter(first, second) => write!(
f,
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
),
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
FStringErrorType::ExpressionNestedTooDeeply => {
SingleRbrace => write!(f, "single '}}' is not allowed"),
Unmatched(delim) => write!(f, "unmatched '{delim}'"),
ExpressionNestedTooDeeply => {
write!(f, "expressions nested too deeply")
}
FStringErrorType::UnterminatedString => {
UnterminatedString => {
write!(f, "unterminated string")
}
FStringErrorType::ExpressionCannotInclude(c) => {
ExpressionCannotInclude(c) => {
if *c == '\\' {
write!(f, "f-string expression part cannot include a backslash")
} else {
@ -832,6 +835,7 @@ mod tests {
#[test]
fn test_parse_invalid_fstring() {
use FStringErrorType::*;
assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);