mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-19 11:05:45 +00:00
Flatten rustpython_parser interface
This commit is contained in:
parent
8580e4ebb5
commit
cb8c6fb78d
7 changed files with 168 additions and 177 deletions
|
@ -1,7 +1,9 @@
|
|||
// Contains functions that perform validation and parsing of arguments and parameters.
|
||||
// Checks apply both to functions and to lambdas.
|
||||
use crate::ast;
|
||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||
use crate::{
|
||||
ast,
|
||||
lexer::{LexicalError, LexicalErrorType},
|
||||
};
|
||||
use rustc_hash::FxHashSet;
|
||||
|
||||
pub(crate) struct ArgumentList {
|
||||
|
|
|
@ -12,9 +12,7 @@
|
|||
//! # Example
|
||||
//!
|
||||
//! ```
|
||||
//! use rustpython_parser::lexer::{lex, Tok};
|
||||
//! use rustpython_parser::mode::Mode;
|
||||
//! use rustpython_parser::token::StringKind;
|
||||
//! use rustpython_parser::{lexer::lex, Tok, Mode, StringKind};
|
||||
//!
|
||||
//! let source = "x = 'RustPython'";
|
||||
//! let tokens = lex(source, Mode::Module)
|
||||
|
@ -33,19 +31,16 @@
|
|||
//! ```
|
||||
//!
|
||||
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
||||
pub use super::token::{StringKind, Tok};
|
||||
use crate::ast::Location;
|
||||
use crate::mode::Mode;
|
||||
use crate::soft_keywords::SoftKeywordTransformer;
|
||||
use crate::string::FStringErrorType;
|
||||
use crate::{
|
||||
ast::Location,
|
||||
mode::Mode,
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
string::FStringErrorType,
|
||||
token::{StringKind, Tok},
|
||||
};
|
||||
use num_bigint::BigInt;
|
||||
use num_traits::identities::Zero;
|
||||
use num_traits::Num;
|
||||
use std::char;
|
||||
use std::cmp::Ordering;
|
||||
use std::ops::Index;
|
||||
use std::slice::SliceIndex;
|
||||
use std::str::FromStr;
|
||||
use num_traits::{Num, Zero};
|
||||
use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr};
|
||||
use unic_emoji_char::is_emoji_presentation;
|
||||
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
||||
|
||||
|
@ -200,8 +195,7 @@ pub type LexResult = Result<Spanned, LexicalError>;
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::mode::Mode;
|
||||
/// use rustpython_parser::lexer::{lex};
|
||||
/// use rustpython_parser::{Mode, lexer::lex};
|
||||
///
|
||||
/// let source = "def hello(): return 'world'";
|
||||
/// let lexer = lex(source, Mode::Module);
|
||||
|
@ -1320,8 +1314,7 @@ impl std::fmt::Display for LexicalErrorType {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{lex, StringKind, Tok};
|
||||
use crate::mode::Mode;
|
||||
use super::*;
|
||||
use num_bigint::BigInt;
|
||||
|
||||
const WINDOWS_EOL: &str = "\r\n";
|
||||
|
|
|
@ -54,20 +54,18 @@
|
|||
//!
|
||||
//! The functionality of this crate is split into several modules:
|
||||
//!
|
||||
//! - [token]: This module contains the definition of the tokens that are generated by the lexer.
|
||||
//! - token: This module contains the definition of the tokens that are generated by the lexer.
|
||||
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
|
||||
//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST.
|
||||
//! - parser: This module contains an interface to the parser and is responsible for generating the AST.
|
||||
//! - Functions and strings have special parsing requirements that are handled in additional files.
|
||||
//! - [mode]: This module contains the definition of the different modes that the parser can be in.
|
||||
//! - [error]: This module contains the definition of the errors that can be returned by the parser.
|
||||
//! - mode: This module contains the definition of the different modes that the parser can be in.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! For example, to get a stream of tokens from a given string, one could do this:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustpython_parser::mode::Mode;
|
||||
//! use rustpython_parser::lexer::lex;
|
||||
//! use rustpython_parser::{lexer::lex, Mode};
|
||||
//!
|
||||
//! let python_source = r#"
|
||||
//! def is_odd(i):
|
||||
|
@ -80,9 +78,7 @@
|
|||
//! These tokens can be directly fed into the parser to generate an AST:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustpython_parser::lexer::lex;
|
||||
//! use rustpython_parser::mode::Mode;
|
||||
//! use rustpython_parser::parser::parse_tokens;
|
||||
//! use rustpython_parser::{lexer::lex, Mode, parse_tokens};
|
||||
//!
|
||||
//! let python_source = r#"
|
||||
//! def is_odd(i):
|
||||
|
@ -98,7 +94,7 @@
|
|||
//! mode or tokenizing the source beforehand:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustpython_parser::parser::parse_program;
|
||||
//! use rustpython_parser::parse_program;
|
||||
//!
|
||||
//! let python_source = r#"
|
||||
//! def is_odd(i):
|
||||
|
@ -111,11 +107,7 @@
|
|||
//!
|
||||
//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
|
||||
//! [parsing]: https://en.wikipedia.org/wiki/Parsing
|
||||
//! [token]: crate::token
|
||||
//! [lexer]: crate::lexer
|
||||
//! [parser]: crate::parser
|
||||
//! [mode]: crate::mode
|
||||
//! [error]: crate::error
|
||||
|
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
|
||||
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
|
||||
|
@ -125,12 +117,21 @@ extern crate log;
|
|||
pub use rustpython_ast as ast;
|
||||
|
||||
mod function;
|
||||
// Skip flattening lexer to distinguish from full parser
|
||||
pub mod lexer;
|
||||
pub mod mode;
|
||||
pub mod parser;
|
||||
mod mode;
|
||||
mod parser;
|
||||
mod string;
|
||||
#[rustfmt::skip]
|
||||
mod python;
|
||||
mod context;
|
||||
mod soft_keywords;
|
||||
pub mod token;
|
||||
mod token;
|
||||
|
||||
pub use mode::Mode;
|
||||
pub use parser::{
|
||||
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
|
||||
ParseError, ParseErrorType,
|
||||
};
|
||||
pub use string::FStringErrorType;
|
||||
pub use token::{StringKind, Tok};
|
||||
|
|
|
@ -12,10 +12,13 @@
|
|||
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
||||
//! [`Mode`]: crate::mode
|
||||
|
||||
use crate::lexer::{LexResult, LexicalError, LexicalErrorType, Tok};
|
||||
pub use crate::mode::Mode;
|
||||
use crate::{ast, lexer, python};
|
||||
use ast::Location;
|
||||
use crate::{
|
||||
ast::{self, Location},
|
||||
lexer::{self, LexResult, LexicalError, LexicalErrorType},
|
||||
mode::Mode,
|
||||
python,
|
||||
token::Tok,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use std::iter;
|
||||
|
||||
|
@ -31,7 +34,7 @@ pub(super) use lalrpop_util::ParseError as LalrpopError;
|
|||
/// For example, parsing a simple function definition and a call to that function:
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::parser;
|
||||
/// use rustpython_parser as parser;
|
||||
/// let source = r#"
|
||||
/// def foo():
|
||||
/// return 42
|
||||
|
@ -59,7 +62,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result<ast::Suite, Pars
|
|||
///
|
||||
/// ```
|
||||
/// extern crate num_bigint;
|
||||
/// use rustpython_parser::{parser, ast};
|
||||
/// use rustpython_parser as parser;
|
||||
/// let expr = parser::parse_expression("1 + 2", "<embedded>");
|
||||
///
|
||||
/// assert!(expr.is_ok());
|
||||
|
@ -80,8 +83,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result<ast::Expr, ParseErro
|
|||
/// somewhat silly, location:
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::parser::parse_expression_located;
|
||||
/// use rustpython_parser::ast::Location;
|
||||
/// use rustpython_parser::{ast::Location, parse_expression_located};
|
||||
///
|
||||
/// let expr = parse_expression_located("1 + 2", "<embedded>", Location::new(5, 20));
|
||||
/// assert!(expr.is_ok());
|
||||
|
@ -108,8 +110,7 @@ pub fn parse_expression_located(
|
|||
/// parsing:
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::mode::Mode;
|
||||
/// use rustpython_parser::parser::parse;
|
||||
/// use rustpython_parser::{Mode, parse};
|
||||
///
|
||||
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
|
||||
/// assert!(expr.is_ok());
|
||||
|
@ -118,8 +119,7 @@ pub fn parse_expression_located(
|
|||
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::mode::Mode;
|
||||
/// use rustpython_parser::parser::parse;
|
||||
/// use rustpython_parser::{Mode, parse};
|
||||
///
|
||||
/// let source = r#"
|
||||
/// class Greeter:
|
||||
|
@ -142,9 +142,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
|
|||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::ast::Location;
|
||||
/// use rustpython_parser::mode::Mode;
|
||||
/// use rustpython_parser::parser::parse_located;
|
||||
/// use rustpython_parser::{ast::Location, Mode, parse_located};
|
||||
///
|
||||
/// let source = r#"
|
||||
/// def fib(i):
|
||||
|
@ -178,9 +176,7 @@ pub fn parse_located(
|
|||
/// them using the [`lexer::lex`] function:
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser::lexer::lex;
|
||||
/// use rustpython_parser::mode::Mode;
|
||||
/// use rustpython_parser::parser::parse_tokens;
|
||||
/// use rustpython_parser::{lexer::lex, Mode, parse_tokens};
|
||||
///
|
||||
/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
|
||||
/// assert!(expr.is_ok());
|
||||
|
@ -200,9 +196,7 @@ pub fn parse_tokens(
|
|||
}
|
||||
|
||||
/// Represents represent errors that occur during parsing and are
|
||||
/// returned by the `parse_*` functions in the [parser] module.
|
||||
///
|
||||
/// [parser]: crate::parser
|
||||
/// returned by the `parse_*` functions.
|
||||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||
|
||||
/// Represents the different types of errors that can occur during parsing.
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
use crate::{lexer::LexResult, mode::Mode, token::Tok};
|
||||
use itertools::{Itertools, MultiPeek};
|
||||
|
||||
use crate::lexer::{LexResult, Tok};
|
||||
pub use crate::mode::Mode;
|
||||
|
||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||
/// and `case`).
|
||||
///
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
// The lexer doesn't do any special handling of f-strings, it just treats them as
|
||||
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
||||
// we have to do the parsing here, manually.
|
||||
use self::FStringErrorType::*;
|
||||
use crate::{
|
||||
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
||||
lexer::{LexicalError, LexicalErrorType},
|
||||
|
@ -11,13 +10,12 @@ use crate::{
|
|||
token::{StringKind, Tok},
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use std::{iter, str};
|
||||
|
||||
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||
const MAX_UNICODE_NAME: usize = 88;
|
||||
|
||||
struct StringParser<'a> {
|
||||
chars: iter::Peekable<str::Chars<'a>>,
|
||||
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
||||
kind: StringKind,
|
||||
start: Location,
|
||||
end: Location,
|
||||
|
@ -177,6 +175,8 @@ impl<'a> StringParser<'a> {
|
|||
}
|
||||
|
||||
fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||
use FStringErrorType::*;
|
||||
|
||||
let mut expression = String::new();
|
||||
let mut spec = None;
|
||||
let mut delims = Vec::new();
|
||||
|
@ -402,6 +402,8 @@ impl<'a> StringParser<'a> {
|
|||
}
|
||||
|
||||
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||
use FStringErrorType::*;
|
||||
|
||||
if nested >= 2 {
|
||||
return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
|
||||
}
|
||||
|
@ -653,7 +655,7 @@ pub(crate) fn parse_strings(
|
|||
// TODO: consolidate these with ParseError
|
||||
/// An error that occurred during parsing of an f-string.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct FStringError {
|
||||
struct FStringError {
|
||||
/// The type of error that occurred.
|
||||
pub error: FStringErrorType,
|
||||
/// The location of the error.
|
||||
|
@ -708,28 +710,29 @@ pub enum FStringErrorType {
|
|||
|
||||
impl std::fmt::Display for FStringErrorType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
use FStringErrorType::*;
|
||||
match self {
|
||||
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
|
||||
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
|
||||
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
||||
FStringErrorType::InvalidExpression(error) => {
|
||||
UnclosedLbrace => write!(f, "expecting '}}'"),
|
||||
UnopenedRbrace => write!(f, "Unopened '}}'"),
|
||||
ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
||||
InvalidExpression(error) => {
|
||||
write!(f, "{error}")
|
||||
}
|
||||
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
|
||||
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
|
||||
FStringErrorType::MismatchedDelimiter(first, second) => write!(
|
||||
InvalidConversionFlag => write!(f, "invalid conversion character"),
|
||||
EmptyExpression => write!(f, "empty expression not allowed"),
|
||||
MismatchedDelimiter(first, second) => write!(
|
||||
f,
|
||||
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
||||
),
|
||||
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
|
||||
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
||||
FStringErrorType::ExpressionNestedTooDeeply => {
|
||||
SingleRbrace => write!(f, "single '}}' is not allowed"),
|
||||
Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
||||
ExpressionNestedTooDeeply => {
|
||||
write!(f, "expressions nested too deeply")
|
||||
}
|
||||
FStringErrorType::UnterminatedString => {
|
||||
UnterminatedString => {
|
||||
write!(f, "unterminated string")
|
||||
}
|
||||
FStringErrorType::ExpressionCannotInclude(c) => {
|
||||
ExpressionCannotInclude(c) => {
|
||||
if *c == '\\' {
|
||||
write!(f, "f-string expression part cannot include a backslash")
|
||||
} else {
|
||||
|
@ -832,6 +835,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_parse_invalid_fstring() {
|
||||
use FStringErrorType::*;
|
||||
assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
|
||||
assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
|
||||
assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue