mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-09 22:25:23 +00:00
Merge pull request #4543 from youknowone/flatten-parser
Flatten parser interface
This commit is contained in:
commit
822f0936ca
14 changed files with 528 additions and 552 deletions
|
@ -1,5 +1,3 @@
|
||||||
use std::fmt::Error;
|
|
||||||
|
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
pub use rustpython_compiler_core::ConversionFlag;
|
pub use rustpython_compiler_core::ConversionFlag;
|
||||||
|
|
||||||
|
@ -44,7 +42,9 @@ impl std::fmt::Display for Constant {
|
||||||
Constant::None => f.pad("None"),
|
Constant::None => f.pad("None"),
|
||||||
Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }),
|
Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }),
|
||||||
Constant::Str(s) => rustpython_common::str::repr(s).fmt(f),
|
Constant::Str(s) => rustpython_common::str::repr(s).fmt(f),
|
||||||
Constant::Bytes(b) => f.pad(&rustpython_common::bytes::repr(b).map_err(|_err| Error)?),
|
Constant::Bytes(b) => {
|
||||||
|
f.pad(&rustpython_common::bytes::repr(b).map_err(|_err| std::fmt::Error)?)
|
||||||
|
}
|
||||||
Constant::Int(i) => i.fmt(f),
|
Constant::Int(i) => i.fmt(f),
|
||||||
Constant::Tuple(tup) => {
|
Constant::Tuple(tup) => {
|
||||||
if let [elt] = &**tup {
|
if let [elt] = &**tup {
|
||||||
|
@ -133,12 +133,12 @@ impl<U> crate::fold::Fold<U> for ConstantOptimizer {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
#[cfg(feature = "constant-optimization")]
|
#[cfg(feature = "constant-optimization")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_constant_opt() {
|
fn test_constant_opt() {
|
||||||
use super::*;
|
use crate::{fold::Fold, *};
|
||||||
use crate::fold::Fold;
|
|
||||||
use crate::*;
|
|
||||||
|
|
||||||
let start = Default::default();
|
let start = Default::default();
|
||||||
let end = None;
|
let end = None;
|
||||||
|
|
|
@ -86,7 +86,7 @@ impl CompileContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compile an ast::Mod produced from rustpython_parser::parser::parse()
|
/// Compile an ast::Mod produced from rustpython_parser::parse()
|
||||||
pub fn compile_top(
|
pub fn compile_top(
|
||||||
ast: &ast::Mod,
|
ast: &ast::Mod,
|
||||||
source_path: String,
|
source_path: String,
|
||||||
|
@ -2843,10 +2843,8 @@ fn compile_constant(value: &ast::Constant) -> ConstantData {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{CompileOpts, Compiler};
|
use super::*;
|
||||||
use crate::symboltable::SymbolTable;
|
use rustpython_parser as parser;
|
||||||
use rustpython_compiler_core::CodeObject;
|
|
||||||
use rustpython_parser::parser;
|
|
||||||
|
|
||||||
fn compile_exec(source: &str) -> CodeObject {
|
fn compile_exec(source: &str) -> CodeObject {
|
||||||
let mut compiler: Compiler = Compiler::new(
|
let mut compiler: Compiler = Compiler::new(
|
||||||
|
|
|
@ -96,7 +96,7 @@ impl Location {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::Location;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_gt() {
|
fn test_gt() {
|
||||||
|
|
|
@ -15,18 +15,16 @@ impl std::str::FromStr for Mode {
|
||||||
"exec" => Ok(Mode::Exec),
|
"exec" => Ok(Mode::Exec),
|
||||||
"eval" => Ok(Mode::Eval),
|
"eval" => Ok(Mode::Eval),
|
||||||
"single" => Ok(Mode::Single),
|
"single" => Ok(Mode::Single),
|
||||||
_ => Err(ModeParseError { _priv: () }),
|
_ => Err(ModeParseError(())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ModeParseError {
|
pub struct ModeParseError(());
|
||||||
_priv: (),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ModeParseError {
|
impl std::fmt::Display for ModeParseError {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
write!(f, r#"mode should be "exec", "eval", or "single""#)
|
write!(f, r#"mode must be "exec", "eval", or "single""#)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,12 +5,11 @@
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast,
|
ast,
|
||||||
error::{LexicalError, LexicalErrorType},
|
lexer::{LexicalError, LexicalErrorType},
|
||||||
function::{ArgumentList, parse_args, parse_params, validate_arguments},
|
function::{ArgumentList, parse_args, parse_params, validate_arguments},
|
||||||
lexer,
|
|
||||||
context::set_context,
|
context::set_context,
|
||||||
string::parse_strings,
|
string::parse_strings,
|
||||||
token::StringKind,
|
token::{self, StringKind},
|
||||||
};
|
};
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
|
@ -1937,106 +1936,106 @@ extern {
|
||||||
type Location = ast::Location;
|
type Location = ast::Location;
|
||||||
type Error = LexicalError;
|
type Error = LexicalError;
|
||||||
|
|
||||||
enum lexer::Tok {
|
enum token::Tok {
|
||||||
Indent => lexer::Tok::Indent,
|
Indent => token::Tok::Indent,
|
||||||
Dedent => lexer::Tok::Dedent,
|
Dedent => token::Tok::Dedent,
|
||||||
StartModule => lexer::Tok::StartModule,
|
StartModule => token::Tok::StartModule,
|
||||||
StartInteractive => lexer::Tok::StartInteractive,
|
StartInteractive => token::Tok::StartInteractive,
|
||||||
StartExpression => lexer::Tok::StartExpression,
|
StartExpression => token::Tok::StartExpression,
|
||||||
"+" => lexer::Tok::Plus,
|
"+" => token::Tok::Plus,
|
||||||
"-" => lexer::Tok::Minus,
|
"-" => token::Tok::Minus,
|
||||||
"~" => lexer::Tok::Tilde,
|
"~" => token::Tok::Tilde,
|
||||||
":" => lexer::Tok::Colon,
|
":" => token::Tok::Colon,
|
||||||
"." => lexer::Tok::Dot,
|
"." => token::Tok::Dot,
|
||||||
"..." => lexer::Tok::Ellipsis,
|
"..." => token::Tok::Ellipsis,
|
||||||
"," => lexer::Tok::Comma,
|
"," => token::Tok::Comma,
|
||||||
"*" => lexer::Tok::Star,
|
"*" => token::Tok::Star,
|
||||||
"**" => lexer::Tok::DoubleStar,
|
"**" => token::Tok::DoubleStar,
|
||||||
"&" => lexer::Tok::Amper,
|
"&" => token::Tok::Amper,
|
||||||
"@" => lexer::Tok::At,
|
"@" => token::Tok::At,
|
||||||
"%" => lexer::Tok::Percent,
|
"%" => token::Tok::Percent,
|
||||||
"//" => lexer::Tok::DoubleSlash,
|
"//" => token::Tok::DoubleSlash,
|
||||||
"^" => lexer::Tok::CircumFlex,
|
"^" => token::Tok::CircumFlex,
|
||||||
"|" => lexer::Tok::Vbar,
|
"|" => token::Tok::Vbar,
|
||||||
"<<" => lexer::Tok::LeftShift,
|
"<<" => token::Tok::LeftShift,
|
||||||
">>" => lexer::Tok::RightShift,
|
">>" => token::Tok::RightShift,
|
||||||
"/" => lexer::Tok::Slash,
|
"/" => token::Tok::Slash,
|
||||||
"(" => lexer::Tok::Lpar,
|
"(" => token::Tok::Lpar,
|
||||||
")" => lexer::Tok::Rpar,
|
")" => token::Tok::Rpar,
|
||||||
"[" => lexer::Tok::Lsqb,
|
"[" => token::Tok::Lsqb,
|
||||||
"]" => lexer::Tok::Rsqb,
|
"]" => token::Tok::Rsqb,
|
||||||
"{" => lexer::Tok::Lbrace,
|
"{" => token::Tok::Lbrace,
|
||||||
"}" => lexer::Tok::Rbrace,
|
"}" => token::Tok::Rbrace,
|
||||||
"=" => lexer::Tok::Equal,
|
"=" => token::Tok::Equal,
|
||||||
"+=" => lexer::Tok::PlusEqual,
|
"+=" => token::Tok::PlusEqual,
|
||||||
"-=" => lexer::Tok::MinusEqual,
|
"-=" => token::Tok::MinusEqual,
|
||||||
"*=" => lexer::Tok::StarEqual,
|
"*=" => token::Tok::StarEqual,
|
||||||
"@=" => lexer::Tok::AtEqual,
|
"@=" => token::Tok::AtEqual,
|
||||||
"/=" => lexer::Tok::SlashEqual,
|
"/=" => token::Tok::SlashEqual,
|
||||||
"%=" => lexer::Tok::PercentEqual,
|
"%=" => token::Tok::PercentEqual,
|
||||||
"&=" => lexer::Tok::AmperEqual,
|
"&=" => token::Tok::AmperEqual,
|
||||||
"|=" => lexer::Tok::VbarEqual,
|
"|=" => token::Tok::VbarEqual,
|
||||||
"^=" => lexer::Tok::CircumflexEqual,
|
"^=" => token::Tok::CircumflexEqual,
|
||||||
"<<=" => lexer::Tok::LeftShiftEqual,
|
"<<=" => token::Tok::LeftShiftEqual,
|
||||||
">>=" => lexer::Tok::RightShiftEqual,
|
">>=" => token::Tok::RightShiftEqual,
|
||||||
"**=" => lexer::Tok::DoubleStarEqual,
|
"**=" => token::Tok::DoubleStarEqual,
|
||||||
"//=" => lexer::Tok::DoubleSlashEqual,
|
"//=" => token::Tok::DoubleSlashEqual,
|
||||||
":=" => lexer::Tok::ColonEqual,
|
":=" => token::Tok::ColonEqual,
|
||||||
"==" => lexer::Tok::EqEqual,
|
"==" => token::Tok::EqEqual,
|
||||||
"!=" => lexer::Tok::NotEqual,
|
"!=" => token::Tok::NotEqual,
|
||||||
"<" => lexer::Tok::Less,
|
"<" => token::Tok::Less,
|
||||||
"<=" => lexer::Tok::LessEqual,
|
"<=" => token::Tok::LessEqual,
|
||||||
">" => lexer::Tok::Greater,
|
">" => token::Tok::Greater,
|
||||||
">=" => lexer::Tok::GreaterEqual,
|
">=" => token::Tok::GreaterEqual,
|
||||||
"->" => lexer::Tok::Rarrow,
|
"->" => token::Tok::Rarrow,
|
||||||
"and" => lexer::Tok::And,
|
"and" => token::Tok::And,
|
||||||
"as" => lexer::Tok::As,
|
"as" => token::Tok::As,
|
||||||
"assert" => lexer::Tok::Assert,
|
"assert" => token::Tok::Assert,
|
||||||
"async" => lexer::Tok::Async,
|
"async" => token::Tok::Async,
|
||||||
"await" => lexer::Tok::Await,
|
"await" => token::Tok::Await,
|
||||||
"break" => lexer::Tok::Break,
|
"break" => token::Tok::Break,
|
||||||
"class" => lexer::Tok::Class,
|
"class" => token::Tok::Class,
|
||||||
"continue" => lexer::Tok::Continue,
|
"continue" => token::Tok::Continue,
|
||||||
"def" => lexer::Tok::Def,
|
"def" => token::Tok::Def,
|
||||||
"del" => lexer::Tok::Del,
|
"del" => token::Tok::Del,
|
||||||
"elif" => lexer::Tok::Elif,
|
"elif" => token::Tok::Elif,
|
||||||
"else" => lexer::Tok::Else,
|
"else" => token::Tok::Else,
|
||||||
"except" => lexer::Tok::Except,
|
"except" => token::Tok::Except,
|
||||||
"finally" => lexer::Tok::Finally,
|
"finally" => token::Tok::Finally,
|
||||||
"for" => lexer::Tok::For,
|
"for" => token::Tok::For,
|
||||||
"from" => lexer::Tok::From,
|
"from" => token::Tok::From,
|
||||||
"global" => lexer::Tok::Global,
|
"global" => token::Tok::Global,
|
||||||
"if" => lexer::Tok::If,
|
"if" => token::Tok::If,
|
||||||
"import" => lexer::Tok::Import,
|
"import" => token::Tok::Import,
|
||||||
"in" => lexer::Tok::In,
|
"in" => token::Tok::In,
|
||||||
"is" => lexer::Tok::Is,
|
"is" => token::Tok::Is,
|
||||||
"lambda" => lexer::Tok::Lambda,
|
"lambda" => token::Tok::Lambda,
|
||||||
"nonlocal" => lexer::Tok::Nonlocal,
|
"nonlocal" => token::Tok::Nonlocal,
|
||||||
"not" => lexer::Tok::Not,
|
"not" => token::Tok::Not,
|
||||||
"or" => lexer::Tok::Or,
|
"or" => token::Tok::Or,
|
||||||
"pass" => lexer::Tok::Pass,
|
"pass" => token::Tok::Pass,
|
||||||
"raise" => lexer::Tok::Raise,
|
"raise" => token::Tok::Raise,
|
||||||
"return" => lexer::Tok::Return,
|
"return" => token::Tok::Return,
|
||||||
"try" => lexer::Tok::Try,
|
"try" => token::Tok::Try,
|
||||||
"while" => lexer::Tok::While,
|
"while" => token::Tok::While,
|
||||||
"match" => lexer::Tok::Match,
|
"match" => token::Tok::Match,
|
||||||
"case" => lexer::Tok::Case,
|
"case" => token::Tok::Case,
|
||||||
"with" => lexer::Tok::With,
|
"with" => token::Tok::With,
|
||||||
"yield" => lexer::Tok::Yield,
|
"yield" => token::Tok::Yield,
|
||||||
"True" => lexer::Tok::True,
|
"True" => token::Tok::True,
|
||||||
"False" => lexer::Tok::False,
|
"False" => token::Tok::False,
|
||||||
"None" => lexer::Tok::None,
|
"None" => token::Tok::None,
|
||||||
int => lexer::Tok::Int { value: <BigInt> },
|
int => token::Tok::Int { value: <BigInt> },
|
||||||
float => lexer::Tok::Float { value: <f64> },
|
float => token::Tok::Float { value: <f64> },
|
||||||
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
|
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
|
||||||
string => lexer::Tok::String {
|
string => token::Tok::String {
|
||||||
value: <String>,
|
value: <String>,
|
||||||
kind: <StringKind>,
|
kind: <StringKind>,
|
||||||
triple_quoted: <bool>
|
triple_quoted: <bool>
|
||||||
},
|
},
|
||||||
name => lexer::Tok::Name { name: <String> },
|
name => token::Tok::Name { name: <String> },
|
||||||
"\n" => lexer::Tok::Newline,
|
"\n" => token::Tok::Newline,
|
||||||
";" => lexer::Tok::Semi,
|
";" => token::Tok::Semi,
|
||||||
"#" => lexer::Tok::Comment(_),
|
"#" => token::Tok::Comment(_),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,331 +0,0 @@
|
||||||
//! Error types for the parser.
|
|
||||||
//!
|
|
||||||
//! These types are used to represent errors that occur during lexing and parsing and are
|
|
||||||
//! returned by the `parse_*` functions in the [parser] module and the iterator in the
|
|
||||||
//! [lexer] implementation.
|
|
||||||
//!
|
|
||||||
//! [parser]: crate::parser
|
|
||||||
//! [lexer]: crate::lexer
|
|
||||||
|
|
||||||
// Define internal parse error types.
|
|
||||||
// The goal is to provide a matching and a safe error API, masking errors from LALR
|
|
||||||
use crate::{ast::Location, token::Tok};
|
|
||||||
use lalrpop_util::ParseError as LalrpopError;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
/// Represents an error during lexing.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct LexicalError {
|
|
||||||
/// The type of error that occurred.
|
|
||||||
pub error: LexicalErrorType,
|
|
||||||
/// The location of the error.
|
|
||||||
pub location: Location,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexicalError {
|
|
||||||
/// Creates a new `LexicalError` with the given error type and location.
|
|
||||||
pub fn new(error: LexicalErrorType, location: Location) -> Self {
|
|
||||||
Self { error, location }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during lexing.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum LexicalErrorType {
|
|
||||||
// TODO: Can probably be removed, the places it is used seem to be able
|
|
||||||
// to use the `UnicodeError` variant instead.
|
|
||||||
#[doc(hidden)]
|
|
||||||
StringError,
|
|
||||||
// TODO: Should take a start/end position to report.
|
|
||||||
/// Decoding of a unicode escape sequence in a string literal failed.
|
|
||||||
UnicodeError,
|
|
||||||
/// The nesting of brackets/braces/parentheses is not balanced.
|
|
||||||
NestingError,
|
|
||||||
/// The indentation is not consistent.
|
|
||||||
IndentationError,
|
|
||||||
/// Inconsistent use of tabs and spaces.
|
|
||||||
TabError,
|
|
||||||
/// Encountered a tab after a space.
|
|
||||||
TabsAfterSpaces,
|
|
||||||
/// A non-default argument follows a default argument.
|
|
||||||
DefaultArgumentError,
|
|
||||||
/// A duplicate argument was found in a function definition.
|
|
||||||
DuplicateArgumentError(String),
|
|
||||||
/// A positional argument follows a keyword argument.
|
|
||||||
PositionalArgumentError,
|
|
||||||
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
|
|
||||||
UnpackedArgumentError,
|
|
||||||
/// A keyword argument was repeated.
|
|
||||||
DuplicateKeywordArgumentError(String),
|
|
||||||
/// An unrecognized token was encountered.
|
|
||||||
UnrecognizedToken { tok: char },
|
|
||||||
/// An f-string error containing the [`FStringErrorType`].
|
|
||||||
FStringError(FStringErrorType),
|
|
||||||
/// An unexpected character was encountered after a line continuation.
|
|
||||||
LineContinuationError,
|
|
||||||
/// An unexpected end of file was encountered.
|
|
||||||
Eof,
|
|
||||||
/// An unexpected error occurred.
|
|
||||||
OtherError(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for LexicalErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
|
||||||
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
|
||||||
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
|
||||||
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
|
|
||||||
LexicalErrorType::IndentationError => {
|
|
||||||
write!(f, "unindent does not match any outer indentation level")
|
|
||||||
}
|
|
||||||
LexicalErrorType::TabError => {
|
|
||||||
write!(f, "inconsistent use of tabs and spaces in indentation")
|
|
||||||
}
|
|
||||||
LexicalErrorType::TabsAfterSpaces => {
|
|
||||||
write!(f, "Tabs not allowed as part of indentation after spaces")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DefaultArgumentError => {
|
|
||||||
write!(f, "non-default argument follows default argument")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DuplicateArgumentError(arg_name) => {
|
|
||||||
write!(f, "duplicate argument '{arg_name}' in function definition")
|
|
||||||
}
|
|
||||||
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
|
|
||||||
write!(f, "keyword argument repeated: {arg_name}")
|
|
||||||
}
|
|
||||||
LexicalErrorType::PositionalArgumentError => {
|
|
||||||
write!(f, "positional argument follows keyword argument")
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnpackedArgumentError => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"iterable argument unpacking follows keyword argument unpacking"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnrecognizedToken { tok } => {
|
|
||||||
write!(f, "Got unexpected token {tok}")
|
|
||||||
}
|
|
||||||
LexicalErrorType::LineContinuationError => {
|
|
||||||
write!(f, "unexpected character after line continuation character")
|
|
||||||
}
|
|
||||||
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
|
||||||
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: consolidate these with ParseError
|
|
||||||
/// An error that occurred during parsing of an f-string.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct FStringError {
|
|
||||||
/// The type of error that occurred.
|
|
||||||
pub error: FStringErrorType,
|
|
||||||
/// The location of the error.
|
|
||||||
pub location: Location,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FStringError {
|
|
||||||
/// Creates a new `FStringError` with the given error type and location.
|
|
||||||
pub fn new(error: FStringErrorType, location: Location) -> Self {
|
|
||||||
Self { error, location }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<FStringError> for LexicalError {
|
|
||||||
fn from(err: FStringError) -> Self {
|
|
||||||
LexicalError {
|
|
||||||
error: LexicalErrorType::FStringError(err.error),
|
|
||||||
location: err.location,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during parsing of an f-string.
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum FStringErrorType {
|
|
||||||
/// Expected a right brace after an opened left brace.
|
|
||||||
UnclosedLbrace,
|
|
||||||
/// Expected a left brace after an ending right brace.
|
|
||||||
UnopenedRbrace,
|
|
||||||
/// Expected a right brace after a conversion flag.
|
|
||||||
ExpectedRbrace,
|
|
||||||
/// An error occurred while parsing an f-string expression.
|
|
||||||
InvalidExpression(Box<ParseErrorType>),
|
|
||||||
/// An invalid conversion flag was encountered.
|
|
||||||
InvalidConversionFlag,
|
|
||||||
/// An empty expression was encountered.
|
|
||||||
EmptyExpression,
|
|
||||||
/// An opening delimiter was not closed properly.
|
|
||||||
MismatchedDelimiter(char, char),
|
|
||||||
/// Too many nested expressions in an f-string.
|
|
||||||
ExpressionNestedTooDeeply,
|
|
||||||
/// The f-string expression cannot include the given character.
|
|
||||||
ExpressionCannotInclude(char),
|
|
||||||
/// A single right brace was encountered.
|
|
||||||
SingleRbrace,
|
|
||||||
/// A closing delimiter was not opened properly.
|
|
||||||
Unmatched(char),
|
|
||||||
// TODO: Test this case.
|
|
||||||
/// Unterminated string.
|
|
||||||
UnterminatedString,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for FStringErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
FStringErrorType::UnclosedLbrace => write!(f, "expecting '}}'"),
|
|
||||||
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
|
|
||||||
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
|
||||||
FStringErrorType::InvalidExpression(error) => {
|
|
||||||
write!(f, "{error}")
|
|
||||||
}
|
|
||||||
FStringErrorType::InvalidConversionFlag => write!(f, "invalid conversion character"),
|
|
||||||
FStringErrorType::EmptyExpression => write!(f, "empty expression not allowed"),
|
|
||||||
FStringErrorType::MismatchedDelimiter(first, second) => write!(
|
|
||||||
f,
|
|
||||||
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
|
||||||
),
|
|
||||||
FStringErrorType::SingleRbrace => write!(f, "single '}}' is not allowed"),
|
|
||||||
FStringErrorType::Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
|
||||||
FStringErrorType::ExpressionNestedTooDeeply => {
|
|
||||||
write!(f, "expressions nested too deeply")
|
|
||||||
}
|
|
||||||
FStringErrorType::UnterminatedString => {
|
|
||||||
write!(f, "unterminated string")
|
|
||||||
}
|
|
||||||
FStringErrorType::ExpressionCannotInclude(c) => {
|
|
||||||
if *c == '\\' {
|
|
||||||
write!(f, "f-string expression part cannot include a backslash")
|
|
||||||
} else {
|
|
||||||
write!(f, "f-string expression part cannot include '{c}'s")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
|
|
||||||
fn from(err: FStringError) -> Self {
|
|
||||||
lalrpop_util::ParseError::User {
|
|
||||||
error: LexicalError {
|
|
||||||
error: LexicalErrorType::FStringError(err.error),
|
|
||||||
location: err.location,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents an error during parsing.
|
|
||||||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during parsing.
|
|
||||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
|
||||||
pub enum ParseErrorType {
|
|
||||||
/// Parser encountered an unexpected end of input
|
|
||||||
Eof,
|
|
||||||
/// Parser encountered an extra token
|
|
||||||
ExtraToken(Tok),
|
|
||||||
/// Parser encountered an invalid token
|
|
||||||
InvalidToken,
|
|
||||||
/// Parser encountered an unexpected token
|
|
||||||
UnrecognizedToken(Tok, Option<String>),
|
|
||||||
// Maps to `User` type from `lalrpop-util`
|
|
||||||
/// Parser encountered an error during lexing.
|
|
||||||
Lexical(LexicalErrorType),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert `lalrpop_util::ParseError` to our internal type
|
|
||||||
pub(crate) fn parse_error_from_lalrpop(
|
|
||||||
err: LalrpopError<Location, Tok, LexicalError>,
|
|
||||||
source_path: &str,
|
|
||||||
) -> ParseError {
|
|
||||||
let source_path = source_path.to_owned();
|
|
||||||
match err {
|
|
||||||
// TODO: Are there cases where this isn't an EOF?
|
|
||||||
LalrpopError::InvalidToken { location } => ParseError {
|
|
||||||
error: ParseErrorType::Eof,
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::ExtraToken { token } => ParseError {
|
|
||||||
error: ParseErrorType::ExtraToken(token.1),
|
|
||||||
location: token.0,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::User { error } => ParseError {
|
|
||||||
error: ParseErrorType::Lexical(error.error),
|
|
||||||
location: error.location,
|
|
||||||
source_path,
|
|
||||||
},
|
|
||||||
LalrpopError::UnrecognizedToken { token, expected } => {
|
|
||||||
// Hacky, but it's how CPython does it. See PyParser_AddToken,
|
|
||||||
// in particular "Only one possible expected token" comment.
|
|
||||||
let expected = (expected.len() == 1).then(|| expected[0].clone());
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::UnrecognizedToken(token.1, expected),
|
|
||||||
location: token.0.with_col_offset(1),
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LalrpopError::UnrecognizedEOF { location, expected } => {
|
|
||||||
// This could be an initial indentation error that we should ignore
|
|
||||||
let indent_error = expected == ["Indent"];
|
|
||||||
if indent_error {
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ParseError {
|
|
||||||
error: ParseErrorType::Eof,
|
|
||||||
location,
|
|
||||||
source_path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ParseErrorType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
|
|
||||||
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
|
|
||||||
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
|
|
||||||
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
|
|
||||||
if *tok == Tok::Indent {
|
|
||||||
write!(f, "unexpected indent")
|
|
||||||
} else if expected.as_deref() == Some("Indent") {
|
|
||||||
write!(f, "expected an indented block")
|
|
||||||
} else {
|
|
||||||
write!(f, "invalid syntax. Got unexpected token {tok}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ParseErrorType {
|
|
||||||
/// Returns true if the error is an indentation error.
|
|
||||||
pub fn is_indentation_error(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
|
|
||||||
ParseErrorType::UnrecognizedToken(token, expected) => {
|
|
||||||
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
|
|
||||||
}
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the error is a tab error.
|
|
||||||
pub fn is_tab_error(&self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self,
|
|
||||||
ParseErrorType::Lexical(LexicalErrorType::TabError)
|
|
||||||
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +1,9 @@
|
||||||
// Contains functions that perform validation and parsing of arguments and parameters.
|
// Contains functions that perform validation and parsing of arguments and parameters.
|
||||||
// Checks apply both to functions and to lambdas.
|
// Checks apply both to functions and to lambdas.
|
||||||
use crate::ast;
|
use crate::{
|
||||||
use crate::error::{LexicalError, LexicalErrorType};
|
ast,
|
||||||
|
lexer::{LexicalError, LexicalErrorType},
|
||||||
|
};
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::FxHashSet;
|
||||||
|
|
||||||
pub(crate) struct ArgumentList {
|
pub(crate) struct ArgumentList {
|
||||||
|
@ -149,8 +151,8 @@ fn is_starred(exp: &ast::Expr) -> bool {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::error::{LexicalErrorType, ParseErrorType};
|
use super::*;
|
||||||
use crate::parser::parse_program;
|
use crate::parser::{parse_program, ParseErrorType};
|
||||||
|
|
||||||
macro_rules! function_and_lambda {
|
macro_rules! function_and_lambda {
|
||||||
($($name:ident: $code:expr,)*) => {
|
($($name:ident: $code:expr,)*) => {
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
//! governing what is and is not a valid token are defined in the Python reference
|
//! governing what is and is not a valid token are defined in the Python reference
|
||||||
//! guide section on [Lexical analysis].
|
//! guide section on [Lexical analysis].
|
||||||
//!
|
//!
|
||||||
//! The primary function in this module is [`make_tokenizer`], which takes a string slice
|
//! The primary function in this module is [`lex`], which takes a string slice
|
||||||
//! and returns an iterator over the tokens in the source code. The tokens are currently returned
|
//! and returns an iterator over the tokens in the source code. The tokens are currently returned
|
||||||
//! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
|
//! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
|
||||||
//! start and end [`Location`] and a [`Tok`] denoting the token.
|
//! start and end [`Location`] and a [`Tok`] denoting the token.
|
||||||
|
@ -12,12 +12,10 @@
|
||||||
//! # Example
|
//! # Example
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::lexer::{make_tokenizer, Tok};
|
//! use rustpython_parser::{lexer::lex, Tok, Mode, StringKind};
|
||||||
//! use rustpython_parser::mode::Mode;
|
|
||||||
//! use rustpython_parser::token::StringKind;
|
|
||||||
//!
|
//!
|
||||||
//! let source = "x = 'RustPython'";
|
//! let source = "x = 'RustPython'";
|
||||||
//! let tokens = make_tokenizer(source, Mode::Module)
|
//! let tokens = lex(source, Mode::Module)
|
||||||
//! .map(|tok| tok.expect("Failed to lex"))
|
//! .map(|tok| tok.expect("Failed to lex"))
|
||||||
//! .collect::<Vec<_>>();
|
//! .collect::<Vec<_>>();
|
||||||
//!
|
//!
|
||||||
|
@ -33,19 +31,17 @@
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
||||||
pub use super::token::{StringKind, Tok};
|
use crate::{
|
||||||
use crate::ast::Location;
|
ast::Location,
|
||||||
use crate::error::{LexicalError, LexicalErrorType};
|
mode::Mode,
|
||||||
use crate::mode::Mode;
|
soft_keywords::SoftKeywordTransformer,
|
||||||
use crate::soft_keywords::SoftKeywordTransformer;
|
string::FStringErrorType,
|
||||||
|
token::{StringKind, Tok},
|
||||||
|
};
|
||||||
|
use log::trace;
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
use num_traits::identities::Zero;
|
use num_traits::{Num, Zero};
|
||||||
use num_traits::Num;
|
use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr};
|
||||||
use std::char;
|
|
||||||
use std::cmp::Ordering;
|
|
||||||
use std::ops::Index;
|
|
||||||
use std::slice::SliceIndex;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use unic_emoji_char::is_emoji_presentation;
|
use unic_emoji_char::is_emoji_presentation;
|
||||||
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
||||||
|
|
||||||
|
@ -195,29 +191,28 @@ pub type Spanned = (Location, Tok, Location);
|
||||||
/// The result of lexing a token.
|
/// The result of lexing a token.
|
||||||
pub type LexResult = Result<Spanned, LexicalError>;
|
pub type LexResult = Result<Spanned, LexicalError>;
|
||||||
|
|
||||||
/// Create a new tokenizer from a source string.
|
/// Create a new lexer from a source string.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::mode::Mode;
|
/// use rustpython_parser::{Mode, lexer::lex};
|
||||||
/// use rustpython_parser::lexer::{make_tokenizer};
|
|
||||||
///
|
///
|
||||||
/// let source = "def hello(): return 'world'";
|
/// let source = "def hello(): return 'world'";
|
||||||
/// let tokenizer = make_tokenizer(source, Mode::Module);
|
/// let lexer = lex(source, Mode::Module);
|
||||||
///
|
///
|
||||||
/// for token in tokenizer {
|
/// for token in lexer {
|
||||||
/// println!("{:?}", token);
|
/// println!("{:?}", token);
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
|
pub fn lex(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
|
||||||
make_tokenizer_located(source, mode, Location::default())
|
lex_located(source, mode, Location::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new tokenizer from a source string, starting at a given location.
|
/// Create a new lexer from a source string, starting at a given location.
|
||||||
/// You probably want to use [`make_tokenizer`] instead.
|
/// You probably want to use [`lex`] instead.
|
||||||
pub fn make_tokenizer_located(
|
pub fn lex_located(
|
||||||
source: &str,
|
source: &str,
|
||||||
mode: Mode,
|
mode: Mode,
|
||||||
start_location: Location,
|
start_location: Location,
|
||||||
|
@ -230,7 +225,7 @@ where
|
||||||
T: Iterator<Item = char>,
|
T: Iterator<Item = char>,
|
||||||
{
|
{
|
||||||
/// Create a new lexer from T and a starting location. You probably want to use
|
/// Create a new lexer from T and a starting location. You probably want to use
|
||||||
/// [`make_tokenizer`] instead.
|
/// [`lex`] instead.
|
||||||
pub fn new(input: T, start: Location) -> Self {
|
pub fn new(input: T, start: Location) -> Self {
|
||||||
let mut lxr = Lexer {
|
let mut lxr = Lexer {
|
||||||
at_begin_of_line: true,
|
at_begin_of_line: true,
|
||||||
|
@ -1212,10 +1207,115 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents an error that occur during lexing and are
|
||||||
|
/// returned by the `parse_*` functions in the iterator in the
|
||||||
|
/// [lexer] implementation.
|
||||||
|
///
|
||||||
|
/// [lexer]: crate::lexer
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct LexicalError {
|
||||||
|
/// The type of error that occurred.
|
||||||
|
pub error: LexicalErrorType,
|
||||||
|
/// The location of the error.
|
||||||
|
pub location: Location,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LexicalError {
|
||||||
|
/// Creates a new `LexicalError` with the given error type and location.
|
||||||
|
pub fn new(error: LexicalErrorType, location: Location) -> Self {
|
||||||
|
Self { error, location }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during lexing.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum LexicalErrorType {
|
||||||
|
// TODO: Can probably be removed, the places it is used seem to be able
|
||||||
|
// to use the `UnicodeError` variant instead.
|
||||||
|
#[doc(hidden)]
|
||||||
|
StringError,
|
||||||
|
// TODO: Should take a start/end position to report.
|
||||||
|
/// Decoding of a unicode escape sequence in a string literal failed.
|
||||||
|
UnicodeError,
|
||||||
|
/// The nesting of brackets/braces/parentheses is not balanced.
|
||||||
|
NestingError,
|
||||||
|
/// The indentation is not consistent.
|
||||||
|
IndentationError,
|
||||||
|
/// Inconsistent use of tabs and spaces.
|
||||||
|
TabError,
|
||||||
|
/// Encountered a tab after a space.
|
||||||
|
TabsAfterSpaces,
|
||||||
|
/// A non-default argument follows a default argument.
|
||||||
|
DefaultArgumentError,
|
||||||
|
/// A duplicate argument was found in a function definition.
|
||||||
|
DuplicateArgumentError(String),
|
||||||
|
/// A positional argument follows a keyword argument.
|
||||||
|
PositionalArgumentError,
|
||||||
|
/// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`.
|
||||||
|
UnpackedArgumentError,
|
||||||
|
/// A keyword argument was repeated.
|
||||||
|
DuplicateKeywordArgumentError(String),
|
||||||
|
/// An unrecognized token was encountered.
|
||||||
|
UnrecognizedToken { tok: char },
|
||||||
|
/// An f-string error containing the [`FStringErrorType`].
|
||||||
|
FStringError(FStringErrorType),
|
||||||
|
/// An unexpected character was encountered after a line continuation.
|
||||||
|
LineContinuationError,
|
||||||
|
/// An unexpected end of file was encountered.
|
||||||
|
Eof,
|
||||||
|
/// An unexpected error occurred.
|
||||||
|
OtherError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LexicalErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
||||||
|
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
||||||
|
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
||||||
|
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
|
||||||
|
LexicalErrorType::IndentationError => {
|
||||||
|
write!(f, "unindent does not match any outer indentation level")
|
||||||
|
}
|
||||||
|
LexicalErrorType::TabError => {
|
||||||
|
write!(f, "inconsistent use of tabs and spaces in indentation")
|
||||||
|
}
|
||||||
|
LexicalErrorType::TabsAfterSpaces => {
|
||||||
|
write!(f, "Tabs not allowed as part of indentation after spaces")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DefaultArgumentError => {
|
||||||
|
write!(f, "non-default argument follows default argument")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DuplicateArgumentError(arg_name) => {
|
||||||
|
write!(f, "duplicate argument '{arg_name}' in function definition")
|
||||||
|
}
|
||||||
|
LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
|
||||||
|
write!(f, "keyword argument repeated: {arg_name}")
|
||||||
|
}
|
||||||
|
LexicalErrorType::PositionalArgumentError => {
|
||||||
|
write!(f, "positional argument follows keyword argument")
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnpackedArgumentError => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"iterable argument unpacking follows keyword argument unpacking"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnrecognizedToken { tok } => {
|
||||||
|
write!(f, "Got unexpected token {tok}")
|
||||||
|
}
|
||||||
|
LexicalErrorType::LineContinuationError => {
|
||||||
|
write!(f, "unexpected character after line continuation character")
|
||||||
|
}
|
||||||
|
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
||||||
|
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{make_tokenizer, StringKind, Tok};
|
use super::*;
|
||||||
use crate::mode::Mode;
|
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
const WINDOWS_EOL: &str = "\r\n";
|
const WINDOWS_EOL: &str = "\r\n";
|
||||||
|
@ -1223,7 +1323,7 @@ mod tests {
|
||||||
const UNIX_EOL: &str = "\n";
|
const UNIX_EOL: &str = "\n";
|
||||||
|
|
||||||
pub fn lex_source(source: &str) -> Vec<Tok> {
|
pub fn lex_source(source: &str) -> Vec<Tok> {
|
||||||
let lexer = make_tokenizer(source, Mode::Module);
|
let lexer = lex(source, Mode::Module);
|
||||||
lexer.map(|x| x.unwrap().1).collect()
|
lexer.map(|x| x.unwrap().1).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,41 +54,37 @@
|
||||||
//!
|
//!
|
||||||
//! The functionality of this crate is split into several modules:
|
//! The functionality of this crate is split into several modules:
|
||||||
//!
|
//!
|
||||||
//! - [token]: This module contains the definition of the tokens that are generated by the lexer.
|
//! - token: This module contains the definition of the tokens that are generated by the lexer.
|
||||||
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
|
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
|
||||||
//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST.
|
//! - parser: This module contains an interface to the parser and is responsible for generating the AST.
|
||||||
//! - Functions and strings have special parsing requirements that are handled in additional files.
|
//! - Functions and strings have special parsing requirements that are handled in additional files.
|
||||||
//! - [mode]: This module contains the definition of the different modes that the parser can be in.
|
//! - mode: This module contains the definition of the different modes that the parser can be in.
|
||||||
//! - [error]: This module contains the definition of the errors that can be returned by the parser.
|
|
||||||
//!
|
//!
|
||||||
//! # Examples
|
//! # Examples
|
||||||
//!
|
//!
|
||||||
//! For example, to get a stream of tokens from a given string, one could do this:
|
//! For example, to get a stream of tokens from a given string, one could do this:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::mode::Mode;
|
//! use rustpython_parser::{lexer::lex, Mode};
|
||||||
//! use rustpython_parser::lexer::make_tokenizer;
|
|
||||||
//!
|
//!
|
||||||
//! let python_source = r#"
|
//! let python_source = r#"
|
||||||
//! def is_odd(i):
|
//! def is_odd(i):
|
||||||
//! return bool(i & 1)
|
//! return bool(i & 1)
|
||||||
//! "#;
|
//! "#;
|
||||||
//! let mut tokens = make_tokenizer(python_source, Mode::Module);
|
//! let mut tokens = lex(python_source, Mode::Module);
|
||||||
//! assert!(tokens.all(|t| t.is_ok()));
|
//! assert!(tokens.all(|t| t.is_ok()));
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! These tokens can be directly fed into the parser to generate an AST:
|
//! These tokens can be directly fed into the parser to generate an AST:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::lexer::make_tokenizer;
|
//! use rustpython_parser::{lexer::lex, Mode, parse_tokens};
|
||||||
//! use rustpython_parser::mode::Mode;
|
|
||||||
//! use rustpython_parser::parser::parse_tokens;
|
|
||||||
//!
|
//!
|
||||||
//! let python_source = r#"
|
//! let python_source = r#"
|
||||||
//! def is_odd(i):
|
//! def is_odd(i):
|
||||||
//! return bool(i & 1)
|
//! return bool(i & 1)
|
||||||
//! "#;
|
//! "#;
|
||||||
//! let tokens = make_tokenizer(python_source, Mode::Module);
|
//! let tokens = lex(python_source, Mode::Module);
|
||||||
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
|
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
|
||||||
//!
|
//!
|
||||||
//! assert!(ast.is_ok());
|
//! assert!(ast.is_ok());
|
||||||
|
@ -98,7 +94,7 @@
|
||||||
//! mode or tokenizing the source beforehand:
|
//! mode or tokenizing the source beforehand:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::parser::parse_program;
|
//! use rustpython_parser::parse_program;
|
||||||
//!
|
//!
|
||||||
//! let python_source = r#"
|
//! let python_source = r#"
|
||||||
//! def is_odd(i):
|
//! def is_odd(i):
|
||||||
|
@ -111,27 +107,29 @@
|
||||||
//!
|
//!
|
||||||
//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
|
//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis
|
||||||
//! [parsing]: https://en.wikipedia.org/wiki/Parsing
|
//! [parsing]: https://en.wikipedia.org/wiki/Parsing
|
||||||
//! [token]: crate::token
|
|
||||||
//! [lexer]: crate::lexer
|
//! [lexer]: crate::lexer
|
||||||
//! [parser]: crate::parser
|
|
||||||
//! [mode]: crate::mode
|
|
||||||
//! [error]: crate::error
|
|
||||||
|
|
||||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
|
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
|
||||||
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
|
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
extern crate log;
|
|
||||||
pub use rustpython_ast as ast;
|
pub use rustpython_ast as ast;
|
||||||
|
|
||||||
pub mod error;
|
|
||||||
mod function;
|
mod function;
|
||||||
|
// Skip flattening lexer to distinguish from full parser
|
||||||
pub mod lexer;
|
pub mod lexer;
|
||||||
pub mod mode;
|
mod mode;
|
||||||
pub mod parser;
|
mod parser;
|
||||||
mod string;
|
mod string;
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
mod python;
|
mod python;
|
||||||
mod context;
|
mod context;
|
||||||
mod soft_keywords;
|
mod soft_keywords;
|
||||||
pub mod token;
|
mod token;
|
||||||
|
|
||||||
|
pub use mode::Mode;
|
||||||
|
pub use parser::{
|
||||||
|
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
|
||||||
|
ParseError, ParseErrorType,
|
||||||
|
};
|
||||||
|
pub use string::FStringErrorType;
|
||||||
|
pub use token::{StringKind, Tok};
|
||||||
|
|
|
@ -39,19 +39,17 @@ impl std::str::FromStr for Mode {
|
||||||
match s {
|
match s {
|
||||||
"exec" | "single" => Ok(Mode::Module),
|
"exec" | "single" => Ok(Mode::Module),
|
||||||
"eval" => Ok(Mode::Expression),
|
"eval" => Ok(Mode::Expression),
|
||||||
_ => Err(ModeParseError { _priv: () }),
|
_ => Err(ModeParseError(())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returned when a given mode is not valid.
|
/// Returned when a given mode is not valid.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ModeParseError {
|
pub struct ModeParseError(());
|
||||||
_priv: (),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ModeParseError {
|
impl std::fmt::Display for ModeParseError {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
write!(f, r#"mode should be "exec", "eval", or "single""#)
|
write!(f, r#"mode must be "exec", "eval", or "single""#)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,13 +12,18 @@
|
||||||
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
||||||
//! [`Mode`]: crate::mode
|
//! [`Mode`]: crate::mode
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
use crate::{
|
||||||
pub use crate::mode::Mode;
|
ast::{self, Location},
|
||||||
use crate::{ast, error::ParseError, lexer, python};
|
lexer::{self, LexResult, LexicalError, LexicalErrorType},
|
||||||
use ast::Location;
|
mode::Mode,
|
||||||
|
python,
|
||||||
|
token::Tok,
|
||||||
|
};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
|
pub(super) use lalrpop_util::ParseError as LalrpopError;
|
||||||
|
|
||||||
/// Parse a full Python program usually consisting of multiple lines.
|
/// Parse a full Python program usually consisting of multiple lines.
|
||||||
///
|
///
|
||||||
/// This is a convenience function that can be used to parse a full Python program without having to
|
/// This is a convenience function that can be used to parse a full Python program without having to
|
||||||
|
@ -29,7 +34,7 @@ use std::iter;
|
||||||
/// For example, parsing a simple function definition and a call to that function:
|
/// For example, parsing a simple function definition and a call to that function:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser;
|
/// use rustpython_parser as parser;
|
||||||
/// let source = r#"
|
/// let source = r#"
|
||||||
/// def foo():
|
/// def foo():
|
||||||
/// return 42
|
/// return 42
|
||||||
|
@ -57,7 +62,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result<ast::Suite, Pars
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// extern crate num_bigint;
|
/// extern crate num_bigint;
|
||||||
/// use rustpython_parser::{parser, ast};
|
/// use rustpython_parser as parser;
|
||||||
/// let expr = parser::parse_expression("1 + 2", "<embedded>");
|
/// let expr = parser::parse_expression("1 + 2", "<embedded>");
|
||||||
///
|
///
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
|
@ -78,8 +83,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result<ast::Expr, ParseErro
|
||||||
/// somewhat silly, location:
|
/// somewhat silly, location:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser::parse_expression_located;
|
/// use rustpython_parser::{ast::Location, parse_expression_located};
|
||||||
/// use rustpython_parser::ast::Location;
|
|
||||||
///
|
///
|
||||||
/// let expr = parse_expression_located("1 + 2", "<embedded>", Location::new(5, 20));
|
/// let expr = parse_expression_located("1 + 2", "<embedded>", Location::new(5, 20));
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
|
@ -106,8 +110,7 @@ pub fn parse_expression_located(
|
||||||
/// parsing:
|
/// parsing:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::mode::Mode;
|
/// use rustpython_parser::{Mode, parse};
|
||||||
/// use rustpython_parser::parser::parse;
|
|
||||||
///
|
///
|
||||||
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
|
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
|
@ -116,8 +119,7 @@ pub fn parse_expression_located(
|
||||||
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::mode::Mode;
|
/// use rustpython_parser::{Mode, parse};
|
||||||
/// use rustpython_parser::parser::parse;
|
|
||||||
///
|
///
|
||||||
/// let source = r#"
|
/// let source = r#"
|
||||||
/// class Greeter:
|
/// class Greeter:
|
||||||
|
@ -140,9 +142,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::ast::Location;
|
/// use rustpython_parser::{ast::Location, Mode, parse_located};
|
||||||
/// use rustpython_parser::mode::Mode;
|
|
||||||
/// use rustpython_parser::parser::parse_located;
|
|
||||||
///
|
///
|
||||||
/// let source = r#"
|
/// let source = r#"
|
||||||
/// def fib(i):
|
/// def fib(i):
|
||||||
|
@ -162,7 +162,7 @@ pub fn parse_located(
|
||||||
source_path: &str,
|
source_path: &str,
|
||||||
location: Location,
|
location: Location,
|
||||||
) -> Result<ast::Mod, ParseError> {
|
) -> Result<ast::Mod, ParseError> {
|
||||||
let lxr = lexer::make_tokenizer_located(source, mode, location);
|
let lxr = lexer::lex_located(source, mode, location);
|
||||||
parse_tokens(lxr, mode, source_path)
|
parse_tokens(lxr, mode, source_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,14 +173,12 @@ pub fn parse_located(
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// As an example, instead of parsing a string, we can parse a list of tokens after we generate
|
/// As an example, instead of parsing a string, we can parse a list of tokens after we generate
|
||||||
/// them using the [`lexer::make_tokenizer`] function:
|
/// them using the [`lexer::lex`] function:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::lexer::make_tokenizer;
|
/// use rustpython_parser::{lexer::lex, Mode, parse_tokens};
|
||||||
/// use rustpython_parser::mode::Mode;
|
|
||||||
/// use rustpython_parser::parser::parse_tokens;
|
|
||||||
///
|
///
|
||||||
/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
|
/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
/// ```
|
/// ```
|
||||||
pub fn parse_tokens(
|
pub fn parse_tokens(
|
||||||
|
@ -189,12 +187,127 @@ pub fn parse_tokens(
|
||||||
source_path: &str,
|
source_path: &str,
|
||||||
) -> Result<ast::Mod, ParseError> {
|
) -> Result<ast::Mod, ParseError> {
|
||||||
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
||||||
let tokenizer = iter::once(Ok(marker_token))
|
let lexer = iter::once(Ok(marker_token))
|
||||||
.chain(lxr)
|
.chain(lxr)
|
||||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||||
python::TopParser::new()
|
python::TopParser::new()
|
||||||
.parse(tokenizer.into_iter())
|
.parse(lexer.into_iter())
|
||||||
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
.map_err(|e| parse_error_from_lalrpop(e, source_path))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents represent errors that occur during parsing and are
|
||||||
|
/// returned by the `parse_*` functions.
|
||||||
|
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during parsing.
|
||||||
|
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||||
|
pub enum ParseErrorType {
|
||||||
|
/// Parser encountered an unexpected end of input
|
||||||
|
Eof,
|
||||||
|
/// Parser encountered an extra token
|
||||||
|
ExtraToken(Tok),
|
||||||
|
/// Parser encountered an invalid token
|
||||||
|
InvalidToken,
|
||||||
|
/// Parser encountered an unexpected token
|
||||||
|
UnrecognizedToken(Tok, Option<String>),
|
||||||
|
// Maps to `User` type from `lalrpop-util`
|
||||||
|
/// Parser encountered an error during lexing.
|
||||||
|
Lexical(LexicalErrorType),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert `lalrpop_util::ParseError` to our internal type
|
||||||
|
fn parse_error_from_lalrpop(
|
||||||
|
err: LalrpopError<Location, Tok, LexicalError>,
|
||||||
|
source_path: &str,
|
||||||
|
) -> ParseError {
|
||||||
|
let source_path = source_path.to_owned();
|
||||||
|
match err {
|
||||||
|
// TODO: Are there cases where this isn't an EOF?
|
||||||
|
LalrpopError::InvalidToken { location } => ParseError {
|
||||||
|
error: ParseErrorType::Eof,
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::ExtraToken { token } => ParseError {
|
||||||
|
error: ParseErrorType::ExtraToken(token.1),
|
||||||
|
location: token.0,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::User { error } => ParseError {
|
||||||
|
error: ParseErrorType::Lexical(error.error),
|
||||||
|
location: error.location,
|
||||||
|
source_path,
|
||||||
|
},
|
||||||
|
LalrpopError::UnrecognizedToken { token, expected } => {
|
||||||
|
// Hacky, but it's how CPython does it. See PyParser_AddToken,
|
||||||
|
// in particular "Only one possible expected token" comment.
|
||||||
|
let expected = (expected.len() == 1).then(|| expected[0].clone());
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::UnrecognizedToken(token.1, expected),
|
||||||
|
location: token.0.with_col_offset(1),
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LalrpopError::UnrecognizedEOF { location, expected } => {
|
||||||
|
// This could be an initial indentation error that we should ignore
|
||||||
|
let indent_error = expected == ["Indent"];
|
||||||
|
if indent_error {
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ParseError {
|
||||||
|
error: ParseErrorType::Eof,
|
||||||
|
location,
|
||||||
|
source_path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ParseErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match *self {
|
||||||
|
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
|
||||||
|
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {tok:?}"),
|
||||||
|
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
|
||||||
|
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
|
||||||
|
if *tok == Tok::Indent {
|
||||||
|
write!(f, "unexpected indent")
|
||||||
|
} else if expected.as_deref() == Some("Indent") {
|
||||||
|
write!(f, "expected an indented block")
|
||||||
|
} else {
|
||||||
|
write!(f, "invalid syntax. Got unexpected token {tok}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParseErrorType {
|
||||||
|
/// Returns true if the error is an indentation error.
|
||||||
|
pub fn is_indentation_error(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
|
||||||
|
ParseErrorType::UnrecognizedToken(token, expected) => {
|
||||||
|
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the error is a tab error.
|
||||||
|
pub fn is_tab_error(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
ParseErrorType::Lexical(LexicalErrorType::TabError)
|
||||||
|
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
|
use crate::{lexer::LexResult, mode::Mode, token::Tok};
|
||||||
use itertools::{Itertools, MultiPeek};
|
use itertools::{Itertools, MultiPeek};
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
|
||||||
pub use crate::mode::Mode;
|
|
||||||
|
|
||||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||||
/// and `case`).
|
/// and `case`).
|
||||||
///
|
///
|
||||||
|
@ -27,9 +25,9 @@ impl<I> SoftKeywordTransformer<I>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = LexResult>,
|
I: Iterator<Item = LexResult>,
|
||||||
{
|
{
|
||||||
pub fn new(tokenizer: I, mode: Mode) -> Self {
|
pub fn new(lexer: I, mode: Mode) -> Self {
|
||||||
Self {
|
Self {
|
||||||
underlying: tokenizer.multipeek(),
|
underlying: lexer.multipeek(),
|
||||||
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
|
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,22 +3,19 @@
|
||||||
// The lexer doesn't do any special handling of f-strings, it just treats them as
|
// The lexer doesn't do any special handling of f-strings, it just treats them as
|
||||||
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
||||||
// we have to do the parsing here, manually.
|
// we have to do the parsing here, manually.
|
||||||
use itertools::Itertools;
|
|
||||||
|
|
||||||
use self::FStringErrorType::*;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
||||||
error::{FStringError, FStringErrorType, LexicalError, LexicalErrorType, ParseError},
|
lexer::{LexicalError, LexicalErrorType},
|
||||||
parser::parse_expression_located,
|
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
|
||||||
token::StringKind,
|
token::{StringKind, Tok},
|
||||||
};
|
};
|
||||||
use std::{iter, str};
|
use itertools::Itertools;
|
||||||
|
|
||||||
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||||
const MAX_UNICODE_NAME: usize = 88;
|
const MAX_UNICODE_NAME: usize = 88;
|
||||||
|
|
||||||
struct StringParser<'a> {
|
struct StringParser<'a> {
|
||||||
chars: iter::Peekable<str::Chars<'a>>,
|
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
||||||
kind: StringKind,
|
kind: StringKind,
|
||||||
start: Location,
|
start: Location,
|
||||||
end: Location,
|
end: Location,
|
||||||
|
@ -178,6 +175,8 @@ impl<'a> StringParser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||||
|
use FStringErrorType::*;
|
||||||
|
|
||||||
let mut expression = String::new();
|
let mut expression = String::new();
|
||||||
let mut spec = None;
|
let mut spec = None;
|
||||||
let mut delims = Vec::new();
|
let mut delims = Vec::new();
|
||||||
|
@ -403,6 +402,8 @@ impl<'a> StringParser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||||
|
use FStringErrorType::*;
|
||||||
|
|
||||||
if nested >= 2 {
|
if nested >= 2 {
|
||||||
return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
|
return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
|
||||||
}
|
}
|
||||||
|
@ -651,6 +652,108 @@ pub(crate) fn parse_strings(
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: consolidate these with ParseError
|
||||||
|
/// An error that occurred during parsing of an f-string.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
struct FStringError {
|
||||||
|
/// The type of error that occurred.
|
||||||
|
pub error: FStringErrorType,
|
||||||
|
/// The location of the error.
|
||||||
|
pub location: Location,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FStringError {
|
||||||
|
/// Creates a new `FStringError` with the given error type and location.
|
||||||
|
pub fn new(error: FStringErrorType, location: Location) -> Self {
|
||||||
|
Self { error, location }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FStringError> for LexicalError {
|
||||||
|
fn from(err: FStringError) -> Self {
|
||||||
|
LexicalError {
|
||||||
|
error: LexicalErrorType::FStringError(err.error),
|
||||||
|
location: err.location,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during parsing of an f-string.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum FStringErrorType {
|
||||||
|
/// Expected a right brace after an opened left brace.
|
||||||
|
UnclosedLbrace,
|
||||||
|
/// Expected a left brace after an ending right brace.
|
||||||
|
UnopenedRbrace,
|
||||||
|
/// Expected a right brace after a conversion flag.
|
||||||
|
ExpectedRbrace,
|
||||||
|
/// An error occurred while parsing an f-string expression.
|
||||||
|
InvalidExpression(Box<ParseErrorType>),
|
||||||
|
/// An invalid conversion flag was encountered.
|
||||||
|
InvalidConversionFlag,
|
||||||
|
/// An empty expression was encountered.
|
||||||
|
EmptyExpression,
|
||||||
|
/// An opening delimiter was not closed properly.
|
||||||
|
MismatchedDelimiter(char, char),
|
||||||
|
/// Too many nested expressions in an f-string.
|
||||||
|
ExpressionNestedTooDeeply,
|
||||||
|
/// The f-string expression cannot include the given character.
|
||||||
|
ExpressionCannotInclude(char),
|
||||||
|
/// A single right brace was encountered.
|
||||||
|
SingleRbrace,
|
||||||
|
/// A closing delimiter was not opened properly.
|
||||||
|
Unmatched(char),
|
||||||
|
// TODO: Test this case.
|
||||||
|
/// Unterminated string.
|
||||||
|
UnterminatedString,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for FStringErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
use FStringErrorType::*;
|
||||||
|
match self {
|
||||||
|
UnclosedLbrace => write!(f, "expecting '}}'"),
|
||||||
|
UnopenedRbrace => write!(f, "Unopened '}}'"),
|
||||||
|
ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
|
||||||
|
InvalidExpression(error) => {
|
||||||
|
write!(f, "{error}")
|
||||||
|
}
|
||||||
|
InvalidConversionFlag => write!(f, "invalid conversion character"),
|
||||||
|
EmptyExpression => write!(f, "empty expression not allowed"),
|
||||||
|
MismatchedDelimiter(first, second) => write!(
|
||||||
|
f,
|
||||||
|
"closing parenthesis '{second}' does not match opening parenthesis '{first}'"
|
||||||
|
),
|
||||||
|
SingleRbrace => write!(f, "single '}}' is not allowed"),
|
||||||
|
Unmatched(delim) => write!(f, "unmatched '{delim}'"),
|
||||||
|
ExpressionNestedTooDeeply => {
|
||||||
|
write!(f, "expressions nested too deeply")
|
||||||
|
}
|
||||||
|
UnterminatedString => {
|
||||||
|
write!(f, "unterminated string")
|
||||||
|
}
|
||||||
|
ExpressionCannotInclude(c) => {
|
||||||
|
if *c == '\\' {
|
||||||
|
write!(f, "f-string expression part cannot include a backslash")
|
||||||
|
} else {
|
||||||
|
write!(f, "f-string expression part cannot include '{c}'s")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
|
||||||
|
fn from(err: FStringError) -> Self {
|
||||||
|
lalrpop_util::ParseError::User {
|
||||||
|
error: LexicalError {
|
||||||
|
error: LexicalErrorType::FStringError(err.error),
|
||||||
|
location: err.location,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -732,6 +835,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_invalid_fstring() {
|
fn test_parse_invalid_fstring() {
|
||||||
|
use FStringErrorType::*;
|
||||||
assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
|
assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
|
||||||
assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
|
assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
|
||||||
assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);
|
assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
use rustpython_codegen::{compile, symboltable};
|
use rustpython_codegen::{compile, symboltable};
|
||||||
use rustpython_parser::{
|
use rustpython_parser::{
|
||||||
|
self as parser,
|
||||||
ast::{fold::Fold, ConstantOptimizer},
|
ast::{fold::Fold, ConstantOptimizer},
|
||||||
error::ParseErrorType,
|
|
||||||
parser,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use rustpython_codegen::compile::CompileOpts;
|
pub use rustpython_codegen::compile::CompileOpts;
|
||||||
|
@ -13,13 +12,13 @@ pub enum CompileErrorType {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Codegen(#[from] rustpython_codegen::error::CodegenErrorType),
|
Codegen(#[from] rustpython_codegen::error::CodegenErrorType),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Parse(#[from] rustpython_parser::error::ParseErrorType),
|
Parse(#[from] parser::ParseErrorType),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type CompileError = rustpython_compiler_core::CompileError<CompileErrorType>;
|
pub type CompileError = rustpython_compiler_core::CompileError<CompileErrorType>;
|
||||||
|
|
||||||
fn error_from_parse(error: rustpython_parser::error::ParseError, source: &str) -> CompileError {
|
fn error_from_parse(error: parser::ParseError, source: &str) -> CompileError {
|
||||||
let error: CompileErrorBody<ParseErrorType> = error.into();
|
let error: CompileErrorBody<parser::ParseErrorType> = error.into();
|
||||||
CompileError::from(error, source)
|
CompileError::from(error, source)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue