reorganize compiler crates

This commit is contained in:
Jeong YunWon 2022-08-22 04:02:00 +09:00
parent 3351b4408b
commit 060d153bb3
82 changed files with 12368 additions and 164 deletions

235
parser/src/error.rs Normal file
View file

@ -0,0 +1,235 @@
//! Define internal parse error types
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
use lalrpop_util::ParseError as LalrpopError;
use crate::ast::Location;
use crate::token::Tok;
use std::error::Error;
use std::fmt;
/// Represents an error during lexical scanning.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
pub error: LexicalErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
StringError,
UnicodeError,
NestingError,
IndentationError,
TabError,
TabsAfterSpaces,
DefaultArgumentError,
PositionalArgumentError,
DuplicateKeywordArgumentError,
UnrecognizedToken { tok: char },
FStringError(FStringErrorType),
LineContinuationError,
Eof,
OtherError(String),
}
impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::IndentationError => {
write!(f, "unindent does not match any outer indentation level")
}
LexicalErrorType::TabError => {
write!(f, "inconsistent use of tabs and spaces in indentation")
}
LexicalErrorType::TabsAfterSpaces => {
write!(f, "Tabs not allowed as part of indentation after spaces")
}
LexicalErrorType::DefaultArgumentError => {
write!(f, "non-default argument follows default argument")
}
LexicalErrorType::DuplicateKeywordArgumentError => {
write!(f, "keyword argument repeated")
}
LexicalErrorType::PositionalArgumentError => {
write!(f, "positional argument follows keyword argument")
}
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {}", tok)
}
LexicalErrorType::LineContinuationError => {
write!(f, "unexpected character after line continuation character")
}
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
LexicalErrorType::OtherError(msg) => write!(f, "{}", msg),
}
}
}
// TODO: consolidate these with ParseError
#[derive(Debug, PartialEq)]
pub struct FStringError {
pub error: FStringErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
UnclosedLbrace,
UnopenedRbrace,
ExpectedRbrace,
InvalidExpression(Box<ParseErrorType>),
InvalidConversionFlag,
EmptyExpression,
MismatchedDelimiter,
ExpressionNestedTooDeeply,
}
impl fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '{{'"),
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
FStringErrorType::InvalidExpression(error) => {
write!(f, "Invalid expression: {}", error)
}
FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"),
FStringErrorType::EmptyExpression => write!(f, "Empty expression"),
FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"),
FStringErrorType::ExpressionNestedTooDeeply => {
write!(f, "expressions nested too deeply")
}
}
}
}
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
fn from(err: FStringError) -> Self {
lalrpop_util::ParseError::User {
error: LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
},
}
}
}
/// Represents an error during parsing
#[derive(Debug, PartialEq)]
pub struct ParseError {
pub error: ParseErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
Eof,
/// Parser encountered an extra token
ExtraToken(Tok),
/// Parser encountered an invalid token
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(Tok, Option<String>),
/// Maps to `User` type from `lalrpop-util`
Lexical(LexicalErrorType),
}
/// Convert `lalrpop_util::ParseError` to our internal type
impl From<LalrpopError<Location, Tok, LexicalError>> for ParseError {
fn from(err: LalrpopError<Location, Tok, LexicalError>) -> Self {
match err {
// TODO: Are there cases where this isn't an EOF?
LalrpopError::InvalidToken { location } => ParseError {
error: ParseErrorType::Eof,
location,
},
LalrpopError::ExtraToken { token } => ParseError {
error: ParseErrorType::ExtraToken(token.1),
location: token.0,
},
LalrpopError::User { error } => ParseError {
error: ParseErrorType::Lexical(error.error),
location: error.location,
},
LalrpopError::UnrecognizedToken { token, expected } => {
// Hacky, but it's how CPython does it. See PyParser_AddToken,
// in particular "Only one possible expected token" comment.
let expected = (expected.len() == 1).then(|| expected[0].clone());
ParseError {
error: ParseErrorType::UnrecognizedToken(token.1, expected),
location: token.0,
}
}
LalrpopError::UnrecognizedEOF { location, .. } => ParseError {
error: ParseErrorType::Eof,
location,
},
}
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} at {}", self.error, self.location)
}
}
impl fmt::Display for ParseErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {:?}", tok),
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
if *tok == Tok::Indent {
write!(f, "unexpected indent")
} else if expected.as_deref() == Some("Indent") {
write!(f, "expected an indented block")
} else {
write!(f, "invalid syntax. Got unexpected token {}", tok)
}
}
ParseErrorType::Lexical(ref error) => write!(f, "{}", error),
}
}
}
impl Error for ParseErrorType {}
impl ParseErrorType {
pub fn is_indentation_error(&self) -> bool {
match self {
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
ParseErrorType::UnrecognizedToken(token, expected) => {
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
}
_ => false,
}
}
pub fn is_tab_error(&self) -> bool {
matches!(
self,
ParseErrorType::Lexical(LexicalErrorType::TabError)
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
)
}
}
impl std::ops::Deref for ParseError {
type Target = ParseErrorType;
fn deref(&self) -> &Self::Target {
&self.error
}
}
impl Error for ParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}

427
parser/src/fstring.rs Normal file
View file

@ -0,0 +1,427 @@
use self::FStringErrorType::*;
use crate::{
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
error::{FStringError, FStringErrorType, ParseError},
parser::parse_expression,
};
use std::{iter, mem, str};
struct FStringParser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
str_location: Location,
recurse_lvl: u8,
}
impl<'a> FStringParser<'a> {
fn new(source: &'a str, str_location: Location, recurse_lvl: u8) -> Self {
Self {
chars: source.chars().peekable(),
str_location,
recurse_lvl,
}
}
#[inline]
fn expr(&self, node: ExprKind) -> Expr {
Expr::new(self.str_location, node)
}
fn parse_formatted_value(&mut self) -> Result<Vec<Expr>, FStringErrorType> {
let mut expression = String::new();
let mut spec = None;
let mut delims = Vec::new();
let mut conversion = ConversionFlag::None;
let mut self_documenting = false;
let mut trailing_seq = String::new();
while let Some(ch) = self.chars.next() {
match ch {
// can be integrated better with the remainign code, but as a starting point ok
// in general I would do here a tokenizing of the fstrings to omit this peeking.
'!' if self.chars.peek() == Some(&'=') => {
expression.push_str("!=");
self.chars.next();
}
'=' if self.chars.peek() == Some(&'=') => {
expression.push_str("==");
self.chars.next();
}
'>' if self.chars.peek() == Some(&'=') => {
expression.push_str(">=");
self.chars.next();
}
'<' if self.chars.peek() == Some(&'=') => {
expression.push_str("<=");
self.chars.next();
}
'!' if delims.is_empty() && self.chars.peek() != Some(&'=') => {
if expression.trim().is_empty() {
return Err(EmptyExpression);
}
conversion = match self.chars.next() {
Some('s') => ConversionFlag::Str,
Some('a') => ConversionFlag::Ascii,
Some('r') => ConversionFlag::Repr,
Some(_) => {
return Err(InvalidConversionFlag);
}
None => {
return Err(ExpectedRbrace);
}
};
if let Some(&peek) = self.chars.peek() {
if peek != '}' && peek != ':' {
return Err(ExpectedRbrace);
}
} else {
return Err(ExpectedRbrace);
}
}
// match a python 3.8 self documenting expression
// format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
'=' if self.chars.peek() != Some(&'=') && delims.is_empty() => {
self_documenting = true;
}
':' if delims.is_empty() => {
let mut nested = 0;
let mut spec_constructor = Vec::new();
let mut constant_piece = String::new();
let mut formatted_value_piece = String::new();
while let Some(&next) = self.chars.peek() {
match next {
'{' if nested > 0 => {
nested += 1;
formatted_value_piece.push(next);
}
'}' if nested > 0 => {
nested -= 1;
if nested == 0 {
formatted_value_piece.push(next);
spec_constructor.push(
self.expr(ExprKind::FormattedValue {
value: Box::new(
FStringParser::new(
&formatted_value_piece,
Location::default(),
&self.recurse_lvl + 1,
)
.parse()?,
),
conversion: ConversionFlag::None as _,
format_spec: None,
}),
);
formatted_value_piece.clear();
} else {
formatted_value_piece.push(next);
}
}
_ if nested > 0 => {
formatted_value_piece.push(next);
}
'{' => {
nested += 1;
spec_constructor.push(self.expr(ExprKind::Constant {
value: constant_piece.to_owned().into(),
kind: None,
}));
constant_piece.clear();
formatted_value_piece.push(next);
formatted_value_piece.push(' ');
}
'}' => break,
_ => {
constant_piece.push(next);
}
}
self.chars.next();
}
spec_constructor.push(self.expr(ExprKind::Constant {
value: constant_piece.to_owned().into(),
kind: None,
}));
constant_piece.clear();
if nested > 0 {
return Err(UnclosedLbrace);
}
spec = Some(Box::new(self.expr(ExprKind::JoinedStr {
values: spec_constructor,
})))
}
'(' | '{' | '[' => {
expression.push(ch);
delims.push(ch);
}
')' => {
if delims.pop() != Some('(') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
']' => {
if delims.pop() != Some('[') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
'}' if !delims.is_empty() => {
if delims.pop() != Some('{') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
'}' => {
if expression.is_empty() {
return Err(EmptyExpression);
}
let ret = if !self_documenting {
vec![self.expr(ExprKind::FormattedValue {
value: Box::new(
parse_fstring_expr(&expression)
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
),
conversion: conversion as _,
format_spec: spec,
})]
} else {
vec![
self.expr(ExprKind::Constant {
value: Constant::Str(expression.clone() + "="),
kind: None,
}),
self.expr(ExprKind::Constant {
value: trailing_seq.into(),
kind: None,
}),
self.expr(ExprKind::FormattedValue {
value: Box::new(
parse_fstring_expr(&expression)
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
),
conversion: (if conversion == ConversionFlag::None && spec.is_none()
{
ConversionFlag::Repr
} else {
conversion
}) as _,
format_spec: spec,
}),
]
};
return Ok(ret);
}
'"' | '\'' => {
expression.push(ch);
for next in &mut self.chars {
expression.push(next);
if next == ch {
break;
}
}
}
' ' if self_documenting => {
trailing_seq.push(ch);
}
_ => {
if self_documenting {
return Err(ExpectedRbrace);
}
expression.push(ch);
}
}
}
Err(UnclosedLbrace)
}
fn parse(mut self) -> Result<Expr, FStringErrorType> {
if self.recurse_lvl >= 2 {
return Err(ExpressionNestedTooDeeply);
}
let mut content = String::new();
let mut values = vec![];
while let Some(ch) = self.chars.next() {
match ch {
'{' => {
if let Some('{') = self.chars.peek() {
self.chars.next();
content.push('{');
} else {
if !content.is_empty() {
values.push(self.expr(ExprKind::Constant {
value: mem::take(&mut content).into(),
kind: None,
}));
}
values.extend(self.parse_formatted_value()?);
}
}
'}' => {
if let Some('}') = self.chars.peek() {
self.chars.next();
content.push('}');
} else {
return Err(UnopenedRbrace);
}
}
_ => {
content.push(ch);
}
}
}
if !content.is_empty() {
values.push(self.expr(ExprKind::Constant {
value: content.into(),
kind: None,
}))
}
Ok(self.expr(ExprKind::JoinedStr { values }))
}
}
fn parse_fstring_expr(source: &str) -> Result<Expr, ParseError> {
let fstring_body = format!("({})", source);
parse_expression(&fstring_body)
}
/// Parse an fstring from a string, located at a certain position in the sourcecode.
/// In case of errors, we will get the location and the error returned.
pub fn parse_located_fstring(source: &str, location: Location) -> Result<Expr, FStringError> {
FStringParser::new(source, location, 0)
.parse()
.map_err(|error| FStringError { error, location })
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_fstring(source: &str) -> Result<Expr, FStringErrorType> {
FStringParser::new(source, Location::default(), 0).parse()
}
#[test]
fn test_parse_fstring() {
let source = "{a}{ b }{{foo}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_spec() {
let source = "{foo:{spec}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_not_nested_spec() {
let source = "{foo:spec}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_empty_fstring() {
insta::assert_debug_snapshot!(parse_fstring("").unwrap());
}
#[test]
fn test_fstring_parse_selfdocumenting_base() {
let src = "{user=}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_selfdocumenting_base_more() {
let src = "mix {user=} with text and {second=}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_selfdocumenting_format() {
let src = "{user=:>10}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_invalid_fstring() {
assert_eq!(parse_fstring("{5!a"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("{5!a1}"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("{5!"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("abc{!a 'cat'}"), Err(EmptyExpression));
assert_eq!(parse_fstring("{!a"), Err(EmptyExpression));
assert_eq!(parse_fstring("{ !a}"), Err(EmptyExpression));
assert_eq!(parse_fstring("{5!}"), Err(InvalidConversionFlag));
assert_eq!(parse_fstring("{5!x}"), Err(InvalidConversionFlag));
assert_eq!(parse_fstring("{a:{a:{b}}}"), Err(ExpressionNestedTooDeeply));
assert_eq!(parse_fstring("{a:b}}"), Err(UnopenedRbrace));
assert_eq!(parse_fstring("}"), Err(UnopenedRbrace));
assert_eq!(parse_fstring("{a:{b}"), Err(UnclosedLbrace));
assert_eq!(parse_fstring("{"), Err(UnclosedLbrace));
assert_eq!(parse_fstring("{}"), Err(EmptyExpression));
// TODO: check for InvalidExpression enum?
assert!(parse_fstring("{class}").is_err());
}
#[test]
fn test_parse_fstring_not_equals() {
let source = "{1 != 2}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_equals() {
let source = "{42 == 42}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_selfdoc_prec_space() {
let source = "{x =}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_selfdoc_trailing_space() {
let source = "{x= }";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_yield_expr() {
let source = "{yield}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

96
parser/src/function.rs Normal file
View file

@ -0,0 +1,96 @@
use ahash::RandomState;
use std::collections::HashSet;
use crate::ast;
use crate::error::{LexicalError, LexicalErrorType};
pub struct ArgumentList {
pub args: Vec<ast::Expr>,
pub keywords: Vec<ast::Keyword>,
}
type ParameterDefs = (Vec<ast::Arg>, Vec<ast::Arg>, Vec<ast::Expr>);
type ParameterDef = (ast::Arg, Option<ast::Expr>);
pub fn parse_params(
params: (Vec<ParameterDef>, Vec<ParameterDef>),
) -> Result<ParameterDefs, LexicalError> {
let mut posonly = Vec::with_capacity(params.0.len());
let mut names = Vec::with_capacity(params.1.len());
let mut defaults = vec![];
let mut try_default = |name: &ast::Arg, default| {
if let Some(default) = default {
defaults.push(default);
} else if !defaults.is_empty() {
// Once we have started with defaults, all remaining arguments must
// have defaults
return Err(LexicalError {
error: LexicalErrorType::DefaultArgumentError,
location: name.location,
});
}
Ok(())
};
for (name, default) in params.0 {
try_default(&name, default)?;
posonly.push(name);
}
for (name, default) in params.1 {
try_default(&name, default)?;
names.push(name);
}
Ok((posonly, names, defaults))
}
type FunctionArgument = (Option<(ast::Location, Option<String>)>, ast::Expr);
pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, LexicalError> {
let mut args = vec![];
let mut keywords = vec![];
let mut keyword_names = HashSet::with_capacity_and_hasher(func_args.len(), RandomState::new());
for (name, value) in func_args {
match name {
Some((location, name)) => {
if let Some(keyword_name) = &name {
if keyword_names.contains(keyword_name) {
return Err(LexicalError {
error: LexicalErrorType::DuplicateKeywordArgumentError,
location,
});
}
keyword_names.insert(keyword_name.clone());
}
keywords.push(ast::Keyword::new(
location,
ast::KeywordData {
arg: name,
value: Box::new(value),
},
));
}
None => {
// Allow starred args after keyword arguments.
if !keywords.is_empty() && !is_starred(&value) {
return Err(LexicalError {
error: LexicalErrorType::PositionalArgumentError,
location: value.location,
});
}
args.push(value);
}
}
}
Ok(ArgumentList { args, keywords })
}
fn is_starred(exp: &ast::Expr) -> bool {
matches!(exp.node, ast::ExprKind::Starred { .. })
}

1712
parser/src/lexer.rs Normal file

File diff suppressed because it is too large Load diff

34
parser/src/lib.rs Normal file
View file

@ -0,0 +1,34 @@
//! This crate can be used to parse python sourcecode into a so
//! called AST (abstract syntax tree).
//!
//! The stages involved in this process are lexical analysis and
//! parsing. The lexical analysis splits the sourcecode into
//! tokens, and the parsing transforms those tokens into an AST.
//!
//! For example, one could do this:
//!
//! ```
//! use rustpython_parser::{parser, ast};
//!
//! let python_source = "print('Hello world')";
//! let python_ast = parser::parse_expression(python_source).unwrap();
//!
//! ```
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
#[macro_use]
extern crate log;
pub use rustpython_ast as ast;
pub mod error;
mod fstring;
mod function;
pub mod lexer;
pub mod mode;
pub mod parser;
#[rustfmt::skip]
mod python;
mod string;
pub mod token;

40
parser/src/mode.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::token::Tok;
#[derive(Clone, Copy)]
pub enum Mode {
Module,
Interactive,
Expression,
}
impl Mode {
pub(crate) fn to_marker(self) -> Tok {
match self {
Self::Module => Tok::StartModule,
Self::Interactive => Tok::StartInteractive,
Self::Expression => Tok::StartExpression,
}
}
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Expression),
_ => Err(ModeParseError { _priv: () }),
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}

202
parser/src/parser.rs Normal file
View file

@ -0,0 +1,202 @@
//! Python parsing.
//!
//! Use this module to parse python code into an AST.
//! There are three ways to parse python code. You could
//! parse a whole program, a single statement, or a single
//! expression.
use std::iter;
use crate::ast;
use crate::error::ParseError;
use crate::lexer;
pub use crate::mode::Mode;
use crate::python;
/*
* Parse python code.
* Grammar may be inspired by antlr grammar for python:
* https://github.com/antlr/grammars-v4/tree/master/python3
*/
/// Parse a full python program, containing usually multiple lines.
pub fn parse_program(source: &str) -> Result<ast::Suite, ParseError> {
parse(source, Mode::Module).map(|top| match top {
ast::Mod::Module { body, .. } => body,
_ => unreachable!(),
})
}
/// Parses a python expression
///
/// # Example
/// ```
/// extern crate num_bigint;
/// use rustpython_parser::{parser, ast};
/// let expr = parser::parse_expression("1 + 2").unwrap();
///
/// assert_eq!(
/// expr,
/// ast::Expr {
/// location: ast::Location::new(1, 3),
/// custom: (),
/// node: ast::ExprKind::BinOp {
/// left: Box::new(ast::Expr {
/// location: ast::Location::new(1, 1),
/// custom: (),
/// node: ast::ExprKind::Constant {
/// value: ast::Constant::Int(1.into()),
/// kind: None,
/// }
/// }),
/// op: ast::Operator::Add,
/// right: Box::new(ast::Expr {
/// location: ast::Location::new(1, 5),
/// custom: (),
/// node: ast::ExprKind::Constant {
/// value: ast::Constant::Int(2.into()),
/// kind: None,
/// }
/// })
/// }
/// },
/// );
///
/// ```
pub fn parse_expression(source: &str) -> Result<ast::Expr, ParseError> {
parse(source, Mode::Expression).map(|top| match top {
ast::Mod::Expression { body } => *body,
_ => unreachable!(),
})
}
// Parse a given source code
pub fn parse(source: &str, mode: Mode) -> Result<ast::Mod, ParseError> {
let lxr = lexer::make_tokenizer(source);
let marker_token = (Default::default(), mode.to_marker(), Default::default());
let tokenizer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(tokenizer)
.map_err(ParseError::from)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_empty() {
let parse_ast = parse_program("").unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string() {
let source = String::from("'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string() {
let source = String::from("f'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_hello() {
let source = String::from("print('Hello world')");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_2() {
let source = String::from("print('Hello world', 2)");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_kwargs() {
let source = String::from("my_func('positional', keyword=2)");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_elif_else() {
let source = String::from("if 1: 10\nelif 2: 20\nelse: 30");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_lambda() {
let source = "lambda x, y: x * y"; // lambda(x, y): x * y";
let parse_ast = parse_program(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_tuples() {
let source = "a, b = 4, 5";
insta::assert_debug_snapshot!(parse_program(source).unwrap());
}
#[test]
fn test_parse_class() {
let source = "\
class Foo(A, B):
def __init__(self):
pass
def method_with_default(self, arg='default'):
pass";
insta::assert_debug_snapshot!(parse_program(source).unwrap());
}
#[test]
fn test_parse_dict_comprehension() {
let source = String::from("{x1: x2 for y in z}");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_list_comprehension() {
let source = String::from("[x for y in z]");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_double_list_comprehension() {
let source = String::from("[x for y, y2 in z for a in b if a < 5 if a > 10]");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_generator_comprehension() {
let source = String::from("(x for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_named_expression_generator_comprehension() {
let source = String::from("(x := y + 1 for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_else_generator_comprehension() {
let source = String::from("(x if y else y for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

3
parser/src/python.rs Normal file
View file

@ -0,0 +1,3 @@
#![allow(clippy::all)]
#![allow(unused)]
include!("../python.rs");

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,137 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"mix ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
" with text and ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"second=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "second",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,88 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
">10",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,15 @@
---
source: parser/src/fstring.rs
expression: "parse_fstring(\"\").unwrap()"
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [],
},
}

View file

@ -0,0 +1,72 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"{foo}",
),
kind: None,
},
},
],
},
}

View file

@ -0,0 +1,66 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 5,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Int(
42,
),
kind: None,
},
},
ops: [
Eq,
],
comparators: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Int(
42,
),
kind: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,119 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "foo",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Name {
id: "spec",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,66 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
ops: [
NotEq,
],
comparators: [
Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,62 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "foo",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"spec",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"x =",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"x=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
" ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,36 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Yield {
value: None,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,160 @@
---
source: parser/src/parser.rs
expression: parse_program(&source).unwrap()
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ClassDef {
name: "Foo",
bases: [
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Name {
id: "A",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "B",
ctx: Load,
},
},
],
keywords: [],
body: [
Located {
location: Location {
row: 2,
column: 2,
},
custom: (),
node: FunctionDef {
name: "__init__",
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 2,
column: 15,
},
custom: (),
node: ArgData {
arg: "self",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [],
},
body: [
Located {
location: Location {
row: 3,
column: 3,
},
custom: (),
node: Pass,
},
],
decorator_list: [],
returns: None,
type_comment: None,
},
},
Located {
location: Location {
row: 4,
column: 2,
},
custom: (),
node: FunctionDef {
name: "method_with_default",
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 4,
column: 26,
},
custom: (),
node: ArgData {
arg: "self",
annotation: None,
type_comment: None,
},
},
Located {
location: Location {
row: 4,
column: 32,
},
custom: (),
node: ArgData {
arg: "arg",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [
Located {
location: Location {
row: 4,
column: 37,
},
custom: (),
node: Constant {
value: Str(
"default",
),
kind: None,
},
},
],
},
body: [
Located {
location: Location {
row: 5,
column: 3,
},
custom: (),
node: Pass,
},
],
decorator_list: [],
returns: None,
type_comment: None,
},
},
],
decorator_list: [],
},
},
]

View file

@ -0,0 +1,63 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: DictComp {
key: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x1",
ctx: Load,
},
},
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Name {
id: "x2",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 18,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,178 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ListComp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Name {
id: "y2",
ctx: Load,
},
},
],
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 17,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
Comprehension {
target: Located {
location: Location {
row: 1,
column: 23,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 28,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
ifs: [
Located {
location: Location {
row: 1,
column: 35,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 33,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
ops: [
Lt,
],
comparators: [
Located {
location: Location {
row: 1,
column: 37,
},
custom: (),
node: Constant {
value: Int(
5,
),
kind: None,
},
},
],
},
},
Located {
location: Location {
row: 1,
column: 44,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 42,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
ops: [
Gt,
],
comparators: [
Located {
location: Location {
row: 1,
column: 46,
},
custom: (),
node: Constant {
value: Int(
10,
),
kind: None,
},
},
],
},
},
],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,5 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[]

View file

@ -0,0 +1,39 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,52 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,125 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: If {
test: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
body: [
Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Constant {
value: Int(
10,
),
kind: None,
},
},
},
},
],
orelse: [
Located {
location: Location {
row: 2,
column: 1,
},
custom: (),
node: If {
test: Located {
location: Location {
row: 2,
column: 6,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
body: [
Located {
location: Location {
row: 2,
column: 9,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 2,
column: 9,
},
custom: (),
node: Constant {
value: Int(
20,
),
kind: None,
},
},
},
},
],
orelse: [
Located {
location: Location {
row: 3,
column: 7,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 3,
column: 7,
},
custom: (),
node: Constant {
value: Int(
30,
),
kind: None,
},
},
},
},
],
},
},
],
},
},
]

View file

@ -0,0 +1,83 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: IfExp {
test: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
body: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
orelse: Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 20,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 25,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,78 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "my_func",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 10,
},
custom: (),
node: Constant {
value: Str(
"positional",
),
kind: None,
},
},
],
keywords: [
Located {
location: Location {
row: 1,
column: 23,
},
custom: (),
node: KeywordData {
arg: Some(
"keyword",
),
value: Located {
location: Location {
row: 1,
column: 31,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,90 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Lambda {
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: ArgData {
arg: "x",
annotation: None,
type_comment: None,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: ArgData {
arg: "y",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [],
},
body: Located {
location: Location {
row: 1,
column: 16,
},
custom: (),
node: BinOp {
left: Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
op: Mult,
right: Located {
location: Location {
row: 1,
column: 18,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
},
},
},
},
},
},
]

View file

@ -0,0 +1,52 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ListComp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,95 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: NamedExpr {
target: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Store,
},
},
value: Located {
location: Location {
row: 1,
column: 9,
},
custom: (),
node: BinOp {
left: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
op: Add,
right: Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
},
},
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 17,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 22,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,65 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "print",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 22,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
],
keywords: [],
},
},
},
},
]

View file

@ -0,0 +1,51 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "print",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
keywords: [],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,91 @@
---
source: parser/src/parser.rs
expression: parse_program(&source).unwrap()
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Assign {
targets: [
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
],
ctx: Load,
},
},
],
value: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Int(
4,
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Constant {
value: Int(
5,
),
kind: None,
},
},
],
ctx: Load,
},
},
type_comment: None,
},
},
]

View file

@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,63 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 12,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"!",
),
kind: None,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world!",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,30 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
},
},
]

143
parser/src/string.rs Normal file
View file

@ -0,0 +1,143 @@
use crate::{
ast::{Constant, Expr, ExprKind, Location},
error::{LexicalError, LexicalErrorType},
fstring::parse_located_fstring,
token::StringKind,
};
use itertools::Itertools;
pub fn parse_strings(values: Vec<(Location, (String, StringKind))>) -> Result<Expr, LexicalError> {
// Preserve the initial location and kind.
let initial_location = values[0].0;
let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned());
// Determine whether the list of values contains any f-strings. (If not, we can return a
// single Constant at the end, rather than a JoinedStr.)
let mut has_fstring = false;
// De-duplicate adjacent constants.
let mut deduped: Vec<Expr> = vec![];
let mut current: Vec<String> = vec![];
let take_current = |current: &mut Vec<String>| -> Expr {
Expr::new(
initial_location,
ExprKind::Constant {
value: Constant::Str(current.drain(..).join("")),
kind: initial_kind.clone(),
},
)
};
for (location, (string, string_kind)) in values {
match string_kind {
StringKind::Normal | StringKind::U => current.push(string),
StringKind::F => {
has_fstring = true;
let values = if let ExprKind::JoinedStr { values } =
parse_located_fstring(&string, location)
.map_err(|e| LexicalError {
location,
error: LexicalErrorType::FStringError(e.error),
})?
.node
{
values
} else {
unreachable!("parse_located_fstring returned a non-JoinedStr.")
};
for value in values {
match value.node {
ExprKind::FormattedValue { .. } => {
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
deduped.push(value)
}
ExprKind::Constant { value, .. } => {
if let Constant::Str(value) = value {
current.push(value);
} else {
unreachable!("Unexpected non-string constant.");
}
}
_ => unreachable!("Unexpected non-string expression."),
}
}
}
}
}
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
Ok(if has_fstring {
Expr::new(initial_location, ExprKind::JoinedStr { values: deduped })
} else {
deduped
.into_iter()
.exactly_one()
.expect("String must be concatenated to a single element.")
})
}
#[cfg(test)]
mod tests {
use crate::parser::parse_program;
#[test]
fn test_parse_string_concat() {
let source = String::from("'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_1() {
let source = String::from("'Hello ' u'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_2() {
let source = String::from("u'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_1() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_2() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_3() {
let source = String::from("'Hello ' f'world{\"!\"}'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_1() {
let source = String::from("u'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_2() {
let source = String::from("u'Hello ' f'world' '!'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

236
parser/src/token.rs Normal file
View file

@ -0,0 +1,236 @@
//! Different token definitions.
//! Loosely based on token.h from CPython source:
use num_bigint::BigInt;
use std::fmt::{self, Write};
/// Python source code can be tokenized in a sequence of these tokens.
#[derive(Clone, Debug, PartialEq)]
pub enum Tok {
Name { name: String },
Int { value: BigInt },
Float { value: f64 },
Complex { real: f64, imag: f64 },
String { value: String, kind: StringKind },
Bytes { value: Vec<u8> },
Newline,
Indent,
Dedent,
StartModule,
StartInteractive,
StartExpression,
EndOfFile,
Lpar,
Rpar,
Lsqb,
Rsqb,
Colon,
Comma,
Semi,
Plus,
Minus,
Star,
Slash,
Vbar, // '|'
Amper, // '&'
Less,
Greater,
Equal,
Dot,
Percent,
Lbrace,
Rbrace,
EqEqual,
NotEqual,
LessEqual,
GreaterEqual,
Tilde,
CircumFlex,
LeftShift,
RightShift,
DoubleStar,
DoubleStarEqual, // '**='
PlusEqual,
MinusEqual,
StarEqual,
SlashEqual,
PercentEqual,
AmperEqual, // '&='
VbarEqual,
CircumflexEqual, // '^='
LeftShiftEqual,
RightShiftEqual,
DoubleSlash, // '//'
DoubleSlashEqual,
ColonEqual,
At,
AtEqual,
Rarrow,
Ellipsis,
// Keywords (alphabetically):
False,
None,
True,
And,
As,
Assert,
Async,
Await,
Break,
Class,
Continue,
Def,
Del,
Elif,
Else,
Except,
Finally,
For,
From,
Global,
If,
Import,
In,
Is,
Lambda,
Nonlocal,
Not,
Or,
Pass,
Raise,
Return,
Try,
While,
With,
Yield,
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum StringKind {
Normal,
F,
U,
}
impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*;
match self {
Name { name } => write!(f, "'{}'", name),
Int { value } => write!(f, "'{}'", value),
Float { value } => write!(f, "'{}'", value),
Complex { real, imag } => write!(f, "{}j{}", real, imag),
String { value, kind } => {
match kind {
StringKind::F => f.write_str("f")?,
StringKind::U => f.write_str("u")?,
StringKind::Normal => {}
}
write!(f, "{:?}", value)
}
Bytes { value } => {
write!(f, "b\"")?;
for i in value {
match i {
9 => f.write_str("\\t")?,
10 => f.write_str("\\n")?,
13 => f.write_str("\\r")?,
32..=126 => f.write_char(*i as char)?,
_ => write!(f, "\\x{:02x}", i)?,
}
}
f.write_str("\"")
}
Newline => f.write_str("Newline"),
Indent => f.write_str("Indent"),
Dedent => f.write_str("Dedent"),
StartModule => f.write_str("StartProgram"),
StartInteractive => f.write_str("StartInteractive"),
StartExpression => f.write_str("StartExpression"),
EndOfFile => f.write_str("EOF"),
Lpar => f.write_str("'('"),
Rpar => f.write_str("')'"),
Lsqb => f.write_str("'['"),
Rsqb => f.write_str("']'"),
Colon => f.write_str("':'"),
Comma => f.write_str("','"),
Semi => f.write_str("';'"),
Plus => f.write_str("'+'"),
Minus => f.write_str("'-'"),
Star => f.write_str("'*'"),
Slash => f.write_str("'/'"),
Vbar => f.write_str("'|'"),
Amper => f.write_str("'&'"),
Less => f.write_str("'<'"),
Greater => f.write_str("'>'"),
Equal => f.write_str("'='"),
Dot => f.write_str("'.'"),
Percent => f.write_str("'%'"),
Lbrace => f.write_str("'{'"),
Rbrace => f.write_str("'}'"),
EqEqual => f.write_str("'=='"),
NotEqual => f.write_str("'!='"),
LessEqual => f.write_str("'<='"),
GreaterEqual => f.write_str("'>='"),
Tilde => f.write_str("'~'"),
CircumFlex => f.write_str("'^'"),
LeftShift => f.write_str("'<<'"),
RightShift => f.write_str("'>>'"),
DoubleStar => f.write_str("'**'"),
DoubleStarEqual => f.write_str("'**='"),
PlusEqual => f.write_str("'+='"),
MinusEqual => f.write_str("'-='"),
StarEqual => f.write_str("'*='"),
SlashEqual => f.write_str("'/='"),
PercentEqual => f.write_str("'%='"),
AmperEqual => f.write_str("'&='"),
VbarEqual => f.write_str("'|='"),
CircumflexEqual => f.write_str("'^='"),
LeftShiftEqual => f.write_str("'<<='"),
RightShiftEqual => f.write_str("'>>='"),
DoubleSlash => f.write_str("'//'"),
DoubleSlashEqual => f.write_str("'//='"),
At => f.write_str("'@'"),
AtEqual => f.write_str("'@='"),
Rarrow => f.write_str("'->'"),
Ellipsis => f.write_str("'...'"),
False => f.write_str("'False'"),
None => f.write_str("'None'"),
True => f.write_str("'True'"),
And => f.write_str("'and'"),
As => f.write_str("'as'"),
Assert => f.write_str("'assert'"),
Async => f.write_str("'async'"),
Await => f.write_str("'await'"),
Break => f.write_str("'break'"),
Class => f.write_str("'class'"),
Continue => f.write_str("'continue'"),
Def => f.write_str("'def'"),
Del => f.write_str("'del'"),
Elif => f.write_str("'elif'"),
Else => f.write_str("'else'"),
Except => f.write_str("'except'"),
Finally => f.write_str("'finally'"),
For => f.write_str("'for'"),
From => f.write_str("'from'"),
Global => f.write_str("'global'"),
If => f.write_str("'if'"),
Import => f.write_str("'import'"),
In => f.write_str("'in'"),
Is => f.write_str("'is'"),
Lambda => f.write_str("'lambda'"),
Nonlocal => f.write_str("'nonlocal'"),
Not => f.write_str("'not'"),
Or => f.write_str("'or'"),
Pass => f.write_str("'pass'"),
Raise => f.write_str("'raise'"),
Return => f.write_str("'return'"),
Try => f.write_str("'try'"),
While => f.write_str("'while'"),
With => f.write_str("'with'"),
Yield => f.write_str("'yield'"),
ColonEqual => f.write_str("':='"),
}
}
}