Parse closures

This commit is contained in:
Richard Feldman 2019-09-21 20:31:24 -04:00
parent 5d6057bfc3
commit 2a80702a21
8 changed files with 224 additions and 52 deletions

View file

@ -4,7 +4,6 @@ use can::problem::Problem;
use can::procedure::{Procedure, References};
use can::symbol::Symbol;
use collections::{ImMap, MutMap};
use parse::ast;
use region::{Located, Region};
/// The canonicalization environment for a particular module.
@ -17,17 +16,14 @@ pub struct Env {
pub problems: Vec<Problem>,
/// Variants either declared in this module, or imported.
pub variants: ImMap<Symbol, Located<Box<ast::VariantName>>>,
pub variants: ImMap<Symbol, Located<Box<str>>>,
/// Former closures converted to top-level procedures.
pub procedures: MutMap<Symbol, Procedure>,
}
impl Env {
pub fn new(
home: String,
declared_variants: ImMap<Symbol, Located<Box<ast::VariantName>>>,
) -> Env {
pub fn new(home: String, declared_variants: ImMap<Symbol, Located<Box<str>>>) -> Env {
Env {
home,
variants: declared_variants,

View file

@ -30,7 +30,7 @@ pub fn canonicalize_declaration<'a>(
name: &str,
loc_expr: &'a Located<ast::Expr<'a>>,
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<Box<ast::VariantName>>>,
declared_variants: &ImMap<Symbol, Located<Box<str>>>,
) -> (
Located<Expr>,
Output,

View file

@ -4,8 +4,11 @@ use bumpalo::Bump;
use operator::Operator;
use region::{Loc, Region};
pub type VariantName = str;
#[derive(Clone, Debug, PartialEq)]
pub enum Module<'a> {
Api(&'a [&'a str], &'a str, Vec<'a, Def<'a>>),
App(&'a [&'a str], &'a str),
}
/// A parsed expression. This uses lifetimes extensively for two reasons:
///
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -51,8 +54,8 @@ pub enum Expr<'a> {
// Pattern Matching
When(&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)]),
Closure(&'a (Vec<'a, Loc<Pattern<'a>>>, Loc<Expr<'a>>)),
// /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
// Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
/// Multiple defs in a row
Defs(&'a (Vec<'a, Def<'a>>, Loc<Expr<'a>>)),
// Application
/// To apply by name, do Apply(Var(...), ...)
@ -68,15 +71,22 @@ pub enum Expr<'a> {
// Blank Space (e.g. comments, spaces, newlines) before or after an expression.
// We preserve this for the formatter; canonicalization ignores it.
SpaceBefore(&'a Expr<'a>, &'a [Space<'a>]),
SpaceAfter(&'a Expr<'a>, &'a [Space<'a>]),
SpaceBefore(&'a Expr<'a>, &'a [CommentOrNewline<'a>]),
SpaceAfter(&'a Expr<'a>, &'a [CommentOrNewline<'a>]),
// Problems
MalformedIdent(&'a str),
}
#[derive(Debug, PartialEq, Eq)]
pub enum Space<'a> {
#[derive(Debug, Clone, PartialEq)]
pub enum Def<'a> {
AnnotationOnly,
BodyOnly(Loc<Pattern<'a>>, &'a Loc<Expr<'a>>),
AnnotatedBody(Loc<Pattern<'a>>, &'a Loc<Expr<'a>>),
}
#[derive(Debug, PartialEq)]
pub enum CommentOrNewline<'a> {
Newline,
LineComment(&'a str),
BlockComment(&'a [&'a str]),
@ -88,7 +98,7 @@ pub enum Pattern<'a> {
Identifier(&'a str),
// Variant, optionally qualified
Variant(&'a [&'a str], &'a VariantName),
Variant(&'a [&'a str], &'a str),
Apply(&'a (Loc<&'a Pattern<'a>>, [Loc<Pattern<'a>>])),
/// This is Loc<Pattern> rather than Loc<str> so we can record comments
/// around the destructured names, e.g. { x ### x does stuff ###, y }
@ -103,15 +113,15 @@ pub enum Pattern<'a> {
Underscore,
// Space
SpaceBefore(&'a Pattern<'a>, &'a [Space<'a>]),
SpaceAfter(&'a Pattern<'a>, &'a [Space<'a>]),
SpaceBefore(&'a Pattern<'a>, &'a [CommentOrNewline<'a>]),
SpaceAfter(&'a Pattern<'a>, &'a [CommentOrNewline<'a>]),
}
pub trait Spaceable<'a> {
fn before(&'a self, &'a [Space<'a>]) -> Self;
fn after(&'a self, &'a [Space<'a>]) -> Self;
fn before(&'a self, &'a [CommentOrNewline<'a>]) -> Self;
fn after(&'a self, &'a [CommentOrNewline<'a>]) -> Self;
fn with_spaces_before(&'a self, spaces: &'a [Space<'a>], region: Region) -> Loc<Self>
fn with_spaces_before(&'a self, spaces: &'a [CommentOrNewline<'a>], region: Region) -> Loc<Self>
where
Self: Sized,
{
@ -121,7 +131,7 @@ pub trait Spaceable<'a> {
}
}
fn with_spaces_after(&'a self, spaces: &'a [Space<'a>], region: Region) -> Loc<Self>
fn with_spaces_after(&'a self, spaces: &'a [CommentOrNewline<'a>], region: Region) -> Loc<Self>
where
Self: Sized,
{
@ -133,19 +143,19 @@ pub trait Spaceable<'a> {
}
impl<'a> Spaceable<'a> for Expr<'a> {
fn before(&'a self, spaces: &'a [Space<'a>]) -> Self {
fn before(&'a self, spaces: &'a [CommentOrNewline<'a>]) -> Self {
Expr::SpaceBefore(self, spaces)
}
fn after(&'a self, spaces: &'a [Space<'a>]) -> Self {
fn after(&'a self, spaces: &'a [CommentOrNewline<'a>]) -> Self {
Expr::SpaceAfter(self, spaces)
}
}
impl<'a> Spaceable<'a> for Pattern<'a> {
fn before(&'a self, spaces: &'a [Space<'a>]) -> Self {
fn before(&'a self, spaces: &'a [CommentOrNewline<'a>]) -> Self {
Pattern::SpaceBefore(self, spaces)
}
fn after(&'a self, spaces: &'a [Space<'a>]) -> Self {
fn after(&'a self, spaces: &'a [CommentOrNewline<'a>]) -> Self {
Pattern::SpaceAfter(self, spaces)
}
}

View file

@ -1,7 +1,8 @@
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use parse::ast::{Space, Spaceable};
use parse::ast::CommentOrNewline::{self, *};
use parse::ast::Spaceable;
use parse::parser::{and, map_with_arena, unexpected, unexpected_eof, Parser, State};
use region::Located;
@ -55,7 +56,8 @@ where
)
}
/// Parses the given expression with 1 or more (spaces/comments/newlines) before and/or after it.
/// Parses the given expression with 1 or more (spaces/comments/newlines) before it,
/// and also 1 or more spaces after it.
/// Returns a Located<Expr> where the location is around the Expr, ignoring the spaces.
/// If any newlines or comments were found, the Expr will be wrapped in a SpaceBefore and/or
/// SpaceAfter as appropriate.
@ -193,12 +195,12 @@ where
}
/// Zero or more (spaces/comments/newlines).
pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
spaces(false, min_indent)
}
/// One or more (spaces/comments/newlines).
pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
// TODO try benchmarking a short-circuit for the typical case: see if there is
// exactly one space followed by char that isn't [' ', '\n', or '#'], and
// if so, return empty slice. The case where there's exactly 1 space should
@ -207,7 +209,10 @@ pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
}
#[inline(always)]
fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
fn spaces<'a>(
require_at_least_one: bool,
_min_indent: u16,
) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars().peekable();
let mut space_list = Vec::new_in(arena);
@ -229,7 +234,7 @@ fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, &
state = state.newline()?;
// Newlines only get added to the list when they're outside comments.
space_list.push(Space::Newline);
space_list.push(Newline);
}
'#' => {
state = state.advance_without_indenting(1)?;
@ -258,7 +263,7 @@ fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, &
state = state.newline()?;
// This was a newline, so end this line comment.
space_list.push(Space::LineComment(comment_line_buf.into_bump_str()));
space_list.push(LineComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
comment_parsing = CommentParsing::No;
@ -339,7 +344,7 @@ fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, &
comment_line_buf = String::new_in(arena);
// Add the block comment to the list.
space_list.push(Space::BlockComment(
space_list.push(BlockComment(
comment_lines.into_bump_slice(),
));

View file

@ -8,22 +8,52 @@ pub mod parser;
pub mod problems;
pub mod string_literal;
/// All module definitions begin with one of these:
///
/// app
/// api
/// api bridge
///
/// We parse these to guard against mistakes; in general, the build tool
/// is responsible for determining the root module (either an `app` or `api bridge`
/// module), and then all `api` modules should only ever be imported from
/// another module.
///
/// parsing the file
use bumpalo::collections::String;
use bumpalo::collections::Vec;
use bumpalo::Bump;
use operator::Operator;
use parse::ast::{Attempting, Expr, Pattern, Spaceable};
use parse::ast::{Attempting, Def, Expr, Pattern, Spaceable};
use parse::blankspace::{space0, space0_around, space0_before, space1_before};
use parse::ident::{ident, Ident};
use parse::number_literal::number_literal;
use parse::parser::{
and, attempt, between, char, either, loc, map, map_with_arena, one_of3, one_of4, one_of9,
one_or_more, optional, sep_by0, skip_first, skip_second, string, unexpected, unexpected_eof,
Either, ParseResult, Parser, State,
one_or_more, optional, sep_by0, skip_first, skip_second, string, then, unexpected,
unexpected_eof, zero_or_more, Either, ParseResult, Parser, State,
};
use parse::string_literal::string_literal;
use region::Located;
// pub fn api<'a>() -> impl Parser<'a, Module<'a>> {
// and(
// skip_first(string("api"), space1_around(ident())),
// skip_first(string("exposes"), space1_around(ident())),
// )
// }
// pub fn app<'a>() -> impl Parser<'a, Module<'a>> {
// skip_first(string("app using Echo"))
// }
// pub fn api_bridge<'a>() -> impl Parser<'a, Module<'a>> {
// and(
// skip_first(string("api bridge"), space1_around(ident())),
// skip_first(string("exposes"), space1_around(ident())),
// )
// }
pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
// Recursive parsers must not directly invoke functions which return (impl Parser),
// as this causes rustc to stack overflow. Thus, parse_expr must be a
@ -125,7 +155,6 @@ pub fn loc_parenthetical_expr<'a>(min_indent: u16) -> impl Parser<'a, Located<Ex
// We parse the parenthetical expression *and* the arguments after it
// in one region, so that (for example) the region for Apply includes its args.
let (loc_expr, opt_extras) = loc_expr_with_extras.value;
match opt_extras {
Some(Either::First(loc_args)) => Located {
region: loc_expr_with_extras.region,
@ -150,6 +179,106 @@ pub fn loc_parenthetical_expr<'a>(min_indent: u16) -> impl Parser<'a, Located<Ex
)
}
/// A definition, consisting of one of these:
///
/// * A pattern followed by '=' and then an expression
/// * A type annotation
/// * Both
pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
move |arena, state| panic!("TODO parse a single def")
}
/// Same as def() but with space_before1 before each def, because each nested def must
/// have space separating it from the previous def.
pub fn nested_def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
then(def(min_indent), move |arena: &'a Bump, state, def_val| {
panic!("TODO actually parse the def with space_before1");
Ok((def_val, state))
})
}
fn parse_def_expr<'a, S>(
min_indent: u16,
equals_sign_indent: u16,
arena: &'a Bump,
state: State<'a>,
loc_first_pattern: Located<Pattern<'a>>,
) -> ParseResult<'a, Expr<'a>> {
let original_indent = state.indent_col;
if original_indent < min_indent {
panic!("TODO this declaration is outdented too far");
// `<` because '=' should be same indent or greater
} else if equals_sign_indent < original_indent {
panic!("TODO the = in this declaration seems outdented");
} else {
then(
and(
// Parse the body of the first def. It doesn't need any spaces
// around it parsed, because both the subsquent defs and the
// final body will have space1_before on them.
loc(move |arena, state| parse_expr(original_indent + 1, arena, state)),
and(
// Optionally parse additional defs.
zero_or_more(nested_def(original_indent)),
// Parse the final
loc(move |arena, state| parse_expr(original_indent + 1, arena, state)),
),
),
move |arena, state, (loc_first_body, (mut defs, loc_ret))| {
if state.indent_col != original_indent {
panic!("TODO return expr was indented differently from original def",);
} else {
let first_def: Def<'a> =
// TODO if Parser were FnOnce instead of Fn, this might not need .clone()?
Def::BodyOnly(loc_first_pattern.clone(), arena.alloc(loc_first_body));
// Add the first def to the end of the defs. (It's fine that we
// reorder the first one to the end, because canonicalize will
// re-sort all of these based on dependencies anyway. Only
// their regions will ever be visible to the user.)
defs.push(first_def);
Ok((Expr::Defs(arena.alloc((defs, loc_ret))), state))
}
},
)
.parse(arena, state)
}
}
fn parse_nested_def_body<'a, S>(
min_indent: u16,
equals_sign_indent: u16,
arena: &'a Bump,
state: State<'a>,
loc_pattern: Located<Pattern<'a>>,
) -> ParseResult<'a, Located<Expr<'a>>> {
let original_indent = state.indent_col;
if original_indent < min_indent {
panic!("TODO this declaration is outdented too far");
// `<` because '=' should be same indent or greater
} else if equals_sign_indent < original_indent {
panic!("TODO the = in this declaration seems outdented");
} else {
then(
loc(move |arena, state| {
parse_expr(original_indent + 1, arena, state)
}),
move |arena, state, loc_expr| {
if state.indent_col != original_indent {
panic!(
"TODO the return expression was indented differently from the original assignment",
);
} else {
Ok((loc_expr, state))
}
},
).parse(arena, state)
}
}
fn loc_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
// Don't parse operators, because they have a higher precedence than function application.
// If we encounter one, we're done parsing function args!
@ -296,6 +425,9 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
optional(either(
// There may optionally be function args after this ident
loc_function_args(min_indent),
// TODO make sure '=' is not_followed_by(one_of2(char('='), char('>'))) b/c
// otherwise it's an == or => and not a def!
//
// If there aren't any args, there may be a '=' or ':' after it.
// (It's a syntax error to write e.g. `foo bar =` - so if there
// were any args, there is definitely no need to parse '=' or ':'!)

View file

@ -174,6 +174,19 @@ where
}
}
pub fn then<'a, P1, F, Before, After>(parser: P1, transform: F) -> impl Parser<'a, After>
where
P1: Parser<'a, Before>,
After: 'a,
F: Fn(&'a Bump, State<'a>, Before) -> ParseResult<'a, After>,
{
move |arena, state| {
parser
.parse(arena, state)
.and_then(|(output, next_state)| transform(arena, next_state, output))
}
}
#[cfg(not(debug_assertions))]
pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
where

View file

@ -40,7 +40,7 @@ pub fn can_expr_with(
name: &str,
expr_str: &str,
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<Box<ast::VariantName>>>,
declared_variants: &ImMap<Symbol, Located<Box<str>>>,
) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
let expr = parse_with(&arena, expr_str).unwrap_or_else(|_| {
panic!(

View file

@ -18,8 +18,10 @@ mod test_parse {
use bumpalo::{self, Bump};
use helpers::parse_with;
use roc::operator::Operator::*;
use roc::parse::ast::CommentOrNewline::*;
use roc::parse::ast::Expr::{self, *};
use roc::parse::ast::{Attempting, Space, Spaceable};
use roc::parse::ast::Pattern::*;
use roc::parse::ast::{Attempting, Spaceable};
use roc::parse::parser::{Fail, FailReason};
use roc::region::{Located, Region};
use std::{f64, i64};
@ -276,9 +278,9 @@ mod test_parse {
#[test]
fn newline_before_op() {
let arena = Bump::new();
let spaced_int = SpaceAfter(
let spaced_int = Expr::SpaceAfter(
arena.alloc(Int("3")),
bumpalo::vec![in &arena; Space::Newline].into_bump_slice(),
bumpalo::vec![in &arena; Newline].into_bump_slice(),
);
let tuple = arena.alloc((
Located::new(0, 0, 0, 1, spaced_int),
@ -296,7 +298,7 @@ mod test_parse {
let arena = Bump::new();
let spaced_int = arena
.alloc(Int("4"))
.before(bumpalo::vec![in &arena; Space::Newline].into_bump_slice());
.before(bumpalo::vec![in &arena; Newline].into_bump_slice());
let tuple = arena.alloc((
Located::new(0, 0, 0, 1, Int("3")),
Located::new(0, 0, 3, 4, Star),
@ -313,7 +315,7 @@ mod test_parse {
let arena = Bump::new();
let spaced_int = arena
.alloc(Int("3"))
.after(bumpalo::vec![in &arena; Space::LineComment(" test!")].into_bump_slice());
.after(bumpalo::vec![in &arena; LineComment(" test!")].into_bump_slice());
let tuple = arena.alloc((
Located::new(0, 0, 0, 1, spaced_int),
Located::new(1, 1, 0, 1, Plus),
@ -330,7 +332,7 @@ mod test_parse {
let arena = Bump::new();
let spaced_int = arena
.alloc(Int("92"))
.before(bumpalo::vec![in &arena; Space::LineComment(" test!")].into_bump_slice());
.before(bumpalo::vec![in &arena; LineComment(" test!")].into_bump_slice());
let tuple = arena.alloc((
Located::new(0, 0, 0, 2, Int("12")),
Located::new(0, 0, 4, 5, Star),
@ -347,10 +349,10 @@ mod test_parse {
let arena = Bump::new();
let spaced_int1 = arena
.alloc(Int("3"))
.after(bumpalo::vec![in &arena; Space::Newline].into_bump_slice());
.after(bumpalo::vec![in &arena; Newline].into_bump_slice());
let spaced_int2 = arena
.alloc(Int("4"))
.before(bumpalo::vec![in &arena; Space::Newline, Space::Newline].into_bump_slice());
.before(bumpalo::vec![in &arena; Newline, Newline].into_bump_slice());
let tuple = arena.alloc((
Located::new(0, 0, 0, 1, spaced_int1),
Located::new(1, 1, 0, 1, Plus),
@ -447,7 +449,7 @@ mod test_parse {
fn basic_variant() {
let arena = Bump::new();
let module_parts = Vec::new_in(&arena).into_bump_slice();
let expected = Variant(module_parts, "Whee");
let expected = Expr::Variant(module_parts, "Whee");
let actual = parse_with(&arena, "Whee");
assert_eq!(Ok(expected), actual);
@ -457,7 +459,7 @@ mod test_parse {
fn qualified_variant() {
let arena = Bump::new();
let module_parts = bumpalo::vec![in &arena; "One", "Two"].into_bump_slice();
let expected = Variant(module_parts, "Whee");
let expected = Expr::Variant(module_parts, "Whee");
let actual = parse_with(&arena, "One.Two.Whee");
assert_eq!(Ok(expected), actual);
@ -556,7 +558,7 @@ mod test_parse {
let arg = Located::new(0, 0, 5, 6, Int("1"));
let args = bumpalo::vec![in &arena; arg];
let tuple = arena.alloc((Located::new(0, 0, 0, 4, Var(module_parts, "whee")), args));
let expected = Apply(tuple);
let expected = Expr::Apply(tuple);
let actual = parse_with(&arena, "whee 1");
assert_eq!(Ok(expected), actual);
@ -570,7 +572,7 @@ mod test_parse {
let arg2 = Located::new(0, 0, 10, 12, Int("34"));
let args = bumpalo::vec![in &arena; arg1, arg2];
let tuple = arena.alloc((Located::new(0, 0, 0, 4, Var(module_parts, "whee")), args));
let expected = Apply(tuple);
let expected = Expr::Apply(tuple);
let actual = parse_with(&arena, "whee 12 34");
assert_eq!(Ok(expected), actual);
@ -583,12 +585,26 @@ mod test_parse {
let arg = Located::new(0, 0, 7, 8, Int("1"));
let args = bumpalo::vec![in &arena; arg];
let tuple = arena.alloc((Located::new(0, 0, 1, 5, Var(module_parts, "whee")), args));
let expected = Apply(tuple);
let expected = Expr::Apply(tuple);
let actual = parse_with(&arena, "(whee) 1");
assert_eq!(Ok(expected), actual);
}
// CLOSURE
#[test]
fn basic_closure() {
let arena = Bump::new();
let pattern = Located::new(0, 0, 1, 2, Identifier("a"));
let patterns = bumpalo::vec![in &arena; pattern];
let tuple = arena.alloc((patterns, Located::new(0, 0, 6, 8, Int("42"))));
let expected = Closure(tuple);
let actual = parse_with(&arena, "\\a -> 42");
assert_eq!(Ok(expected), actual);
}
// TODO test hex/oct/binary parsing
//
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!