diff --git a/src/parse/blankspace.rs b/src/parse/blankspace.rs index bf2f17112d..fa04744258 100644 --- a/src/parse/blankspace.rs +++ b/src/parse/blankspace.rs @@ -3,6 +3,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; use parse::ast::{Expr, Space}; use parse::parser::{and, loc, map_with_arena, unexpected, unexpected_eof, Parser, State}; +use region::Located; /// What type of comment (if any) are we currently parsing? #[derive(Debug, PartialEq, Eq)] @@ -12,7 +13,10 @@ enum CommentParsing { No, } -pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>> +/// Parse the given expression with 0 or more spaces before it. +/// Returns a Located where the location is around the Expr, ignoring the spaces. +/// The Expr will be wrapped in a SpaceBefore if there were any interesting spaces found. +pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Located>> where P: Parser<'a, Expr<'a>>, { @@ -20,15 +24,21 @@ where and(space0(min_indent), loc(parser)), |arena, (space_list, loc_expr)| { if space_list.is_empty() { - loc_expr.value + loc_expr } else { - Expr::SpaceBefore(space_list, arena.alloc(loc_expr)) + Located { + region: loc_expr.region.clone(), + value: Expr::SpaceBefore(space_list, arena.alloc(loc_expr)), + } } }, ) } -pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>> +/// Parse the given expression with 1 or more spaces before it. +/// Returns a Located where the location is around the Expr, ignoring the spaces. +/// The Expr will be wrapped in a SpaceBefore if there were any interesting spaces found. +pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Located>> where P: Parser<'a, Expr<'a>>, { @@ -36,15 +46,21 @@ where and(space1(min_indent), loc(parser)), |arena, (space_list, loc_expr)| { if space_list.is_empty() { - loc_expr.value + loc_expr } else { - Expr::SpaceBefore(space_list, arena.alloc(loc_expr)) + Located { + region: loc_expr.region.clone(), + value: Expr::SpaceBefore(space_list, arena.alloc(loc_expr)), + } } }, ) } -pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>> +/// Parse the given expression with 0 or more spaces after it. +/// Returns a Located where the location is around the Expr, ignoring the spaces. +/// The Expr will be wrapped in a SpaceAfter if there were any interesting spaces found. +pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Located>> where P: Parser<'a, Expr<'a>>, { @@ -52,15 +68,21 @@ where and(space0(min_indent), loc(parser)), |arena, (space_list, loc_expr)| { if space_list.is_empty() { - loc_expr.value + loc_expr } else { - Expr::SpaceAfter(arena.alloc(loc_expr), space_list) + Located { + region: loc_expr.region.clone(), + value: Expr::SpaceAfter(arena.alloc(loc_expr), space_list), + } } }, ) } -pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>> +/// Parse the given expression with 1 or more spaces after it. +/// Returns a Located where the location is around the Expr, ignoring the spaces. +/// The Expr will be wrapped in a SpaceAfter if there were any interesting spaces found. +pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Located>> where P: Parser<'a, Expr<'a>>, { @@ -68,9 +90,12 @@ where and(space1(min_indent), loc(parser)), |arena, (space_list, loc_expr)| { if space_list.is_empty() { - loc_expr.value + loc_expr } else { - Expr::SpaceAfter(arena.alloc(loc_expr), space_list) + Located { + region: loc_expr.region.clone(), + value: Expr::SpaceAfter(arena.alloc(loc_expr), space_list), + } } }, ) @@ -118,7 +143,9 @@ fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, & comment_parsing = CommentParsing::Line; } nonblank => { - return if space_list.is_empty() && require_at_least_one { + return if require_at_least_one && chars_parsed <= 1 { + // We've parsed 1 char and it was not a space, + // but we require parsing at least one space! Err(unexpected( nonblank, chars_parsed, @@ -259,7 +286,7 @@ fn spaces<'a>(require_at_least_one: bool, _min_indent: u16) -> impl Parser<'a, & } } - if space_list.is_empty() && require_at_least_one { + if require_at_least_one && chars_parsed == 0 { Err(unexpected_eof(chars_parsed, state.attempting, state)) } else { Ok((space_list.into_bump_slice(), state)) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index e294e0a128..f20af56958 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -8,6 +8,7 @@ pub mod parser; pub mod problems; pub mod string_literal; +use bumpalo::collections::Vec; use bumpalo::Bump; use operator::Operator; use parse::ast::{Attempting, Expr}; @@ -15,8 +16,8 @@ use parse::blankspace::{space0, space1_before}; use parse::ident::{ident, Ident}; use parse::number_literal::number_literal; use parse::parser::{ - and, attempt, ch, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, - skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State, + and, attempt, char, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, one_or_more, + optional, skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State, }; use parse::string_literal::string_literal; use region::Located; @@ -27,9 +28,26 @@ pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { move |arena, state| parse_expr(min_indent, arena, state) } +fn parse_expr_body_without_operators<'a>( + min_indent: u16, + arena: &'a Bump, + state: State<'a>, +) -> ParseResult<'a, Expr<'a>> { + one_of6( + string_literal(), + record_literal(), + number_literal(), + when(min_indent), + conditional(min_indent), + ident_etc(min_indent), + ) + .parse(arena, state) +} + fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Expr<'a>> { let expr_parser = map_with_arena( and( + // First parse the body without operators, then try to parse possible operators after. loc(move |arena, state| parse_expr_body_without_operators(min_indent, arena, state)), optional(and( and(space0(min_indent), and(loc(operator()), space0(min_indent))), @@ -61,27 +79,10 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe attempt(Attempting::Expression, expr_parser).parse(arena, state) } -fn parse_expr_body_without_operators<'a>( - min_indent: u16, - arena: &'a Bump, - state: State<'a>, -) -> ParseResult<'a, Expr<'a>> { - one_of6( - string_literal(), - record_literal(), - number_literal(), - when(min_indent), - conditional(min_indent), - ident_etc(min_indent), - ) - .parse(arena, state) -} - -pub fn loc_function_args<'a>(_min_indent: u16) -> impl Parser<'a, &'a [Located>]> { - move |_arena, _state| { - panic!("TODO stop early if we see an operator after the whitespace - precedence!"); - // zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg())) - } +fn function_arg<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { + // Don't parse operators, because they have a higher precedence than function application. + // If we encounter one, we're done parsing function args! + move |arena, state| parse_expr_body_without_operators(min_indent, arena, state) } pub fn when<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> { @@ -98,33 +99,36 @@ pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { map_with_arena( skip_first( string(keyword::IF), - loc(space1_before(expr(min_indent), min_indent)), + space1_before(expr(min_indent), min_indent), ), |arena, loc_expr| Expr::If(arena.alloc(loc_expr)), ), map_with_arena( skip_first( string(keyword::THEN), - loc(space1_before(expr(min_indent), min_indent)), + space1_before(expr(min_indent), min_indent), ), |arena, loc_expr| Expr::Then(arena.alloc(loc_expr)), ), map_with_arena( skip_first( string(keyword::ELSE), - loc(space1_before(expr(min_indent), min_indent)), + space1_before(expr(min_indent), min_indent), ), |arena, loc_expr| Expr::Else(arena.alloc(loc_expr)), ), map_with_arena( skip_first( string(keyword::CASE), - loc(space1_before(expr(min_indent), min_indent)), + space1_before(expr(min_indent), min_indent), ), |arena, loc_expr| Expr::Case(arena.alloc(loc_expr)), ), ) } +pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, Vec<'a, Located>>> { + one_or_more(space1_before(function_arg(min_indent), min_indent)) +} /// When we parse an ident like `foo ` it could be any of these: /// @@ -137,49 +141,49 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { map_with_arena( and( loc(ident()), - either( - // Check if this is either a def or type annotation - and(space0(min_indent), either(ch('='), ch(':'))), - // Check if this is function application + optional(either( + // There may optionally be function args after this ident loc_function_args(min_indent), - ), + // If there aren't any args, there may be a '=' or ':' after it. + // (It's a syntax error to write e.g. `foo bar =` - so if there + // were any args, there is definitely no need to parse '=' or ':'!) + and(space0(min_indent), either(char('='), char(':'))), + )), ), - |arena, (loc_ident, equals_or_loc_args)| { - match equals_or_loc_args { - Either::First((_space_list, Either::First(()))) => { - // We have now parsed the beginning of a def (e.g. `foo =`) - panic!("TODO parse def, making sure not to drop comments!"); - } - Either::First((_space_list, Either::Second(()))) => { - // We have now parsed the beginning of a type annotation (e.g. `foo :`) - panic!("TODO parse type annotation, making sure not to drop comments!"); - } - Either::Second(loc_args) => { - // This appears to be a var, keyword, or function application. - let name_expr = match loc_ident.value { - Ident::Var(info) => Expr::Var(info.module_parts, info.value), - Ident::Variant(info) => Expr::Variant(info.module_parts, info.value), - Ident::Field(info) => Expr::QualifiedField(info.module_parts, info.value), - Ident::AccessorFunction(string) => Expr::AccessorFunction(string), - Ident::Malformed(string) => Expr::MalformedIdent(string), + |arena, (loc_ident, opt_extras)| { + // This appears to be a var, keyword, or function application. + + match opt_extras { + Some(Either::First(loc_args)) => { + let loc_expr = Located { + region: loc_ident.region, + value: ident_to_expr(loc_ident.value), }; - if loc_args.is_empty() { - name_expr - } else { - let loc_expr = Located { - region: loc_ident.region, - value: name_expr, - }; - - Expr::Apply(arena.alloc((loc_expr, loc_args))) - } + Expr::Apply(arena.alloc((loc_expr, loc_args.into_bump_slice()))) } + Some(Either::Second((_space_list, Either::First(())))) => { + panic!("TODO handle def, making sure not to drop comments!"); + } + Some(Either::Second((_space_list, Either::Second(())))) => { + panic!("TODO handle annotation, making sure not to drop comments!"); + } + None => ident_to_expr(loc_ident.value), } }, ) } +fn ident_to_expr<'a>(src: Ident<'a>) -> Expr<'a> { + match src { + Ident::Var(info) => Expr::Var(info.module_parts, info.value), + Ident::Variant(info) => Expr::Variant(info.module_parts, info.value), + Ident::Field(info) => Expr::QualifiedField(info.module_parts, info.value), + Ident::AccessorFunction(string) => Expr::AccessorFunction(string), + Ident::Malformed(string) => Expr::MalformedIdent(string), + } +} + pub fn operator<'a>() -> impl Parser<'a, Operator> { one_of3( map(string("+"), |_| Operator::Plus), diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 621d7400ab..1b93496d62 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -177,6 +177,19 @@ where move |_, state| Ok((value.clone(), state)) } +pub fn and_then<'a, P1, P2, F, Before, After>(parser: P1, transform: F) -> impl Parser<'a, After> +where + P1: Parser<'a, Before>, + P2: Parser<'a, After>, + F: Fn(Before) -> P2, +{ + move |arena, state| { + parser + .parse(arena, state) + .and_then(|(output, next_state)| transform(output).parse(arena, next_state)) + } +} + pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After> where P: Parser<'a, Before>, @@ -371,10 +384,11 @@ fn line_too_long<'a>(attempting: Attempting, state: State<'a>) -> (Fail, State<' } /// A single char. -pub fn ch<'a>(expected: char) -> impl Parser<'a, ()> { +pub fn char<'a>(expected: char) -> impl Parser<'a, ()> { move |_arena, state: State<'a>| match state.input.chars().next() { Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)), - _ => Err(unexpected_eof(1, Attempting::Keyword, state)), + Some(other_ch) => Err(unexpected(other_ch, 0, state, Attempting::Keyword)), + _ => Err(unexpected_eof(0, Attempting::Keyword, state)), } } @@ -391,7 +405,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> { // TODO do this comparison in one SIMD instruction (on supported systems) match input.get(0..len) { Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)), - _ => Err(unexpected_eof(len, Attempting::Keyword, state)), + _ => Err(unexpected_eof(0, Attempting::Keyword, state)), } } } diff --git a/tests/test_parse.rs b/tests/test_parse.rs index 02f4943449..f641980bbb 100644 --- a/tests/test_parse.rs +++ b/tests/test_parse.rs @@ -14,7 +14,8 @@ mod helpers; #[cfg(test)] mod test_parse { - use bumpalo::Bump; + use bumpalo::collections::vec::Vec; + use bumpalo::{self, Bump}; use helpers::parse_with; use roc::operator::Operator::*; use roc::parse::ast::Attempting; @@ -357,6 +358,36 @@ mod test_parse { assert_eq!(Ok(expected), actual); } + // VAR + + #[test] + fn basic_var() { + let arena = Bump::new(); + let module_parts = Vec::new_in(&arena).into_bump_slice(); + let expected = Var(module_parts, "whee"); + let actual = parse_with(&arena, "whee"); + + assert_eq!(Ok(expected), actual); + } + + // APPLY + + #[test] + fn basic_apply() { + let arena = Bump::new(); + let module_parts = Vec::new_in(&arena).into_bump_slice(); + let arg = Located::new(0, 5, 0, 6, Int("1")); + let args = bumpalo::vec![in &arena; arg]; + let tuple = arena.alloc(( + Located::new(0, 0, 0, 4, Var(module_parts, "whee")), + args.into_bump_slice(), + )); + let expected = Apply(tuple); + let actual = parse_with(&arena, "whee 1"); + + assert_eq!(Ok(expected), actual); + } + // TODO test hex/oct/binary parsing // // TODO test for \t \r and \n in string literals *outside* unicode escape sequence!