diff --git a/compiler/can/src/expr.rs b/compiler/can/src/expr.rs index d1718703e6..2eafe5b2d4 100644 --- a/compiler/can/src/expr.rs +++ b/compiler/can/src/expr.rs @@ -737,10 +737,10 @@ pub fn canonicalize_expr<'a>( use roc_problem::can::RuntimeError::*; (RuntimeError(MalformedClosure(region)), Output::default()) } - ast::Expr::MalformedIdent(name) => { + ast::Expr::MalformedIdent(name, bad_ident) => { use roc_problem::can::RuntimeError::*; - let problem = MalformedIdentifier((*name).into(), region); + let problem = MalformedIdentifier((*name).into(), *bad_ident, region); env.problem(Problem::RuntimeError(problem.clone())); (RuntimeError(problem), Output::default()) diff --git a/compiler/can/src/operator.rs b/compiler/can/src/operator.rs index 048c2b9d83..2572bc11ec 100644 --- a/compiler/can/src/operator.rs +++ b/compiler/can/src/operator.rs @@ -88,8 +88,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located>) -> &'a | Nested(AccessorFunction(_)) | Var { .. } | Nested(Var { .. }) - | MalformedIdent(_) - | Nested(MalformedIdent(_)) + | MalformedIdent(_, _) + | Nested(MalformedIdent(_, _)) | MalformedClosure | Nested(MalformedClosure) | PrecedenceConflict(_, _, _, _) diff --git a/compiler/can/src/pattern.rs b/compiler/can/src/pattern.rs index a26e9ddd90..d95965da52 100644 --- a/compiler/can/src/pattern.rs +++ b/compiler/can/src/pattern.rs @@ -379,6 +379,11 @@ pub fn canonicalize_pattern<'a>( malformed_pattern(env, problem, region) } + MalformedIdent(_str, problem) => { + let problem = MalformedPatternProblem::BadIdent(*problem); + malformed_pattern(env, problem, region) + } + QualifiedIdentifier { .. } => { let problem = MalformedPatternProblem::QualifiedIdentifier; malformed_pattern(env, problem, region) diff --git a/compiler/fmt/src/expr.rs b/compiler/fmt/src/expr.rs index 80056cce7f..cbed249bca 100644 --- a/compiler/fmt/src/expr.rs +++ b/compiler/fmt/src/expr.rs @@ -30,7 +30,7 @@ impl<'a> Formattable<'a> for Expr<'a> { | Access(_, _) | AccessorFunction(_) | Var { .. } - | MalformedIdent(_) + | MalformedIdent(_, _) | MalformedClosure | GlobalTag(_) | PrivateTag(_) => false, @@ -303,7 +303,7 @@ impl<'a> Formattable<'a> for Expr<'a> { buf.push('.'); buf.push_str(key); } - MalformedIdent(_) => {} + MalformedIdent(_, _) => {} MalformedClosure => {} PrecedenceConflict(_, _, _, _) => {} } diff --git a/compiler/fmt/src/pattern.rs b/compiler/fmt/src/pattern.rs index 3f4fb2667f..f768b26813 100644 --- a/compiler/fmt/src/pattern.rs +++ b/compiler/fmt/src/pattern.rs @@ -39,6 +39,7 @@ impl<'a> Formattable<'a> for Pattern<'a> { | Pattern::StrLiteral(_) | Pattern::Underscore(_) | Pattern::Malformed(_) + | Pattern::MalformedIdent(_, _) | Pattern::QualifiedIdentifier { .. } => false, } } @@ -157,7 +158,7 @@ impl<'a> Formattable<'a> for Pattern<'a> { } // Malformed - Malformed(string) => buf.push_str(string), + Malformed(string) | MalformedIdent(string, _) => buf.push_str(string), QualifiedIdentifier { module_name, ident } => { if !module_name.is_empty() { buf.push_str(module_name); diff --git a/compiler/parse/src/ast.rs b/compiler/parse/src/ast.rs index f33ba8149b..6d63379acc 100644 --- a/compiler/parse/src/ast.rs +++ b/compiler/parse/src/ast.rs @@ -150,7 +150,7 @@ pub enum Expr<'a> { Nested(&'a Expr<'a>), // Problems - MalformedIdent(&'a str), + MalformedIdent(&'a str, crate::ident::BadIdent), MalformedClosure, // Both operators were non-associative, e.g. (True == False == False). // We should tell the author to disambiguate by grouping them with parens. @@ -356,6 +356,7 @@ pub enum Pattern<'a> { // Malformed Malformed(&'a str), + MalformedIdent(&'a str, crate::ident::BadIdent), QualifiedIdentifier { module_name: &'a str, ident: &'a str, @@ -411,7 +412,7 @@ impl<'a> Pattern<'a> { } } Ident::AccessorFunction(string) => Pattern::Malformed(string), - Ident::Malformed(string) => Pattern::Malformed(string), + Ident::Malformed(string, _problem) => Pattern::Malformed(string), } } diff --git a/compiler/parse/src/blankspace.rs b/compiler/parse/src/blankspace.rs index 029ddda571..93e6653a3b 100644 --- a/compiler/parse/src/blankspace.rs +++ b/compiler/parse/src/blankspace.rs @@ -337,6 +337,28 @@ where }) } +pub fn space1_e<'a, E>( + min_indent: u16, + space_problem: fn(BadInputError, Row, Col) -> E, + indent_problem: fn(Row, Col) -> E, + no_parse_problem: fn(Row, Col) -> E, +) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E> +where + E: 'a, +{ + move |arena, state| match space0_e(min_indent, space_problem, indent_problem) + .parse(arena, state) + { + Ok((NoProgress, _, state)) => Err(( + NoProgress, + no_parse_problem(state.line, state.column), + state, + )), + Ok((MadeProgress, spaces, state)) => Ok((MadeProgress, spaces, state)), + Err(bad) => Err(bad), + } +} + /// One or more (spaces/comments/newlines). pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [CommentOrNewline<'a>], SyntaxError<'a>> { // TODO try benchmarking a short-circuit for the typical case: see if there is @@ -434,6 +456,62 @@ pub fn spaces_exactly<'a>(spaces_expected: u16) -> impl Parser<'a, (), SyntaxErr } } +#[inline(always)] +pub fn spaces_exactly_e<'a>(spaces_expected: u16) -> impl Parser<'a, (), parser::EExpr<'a>> { + use parser::EExpr; + + move |arena: &'a Bump, state: State<'a>| { + if spaces_expected == 0 { + return Ok((NoProgress, (), state)); + } + + let mut state = state; + let mut spaces_seen: u16 = 0; + + while !state.bytes.is_empty() { + match peek_utf8_char(&state) { + Ok((' ', _)) => { + spaces_seen += 1; + state = state.advance_spaces_e(arena, 1, EExpr::IndentStart)?; + if spaces_seen == spaces_expected { + return Ok((MadeProgress, (), state)); + } + } + Ok(_) => { + return Err(( + NoProgress, + EExpr::IndentStart(state.line, state.column), + state, + )) + } + + Err(SyntaxError::BadUtf8) => { + // If we hit an invalid UTF-8 character, bail out immediately. + let progress = Progress::progress_when(spaces_seen != 0); + return Err(( + progress, + EExpr::Space(BadInputError::BadUtf8, state.line, state.column), + state, + )); + } + Err(_) => { + return Err(( + NoProgress, + EExpr::IndentStart(state.line, state.column), + state, + )) + } + } + } + + Err(( + NoProgress, + EExpr::IndentStart(state.line, state.column), + state, + )) + } +} + #[inline(always)] fn spaces<'a>( require_at_least_one: bool, diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 80cca11cca..7e2baa1ab7 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -1,18 +1,15 @@ -use crate::ast::{ - AssignedField, Attempting, CommentOrNewline, Def, Expr, Pattern, Spaceable, TypeAnnotation, -}; +use crate::ast::{AssignedField, CommentOrNewline, Def, Expr, Pattern, Spaceable, TypeAnnotation}; use crate::blankspace::{ - line_comment, space0, space0_after, space0_after_e, space0_around_ee, space0_before, - space0_before_e, space0_e, space1, space1_before, spaces_exactly, + line_comment, space0_after_e, space0_around_ee, space0_before_e, space0_e, space1_e, + spaces_exactly_e, }; use crate::ident::{ident, lowercase_ident, Ident}; use crate::keyword; use crate::parser::{ - self, allocated, and_then_with_indent_level, ascii_char, ascii_string, attempt, backtrackable, - map, newline_char, not, not_followed_by, optional, sep_by1, sep_by1_e, specialize, - specialize_ref, then, trailing_sep_by0, unexpected, unexpected_eof, word1, word2, EExpr, - EInParens, ELambda, ERecord, EString, Either, If, List, Number, ParseResult, Parser, State, - SyntaxError, When, + self, allocated, and_then_with_indent_level, ascii_char, backtrackable, map, newline_char, + optional, sep_by1, sep_by1_e, specialize, specialize_ref, then, trailing_sep_by0, word1, word2, + EExpr, EInParens, ELambda, EPattern, ERecord, EString, Either, If, List, Number, ParseResult, + Parser, State, SyntaxError, Type, When, }; use crate::pattern::loc_closure_param; use crate::type_annotation; @@ -22,11 +19,15 @@ use roc_module::operator::{BinOp, CalledVia, UnaryOp}; use roc_region::all::{Located, Region}; use crate::parser::Progress::{self, *}; + pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { // Recursive parsers must not directly invoke functions which return (impl Parser), // as this causes rustc to stack overflow. Thus, parse_expr must be a // separate function which recurses by calling itself directly. - move |arena, state: State<'a>| parse_expr(min_indent, arena, state) + specialize( + |e, _, _| SyntaxError::Expr(e), + move |arena, state: State<'a>| parse_expr_help(min_indent, arena, state), + ) } fn loc_expr_in_parens_help<'a>( @@ -53,8 +54,8 @@ fn loc_expr_in_parens_help_help<'a>( word1(b'(', EInParens::Open), space0_around_ee( specialize_ref( - EInParens::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)) + EInParens::Expr, + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)) ), min_indent, EInParens::Space, @@ -65,15 +66,6 @@ fn loc_expr_in_parens_help_help<'a>( ) } -fn loc_function_arg_in_parens_etc<'a>( - min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { - specialize( - |e, _, _| SyntaxError::Expr(e), - loc_function_arg_in_parens_etc_help(min_indent), - ) -} - fn loc_function_arg_in_parens_etc_help<'a>( min_indent: u16, ) -> impl Parser<'a, Located>, EExpr<'a>> { @@ -100,42 +92,37 @@ fn loc_function_arg_in_parens_etc_help<'a>( ) } -fn loc_expr_in_parens_etc<'a>( - min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { - specialize( - |e, _, _| SyntaxError::Expr(e), - loc_expr_in_parens_etc_help(min_indent), - ) -} - fn loc_expr_in_parens_etc_help<'a>( min_indent: u16, ) -> impl Parser<'a, Located>, EExpr<'a>> { then( loc!(and!( specialize(EExpr::InParens, loc_expr_in_parens_help(min_indent)), - optional(either!( - // There may optionally be function args after the ')' - // e.g. ((foo bar) baz) - loc_function_args_help(min_indent), - // If there aren't any args, there may be a '=' or ':' after it. - // - // (It's a syntax error to write e.g. `foo bar =` - so if there - // were any args, there is definitely no need to parse '=' or ':'!) - // - // Also, there may be a '.' for field access (e.g. `(foo).bar`), - // but we only want to look for that if there weren't any args, - // as if there were any args they'd have consumed it anyway - // e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser - either!( - record_field_access_chain(), + and!( + one_of![record_field_access_chain(), |a, s| Ok(( + NoProgress, + Vec::new_in(a), + s + ))], + optional(either!( + // There may optionally be function args after the ')' + // e.g. ((foo bar) baz) + loc_function_args_help(min_indent), + // If there aren't any args, there may be a '=' or ':' after it. + // + // (It's a syntax error to write e.g. `foo bar =` - so if there + // were any args, there is definitely no need to parse '=' or ':'!) + // + // Also, there may be a '.' for field access (e.g. `(foo).bar`), + // but we only want to look for that if there weren't any args, + // as if there were any args they'd have consumed it anyway + // e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser and!( space0_e(min_indent, EExpr::Space, EExpr::IndentEquals), equals_with_indent_help() ) - ) - )) + )) + ) )), move |arena, state, _progress, parsed| helper_help(arena, state, parsed, min_indent), ) @@ -176,12 +163,10 @@ fn record_field_access<'a>() -> impl Parser<'a, &'a str, EExpr<'a>> { type Extras<'a> = Located<( Located>, - Option< - Either< - Vec<'a, Located>>, - Either, (&'a [CommentOrNewline<'a>], u16)>, - >, - >, + ( + Vec<'a, &'a str>, + Option>>, (&'a [CommentOrNewline<'a>], u16)>>, + ), )>; fn helper_help<'a>( @@ -192,7 +177,21 @@ fn helper_help<'a>( ) -> ParseResult<'a, Located>, EExpr<'a>> { // We parse the parenthetical expression *and* the arguments after it // in one region, so that (for example) the region for Apply includes its args. - let (loc_expr, opt_extras) = loc_expr_with_extras.value; + let (mut loc_expr, (accesses, opt_extras)) = loc_expr_with_extras.value; + + let mut value = loc_expr.value; + + for field in accesses { + // Wrap the previous answer in the new one, so we end up + // with a nested Expr. That way, `foo.bar.baz` gets represented + // in the AST as if it had been written (foo.bar).baz all along. + value = Expr::Access(arena.alloc(value), field); + } + + loc_expr = Located { + region: loc_expr.region, + value, + }; match opt_extras { Some(Either::First(loc_args)) => Ok(( @@ -200,7 +199,7 @@ fn helper_help<'a>( expr_in_parens_then_arguments(arena, loc_expr, loc_args, loc_expr_with_extras.region), state, )), - Some(Either::Second(Either::Second((spaces_before_equals, equals_indent)))) => { + Some(Either::Second((spaces_before_equals, equals_indent))) => { // '=' after optional spaces expr_in_parens_then_equals_help( min_indent, @@ -211,14 +210,6 @@ fn helper_help<'a>( ) .parse(arena, state) } - Some(Either::Second(Either::First(fields))) => { - // '.' and a record field immediately after ')', no optional spaces - Ok(( - MadeProgress, - expr_in_parens_then_access(arena, loc_expr, fields), - state, - )) - } None => Ok((MadeProgress, loc_expr, state)), } } @@ -234,12 +225,12 @@ fn expr_in_parens_then_equals_help<'a>( let region = loc_expr.region; // Re-parse the Expr as a Pattern. - let pattern = match expr_to_pattern(arena, &loc_expr.value) { + let pattern = match expr_to_pattern_help(arena, &loc_expr.value) { Ok(valid) => valid, - Err(fail) => { + Err(_) => { return Err(( MadeProgress, - EExpr::Syntax(arena.alloc(fail), state.line, state.column), + EExpr::MalformedPattern(state.line, state.column), state, )) } @@ -322,36 +313,106 @@ fn expr_in_parens_then_access<'a>( } } -fn loc_parse_expr_body_without_operators<'a>( +fn loc_parse_expr_body_without_operators_help<'a>( min_indent: u16, arena: &'a Bump, state: State<'a>, -) -> ParseResult<'a, Located>, SyntaxError<'a>> { +) -> ParseResult<'a, Located>, EExpr<'a>> { one_of!( - loc_expr_in_parens_etc(min_indent), - loc!(string_literal()), - loc!(number_literal()), - loc!(closure(min_indent)), - loc!(record_literal(min_indent)), - loc!(list_literal(min_indent)), - loc!(unary_op(min_indent)), - loc!(when::expr(min_indent)), - loc!(if_expr(min_indent)), - loc!(ident_etc(min_indent)) + loc_expr_in_parens_etc_help(min_indent), + loc!(specialize(EExpr::Str, string_literal_help())), + loc!(specialize(EExpr::Number, number_literal_help())), + loc!(specialize(EExpr::Lambda, closure_help(min_indent))), + loc!(record_literal_help(min_indent)), + loc!(specialize(EExpr::List, list_literal_help(min_indent))), + loc!(unary_op_help(min_indent)), + loc!(specialize(EExpr::When, when::expr_help(min_indent))), + loc!(specialize(EExpr::If, if_expr_help(min_indent))), + loc!(ident_etc_help(min_indent)), + fail_expr_start_e() ) .parse(arena, state) } +fn fail_expr_start_e<'a, T>() -> impl Parser<'a, T, EExpr<'a>> +where + T: 'a, +{ + |_arena, state: State<'a>| Err((NoProgress, EExpr::Start(state.line, state.column), state)) +} + +fn unary_not<'a>() -> impl Parser<'a, (), EExpr<'a>> { + move |_arena: &'a Bump, state: State<'a>| { + if state.bytes.starts_with(b"!") && state.bytes.get(1) != Some(&b'=') { + // don't parse the `!` if it's followed by a `=` + Ok(( + MadeProgress, + (), + State { + bytes: &state.bytes[1..], + column: state.column + 1, + ..state + }, + )) + } else { + // this is not a negated expression + Err((NoProgress, EExpr::UnaryNot(state.line, state.column), state)) + } + } +} + +fn unary_negate<'a>() -> impl Parser<'a, (), EExpr<'a>> { + move |_arena: &'a Bump, state: State<'a>| { + // a minus is unary iff + // + // - it is preceded by whitespace (spaces, newlines, comments) + // - it is not followed by whitespace + // - it is not followed by a number literal + // + // The last condition is because of overflow, this would overflow + // + // Num.negate 125 + // + // while + // + // -125 + // + // is perfectly fine (assuming I8 here). So it is vital the minus is + // parses as part of the number literal, and not as a unary minus + let followed_by_whitespace = state + .bytes + .get(1) + .map(|c| c.is_ascii_whitespace() || *c == b'#' || c.is_ascii_digit()) + .unwrap_or(false); + + if state.bytes.starts_with(b"-") && !followed_by_whitespace { + // the negate is only unary if it is not followed by whitespace + Ok(( + MadeProgress, + (), + State { + bytes: &state.bytes[1..], + column: state.column + 1, + ..state + }, + )) + } else { + // this is not a negated expression + Err((NoProgress, EExpr::UnaryNot(state.line, state.column), state)) + } + } +} + /// Unary (!) or (-) /// /// e.g. `!x` or `-x` -pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { +fn unary_op_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, EExpr<'a>> { one_of!( map_with_arena!( // must backtrack to distinguish `!x` from `!= y` and!( - loc!(backtrackable(ascii_char(b'!'))), - loc!(move |arena, state| parse_expr(min_indent, arena, state)) + loc!(unary_not()), + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)) ), |arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located>)| { Expr::UnaryOp(arena.alloc(loc_expr), loc_op.map(|_| UnaryOp::Not)) @@ -360,8 +421,8 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a map_with_arena!( and!( // must backtrack to distinguish `x - 1` from `-1` - loc!(backtrackable(ascii_char(b'-'))), - loc!(move |arena, state| parse_expr(min_indent, arena, state)) + loc!(unary_negate()), + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)) ), |arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located>)| { Expr::UnaryOp(arena.alloc(loc_expr), loc_op.map(|_| UnaryOp::Negate)) @@ -370,27 +431,34 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a ) } -fn parse_expr<'a>( +fn parse_expr_help<'a>( min_indent: u16, arena: &'a Bump, state: State<'a>, -) -> ParseResult<'a, Expr<'a>, SyntaxError<'a>> { +) -> ParseResult<'a, Expr<'a>, EExpr<'a>> { let expr_parser = crate::parser::map_with_arena( and!( // First parse the body without operators, then try to parse possible operators after. - move |arena, state| loc_parse_expr_body_without_operators(min_indent, arena, state), + move |arena, state| loc_parse_expr_body_without_operators_help( + min_indent, arena, state + ), // Parse the operator, with optional spaces before it. // // Since spaces can only wrap an Expr, not an BinOp, we have to first // parse the spaces and then attach them retroactively to the expression // preceding the operator (the one we parsed before considering operators). optional(and!( - and!(space0(min_indent), loc!(binop())), + and!( + space0_e(min_indent, EExpr::Space, EExpr::IndentEnd), + loc!(binop_help()) + ), // The spaces *after* the operator can be attached directly to // the expression following the operator. - space0_before( - loc!(move |arena, state| parse_expr(min_indent, arena, state)), + space0_before_e( + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)), min_indent, + EExpr::Space, + EExpr::IndentEnd, ) )) ), @@ -417,10 +485,7 @@ fn parse_expr<'a>( /// If the given Expr would parse the same way as a valid Pattern, convert it. /// Example: (foo) could be either an Expr::Var("foo") or Pattern::Identifier("foo") -pub fn expr_to_pattern<'a>( - arena: &'a Bump, - expr: &Expr<'a>, -) -> Result, SyntaxError<'a>> { +fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result, ()> { match expr { Expr::Var { module_name, ident } => { if module_name.is_empty() { @@ -433,14 +498,14 @@ pub fn expr_to_pattern<'a>( Expr::PrivateTag(value) => Ok(Pattern::PrivateTag(value)), Expr::Apply(loc_val, loc_args, _) => { let region = loc_val.region; - let value = expr_to_pattern(arena, &loc_val.value)?; + let value = expr_to_pattern_help(arena, &loc_val.value)?; let val_pattern = arena.alloc(Located { region, value }); let mut arg_patterns = Vec::with_capacity_in(loc_args.len(), arena); for loc_arg in loc_args.iter() { let region = loc_arg.region; - let value = expr_to_pattern(arena, &loc_arg.value)?; + let value = expr_to_pattern_help(arena, &loc_arg.value)?; arg_patterns.push(Located { region, value }); } @@ -451,15 +516,17 @@ pub fn expr_to_pattern<'a>( } Expr::SpaceBefore(sub_expr, spaces) => Ok(Pattern::SpaceBefore( - arena.alloc(expr_to_pattern(arena, sub_expr)?), + arena.alloc(expr_to_pattern_help(arena, sub_expr)?), spaces, )), Expr::SpaceAfter(sub_expr, spaces) => Ok(Pattern::SpaceAfter( - arena.alloc(expr_to_pattern(arena, sub_expr)?), + arena.alloc(expr_to_pattern_help(arena, sub_expr)?), spaces, )), - Expr::ParensAround(sub_expr) | Expr::Nested(sub_expr) => expr_to_pattern(arena, sub_expr), + Expr::ParensAround(sub_expr) | Expr::Nested(sub_expr) => { + expr_to_pattern_help(arena, sub_expr) + } Expr::Record { fields, @@ -470,7 +537,7 @@ pub fn expr_to_pattern<'a>( for loc_assigned_field in fields.iter() { let region = loc_assigned_field.region; - let value = assigned_expr_field_to_pattern(arena, &loc_assigned_field.value)?; + let value = assigned_expr_field_to_pattern_help(arena, &loc_assigned_field.value)?; loc_patterns.push(Located { region, value }); } @@ -503,22 +570,22 @@ pub fn expr_to_pattern<'a>( | Expr::Record { update: Some(_), .. } - | Expr::UnaryOp(_, _) => Err(SyntaxError::InvalidPattern), + | Expr::UnaryOp(_, _) => Err(()), Expr::Str(string) => Ok(Pattern::StrLiteral(string.clone())), - Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)), + Expr::MalformedIdent(string, _problem) => Ok(Pattern::Malformed(string)), } } /// use for expressions like { x: a + b } -pub fn assigned_expr_field_to_pattern<'a>( +fn assigned_expr_field_to_pattern<'a>( arena: &'a Bump, assigned_field: &AssignedField<'a, Expr<'a>>, -) -> Result, SyntaxError<'a>> { +) -> Result, ()> { // the assigned fields always store spaces, but this slice is often empty Ok(match assigned_field { AssignedField::RequiredValue(name, spaces, value) => { - let pattern = expr_to_pattern(arena, &value.value)?; + let pattern = expr_to_pattern_help(arena, &value.value)?; let result = arena.alloc(Located { region: value.region, value: pattern, @@ -559,68 +626,51 @@ pub fn assigned_expr_field_to_pattern<'a>( }) } -/// Used for patterns like { x: Just _ } -pub fn assigned_pattern_field_to_pattern<'a>( +fn assigned_expr_field_to_pattern_help<'a>( arena: &'a Bump, assigned_field: &AssignedField<'a, Expr<'a>>, - backup_region: Region, -) -> Result>, SyntaxError<'a>> { +) -> Result, ()> { // the assigned fields always store spaces, but this slice is often empty Ok(match assigned_field { AssignedField::RequiredValue(name, spaces, value) => { - let pattern = expr_to_pattern(arena, &value.value)?; - let region = Region::span_across(&value.region, &value.region); + let pattern = expr_to_pattern_help(arena, &value.value)?; let result = arena.alloc(Located { region: value.region, value: pattern, }); if spaces.is_empty() { - Located::at(region, Pattern::RequiredField(name.value, result)) + Pattern::RequiredField(name.value, result) } else { - Located::at( - region, - Pattern::SpaceAfter( - arena.alloc(Pattern::RequiredField(name.value, result)), - spaces, - ), + Pattern::SpaceAfter( + arena.alloc(Pattern::RequiredField(name.value, result)), + spaces, ) } } AssignedField::OptionalValue(name, spaces, value) => { - let pattern = value.value.clone(); - let region = Region::span_across(&value.region, &value.region); let result = arena.alloc(Located { region: value.region, - value: pattern, + value: value.value.clone(), }); if spaces.is_empty() { - Located::at(region, Pattern::OptionalField(name.value, result)) + Pattern::OptionalField(name.value, result) } else { - Located::at( - region, - Pattern::SpaceAfter( - arena.alloc(Pattern::OptionalField(name.value, result)), - spaces, - ), + Pattern::SpaceAfter( + arena.alloc(Pattern::OptionalField(name.value, result)), + spaces, ) } } - AssignedField::LabelOnly(name) => Located::at(name.region, Pattern::Identifier(name.value)), - AssignedField::SpaceBefore(nested, spaces) => { - let can_nested = assigned_pattern_field_to_pattern(arena, nested, backup_region)?; - Located::at( - can_nested.region, - Pattern::SpaceBefore(arena.alloc(can_nested.value), spaces), - ) - } - AssignedField::SpaceAfter(nested, spaces) => { - let can_nested = assigned_pattern_field_to_pattern(arena, nested, backup_region)?; - Located::at( - can_nested.region, - Pattern::SpaceAfter(arena.alloc(can_nested.value), spaces), - ) - } - AssignedField::Malformed(string) => Located::at(backup_region, Pattern::Malformed(string)), + AssignedField::LabelOnly(name) => Pattern::Identifier(name.value), + AssignedField::SpaceBefore(nested, spaces) => Pattern::SpaceBefore( + arena.alloc(assigned_expr_field_to_pattern_help(arena, nested)?), + spaces, + ), + AssignedField::SpaceAfter(nested, spaces) => Pattern::SpaceAfter( + arena.alloc(assigned_expr_field_to_pattern_help(arena, nested)?), + spaces, + ), + AssignedField::Malformed(string) => Pattern::Malformed(string), }) } @@ -667,20 +717,50 @@ pub fn assigned_pattern_field_to_pattern<'a>( /// The '=' used in a def can't be followed by another '=' (or else it's actually /// an "==") and also it can't be followed by '>' (or else it's actually an "=>") -fn equals_for_def<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { +fn equals_for_def_help<'a>() -> impl Parser<'a, (), EExpr<'a>> { |_arena, state: State<'a>| match state.bytes.get(0) { Some(b'=') => match state.bytes.get(1) { - Some(b'=') | Some(b'>') => Err((NoProgress, SyntaxError::ConditionFailed, state)), + Some(b'=') | Some(b'>') => { + Err((NoProgress, EExpr::Equals(state.line, state.column), state)) + } _ => { - let state = state.advance_without_indenting(1)?; + let state = state.advance_without_indenting_ee(1, |r, c| { + EExpr::Space(parser::BadInputError::LineTooLong, r, c) + })?; Ok((MadeProgress, (), state)) } }, - _ => Err((NoProgress, SyntaxError::ConditionFailed, state)), + _ => Err((NoProgress, EExpr::Equals(state.line, state.column), state)), } } +fn parse_defs_help<'a>( + min_indent: u16, +) -> impl Parser<'a, Vec<'a, &'a Located>>, EExpr<'a>> { + let parse_def = move |arena, state| { + let (_, (spaces, def), state) = and!( + backtrackable(space0_e(min_indent, EExpr::Space, EExpr::IndentStart)), + loc!(def_help(min_indent)) + ) + .parse(arena, state)?; + + let result = if spaces.is_empty() { + &*arena.alloc(def) + } else { + &*arena.alloc( + arena + .alloc(def.value) + .with_spaces_before(spaces, def.region), + ) + }; + + Ok((MadeProgress, result, state)) + }; + + zero_or_more!(parse_def) +} + /// A definition, consisting of one of these: /// /// * A type alias using `:` @@ -688,6 +768,10 @@ fn equals_for_def<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { /// * A type annotation /// * A type annotation followed on the next line by a pattern, an `=`, and an expression pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>, SyntaxError<'a>> { + specialize(|e, _, _| SyntaxError::Expr(e), def_help(min_indent)) +} + +fn def_help<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>, EExpr<'a>> { let indented_more = min_indent + 1; enum DefKind { @@ -696,113 +780,123 @@ pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>, SyntaxError<'a>> { } let def_colon_or_equals = one_of![ - map!(equals_for_def(), |_| DefKind::DefEqual), - map!(ascii_char(b':'), |_| DefKind::DefColon) + map!(equals_for_def_help(), |_| DefKind::DefEqual), + map!(word1(b':', EExpr::Colon), |_| DefKind::DefColon) ]; - attempt( - Attempting::Def, - then( - // backtrackable because - // - // i = 0 - // i - // - // on the last line, we parse a pattern `i`, but it's not actually a def, so need to - // backtrack - and!(backtrackable(pattern(min_indent)), def_colon_or_equals), - move |arena, state, _progress, (loc_pattern, def_kind)| match def_kind { - DefKind::DefColon => { - // Spaces after the ':' (at a normal indentation level) and then the type. - // The type itself must be indented more than the pattern and ':' - let (_, ann_type, state) = - space0_before(type_annotation::located(indented_more), min_indent) - .parse(arena, state)?; - - // see if there is a definition (assuming the preceding characters were a type - // annotation - let (_, opt_rest, state) = optional(and!( - spaces_then_comment_or_newline(), - body_at_indent(min_indent) - )) - .parse(arena, state)?; - - let def = match opt_rest { - None => annotation_or_alias( - arena, - &loc_pattern.value, - loc_pattern.region, - ann_type, - ), - Some((opt_comment, (body_pattern, body_expr))) => Def::AnnotatedBody { - ann_pattern: arena.alloc(loc_pattern), - ann_type: arena.alloc(ann_type), - comment: opt_comment, - body_pattern: arena.alloc(body_pattern), - body_expr: arena.alloc(body_expr), - }, - }; - - Ok((MadeProgress, def, state)) - } - DefKind::DefEqual => { - // Spaces after the '=' (at a normal indentation level) and then the expr. - // The expr itself must be indented more than the pattern and '=' - let (_, body_expr, state) = space0_before( - loc!(move |arena, state| { parse_expr(indented_more, arena, state) }), + then( + // backtrackable because + // + // i = 0 + // i + // + // on the last line, we parse a pattern `i`, but it's not actually a def, so need to + // backtrack + and!(backtrackable(pattern_help(min_indent)), def_colon_or_equals), + move |arena, state, _progress, (loc_pattern, def_kind)| match def_kind { + DefKind::DefColon => { + // Spaces after the ':' (at a normal indentation level) and then the type. + // The type itself must be indented more than the pattern and ':' + let (_, ann_type, state) = specialize( + EExpr::Type, + space0_before_e( + type_annotation::located_help(indented_more), min_indent, - ) - .parse(arena, state)?; + Type::TSpace, + Type::TIndentStart, + ), + ) + .parse(arena, state)?; - Ok(( - MadeProgress, - Def::Body(arena.alloc(loc_pattern), arena.alloc(body_expr)), - state, - )) - } - }, - ), + // see if there is a definition (assuming the preceding characters were a type + // annotation + let (_, opt_rest, state) = optional(and!( + spaces_then_comment_or_newline_help(), + body_at_indent_help(min_indent) + )) + .parse(arena, state)?; + + let def = match opt_rest { + None => { + annotation_or_alias(arena, &loc_pattern.value, loc_pattern.region, ann_type) + } + Some((opt_comment, (body_pattern, body_expr))) => Def::AnnotatedBody { + ann_pattern: arena.alloc(loc_pattern), + ann_type: arena.alloc(ann_type), + comment: opt_comment, + body_pattern: arena.alloc(body_pattern), + body_expr: arena.alloc(body_expr), + }, + }; + + Ok((MadeProgress, def, state)) + } + DefKind::DefEqual => { + // Spaces after the '=' (at a normal indentation level) and then the expr. + // The expr itself must be indented more than the pattern and '=' + let (_, body_expr, state) = space0_before_e( + loc!(move |arena, state| parse_expr_help(indented_more, arena, state)), + min_indent, + EExpr::Space, + EExpr::IndentStart, + ) + .parse(arena, state)?; + + Ok(( + MadeProgress, + Def::Body(arena.alloc(loc_pattern), arena.alloc(body_expr)), + state, + )) + } + }, ) } // PARSER HELPERS -fn pattern<'a>(min_indent: u16) -> impl Parser<'a, Located>, SyntaxError<'a>> { - space0_after( - specialize( - |e, _, _| SyntaxError::Pattern(e), +fn pattern_help<'a>(min_indent: u16) -> impl Parser<'a, Located>, EExpr<'a>> { + specialize_ref( + EExpr::Pattern, + space0_after_e( loc_closure_param(min_indent), + min_indent, + EPattern::Space, + EPattern::IndentStart, ), - min_indent, ) } -fn spaces_then_comment_or_newline<'a>() -> impl Parser<'a, Option<&'a str>, SyntaxError<'a>> { - skip_first!( - zero_or_more!(ascii_char(b' ')), - map!( - either!(newline_char(), line_comment()), - |either_comment_or_newline| match either_comment_or_newline { - Either::First(_) => None, - Either::Second(comment) => Some(comment), - } - ) +fn spaces_then_comment_or_newline_help<'a>() -> impl Parser<'a, Option<&'a str>, EExpr<'a>> { + specialize_ref( + EExpr::Syntax, + skip_first!( + zero_or_more!(ascii_char(b' ')), + map!( + either!(newline_char(), line_comment()), + |either_comment_or_newline| match either_comment_or_newline { + Either::First(_) => None, + Either::Second(comment) => Some(comment), + } + ) + ), ) } type Body<'a> = (Located>, Located>); -fn body_at_indent<'a>(indent_level: u16) -> impl Parser<'a, Body<'a>, SyntaxError<'a>> { +fn body_at_indent_help<'a>(indent_level: u16) -> impl Parser<'a, Body<'a>, EExpr<'a>> { let indented_more = indent_level + 1; and!( - skip_first!(spaces_exactly(indent_level), pattern(indent_level)), + skip_first!(spaces_exactly_e(indent_level), pattern_help(indent_level)), skip_first!( - equals_for_def(), + equals_for_def_help(), // Spaces after the '=' (at a normal indentation level) and then the expr. // The expr itself must be indented more than the pattern and '=' - space0_before( - loc!(move |arena, state| parse_expr(indented_more, arena, state)), + space0_before_e( + loc!(move |arena, state| parse_expr_help(indented_more, arena, state)), indent_level, + EExpr::Space, + EExpr::IndentStart, ) ) ) @@ -869,6 +963,9 @@ fn annotation_or_alias<'a>( Malformed(_) => { Def::NotYetImplemented("TODO translate a malformed pattern into a malformed annotation") } + MalformedIdent(_, _) => { + Def::NotYetImplemented("TODO translate a malformed pattern into a malformed annotation") + } Identifier(ident) => { // This is a regular Annotation Def::Annotation( @@ -895,14 +992,6 @@ fn annotation_or_alias<'a>( } } -fn parse_defs<'a>( - min_indent: u16, -) -> impl Parser<'a, Vec<'a, &'a Located>>, SyntaxError<'a>> { - let parse_def = move |a, s| space1_before(loc!(def(min_indent)), min_indent).parse(a, s); - - zero_or_more!(allocated(parse_def)) -} - fn parse_def_expr_help<'a>( min_indent: u16, def_start_col: u16, @@ -912,70 +1001,37 @@ fn parse_def_expr_help<'a>( loc_first_pattern: Located>, spaces_after_equals: &'a [CommentOrNewline<'a>], ) -> ParseResult<'a, Expr<'a>, EExpr<'a>> { - let result = parse_def_expr( - min_indent, - def_start_col, - equals_sign_indent, - arena, - state, - loc_first_pattern, - spaces_after_equals, - ); - - match result { - Ok(good) => Ok(good), - Err((progress, fail, state)) => { - let row = state.line; - let col = state.column; - Err((progress, EExpr::Def(arena.alloc(fail), row, col), state)) - } - } -} - -fn parse_def_expr<'a>( - min_indent: u16, - def_start_col: u16, - equals_sign_indent: u16, - arena: &'a Bump, - state: State<'a>, - loc_first_pattern: Located>, - spaces_after_equals: &'a [CommentOrNewline<'a>], -) -> ParseResult<'a, Expr<'a>, SyntaxError<'a>> { - if def_start_col < min_indent { - Err((NoProgress, SyntaxError::OutdentedTooFar, state)) - // `<` because '=' should be same indent (or greater) as the entire def-expr - } else if equals_sign_indent < def_start_col { - let msg = format!( - r"TODO the = in this declaration seems outdented. equals_sign_indent was {} and def_start_col was {}", - equals_sign_indent, def_start_col - ); - Err((NoProgress, SyntaxError::NotYetImplemented(msg), state)) + if def_start_col < min_indent || equals_sign_indent < def_start_col { + Err(( + NoProgress, + EExpr::IndentDefBody(state.line, state.column), + state, + )) } else { // Indented more beyond the original indent of the entire def-expr. let indented_more = def_start_col + 1; then( - attempt!( - Attempting::Def, + and!( + // Parse the body of the first def. It doesn't need any spaces + // around it parsed, because both the subsquent defs and the + // final body will have space1_before on them. + // + // It should be indented more than the original, and it will + // end when outdented again. + loc!(move |arena, state| parse_expr_help(indented_more, arena, state)), and!( - // Parse the body of the first def. It doesn't need any spaces - // around it parsed, because both the subsquent defs and the - // final body will have space1_before on them. - // - // It should be indented more than the original, and it will - // end when outdented again. - loc!(move |arena, state| parse_expr(indented_more, arena, state)), - and!( - // Optionally parse additional defs. - parse_defs(def_start_col), - // Parse the final expression that will be returned. - // It should be indented the same amount as the original. - space1_before( - loc!(move |arena, state: State<'a>| { - parse_expr(def_start_col, arena, state) - }), - def_start_col, - ) + // Optionally parse additional defs. + parse_defs_help(def_start_col), + // Parse the final expression that will be returned. + // It should be indented the same amount as the original. + space0_before_e( + loc!(move |arena, state: State<'a>| { + parse_expr_help(def_start_col, arena, state) + }), + def_start_col, + EExpr::Space, + EExpr::IndentStart, ) ) ), @@ -1017,66 +1073,66 @@ fn parse_def_expr<'a>( } } -fn parse_def_signature<'a>( +fn parse_def_signature_help<'a>( min_indent: u16, colon_indent: u16, arena: &'a Bump, state: State<'a>, loc_first_pattern: Located>, -) -> ParseResult<'a, Expr<'a>, SyntaxError<'a>> { +) -> ParseResult<'a, Expr<'a>, EExpr<'a>> { let original_indent = state.indent_col; - if original_indent < min_indent { - Err((NoProgress, SyntaxError::OutdentedTooFar, state)) - // `<` because ':' should be same indent or greater - } else if colon_indent < original_indent { + if original_indent < min_indent || colon_indent < original_indent { + // `colon_indent < original_indent` because ':' should be same indent or greater Err(( NoProgress, - SyntaxError::NotYetImplemented( - "TODO the : in this declaration seems outdented".to_string(), - ), + EExpr::IndentDefBody(state.line, state.column), state, )) } else { // Indented more beyond the original indent. let indented_more = original_indent + 1; - attempt!( - Attempting::Def, - and!( - // Parse the first annotation. It doesn't need any spaces - // around it parsed, because both the subsquent defs and the - // final body will have space1_before on them. - // - // It should be indented more than the original, and it will - // end when outdented again. - and_then_with_indent_level( - space0_before(type_annotation::located(indented_more), min_indent), - // The first annotation may be immediately (spaces_then_comment_or_newline()) - // followed by a body at the exact same indent_level - // leading to an AnnotatedBody in this case - |_progress, type_ann, indent_level| map( - optional(and!( - backtrackable(spaces_then_comment_or_newline()), - body_at_indent(indent_level) - )), - move |opt_body| (type_ann.clone(), opt_body) - ) + and!( + // Parse the first annotation. It doesn't need any spaces + // around it parsed, because both the subsquent defs and the + // final body will have space1_before on them. + // + // It should be indented more than the original, and it will + // end when outdented again. + and_then_with_indent_level( + space0_before_e( + specialize(EExpr::Type, type_annotation::located_help(indented_more)), + min_indent, + EExpr::Space, + EExpr::IndentAnnotation ), - and!( - // Optionally parse additional defs. - zero_or_more!(allocated(space1_before( - loc!(def(original_indent)), - original_indent, - ))), - // Parse the final expression that will be returned. - // It should be indented the same amount as the original. - space1_before( - loc!(|arena, state: State<'a>| { - parse_expr(original_indent, arena, state) - }), - original_indent, - ) + // The first annotation may be immediately (spaces_then_comment_or_newline()) + // followed by a body at the exact same indent_level + // leading to an AnnotatedBody in this case + |_progress, type_ann, indent_level| map( + optional(and!( + backtrackable(spaces_then_comment_or_newline_help()), + body_at_indent_help(indent_level) + )), + move |opt_body| (type_ann.clone(), opt_body) + ) + ), + and!( + // Optionally parse additional defs. + zero_or_more!(backtrackable(allocated(space0_before_e( + loc!(specialize_ref(EExpr::Syntax, def(original_indent))), + original_indent, + EExpr::Space, + EExpr::IndentStart, + )))), + // Parse the final expression that will be returned. + // It should be indented the same amount as the original. + space0_before_e( + loc!(|arena, state| parse_expr_help(original_indent, arena, state)), + original_indent, + EExpr::Space, + EExpr::IndentEnd, ) ) ) @@ -1127,63 +1183,26 @@ fn parse_def_signature<'a>( } } -// fn to_expr<'a>(arena, state, ((loc_first_annotation, opt_body), (mut defs, loc_ret))-> ParseResult<'a, Expr<'a>, SyntaxError<'a>>{ - -// } - -fn loc_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located>, SyntaxError<'a>> { - skip_first!( - // If this is a reserved keyword ("if", "then", "case, "when"), - // followed by a blank space, then it is not a function argument! - // - // (The space is necessary because otherwise we'll get a false - // positive on function arguments beginning with keywords, - // e.g. `ifBlah` or `isSomething` will register as `if`/`is` keywords) - not(and!(reserved_keyword(), space1(min_indent))), - // Don't parse operators, because they have a higher precedence than function application. - // If we encounter one, we're done parsing function args! - move |arena, state| loc_parse_function_arg(min_indent, arena, state) - ) -} - -fn loc_parse_function_arg<'a>( +fn loc_parse_function_arg_help<'a>( min_indent: u16, arena: &'a Bump, state: State<'a>, -) -> ParseResult<'a, Located>, SyntaxError<'a>> { +) -> ParseResult<'a, Located>, EExpr<'a>> { one_of!( - loc_function_arg_in_parens_etc(min_indent), - loc!(string_literal()), - loc!(number_literal()), - loc!(closure(min_indent)), - loc!(record_literal(min_indent)), - loc!(list_literal(min_indent)), - loc!(unary_op(min_indent)), - loc!(when::expr(min_indent)), - loc!(if_expr(min_indent)), - loc!(ident_without_apply()) + loc_function_arg_in_parens_etc_help(min_indent), + loc!(specialize(EExpr::Str, string_literal_help())), + loc!(specialize(EExpr::Number, number_literal_help())), + loc!(specialize(EExpr::Lambda, closure_help(min_indent))), + loc!(record_literal_help(min_indent)), + loc!(specialize(EExpr::List, list_literal_help(min_indent))), + loc!(unary_op_help(min_indent)), + loc!(specialize(EExpr::When, when::expr_help(min_indent))), + loc!(specialize(EExpr::If, if_expr_help(min_indent))), + loc!(ident_without_apply_help()) ) .parse(arena, state) } -fn reserved_keyword<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { - one_of!( - ascii_string(keyword::IF), - ascii_string(keyword::THEN), - ascii_string(keyword::ELSE), - ascii_string(keyword::WHEN), - ascii_string(keyword::IS), - ascii_string(keyword::AS) - ) -} - -fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::Lambda(e, r, c)), - closure_help(min_indent), - ) -} - fn closure_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, ELambda<'a>> { map_with_arena!( skip_first!( @@ -1211,8 +1230,8 @@ fn closure_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, ELambda<'a>> { // Parse the body space0_before_e( specialize_ref( - ELambda::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)) + ELambda::Body, + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)) ), min_indent, ELambda::Space, @@ -1235,22 +1254,15 @@ mod when { use crate::ast::WhenBranch; /// Parser for when expressions. - pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::When(e, r, c)), - expr_help(min_indent), - ) - } pub fn expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, When<'a>> { then( and!( when_with_indent(), skip_second!( space0_around_ee( - loc!(specialize_ref( - When::Syntax, - move |arena, state| parse_expr(min_indent, arena, state) - )), + loc!(specialize_ref(When::Condition, move |arena, state| { + parse_expr_help(min_indent, arena, state) + })), min_indent, When::Space, When::IndentCondition, @@ -1402,7 +1414,7 @@ mod when { // TODO we should require space before the expression but not after space0_around_ee( loc!(specialize_ref(When::IfGuard, move |arena, state| { - parse_expr(min_indent, arena, state) + parse_expr_help(min_indent, arena, state) })), min_indent, When::Space, @@ -1442,8 +1454,8 @@ mod when { word2(b'-', b'>', When::Arrow), space0_before_e( specialize_ref( - When::Syntax, - loc!(move |arena, state| parse_expr(indent, arena, state)) + When::Branch, + loc!(move |arena, state| parse_expr_help(indent, arena, state)) ), indent, When::Space, @@ -1460,8 +1472,8 @@ fn if_branch<'a>( // NOTE: only parse spaces before the expression let (_, cond, state) = space0_around_ee( specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), + If::Condition, + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)), ), min_indent, If::Space, @@ -1477,8 +1489,8 @@ fn if_branch<'a>( let (_, then_branch, state) = space0_around_ee( specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), + If::ThenBranch, + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)), ), min_indent, If::Space, @@ -1496,7 +1508,7 @@ fn if_branch<'a>( } } -pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { +fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { move |arena: &'a Bump, state| { let (_, _, state) = parser::keyword_e(keyword::IF, If::If).parse(arena, state)?; @@ -1527,8 +1539,8 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { let (_, else_branch, state) = space0_before_e( specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), + If::ElseBranch, + loc!(move |arena, state| parse_expr_help(min_indent, arena, state)), ), min_indent, If::Space, @@ -1543,13 +1555,6 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { } } -pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::If(e, r, c)), - if_expr_help(min_indent), - ) -} - /// This is a helper function for parsing function args. /// The rules for (-) are special-cased, and they come up in function args. /// @@ -1564,101 +1569,80 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a> /// any time we encounter a '-' it is unary iff it is both preceded by spaces /// and is *not* followed by a whitespace character. #[inline(always)] -fn unary_negate_function_arg<'a>( +fn unary_negate_function_arg_help<'a>( min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { - then( - // Spaces, then '-', then *not* more spaces. - not_followed_by( - either!( - // Try to parse a number literal *before* trying to parse unary negate, - // because otherwise (foo -1) will parse as (foo (Num.neg 1)) - loc!(number_literal()), - loc!(ascii_char(b'-')) - ), - one_of!( - ascii_char(b' '), - ascii_char(b'#'), - newline_char(), - ascii_char(b'>') - ), - ), - move |arena, state, progress, num_or_minus_char| { - debug_assert_eq!(progress, MadeProgress); +) -> impl Parser<'a, Located>, EExpr<'a>> { + move |arena, state: State<'a>| { + let (_, Located { region, .. }, state) = loc!(unary_negate()).parse(arena, state)?; - match num_or_minus_char { - Either::First(loc_num_literal) => Ok((progress, loc_num_literal, state)), - Either::Second(Located { region, .. }) => { - let loc_op = Located { - region, - value: UnaryOp::Negate, - }; + let loc_op = Located { + region, + value: UnaryOp::Negate, + }; - // Continue parsing the function arg as normal. - let (_, loc_expr, state) = loc_function_arg(min_indent).parse(arena, state)?; - let region = Region { - start_col: loc_op.region.start_col, - start_line: loc_op.region.start_line, - end_col: loc_expr.region.end_col, - end_line: loc_expr.region.end_line, - }; - let value = Expr::UnaryOp(arena.alloc(loc_expr), loc_op); - let loc_expr = Located { - // Start from where the unary op started, - // and end where its argument expr ended. - // This is relevant in case (for example) - // we have an expression involving parens, - // for example `-(foo bar)` - region, - value, - }; + // Continue parsing the function arg as normal. + let (_, loc_expr, state) = loc_parse_function_arg_help(min_indent, arena, state)?; + let region = Region { + start_col: loc_op.region.start_col, + start_line: loc_op.region.start_line, + end_col: loc_expr.region.end_col, + end_line: loc_expr.region.end_line, + }; + let value = Expr::UnaryOp(arena.alloc(loc_expr), loc_op); + let loc_expr = Located { + // Start from where the unary op started, + // and end where its argument expr ended. + // This is relevant in case (for example) + // we have an expression involving parens, + // for example `-(foo bar)` + region, + value, + }; - let value = loc_expr.value; + let value = loc_expr.value; - Ok(( - MadeProgress, - Located { - region: loc_expr.region, - value, - }, - state, - )) - } - } - }, - ) + Ok(( + MadeProgress, + Located { + region: loc_expr.region, + value, + }, + state, + )) + } } fn loc_function_args_help<'a>( min_indent: u16, ) -> impl Parser<'a, Vec<'a, Located>>, EExpr<'a>> { - specialize_ref(EExpr::Syntax, loc_function_args(min_indent)) -} - -fn loc_function_args<'a>( - min_indent: u16, -) -> impl Parser<'a, Vec<'a, Located>>, SyntaxError<'a>> { - one_or_more!(move |arena: &'a Bump, s| { - map!( - and!( - backtrackable(space1(min_indent)), - one_of!( - unary_negate_function_arg(min_indent), - loc_function_arg(min_indent) - ) - ), - |(spaces, loc_expr): (&'a [_], Located>)| { - if spaces.is_empty() { - loc_expr - } else { - arena - .alloc(loc_expr.value) - .with_spaces_before(spaces, loc_expr.region) + one_or_more_e!( + move |arena: &'a Bump, s| { + map!( + and!( + backtrackable(space1_e( + min_indent, + EExpr::Space, + EExpr::IndentStart, + EExpr::Start + )), + one_of!(unary_negate_function_arg_help(min_indent), |a, s| { + loc_parse_function_arg_help(min_indent, a, s) + }) + ), + |(spaces, loc_expr): (&'a [_], Located>)| { + if spaces.is_empty() { + loc_expr + } else { + arena + .alloc(loc_expr.value) + .with_spaces_before(spaces, loc_expr.region) + } } - } - ) - .parse(arena, s) - }) + ) + .parse(arena, s) + }, + EExpr::Start + ) } /// When we parse an ident like `foo ` it could be any of these: @@ -1668,20 +1652,25 @@ fn loc_function_args<'a>( /// 3. The beginning of a definition (e.g. `foo =`) /// 4. The beginning of a type annotation (e.g. `foo :`) /// 5. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else. -fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { + +fn assign_or_destructure_identifier<'a>() -> impl Parser<'a, Ident<'a>, EExpr<'a>> { + crate::ident::parse_ident_help +} + +fn ident_etc_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, EExpr<'a>> { then( and!( - loc!(ident()), + loc!(assign_or_destructure_identifier()), and!( // There may optionally be function args after this ident - optional(loc_function_args(min_indent)), + optional(loc_function_args_help(min_indent)), // There may also be a '=' or ':' after it. // The : might be because this is a type alias, e.g. (List a : ...` // The = might be because someone is trying to use Elm or Haskell // syntax for defining functions, e.g. `foo a b = ...` - so give a nice error! optional(and!( - backtrackable(space0(min_indent)), - either!(equals_with_indent(), colon_with_indent()) + backtrackable(space0_e(min_indent, EExpr::Space, EExpr::IndentEquals,)), + either!(equals_with_indent_help(), colon_with_indent_help()) )) ) ), @@ -1693,7 +1682,7 @@ fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> (Some(loc_args), Some((_spaces_before_equals, Either::First(_equals_indent)))) => { // We got args with an '=' after them, e.g. `foo a b = ...` This is a syntax error! let region = Region::across_all(loc_args.iter().map(|v| &v.region)); - let fail = SyntaxError::ArgumentsBeforeEquals(region); + let fail = EExpr::ElmStyleFunction(region, state.line, state.column); Err((MadeProgress, fail, state)) } (None, Some((spaces_before_equals, Either::First(equals_indent)))) => { @@ -1708,8 +1697,11 @@ fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> let def_start_col = state.indent_col; let loc_pattern = Located { region, value }; // TODO use equals_indent below? - let (_, spaces_after_equals, state) = space0(min_indent).parse(arena, state)?; - let (_, parsed_expr, state) = parse_def_expr( + let (_, spaces_after_equals, state) = + space0_e(min_indent, EExpr::Space, EExpr::IndentDefBody) + .parse(arena, state)?; + + let (_, parsed_expr, state) = parse_def_expr_help( min_indent, def_start_col, equals_indent, @@ -1758,20 +1750,17 @@ fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> let mut arg_patterns = Vec::with_capacity_in(loc_args.len(), arena); for loc_arg in loc_args { - match expr_to_pattern(arena, &loc_arg.value) { + match expr_to_pattern_help(arena, &loc_arg.value) { Ok(arg_pat) => { arg_patterns.push(Located { value: arg_pat, region: loc_arg.region, }); } - Err(malformed) => { + Err(_malformed) => { return Err(( MadeProgress, - SyntaxError::NotYetImplemented(format!( - "TODO early return malformed pattern {:?}", - malformed - )), + EExpr::MalformedPattern(state.line, state.column), state, )); } @@ -1799,7 +1788,7 @@ fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> let region = loc_ident.region; let loc_pattern = Located { region, value }; - parse_def_signature(min_indent, colon_indent, arena, state, loc_pattern) + parse_def_signature_help(min_indent, colon_indent, arena, state, loc_pattern) } (None, None) => { // We got nothin' @@ -1812,14 +1801,17 @@ fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> ) } -pub fn ident_without_apply<'a>() -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - then(loc!(ident()), move |arena, state, progress, loc_ident| { - Ok((progress, ident_to_expr(arena, loc_ident.value), state)) - }) +fn ident_without_apply_help<'a>() -> impl Parser<'a, Expr<'a>, EExpr<'a>> { + specialize_ref( + EExpr::Syntax, + then(loc!(ident()), move |arena, state, progress, loc_ident| { + Ok((progress, ident_to_expr(arena, loc_ident.value), state)) + }), + ) } /// Like equals_for_def(), except it produces the indent_col of the state rather than () -pub fn equals_with_indent_help<'a>() -> impl Parser<'a, u16, EExpr<'a>> { +fn equals_with_indent_help<'a>() -> impl Parser<'a, u16, EExpr<'a>> { move |_arena, state: State<'a>| { let equals = EExpr::Equals(state.line, state.column); let indent_col = state.indent_col; @@ -1844,45 +1836,23 @@ pub fn equals_with_indent_help<'a>() -> impl Parser<'a, u16, EExpr<'a>> { } /// Like equals_for_def(), except it produces the indent_col of the state rather than () -pub fn equals_with_indent<'a>() -> impl Parser<'a, u16, SyntaxError<'a>> { - move |arena, state: State<'a>| { +fn colon_with_indent_help<'a>() -> impl Parser<'a, u16, EExpr<'a>> { + move |_arena, state: State<'a>| { + let equals = EExpr::Colon(state.line, state.column); + let indent_col = state.indent_col; + match state.bytes.first() { - Some(b'=') => { - match state.bytes.get(1) { - // The '=' must not be followed by another `=` or `>` - // (See equals_for_def() for explanation) - Some(b'=') | Some(b'>') => Err(unexpected(0, Attempting::Def, state)), - Some(_) => Ok(( - MadeProgress, - state.indent_col, - state.advance_without_indenting(1)?, - )), - None => Err(unexpected_eof( - arena, - state.advance_without_indenting(1)?, - 1, - )), - } - } - Some(_) => Err(unexpected(0, Attempting::Def, state)), - None => Err(unexpected_eof(arena, state, 0)), + Some(b':') => match state.advance_without_indenting_e(1, EExpr::Space) { + Err(bad) => Err(bad), + Ok(good) => Ok((MadeProgress, indent_col, good)), + }, + Some(_) => Err((NoProgress, equals, state)), + None => Err((NoProgress, equals, state)), } } } -pub fn colon_with_indent<'a>() -> impl Parser<'a, u16, SyntaxError<'a>> { - move |arena, state: State<'a>| match state.bytes.first() { - Some(&byte) if byte == b':' => Ok(( - MadeProgress, - state.indent_col, - state.advance_without_indenting(1)?, - )), - Some(_) => Err(unexpected(0, Attempting::Def, state)), - None => Err(unexpected_eof(arena, state, 0)), - } -} - -pub fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { +fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { match src { Ident::GlobalTag(string) => Expr::GlobalTag(string), Ident::PrivateTag(string) => Expr::PrivateTag(string), @@ -1910,11 +1880,26 @@ pub fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { answer } Ident::AccessorFunction(string) => Expr::AccessorFunction(string), - Ident::Malformed(string) => Expr::MalformedIdent(string), + Ident::Malformed(string, problem) => Expr::MalformedIdent(string, problem), } } -fn binop<'a>() -> impl Parser<'a, BinOp, SyntaxError<'a>> { +fn binop_help<'a>() -> impl Parser<'a, BinOp, EExpr<'a>> { + macro_rules! binop { + ($word1:expr, $op:expr) => { + map!( + word1($word1, |row, col| EExpr::BinOp($op, row, col)), + |_| $op + ) + }; + ($word1:expr, $word2:expr, $op:expr) => { + map!( + word2($word1, $word2, |row, col| EExpr::BinOp($op, row, col)), + |_| $op + ) + }; + } + one_of!( // Sorted from highest to lowest predicted usage in practice, // so that successful matches short-circuit as early as possible. @@ -1922,29 +1907,23 @@ fn binop<'a>() -> impl Parser<'a, BinOp, SyntaxError<'a>> { // with other valid operators (e.g. "<=" begins with "<") must // come before the shorter ones; otherwise, they will never // be reached because the shorter one will pass and consume! - map!(ascii_string("|>"), |_| BinOp::Pizza), - map!(ascii_string("=="), |_| BinOp::Equals), - map!(ascii_string("!="), |_| BinOp::NotEquals), - map!(ascii_string("&&"), |_| BinOp::And), - map!(ascii_string("||"), |_| BinOp::Or), - map!(ascii_char(b'+'), |_| BinOp::Plus), - map!(ascii_char(b'*'), |_| BinOp::Star), - map!(ascii_char(b'-'), |_| BinOp::Minus), - map!(ascii_string("//"), |_| BinOp::DoubleSlash), - map!(ascii_char(b'/'), |_| BinOp::Slash), - map!(ascii_string("<="), |_| BinOp::LessThanOrEq), - map!(ascii_char(b'<'), |_| BinOp::LessThan), - map!(ascii_string(">="), |_| BinOp::GreaterThanOrEq), - map!(ascii_char(b'>'), |_| BinOp::GreaterThan), - map!(ascii_char(b'^'), |_| BinOp::Caret), - map!(ascii_string("%%"), |_| BinOp::DoublePercent), - map!(ascii_char(b'%'), |_| BinOp::Percent) - ) -} -fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::List(e, r, c)), - list_literal_help(min_indent), + binop!(b'|', b'>', BinOp::Pizza), + binop!(b'=', b'=', BinOp::Equals), + binop!(b'!', b'=', BinOp::NotEquals), + binop!(b'&', b'&', BinOp::And), + binop!(b'|', b'|', BinOp::Or), + binop!(b'+', BinOp::Plus), + binop!(b'*', BinOp::Star), + binop!(b'-', BinOp::Minus), + binop!(b'/', b'/', BinOp::DoubleSlash), + binop!(b'/', BinOp::Slash), + binop!(b'<', b'=', BinOp::LessThanOrEq), + binop!(b'<', BinOp::LessThan), + binop!(b'>', b'=', BinOp::GreaterThanOrEq), + binop!(b'>', BinOp::GreaterThan), + binop!(b'^', BinOp::Caret), + binop!(b'%', b'%', BinOp::DoublePercent), + binop!(b'%', BinOp::Percent) ) } @@ -2039,25 +2018,6 @@ fn record_updateable_identifier<'a>() -> impl Parser<'a, Expr<'a>, ERecord<'a>> ) } -fn record_literal_wrapper<'a>( - min_indent: u16, -) -> impl Parser< - 'a, - ( - Option>>, - Located<( - Vec<'a, Located>>>, - &'a [CommentOrNewline<'a>], - )>, - ), - SyntaxError<'a>, -> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::Record(e, r, c)), - record_help(min_indent), - ) -} - fn record_help<'a>( min_indent: u16, ) -> impl Parser< @@ -2116,13 +2076,13 @@ fn record_help<'a>( ) } -fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { +fn record_literal_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, EExpr<'a>> { then( and!( - attempt!(Attempting::Record, loc!(record_literal_wrapper(min_indent))), + loc!(specialize(EExpr::Record, record_help(min_indent))), optional(and!( - space0(min_indent), - either!(equals_with_indent(), colon_with_indent()) + space0_e(min_indent, EExpr::Space, EExpr::IndentEquals), + either!(equals_with_indent_help(), colon_with_indent_help()) )) ), move |arena, state, progress, (loc_record, opt_def)| { @@ -2137,11 +2097,8 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError< }; // there can be field access, e.g. `{ x : 4 }.x` - let (_, accesses, state) = optional(one_or_more!(skip_first!( - ascii_char(b'.'), - lowercase_ident() - ))) - .parse(arena, state)?; + let (_, accesses, state) = + optional(record_field_access_chain()).parse(arena, state)?; if let Some(fields) = accesses { for field in fields { @@ -2165,7 +2122,13 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError< match assigned_expr_field_to_pattern(arena, &loc_assigned_field.value) { Ok(value) => loc_patterns.push(Located { region, value }), // an Expr became a pattern that should not be. - Err(fail) => return Err((progress, fail, state)), + Err(_fail) => { + return Err(( + progress, + EExpr::MalformedPattern(state.line, state.column), + state, + )) + } } } @@ -2176,11 +2139,13 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError< Pattern::SpaceAfter(arena.alloc(pattern), spaces_before_equals) }; let loc_pattern = Located { region, value }; - let (_, spaces_after_equals, state) = space0(min_indent).parse(arena, state)?; + let (_, spaces_after_equals, state) = + space0_e(min_indent, EExpr::Space, EExpr::IndentDefBody) + .parse(arena, state)?; // The def's starting column is the '{' char in the record literal. let def_start_col = loc_record.region.start_col; - let (_, parsed_expr, state) = parse_def_expr( + let (_, parsed_expr, state) = parse_def_expr_help( min_indent, def_start_col, equals_indent, @@ -2203,7 +2168,13 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError< match assigned_expr_field_to_pattern(arena, &loc_assigned_field.value) { Ok(value) => loc_patterns.push(Located { region, value }), // an Expr became a pattern that should not be. - Err(fail) => return Err((progress, fail, state)), + Err(_fail) => { + return Err(( + progress, + EExpr::MalformedPattern(state.line, state.column), + state, + )) + } } } @@ -2215,35 +2186,33 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError< }; let loc_pattern = Located { region, value }; - parse_def_signature(min_indent, colon_indent, arena, state, loc_pattern) + parse_def_signature_help(min_indent, colon_indent, arena, state, loc_pattern) } } }, ) } -fn string_literal<'a>() -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( - |e, r, c| SyntaxError::Expr(EExpr::Str(e, r, c)), - map!(crate::string_literal::parse(), Expr::Str), - ) -} - -#[allow(dead_code)] fn string_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> { map!(crate::string_literal::parse(), Expr::Str) } -#[allow(dead_code)] -fn number_literal<'a>() -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - // use crate::number_literal::number_literal; - specialize( - |e, r, c| SyntaxError::Expr(EExpr::Number(e, r, c)), - crate::number_literal::number_literal(), - ) -} - -#[allow(dead_code)] fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, Number> { - crate::number_literal::number_literal() + map!(crate::number_literal::number_literal(), |literal| { + use crate::number_literal::NumLiteral::*; + + match literal { + Num(s) => Expr::Num(s), + Float(s) => Expr::Float(s), + NonBase10Int { + string, + base, + is_negative, + } => Expr::NonBase10Int { + string, + base, + is_negative, + }, + } + }) } diff --git a/compiler/parse/src/ident.rs b/compiler/parse/src/ident.rs index 09db1275f3..bb3143b53d 100644 --- a/compiler/parse/src/ident.rs +++ b/compiler/parse/src/ident.rs @@ -1,11 +1,13 @@ use crate::ast::Attempting; use crate::keyword; use crate::parser::Progress::{self, *}; -use crate::parser::{peek_utf8_char, unexpected, ParseResult, Parser, State, SyntaxError}; +use crate::parser::{ + peek_utf8_char, unexpected, BadInputError, Col, EExpr, ParseResult, Parser, Row, State, + SyntaxError, +}; use bumpalo::collections::string::String; use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use roc_collections::all::arena_join; use roc_region::all::Region; /// The parser accepts all of these in any position where any one of them could @@ -26,7 +28,7 @@ pub enum Ident<'a> { /// .foo AccessorFunction(&'a str), /// .Foo or foo. or something like foo.Bar - Malformed(&'a str), + Malformed(&'a str, BadIdent), } impl<'a> Ident<'a> { @@ -50,7 +52,7 @@ impl<'a> Ident<'a> { len - 1 } AccessorFunction(string) => string.len(), - Malformed(string) => string.len(), + Malformed(string, _) => string.len(), } } @@ -59,274 +61,8 @@ impl<'a> Ident<'a> { } } -/// Parse an identifier into a string. -/// -/// This is separate from the `ident` Parser because string interpolation -/// wants to use it this way. -/// -/// By design, this does not check for reserved keywords like "if", "else", etc. -/// Sometimes we may want to check for those later in the process, and give -/// more contextually-aware error messages than "unexpected `if`" or the like. -#[inline(always)] -pub fn parse_ident<'a>( - arena: &'a Bump, - mut state: State<'a>, -) -> ParseResult<'a, (Ident<'a>, Option), SyntaxError<'a>> { - let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) - let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); - let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); - let mut is_capitalized; - let is_accessor_fn; - let mut is_private_tag = false; - - let start_bytes_len = state.bytes.len(); - - // Identifiers and accessor functions must start with either a letter or a dot. - // If this starts with neither, it must be something else! - match peek_utf8_char(&state) { - Ok((first_ch, bytes_parsed)) => { - if first_ch.is_alphabetic() { - part_buf.push(first_ch); - - is_capitalized = first_ch.is_uppercase(); - is_accessor_fn = false; - - state = state.advance_without_indenting(bytes_parsed)?; - } else if first_ch == '.' { - is_capitalized = false; - is_accessor_fn = true; - - state = state.advance_without_indenting(bytes_parsed)?; - } else if first_ch == '@' { - state = state.advance_without_indenting(bytes_parsed)?; - - // '@' must always be followed by a capital letter! - match peek_utf8_char(&state) { - Ok((next_ch, next_bytes_parsed)) => { - if next_ch.is_uppercase() { - state = state.advance_without_indenting(next_bytes_parsed)?; - - part_buf.push('@'); - part_buf.push(next_ch); - - is_private_tag = true; - is_capitalized = true; - is_accessor_fn = false; - } else { - return Err(unexpected( - bytes_parsed + next_bytes_parsed, - Attempting::Identifier, - state, - )); - } - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - } else { - return Err(unexpected(0, Attempting::Identifier, state)); - } - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { - Ok((ch, bytes_parsed)) => { - // After the first character, only these are allowed: - // - // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers - // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() - // * A dot ('.') - if ch.is_alphabetic() { - if part_buf.is_empty() { - // Capitalization is determined by the first character in the part. - is_capitalized = ch.is_uppercase(); - } - - part_buf.push(ch); - } else if ch.is_ascii_digit() { - // Parts may not start with numbers! - if part_buf.is_empty() { - return malformed( - Some(ch), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - part_buf.push(ch); - } else if ch == '.' { - // There are two posssible errors here: - // - // 1. Having two consecutive dots is an error. - // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error. - if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) { - return malformed( - Some(ch), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - if is_capitalized { - capitalized_parts.push(part_buf.into_bump_str()); - } else { - noncapitalized_parts.push(part_buf.into_bump_str()); - } - - // Now that we've recorded the contents of the current buffer, reset it. - part_buf = String::new_in(arena); - } else { - // This must be the end of the identifier. We're done! - - break; - } - - state = state.advance_without_indenting(bytes_parsed)?; - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - } - - if part_buf.is_empty() { - // We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed! - // - // This condition might also occur if we encounter a malformed accessor like `.|` - // - // If we made it this far and don't have a next_char, then necessarily - // we have consumed a '.' char previously. - return malformed( - Some('.'), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - // Record the final parts. - if is_capitalized { - capitalized_parts.push(part_buf.into_bump_str()); - } else { - noncapitalized_parts.push(part_buf.into_bump_str()); - } - - let answer = if is_accessor_fn { - // Handle accessor functions first because they have the strictest requirements. - // Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts. - if capitalized_parts.is_empty() && noncapitalized_parts.len() == 1 && !is_private_tag { - let value = noncapitalized_parts.iter().next().unwrap(); - - Ident::AccessorFunction(value) - } else { - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } - } else if noncapitalized_parts.is_empty() { - // We have capitalized parts only, so this must be a tag. - match capitalized_parts.first() { - Some(value) => { - if capitalized_parts.len() == 1 { - if is_private_tag { - Ident::PrivateTag(value) - } else { - Ident::GlobalTag(value) - } - } else { - // This is a qualified tag, which is not allowed! - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } - } - None => { - // We had neither capitalized nor noncapitalized parts, - // yet we made it this far. The only explanation is that this was - // a stray '.' drifting through the cosmos. - return Err(unexpected(1, Attempting::Identifier, state)); - } - } - } else if is_private_tag { - // This is qualified field access with an '@' in front, which does not make sense! - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } else { - // We have multiple noncapitalized parts, so this must be field access. - Ident::Access { - module_name: join_module_parts(arena, capitalized_parts.into_bump_slice()), - parts: noncapitalized_parts.into_bump_slice(), - } - }; - - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - debug_assert_eq!(progress, Progress::MadeProgress,); - Ok((Progress::MadeProgress, (answer, None), state)) -} - -fn malformed<'a>( - opt_bad_char: Option, - arena: &'a Bump, - mut state: State<'a>, - capitalized_parts: Vec<&'a str>, - noncapitalized_parts: Vec<&'a str>, -) -> ParseResult<'a, (Ident<'a>, Option), SyntaxError<'a>> { - // Reconstruct the original string that we've been parsing. - let mut full_string = String::new_in(arena); - - full_string - .push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str()); - full_string - .push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str()); - - if let Some(bad_char) = opt_bad_char { - full_string.push(bad_char); - } - - // Consume the remaining chars in the identifier. - let mut next_char = None; - - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { - Ok((ch, bytes_parsed)) => { - // We can't use ch.is_alphanumeric() here because that passes for - // things that are "numeric" but not ASCII digits, like `¾` - if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { - full_string.push(ch); - } else { - next_char = Some(ch); - - break; - } - - state = state.advance_without_indenting(bytes_parsed)?; - } - Err(reason) => return state.fail(arena, MadeProgress, reason), - } - } - - Ok(( - MadeProgress, - (Ident::Malformed(full_string.into_bump_str()), next_char), - state, - )) -} - pub fn ident<'a>() -> impl Parser<'a, Ident<'a>, SyntaxError<'a>> { - move |arena: &'a Bump, state: State<'a>| { - // Discard next_char; we don't need it. - let (progress, (string, _), state) = parse_ident(arena, state)?; - - Ok((progress, string, state)) - } + crate::parser::specialize(|e, _, _| SyntaxError::Expr(e), parse_ident_help) } pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str, SyntaxError<'a>> @@ -435,3 +171,343 @@ pub fn join_module_parts<'a>(arena: &'a Bump, module_parts: &[&str]) -> &'a str buf.into_bump_str() } + +macro_rules! advance_state { + ($state:expr, $n:expr) => { + $state.advance_without_indenting_ee($n, |r, c| { + BadIdent::Space(crate::parser::BadInputError::LineTooLong, r, c) + }) + }; +} + +pub fn parse_ident_help<'a>( + arena: &'a Bump, + state: State<'a>, +) -> ParseResult<'a, Ident<'a>, EExpr<'a>> { + let initial = state.clone(); + + match parse_ident_help_help(arena, state) { + Ok((progress, (ident, _), state)) => { + if let Ident::Access { module_name, parts } = ident { + if module_name.is_empty() { + if let Some(first) = parts.first() { + for keyword in crate::keyword::KEYWORDS.iter() { + if first == keyword { + return Err(( + NoProgress, + EExpr::Start(initial.line, initial.column), + initial, + )); + } + } + } + } + } + + Ok((progress, ident, state)) + } + Err((NoProgress, _, state)) => { + Err((NoProgress, EExpr::Start(state.line, state.column), state)) + } + Err((MadeProgress, fail, state)) => match fail { + BadIdent::Start(r, c) => Err((NoProgress, EExpr::Start(r, c), state)), + BadIdent::Space(e, r, c) => Err((NoProgress, EExpr::Space(e, r, c), state)), + _ => malformed_identifier(initial.bytes, fail, arena, state), + }, + } +} + +fn malformed_identifier<'a>( + initial_bytes: &'a [u8], + problem: BadIdent, + _arena: &'a Bump, + mut state: State<'a>, +) -> ParseResult<'a, Ident<'a>, EExpr<'a>> { + // skip forward to the next non-identifier character + while !state.bytes.is_empty() { + match peek_utf8_char(&state) { + Ok((ch, bytes_parsed)) => { + // We can't use ch.is_alphanumeric() here because that passes for + // things that are "numeric" but not ASCII digits, like `¾` + if ch == '.' || ch == '_' || ch.is_alphabetic() || ch.is_ascii_digit() { + state = state.advance_without_indenting_ee(bytes_parsed, |r, c| { + EExpr::Space(crate::parser::BadInputError::LineTooLong, r, c) + })?; + continue; + } else { + break; + } + } + Err(_reason) => { + break; + } + } + } + + let parsed = &initial_bytes[..(initial_bytes.len() - state.bytes.len())]; + + let parsed_str = unsafe { std::str::from_utf8_unchecked(parsed) }; + + Ok((MadeProgress, Ident::Malformed(parsed_str, problem), state)) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BadIdent { + Start(Row, Col), + Space(BadInputError, Row, Col), + Underscore(Row, Col), + QualifiedTag(Row, Col), + PrivateTagNotUppercase(Row, Col), + PartStartsWithNumber(Row, Col), + WeirdAccessor(Row, Col), + PrivateTagFieldAccess(Row, Col), + + WeirdDotAccess(Row, Col), + WeirdDotQualified(Row, Col), + DoubleDot(Row, Col), + StrayDot(Row, Col), +} + +/// Parse an identifier into a string. +/// +/// This is separate from the `ident` Parser because string interpolation +/// wants to use it this way. +pub fn parse_ident_help_help<'a>( + arena: &'a Bump, + mut state: State<'a>, +) -> ParseResult<'a, (Ident<'a>, Option), BadIdent> { + let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) + let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); + let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); + let mut is_capitalized; + let is_accessor_fn; + let mut is_private_tag = false; + + // Identifiers and accessor functions must start with either a letter or a dot. + // If this starts with neither, it must be something else! + match peek_utf8_char(&state) { + Ok((first_ch, bytes_parsed)) => { + if first_ch.is_alphabetic() { + part_buf.push(first_ch); + + is_capitalized = first_ch.is_uppercase(); + is_accessor_fn = false; + + state = advance_state!(state, bytes_parsed)?; + } else if first_ch == '.' { + is_capitalized = false; + is_accessor_fn = true; + + state = advance_state!(state, bytes_parsed)?; + } else if first_ch == '@' { + state = advance_state!(state, bytes_parsed)?; + + // '@' must always be followed by a capital letter! + match peek_utf8_char(&state) { + Ok((next_ch, next_bytes_parsed)) => { + if next_ch.is_uppercase() { + state = advance_state!(state, next_bytes_parsed)?; + + part_buf.push('@'); + part_buf.push(next_ch); + + is_private_tag = true; + is_capitalized = true; + is_accessor_fn = false; + } else { + return Err(( + MadeProgress, + BadIdent::PrivateTagNotUppercase(state.line, state.column), + state, + )); + } + } + Err(_reason) => { + return Err(( + MadeProgress, + BadIdent::PrivateTagNotUppercase(state.line, state.column), + state, + )); + } + } + } else { + return Err((NoProgress, BadIdent::Start(state.line, state.column), state)); + } + } + Err(_reason) => { + return Err((NoProgress, BadIdent::Start(state.line, state.column), state)); + } + } + + while !state.bytes.is_empty() { + match peek_utf8_char(&state) { + Ok((ch, bytes_parsed)) => { + // After the first character, only these are allowed: + // + // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers + // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() + // * A dot ('.') + if ch.is_alphabetic() { + if part_buf.is_empty() { + // Capitalization is determined by the first character in the part. + is_capitalized = ch.is_uppercase(); + } + + part_buf.push(ch); + } else if ch.is_ascii_digit() { + // Parts may not start with numbers! + if part_buf.is_empty() { + return Err(( + MadeProgress, + BadIdent::PartStartsWithNumber(state.line, state.column), + state, + )); + } + + part_buf.push(ch); + } else if ch == '.' { + // There are two posssible errors here: + // + // 1. Having two consecutive dots is an error. + // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error. + if part_buf.is_empty() { + return Err(( + MadeProgress, + BadIdent::DoubleDot(state.line, state.column), + state, + )); + } + + if is_capitalized && !noncapitalized_parts.is_empty() { + return Err(( + MadeProgress, + BadIdent::WeirdDotQualified(state.line, state.column), + state, + )); + } + + if is_capitalized { + capitalized_parts.push(part_buf.into_bump_str()); + } else { + noncapitalized_parts.push(part_buf.into_bump_str()); + } + + // Now that we've recorded the contents of the current buffer, reset it. + part_buf = String::new_in(arena); + } else if ch == '_' { + // we don't allow underscores in the middle of an identifier + // but still parse them (and generate a malformed identifier) + // to give good error messages for this case + state = advance_state!(state, bytes_parsed)?; + return Err(( + MadeProgress, + BadIdent::Underscore(state.line, state.column), + state, + )); + } else { + // This must be the end of the identifier. We're done! + + break; + } + + state = advance_state!(state, bytes_parsed)?; + } + Err(_reason) => { + // + return Err(( + MadeProgress, + BadIdent::Start(state.line, state.column), + state, + )); + } + } + } + + if part_buf.is_empty() { + // We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed! + // + // This condition might also occur if we encounter a malformed accessor like `.|` + // + // If we made it this far and don't have a next_char, then necessarily + // we have consumed a '.' char previously. + let fail = if noncapitalized_parts.is_empty() { + if capitalized_parts.is_empty() { + BadIdent::StrayDot(state.line, state.column) + } else { + BadIdent::WeirdDotQualified(state.line, state.column) + } + } else { + BadIdent::WeirdDotAccess(state.line, state.column) + }; + + return Err((MadeProgress, fail, state)); + } + + // Record the final parts. + if is_capitalized { + capitalized_parts.push(part_buf.into_bump_str()); + } else { + noncapitalized_parts.push(part_buf.into_bump_str()); + } + + let answer = if is_accessor_fn { + // Handle accessor functions first because they have the strictest requirements. + // Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts. + if capitalized_parts.is_empty() && noncapitalized_parts.len() == 1 && !is_private_tag { + let value = noncapitalized_parts.iter().next().unwrap(); + + Ident::AccessorFunction(value) + } else { + return Err(( + MadeProgress, + BadIdent::WeirdAccessor(state.line, state.column), + state, + )); + } + } else if noncapitalized_parts.is_empty() { + // We have capitalized parts only, so this must be a tag. + match capitalized_parts.first() { + Some(value) => { + if capitalized_parts.len() == 1 { + if is_private_tag { + Ident::PrivateTag(value) + } else { + Ident::GlobalTag(value) + } + } else { + // This is a qualified tag, which is not allowed! + return Err(( + MadeProgress, + BadIdent::QualifiedTag(state.line, state.column), + state, + )); + } + } + None => { + // We had neither capitalized nor noncapitalized parts, + // yet we made it this far. The only explanation is that this was + // a stray '.' drifting through the cosmos. + return Err(( + MadeProgress, + BadIdent::StrayDot(state.line, state.column), + state, + )); + } + } + } else if is_private_tag { + // This is qualified field access with an '@' in front, which does not make sense! + return Err(( + MadeProgress, + BadIdent::PrivateTagFieldAccess(state.line, state.column), + state, + )); + } else { + // We have multiple noncapitalized parts, so this must be field access. + Ident::Access { + module_name: join_module_parts(arena, capitalized_parts.into_bump_slice()), + parts: noncapitalized_parts.into_bump_slice(), + } + }; + + Ok((Progress::MadeProgress, (answer, None), state)) +} diff --git a/compiler/parse/src/number_literal.rs b/compiler/parse/src/number_literal.rs index 139a9df1a3..e9f3ed8a9c 100644 --- a/compiler/parse/src/number_literal.rs +++ b/compiler/parse/src/number_literal.rs @@ -1,9 +1,19 @@ -use crate::ast::{Base, Expr}; +use crate::ast::Base; use crate::parser::{parse_utf8, Number, ParseResult, Parser, Progress, State, SyntaxError}; use std::char; use std::str::from_utf8_unchecked; -pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>, Number> { +pub enum NumLiteral<'a> { + Float(&'a str), + Num(&'a str), + NonBase10Int { + string: &'a str, + base: Base, + is_negative: bool, + }, +} + +pub fn number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, Number> { move |_arena, state: State<'a>| { match state.bytes.get(0) { Some(first_byte) if *first_byte == b'-' => { @@ -25,7 +35,7 @@ fn parse_number_base<'a>( is_negated: bool, bytes: &'a [u8], state: State<'a>, -) -> ParseResult<'a, Expr<'a>, Number> { +) -> ParseResult<'a, NumLiteral<'a>, Number> { match bytes.get(0..2) { Some(b"0b") => chomp_number_base(Base::Binary, is_negated, &bytes[2..], state), Some(b"0o") => chomp_number_base(Base::Octal, is_negated, &bytes[2..], state), @@ -39,7 +49,7 @@ fn chomp_number_base<'a>( is_negative: bool, bytes: &'a [u8], state: State<'a>, -) -> ParseResult<'a, Expr<'a>, Number> { +) -> ParseResult<'a, NumLiteral<'a>, Number> { let (_is_float, chomped) = chomp_number(bytes); match parse_utf8(&bytes[0..chomped]) { @@ -48,7 +58,7 @@ fn chomp_number_base<'a>( // all is well Ok(( Progress::MadeProgress, - Expr::NonBase10Int { + NumLiteral::NonBase10Int { is_negative, string, base, @@ -71,7 +81,7 @@ fn chomp_number_dec<'a>( is_negative: bool, bytes: &'a [u8], state: State<'a>, -) -> ParseResult<'a, Expr<'a>, Number> { +) -> ParseResult<'a, NumLiteral<'a>, Number> { let (is_float, chomped) = chomp_number(bytes); if is_negative && chomped == 0 { @@ -92,9 +102,9 @@ fn chomp_number_dec<'a>( Ok(( Progress::MadeProgress, if is_float { - Expr::Float(string) + NumLiteral::Float(string) } else { - Expr::Num(string) + NumLiteral::Num(string) }, new, )) diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index 322f95a974..fe74a7d8a9 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -389,11 +389,22 @@ pub enum EExpr<'a> { Dot(Row, Col), Access(Row, Col), + UnaryNot(Row, Col), + UnaryNegate(Row, Col), + BinOp(roc_module::operator::BinOp, Row, Col), Def(&'a SyntaxError<'a>, Row, Col), + Type(Type<'a>, Row, Col), + Pattern(&'a EPattern<'a>, Row, Col), IndentDefBody(Row, Col), IndentEquals(Row, Col), + IndentAnnotation(Row, Col), Equals(Row, Col), + Colon(Row, Col), + Ident(Row, Col), + ElmStyleFunction(Region, Row, Col), + MalformedPattern(Row, Col), + QualifiedTag(Row, Col), Syntax(&'a SyntaxError<'a>, Row, Col), @@ -469,8 +480,7 @@ pub enum EInParens<'a> { End(Row, Col), Open(Row, Col), /// - // TODO remove - Syntax(&'a SyntaxError<'a>, Row, Col), + Expr(&'a EExpr<'a>, Row, Col), /// Space(BadInputError, Row, Col), @@ -488,8 +498,7 @@ pub enum ELambda<'a> { Arg(Row, Col), // TODO make EEXpr Pattern(EPattern<'a>, Row, Col), - Syntax(&'a SyntaxError<'a>, Row, Col), - + Body(&'a EExpr<'a>, Row, Col), IndentArrow(Row, Col), IndentBody(Row, Col), IndentArg(Row, Col), @@ -516,9 +525,10 @@ pub enum When<'a> { Pattern(EPattern<'a>, Row, Col), Arrow(Row, Col), Bar(Row, Col), + IfToken(Row, Col), - // TODO make EEXpr - IfGuard(&'a SyntaxError<'a>, Row, Col), + IfGuard(&'a EExpr<'a>, Row, Col), + Condition(&'a EExpr<'a>, Row, Col), Branch(&'a EExpr<'a>, Row, Col), Syntax(&'a SyntaxError<'a>, Row, Col), @@ -562,6 +572,7 @@ pub enum EPattern<'a> { Space(BadInputError, Row, Col), PInParens(PInParens<'a>, Row, Col), + NumLiteral(Number, Row, Col), IndentStart(Row, Col), IndentEnd(Row, Col), @@ -1958,6 +1969,44 @@ macro_rules! one_or_more { }; } +#[macro_export] +macro_rules! one_or_more_e { + ($parser:expr, $to_error:expr) => { + move |arena, state: State<'a>| { + use bumpalo::collections::Vec; + + match $parser.parse(arena, state) { + Ok((_, first_output, next_state)) => { + let mut state = next_state; + let mut buf = Vec::with_capacity_in(1, arena); + + buf.push(first_output); + + loop { + match $parser.parse(arena, state) { + Ok((_, next_output, next_state)) => { + state = next_state; + buf.push(next_output); + } + Err((NoProgress, _, old_state)) => { + return Ok((MadeProgress, buf, old_state)); + } + Err((MadeProgress, fail, old_state)) => { + return Err((MadeProgress, fail, old_state)); + } + } + } + } + Err((progress, _, new_state)) => Err(( + progress, + $to_error(new_state.line, new_state.column), + new_state, + )), + } + } + }; +} + #[macro_export] macro_rules! debug { ($parser:expr) => { diff --git a/compiler/parse/src/pattern.rs b/compiler/parse/src/pattern.rs index a26193aab6..5e073623d8 100644 --- a/compiler/parse/src/pattern.rs +++ b/compiler/parse/src/pattern.rs @@ -1,7 +1,6 @@ use crate::ast::Pattern; use crate::blankspace::{space0_around_ee, space0_before_e, space0_e}; use crate::ident::{ident, lowercase_ident, Ident}; -use crate::number_literal::number_literal; use crate::parser::Progress::{self, *}; use crate::parser::{ backtrackable, optional, specialize, specialize_ref, word1, EPattern, PInParens, PRecord, @@ -144,9 +143,23 @@ fn loc_pattern_in_parens_help<'a>( fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> { specialize( - |_, r, c| EPattern::Start(r, c), - map_with_arena!(number_literal(), |arena, expr| { - crate::expr::expr_to_pattern(arena, &expr).unwrap() + EPattern::NumLiteral, + map!(crate::number_literal::number_literal(), |literal| { + use crate::number_literal::NumLiteral::*; + + match literal { + Num(s) => Pattern::NumLiteral(s), + Float(s) => Pattern::FloatLiteral(s), + NonBase10Int { + string, + base, + is_negative, + } => Pattern::NonBase10Literal { + string, + base, + is_negative, + }, + } }), ) } @@ -267,12 +280,15 @@ fn loc_ident_pattern_help<'a>( }, state, )), - Ident::Malformed(malformed) => { + Ident::Malformed(malformed, problem) => { debug_assert!(!malformed.is_empty()); - Err(( + Ok(( MadeProgress, - EPattern::Start(state.line, state.column), + Located { + region: loc_ident.region, + value: Pattern::MalformedIdent(malformed, problem), + }, state, )) } diff --git a/compiler/parse/src/type_annotation.rs b/compiler/parse/src/type_annotation.rs index a096bde51e..5f7b6fb5f1 100644 --- a/compiler/parse/src/type_annotation.rs +++ b/compiler/parse/src/type_annotation.rs @@ -19,6 +19,10 @@ pub fn located<'a>( specialize(|x, _, _| SyntaxError::Type(x), expression(min_indent)) } +pub fn located_help<'a>(min_indent: u16) -> impl Parser<'a, Located>, Type<'a>> { + expression(min_indent) +} + #[inline(always)] fn tag_union_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, TTagUnion<'a>> { move |arena, state| { diff --git a/compiler/parse/tests/test_parse.rs b/compiler/parse/tests/test_parse.rs index c65ce1fe8c..fec5017711 100644 --- a/compiler/parse/tests/test_parse.rs +++ b/compiler/parse/tests/test_parse.rs @@ -975,22 +975,25 @@ mod test_parse { #[test] fn qualified_global_tag() { + use roc_parse::ident::BadIdent; + let arena = Bump::new(); - let expected = Expr::MalformedIdent("One.Two.Whee"); + let expected = Expr::MalformedIdent("One.Two.Whee", BadIdent::QualifiedTag(0, 12)); let actual = parse_expr_with(&arena, "One.Two.Whee"); assert_eq!(Ok(expected), actual); } - // TODO restore this test - it fails, but is not worth fixing right now. - // #[test] - // fn qualified_private_tag() { - // let arena = Bump::new(); - // let expected = Expr::MalformedIdent("One.Two.@Whee"); - // let actual = parse_expr_with(&arena, "One.Two.@Whee"); + #[test] + fn private_qualified_tag() { + use roc_parse::ident::BadIdent; - // assert_eq!(Ok(expected), actual); - // } + let arena = Bump::new(); + let expected = Expr::MalformedIdent("@One.Two.Whee", BadIdent::QualifiedTag(0, 13)); + let actual = parse_expr_with(&arena, "@One.Two.Whee"); + + assert_eq!(Ok(expected), actual); + } #[test] fn tag_pattern() { @@ -1003,15 +1006,6 @@ mod test_parse { assert_eq!(Ok(expected), actual); } - #[test] - fn private_qualified_tag() { - let arena = Bump::new(); - let expected = Expr::MalformedIdent("@One.Two.Whee"); - let actual = parse_expr_with(&arena, "@One.Two.Whee"); - - assert_eq!(Ok(expected), actual); - } - // LISTS #[test] @@ -1501,15 +1495,26 @@ mod test_parse { } #[test] - #[ignore] fn malformed_ident_due_to_underscore() { // This is a regression test against a bug where if you included an // underscore in an argument name, it would parse as three arguments // (and would ignore the underscore as if it had been blank space). let arena = Bump::new(); + + let pattern = Located::new( + 0, + 0, + 1, + 11, + Pattern::MalformedIdent(&"the_answer", roc_parse::ident::BadIdent::Underscore(0, 5)), + ); + let patterns = &[pattern]; + let expr = Located::new(0, 0, 15, 17, Expr::Num("42")); + + let expected = Closure(patterns, &expr); let actual = parse_expr_with(&arena, "\\the_answer -> 42"); - assert_eq!(Ok(MalformedClosure), actual); + assert_eq!(Ok(expected), actual); } #[test] diff --git a/compiler/problem/src/can.rs b/compiler/problem/src/can.rs index 5eee364411..f28d3b1c5e 100644 --- a/compiler/problem/src/can.rs +++ b/compiler/problem/src/can.rs @@ -133,7 +133,7 @@ pub enum RuntimeError { region: Region, }, InvalidPrecedence(PrecedenceProblem, Region), - MalformedIdentifier(Box, Region), + MalformedIdentifier(Box, roc_parse::ident::BadIdent, Region), MalformedClosure(Region), InvalidRecordUpdate { region: Region, @@ -167,4 +167,5 @@ pub enum MalformedPatternProblem { MalformedBase(Base), Unknown, QualifiedIdentifier, + BadIdent(roc_parse::ident::BadIdent), } diff --git a/compiler/reporting/src/error/canonicalize.rs b/compiler/reporting/src/error/canonicalize.rs index e7202b154e..85fc4da8d1 100644 --- a/compiler/reporting/src/error/canonicalize.rs +++ b/compiler/reporting/src/error/canonicalize.rs @@ -344,6 +344,253 @@ pub fn can_problem<'b>( } } +fn to_bad_ident_expr_report<'b>( + alloc: &'b RocDocAllocator<'b>, + bad_ident: roc_parse::ident::BadIdent, + surroundings: Region, +) -> RocDocBuilder<'b> { + use roc_parse::ident::BadIdent::*; + + match bad_ident { + Start(_, _) | Space(_, _, _) => unreachable!("these are handled in the parser"), + WeirdDotAccess(row, col) | StrayDot(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow(r"I trying to parse a record field accessor here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Something like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow(" that accesses a value from a record."), + ]), + ]) + } + + PartStartsWithNumber(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I trying to parse a record field access here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("So I expect to see a lowercase letter next, like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow("."), + ]), + ]) + } + + WeirdAccessor(_row, _col) => alloc.stack(vec![ + alloc.reflow("I am very confused by this field access"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow("It looks like a field access on an accessor. I parse"), + alloc.parser_suggestion(".client.name"), + alloc.reflow(" as "), + alloc.parser_suggestion("(.client).name"), + alloc.reflow(". Maybe use an anonymous function like "), + alloc.parser_suggestion("(\\r -> r.client.name)"), + alloc.reflow(" instead"), + alloc.reflow("?"), + ]), + ]), + + WeirdDotQualified(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I was expecting to see an identifier next, like "), + alloc.parser_suggestion("height"), + alloc.reflow(". A complete qualified name looks something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text("."), + ]), + ]) + } + QualifiedTag(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"This looks like a qualified tag name to me, "), + alloc.reflow(r"but tags cannot be qualified! "), + alloc.reflow(r"Maybe you wanted a qualified name, something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text("?"), + ]), + ]) + } + PrivateTagNotUppercase(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a private tag here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"But after the "), + alloc.keyword("@"), + alloc.reflow(r" symbol I found a lowercase letter. "), + alloc.reflow(r"All tag names (global and private)"), + alloc.reflow(r" must start with an uppercase letter, like "), + alloc.parser_suggestion("@UUID"), + alloc.reflow(" or "), + alloc.parser_suggestion("@Secrets"), + alloc.reflow("."), + ]), + ]) + } + + PrivateTagFieldAccess(_row, _col) => alloc.stack(vec![ + alloc.reflow("I am very confused by this field access:"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow(r"It looks like a record field access on a private tag.") + ]), + ]), + _ => todo!(), + } +} + +fn to_bad_ident_pattern_report<'b>( + alloc: &'b RocDocAllocator<'b>, + bad_ident: roc_parse::ident::BadIdent, + surroundings: Region, +) -> RocDocBuilder<'b> { + use roc_parse::ident::BadIdent::*; + + match bad_ident { + Start(_, _) | Space(_, _, _) => unreachable!("these are handled in the parser"), + WeirdDotAccess(row, col) | StrayDot(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow(r"I trying to parse a record field accessor here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Something like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow(" that accesses a value from a record."), + ]), + ]) + } + + PartStartsWithNumber(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I trying to parse a record field access here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("So I expect to see a lowercase letter next, like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow("."), + ]), + ]) + } + + WeirdAccessor(_row, _col) => alloc.stack(vec![ + alloc.reflow("I am very confused by this field access"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow("It looks like a field access on an accessor. I parse"), + alloc.parser_suggestion(".client.name"), + alloc.reflow(" as "), + alloc.parser_suggestion("(.client).name"), + alloc.reflow(". Maybe use an anonymous function like "), + alloc.parser_suggestion("(\\r -> r.client.name)"), + alloc.reflow(" instead"), + alloc.reflow("?"), + ]), + ]), + + WeirdDotQualified(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I was expecting to see an identifier next, like "), + alloc.parser_suggestion("height"), + alloc.reflow(". A complete qualified name looks something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text("."), + ]), + ]) + } + QualifiedTag(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"This looks like a qualified tag name to me, "), + alloc.reflow(r"but tags cannot be qualified! "), + alloc.reflow(r"Maybe you wanted a qualified name, something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text("?"), + ]), + ]) + } + PrivateTagNotUppercase(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a private tag here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"But after the "), + alloc.keyword("@"), + alloc.reflow(r" symbol I found a lowercase letter. "), + alloc.reflow(r"All tag names (global and private)"), + alloc.reflow(r" must start with an uppercase letter, like "), + alloc.parser_suggestion("@UUID"), + alloc.reflow(" or "), + alloc.parser_suggestion("@Secrets"), + alloc.reflow("."), + ]), + ]) + } + + PrivateTagFieldAccess(_row, _col) => alloc.stack(vec![ + alloc.reflow("I am very confused by this field access:"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow(r"It looks like a record field access on a private tag.") + ]), + ]), + + Underscore(row, col) => { + let region = Region::from_row_col(row, col - 1); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse an identifier here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![alloc.reflow( + r"Underscores are not allowed in identifiers. Use camelCase instead!", + )]), + ]) + } + + _ => todo!(), + } +} + fn pretty_runtime_error<'b>( alloc: &'b RocDocAllocator<'b>, runtime_error: RuntimeError, @@ -432,6 +679,7 @@ fn pretty_runtime_error<'b>( MalformedBase(Base::Binary) => " binary integer ", MalformedBase(Base::Octal) => " octal integer ", MalformedBase(Base::Decimal) => " integer ", + BadIdent(bad_ident) => return to_bad_ident_pattern_report(alloc, bad_ident, region), Unknown => " ", QualifiedIdentifier => " qualified ", }; @@ -440,7 +688,7 @@ fn pretty_runtime_error<'b>( MalformedInt | MalformedFloat | MalformedBase(_) => alloc .tip() .append(alloc.reflow("Learn more about number literals at TODO")), - Unknown => alloc.nil(), + Unknown | BadIdent(_) => alloc.nil(), QualifiedIdentifier => alloc.tip().append( alloc.reflow("In patterns, only private and global tags can be qualified"), ), @@ -482,15 +730,10 @@ fn pretty_runtime_error<'b>( // do nothing, reported with PrecedenceProblem unreachable!() } - RuntimeError::MalformedIdentifier(box_str, region) => { - alloc.stack(vec![ - alloc.concat(vec![ - alloc.reflow("The ") - .append(format!("`{}`", box_str)) - .append(alloc.reflow(" identifier is malformed:")), - ]), - alloc.region(region), - ]) + RuntimeError::MalformedIdentifier(_box_str, bad_ident, surroundings) => { + to_bad_ident_expr_report(alloc, bad_ident, surroundings) + + } RuntimeError::MalformedClosure(_) => todo!(""), RuntimeError::InvalidFloat(sign @ FloatErrorKind::PositiveInfinity, region, _raw_str) diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs index d62d59f3ce..7b7e1850e6 100644 --- a/compiler/reporting/src/error/parse.rs +++ b/compiler/reporting/src/error/parse.rs @@ -145,23 +145,32 @@ fn to_syntax_report<'a>( } Type(typ) => to_type_report(alloc, filename, &typ, 0, 0), Pattern(pat) => to_pattern_report(alloc, filename, &pat, 0, 0), - Expr(expr) => to_expr_report(alloc, filename, Context::InDef, &expr, 0, 0), + Expr(expr) => to_expr_report( + alloc, + filename, + Context::InDef(start_row, start_col), + &expr, + 0, + 0, + ), _ => todo!("unhandled parse error: {:?}", parse_problem), } } enum Context { InNode(Node, Row, Col, Box), - InDef, + InDef(Row, Col), } enum Node { WhenCondition, WhenBranch, + WhenIfGuard, IfCondition, IfThenBranch, IfElseBranch, ListElement, + InsideParens, } fn to_expr_report<'a>( @@ -169,8 +178,8 @@ fn to_expr_report<'a>( filename: PathBuf, context: Context, parse_problem: &roc_parse::parser::EExpr<'a>, - _start_row: Row, - _start_col: Col, + start_row: Row, + start_col: Col, ) -> Report<'a> { use roc_parse::parser::EExpr; @@ -184,6 +193,134 @@ fn to_expr_report<'a>( EExpr::Str(string, row, col) => { to_str_report(alloc, filename, context, &string, *row, *col) } + EExpr::InParens(expr, row, col) => { + to_expr_in_parens_report(alloc, filename, context, &expr, *row, *col) + } + EExpr::Type(tipe, row, col) => to_type_report(alloc, filename, &tipe, *row, *col), + EExpr::Def(syntax, row, col) => to_syntax_report(alloc, filename, syntax, *row, *col), + + EExpr::ElmStyleFunction(region, row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = *region; + + let doc = alloc.stack(vec![ + alloc.reflow(r"I am in the middle of parsing a definition, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Looks like you are trying to define a function. "), + alloc.reflow("In roc, functions are always written as a lambda, like "), + alloc.parser_suggestion("increment = \\n -> n + 1"), + alloc.reflow("."), + ]), + ]); + + Report { + filename, + doc, + title: "ARGUMENTS BEFORE EQUALS".to_string(), + } + } + + EExpr::Ident(_row, _col) => unreachable!("another branch would be taken"), + + EExpr::QualifiedTag(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I am very confused by this identifier:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Are you trying to qualify a name? I am execting something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.reflow(". Maybe you are trying to qualify a tag? Tags like "), + alloc.parser_suggestion("Err"), + alloc.reflow(" are globally scoped in roc, and cannot be qualified."), + ]), + ]); + + Report { + filename, + doc, + title: "WEIRD IDENTIFIER".to_string(), + } + } + + EExpr::Start(row, col) => { + let (context_row, context_col, a_thing) = match context { + Context::InNode(node, r, c, _) => match node { + Node::WhenCondition | Node::WhenBranch | Node::WhenIfGuard => ( + r, + c, + alloc.concat(vec![ + alloc.text("an "), + alloc.keyword("when"), + alloc.text(" expression"), + ]), + ), + Node::IfCondition | Node::IfThenBranch | Node::IfElseBranch => ( + r, + c, + alloc.concat(vec![ + alloc.text("an "), + alloc.keyword("if"), + alloc.text(" expression"), + ]), + ), + Node::ListElement => (r, c, alloc.text("a list")), + Node::InsideParens => (r, c, alloc.text("some parentheses")), + }, + Context::InDef(r, c) => (r, c, alloc.text("a definition")), + }; + + let surroundings = Region::from_rows_cols(context_row, context_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.concat(vec![ + alloc.reflow(r"I am partway through parsing "), + a_thing, + alloc.reflow(", but I got stuck here:"), + ]), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I was expecting to see an expression like "), + alloc.parser_suggestion("42"), + alloc.reflow(" or "), + alloc.parser_suggestion("\"hello\""), + alloc.text("."), + ]), + ]); + + Report { + filename, + doc, + title: "MISSING EXPRESSION".to_string(), + } + } + + EExpr::Colon(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I am in the middle of parsing a definition, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Looks like you are trying to define a function. "), + alloc.reflow("In roc, functions are always written as a lambda, like "), + alloc.parser_suggestion("increment = \\n -> n + 1"), + alloc.reflow("."), + ]), + ]); + + Report { + filename, + doc, + title: "ARGUMENTS BEFORE EQUALS".to_string(), + } + } + _ => todo!("unhandled parse error: {:?}", parse_problem), } } @@ -334,7 +471,14 @@ fn to_lambda_report<'a>( ELambda::Start(_row, _col) => unreachable!("another branch would have been taken"), - ELambda::Syntax(syntax, row, col) => to_syntax_report(alloc, filename, syntax, row, col), + ELambda::Body(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InDef(start_row, start_col), + expr, + row, + col, + ), ELambda::Pattern(ref pattern, row, col) => { to_pattern_report(alloc, filename, pattern, row, col) } @@ -538,6 +682,75 @@ fn to_str_report<'a>( } } } +fn to_expr_in_parens_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + context: Context, + parse_problem: &roc_parse::parser::EInParens<'a>, + start_row: Row, + start_col: Col, +) -> Report<'a> { + use roc_parse::parser::EInParens; + + match *parse_problem { + EInParens::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col), + EInParens::Expr(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InNode(Node::InsideParens, start_row, start_col, Box::new(context)), + expr, + row, + col, + ), + EInParens::End(row, col) | EInParens::IndentEnd(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc + .reflow("I am partway through parsing a record pattern, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow( + r"I was expecting to see a closing parenthesis next, so try adding a ", + ), + alloc.parser_suggestion(")"), + alloc.reflow(" and see if that helps?"), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + EInParens::Open(row, col) | EInParens::IndentOpen(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I just started parsing an expression in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"An expression in parentheses looks like "), + alloc.parser_suggestion("(32)"), + alloc.reflow(r" or "), + alloc.parser_suggestion("(\"hello\")"), + alloc.reflow(" so I was expecting to see an expression next."), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + } +} fn to_list_report<'a>( alloc: &'a RocDocAllocator<'a>, @@ -799,18 +1012,14 @@ fn to_when_report<'a>( title: "IF GUARD NO CONDITION".to_string(), } } - _ => { - // to_expr_report( - // alloc, - // filename, - // Context::InNode(Node::WhenIfGuard, start_row, start_col, Box::new(context)), - // expr, - // row, - // col, - // ) - - to_syntax_report(alloc, filename, nested, row, col) - } + _ => to_expr_report( + alloc, + filename, + Context::InNode(Node::WhenIfGuard, start_row, start_col, Box::new(context)), + nested, + row, + col, + ), }, When::Arrow(row, col) => { let surroundings = Region::from_rows_cols(start_row, start_col, row, col); diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index 93f9ea6cc2..a904d86526 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -3158,12 +3158,15 @@ mod test_reporting { ), indoc!( r#" - ── PARSE PROBLEM ─────────────────────────────────────────────────────────────── - - Unexpected tokens in front of the `=` symbol: - + ── ARGUMENTS BEFORE EQUALS ───────────────────────────────────────────────────── + + I am in the middle of parsing a definition, but I got stuck here: + 1│ f x y = x ^^^ + + Looks like you are trying to define a function. In roc, functions are + always written as a lambda, like increment = \n -> n + 1. "# ), ) @@ -4017,11 +4020,87 @@ mod test_reporting { indoc!( r#" ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── - - The `Foo.Bar` identifier is malformed: - + + I am trying to parse a qualified name here: + 1│ Foo.Bar - ^^^^^^^ + ^ + + This looks like a qualified tag name to me, but tags cannot be + qualified! Maybe you wanted a qualified name, something like + Json.Decode.string? + "# + ), + ) + } + + #[test] + fn module_ident_ends_with_dot() { + report_problem_as( + indoc!( + r#" + Foo.Bar. + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a qualified name here: + + 1│ Foo.Bar. + ^ + + I was expecting to see an identifier next, like height. A complete + qualified name looks something like Json.Decode.string. + "# + ), + ) + } + + #[test] + fn record_access_ends_with_dot() { + report_problem_as( + indoc!( + r#" + foo.bar. + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field accessor here: + + 1│ foo.bar. + ^ + + Something like .name or .height that accesses a value from a record. + "# + ), + ) + } + + #[test] + fn qualified_private_tag() { + report_problem_as( + indoc!( + r#" + @Foo.Bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a qualified name here: + + 1│ @Foo.Bar + ^ + + This looks like a qualified tag name to me, but tags cannot be + qualified! Maybe you wanted a qualified name, something like + Json.Decode.string? "# ), ) @@ -4096,12 +4175,15 @@ mod test_reporting { ), indoc!( r#" - ── PARSE PROBLEM ─────────────────────────────────────────────────────────────── - - Unexpected token : - + ── MISSING EXPRESSION ────────────────────────────────────────────────────────── + + I am partway through parsing a definition, but I got stuck here: + + 1│ main = 2│ 5 ** 3 ^ + + I was expecting to see an expression like 42 or "hello". "# ), ) @@ -4822,12 +4904,15 @@ mod test_reporting { ), indoc!( r#" - ── PARSE PROBLEM ─────────────────────────────────────────────────────────────── + ── MISSING EXPRESSION ────────────────────────────────────────────────────────── - Unexpected token : + I am partway through parsing a definition, but I got stuck here: + 1│ when Just 4 is 2│ Just 4 | -> ^ + + I was expecting to see an expression like 42 or "hello". "# ), // indoc!( @@ -5291,4 +5376,200 @@ mod test_reporting { ), ) } + + #[test] + fn keyword_record_field_access() { + report_problem_as( + indoc!( + r#" + foo = {} + + foo.if + "# + ), + indoc!( + r#" + ── TYPE MISMATCH ─────────────────────────────────────────────────────────────── + + This expression is used in an unexpected way: + + 3│ foo.if + ^^^^^^ + + This `foo` value is a: + + {} + + But you are trying to use it as: + + { if : a }b + + + "# + ), + ) + } + + #[test] + fn keyword_qualified_import() { + report_problem_as( + indoc!( + r#" + Num.if + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + The Num module does not expose a if value: + + 1│ Num.if + ^^^^^^ + "# + ), + ) + } + + #[test] + fn stray_dot_expr() { + report_problem_as( + indoc!( + r#" + Num.add . 23 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field accessor here: + + 1│ Num.add . 23 + ^ + + Something like .name or .height that accesses a value from a record. + "# + ), + ) + } + + #[test] + fn private_tag_not_uppercase() { + report_problem_as( + indoc!( + r#" + Num.add @foo 23 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a private tag here: + + 1│ Num.add @foo 23 + ^ + + But after the `@` symbol I found a lowercase letter. All tag names + (global and private) must start with an uppercase letter, like @UUID + or @Secrets. + "# + ), + ) + } + + #[test] + fn private_tag_field_access() { + report_problem_as( + indoc!( + r#" + @UUID.bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am very confused by this field access: + + 1│ @UUID.bar + ^^^^^^^^^ + + It looks like a record field access on a private tag. + "# + ), + ) + } + + #[test] + fn weird_accessor() { + report_problem_as( + indoc!( + r#" + .foo.bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am very confused by this field access + + 1│ .foo.bar + ^^^^^^^^ + + It looks like a field access on an accessor. I parse.client.name as + (.client).name. Maybe use an anonymous function like + (\r -> r.client.name) instead? + "# + ), + ) + } + + #[test] + fn part_starts_with_number() { + report_problem_as( + indoc!( + r#" + foo.100 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field access here: + + 1│ foo.100 + ^ + + So I expect to see a lowercase letter next, like .name or .height. + "# + ), + ) + } + + #[test] + fn closure_underscore_ident() { + report_problem_as( + indoc!( + r#" + \the_answer -> 100 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse an identifier here: + + 1│ \the_answer -> 100 + ^ + + Underscores are not allowed in identifiers. Use camelCase instead! + "# + ), + ) + } } diff --git a/editor/src/lang/expr.rs b/editor/src/lang/expr.rs index 3cc1a4f912..061fc21495 100644 --- a/editor/src/lang/expr.rs +++ b/editor/src/lang/expr.rs @@ -807,7 +807,7 @@ pub fn to_expr2<'a>( // (RuntimeError(MalformedClosure(region)), Output::default()) todo!() } - MalformedIdent(_name) => { + MalformedIdent(_name, _problem) => { // use roc_problem::can::RuntimeError::*; // // let problem = MalformedIdentifier((*name).into(), region); diff --git a/editor/src/lang/pattern.rs b/editor/src/lang/pattern.rs index c1c6f2d0e9..317c871622 100644 --- a/editor/src/lang/pattern.rs +++ b/editor/src/lang/pattern.rs @@ -409,6 +409,11 @@ pub fn to_pattern2<'a>( malformed_pattern(env, problem, region) } + MalformedIdent(_str, bad_ident) => { + let problem = MalformedPatternProblem::BadIdent(*bad_ident); + malformed_pattern(env, problem, region) + } + SpaceBefore(sub_pattern, _) | SpaceAfter(sub_pattern, _) | Nested(sub_pattern) => { return to_pattern2(env, scope, pattern_type, sub_pattern, region) }