diff --git a/compiler/can/src/expr.rs b/compiler/can/src/expr.rs index d1718703e6..2eafe5b2d4 100644 --- a/compiler/can/src/expr.rs +++ b/compiler/can/src/expr.rs @@ -737,10 +737,10 @@ pub fn canonicalize_expr<'a>( use roc_problem::can::RuntimeError::*; (RuntimeError(MalformedClosure(region)), Output::default()) } - ast::Expr::MalformedIdent(name) => { + ast::Expr::MalformedIdent(name, bad_ident) => { use roc_problem::can::RuntimeError::*; - let problem = MalformedIdentifier((*name).into(), region); + let problem = MalformedIdentifier((*name).into(), *bad_ident, region); env.problem(Problem::RuntimeError(problem.clone())); (RuntimeError(problem), Output::default()) diff --git a/compiler/can/src/operator.rs b/compiler/can/src/operator.rs index 048c2b9d83..2572bc11ec 100644 --- a/compiler/can/src/operator.rs +++ b/compiler/can/src/operator.rs @@ -88,8 +88,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located>) -> &'a | Nested(AccessorFunction(_)) | Var { .. } | Nested(Var { .. }) - | MalformedIdent(_) - | Nested(MalformedIdent(_)) + | MalformedIdent(_, _) + | Nested(MalformedIdent(_, _)) | MalformedClosure | Nested(MalformedClosure) | PrecedenceConflict(_, _, _, _) diff --git a/compiler/fmt/src/expr.rs b/compiler/fmt/src/expr.rs index 80056cce7f..cbed249bca 100644 --- a/compiler/fmt/src/expr.rs +++ b/compiler/fmt/src/expr.rs @@ -30,7 +30,7 @@ impl<'a> Formattable<'a> for Expr<'a> { | Access(_, _) | AccessorFunction(_) | Var { .. } - | MalformedIdent(_) + | MalformedIdent(_, _) | MalformedClosure | GlobalTag(_) | PrivateTag(_) => false, @@ -303,7 +303,7 @@ impl<'a> Formattable<'a> for Expr<'a> { buf.push('.'); buf.push_str(key); } - MalformedIdent(_) => {} + MalformedIdent(_, _) => {} MalformedClosure => {} PrecedenceConflict(_, _, _, _) => {} } diff --git a/compiler/parse/src/ast.rs b/compiler/parse/src/ast.rs index f33ba8149b..9cff30eaed 100644 --- a/compiler/parse/src/ast.rs +++ b/compiler/parse/src/ast.rs @@ -150,7 +150,7 @@ pub enum Expr<'a> { Nested(&'a Expr<'a>), // Problems - MalformedIdent(&'a str), + MalformedIdent(&'a str, crate::ident::BadIdent), MalformedClosure, // Both operators were non-associative, e.g. (True == False == False). // We should tell the author to disambiguate by grouping them with parens. @@ -411,7 +411,7 @@ impl<'a> Pattern<'a> { } } Ident::AccessorFunction(string) => Pattern::Malformed(string), - Ident::Malformed(string) => Pattern::Malformed(string), + Ident::Malformed(string, _problem) => Pattern::Malformed(string), } } diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index dda6227ddc..b1adecba21 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -7,9 +7,9 @@ use crate::ident::{ident, lowercase_ident, Ident}; use crate::keyword; use crate::parser::{ self, allocated, and_then_with_indent_level, ascii_char, ascii_string, backtrackable, map, - newline_char, not, optional, sep_by1, sep_by1_e, specialize, specialize_ref, then, - trailing_sep_by0, word1, word2, EExpr, EInParens, ELambda, EPattern, ERecord, EString, Either, - If, List, Number, ParseResult, Parser, State, SyntaxError, Type, When, + newline_char, optional, sep_by1, sep_by1_e, specialize, specialize_ref, then, trailing_sep_by0, + word1, word2, EExpr, EInParens, ELambda, EPattern, ERecord, EString, Either, If, List, Number, + ParseResult, Parser, State, SyntaxError, Type, When, }; use crate::pattern::loc_closure_param; use crate::type_annotation; @@ -597,7 +597,7 @@ pub fn expr_to_pattern<'a>( | Expr::UnaryOp(_, _) => Err(SyntaxError::InvalidPattern), Expr::Str(string) => Ok(Pattern::StrLiteral(string.clone())), - Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)), + Expr::MalformedIdent(string, _problem) => Ok(Pattern::Malformed(string)), } } @@ -1235,20 +1235,24 @@ fn parse_def_signature_help<'a>( } fn loc_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located>, SyntaxError<'a>> { - skip_first!( - // If this is a reserved keyword ("if", "then", "case, "when"), - // followed by a blank space, then it is not a function argument! - // - // (The space is necessary because otherwise we'll get a false - // positive on function arguments beginning with keywords, - // e.g. `ifBlah` or `isSomething` will register as `if`/`is` keywords) - not(and!(reserved_keyword(), space1(min_indent))), - // Don't parse operators, because they have a higher precedence than function application. - // If we encounter one, we're done parsing function args! - specialize( - |e, _, _| SyntaxError::Expr(e), - move |arena, state| loc_parse_function_arg_help(min_indent, arena, state) - ) + // skip_first!( + // // If this is a reserved keyword ("if", "then", "case, "when"), + // // followed by a blank space, then it is not a function argument! + // // + // // (The space is necessary because otherwise we'll get a false + // // positive on function arguments beginning with keywords, + // // e.g. `ifBlah` or `isSomething` will register as `if`/`is` keywords) + // not(and!(reserved_keyword(), space1(min_indent))), + // // Don't parse operators, because they have a higher precedence than function application. + // // If we encounter one, we're done parsing function args! + // specialize( + // |e, _, _| SyntaxError::Expr(e), + // move |arena, state| loc_parse_function_arg_help(min_indent, arena, state) + // ) + // ) + specialize( + |e, _, _| SyntaxError::Expr(e), + move |arena, state| loc_parse_function_arg_help(min_indent, arena, state), ) } @@ -1272,17 +1276,6 @@ fn loc_parse_function_arg_help<'a>( .parse(arena, state) } -fn reserved_keyword<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { - one_of!( - ascii_string(keyword::IF), - ascii_string(keyword::THEN), - ascii_string(keyword::ELSE), - ascii_string(keyword::WHEN), - ascii_string(keyword::IS), - ascii_string(keyword::AS) - ) -} - fn closure_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, ELambda<'a>> { map_with_arena!( skip_first!( @@ -1742,7 +1735,7 @@ fn loc_function_args<'a>( /// 5. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else. fn assign_or_destructure_identifier<'a>() -> impl Parser<'a, Ident<'a>, EExpr<'a>> { - specialize(|_, r, c| EExpr::Ident(r, c), ident()) + crate::ident::parse_ident_help } fn ident_etc_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, EExpr<'a>> { @@ -1968,7 +1961,7 @@ fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { answer } Ident::AccessorFunction(string) => Expr::AccessorFunction(string), - Ident::Malformed(string) => Expr::MalformedIdent(string), + Ident::Malformed(string, problem) => Expr::MalformedIdent(string, problem), } } diff --git a/compiler/parse/src/ident.rs b/compiler/parse/src/ident.rs index 09db1275f3..3101d27744 100644 --- a/compiler/parse/src/ident.rs +++ b/compiler/parse/src/ident.rs @@ -1,11 +1,13 @@ use crate::ast::Attempting; use crate::keyword; use crate::parser::Progress::{self, *}; -use crate::parser::{peek_utf8_char, unexpected, ParseResult, Parser, State, SyntaxError}; +use crate::parser::{ + peek_utf8_char, unexpected, BadInputError, Col, EExpr, ParseResult, Parser, Row, State, + SyntaxError, +}; use bumpalo::collections::string::String; use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use roc_collections::all::arena_join; use roc_region::all::Region; /// The parser accepts all of these in any position where any one of them could @@ -26,7 +28,7 @@ pub enum Ident<'a> { /// .foo AccessorFunction(&'a str), /// .Foo or foo. or something like foo.Bar - Malformed(&'a str), + Malformed(&'a str, BadIdent), } impl<'a> Ident<'a> { @@ -50,7 +52,7 @@ impl<'a> Ident<'a> { len - 1 } AccessorFunction(string) => string.len(), - Malformed(string) => string.len(), + Malformed(string, _) => string.len(), } } @@ -59,274 +61,8 @@ impl<'a> Ident<'a> { } } -/// Parse an identifier into a string. -/// -/// This is separate from the `ident` Parser because string interpolation -/// wants to use it this way. -/// -/// By design, this does not check for reserved keywords like "if", "else", etc. -/// Sometimes we may want to check for those later in the process, and give -/// more contextually-aware error messages than "unexpected `if`" or the like. -#[inline(always)] -pub fn parse_ident<'a>( - arena: &'a Bump, - mut state: State<'a>, -) -> ParseResult<'a, (Ident<'a>, Option), SyntaxError<'a>> { - let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) - let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); - let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); - let mut is_capitalized; - let is_accessor_fn; - let mut is_private_tag = false; - - let start_bytes_len = state.bytes.len(); - - // Identifiers and accessor functions must start with either a letter or a dot. - // If this starts with neither, it must be something else! - match peek_utf8_char(&state) { - Ok((first_ch, bytes_parsed)) => { - if first_ch.is_alphabetic() { - part_buf.push(first_ch); - - is_capitalized = first_ch.is_uppercase(); - is_accessor_fn = false; - - state = state.advance_without_indenting(bytes_parsed)?; - } else if first_ch == '.' { - is_capitalized = false; - is_accessor_fn = true; - - state = state.advance_without_indenting(bytes_parsed)?; - } else if first_ch == '@' { - state = state.advance_without_indenting(bytes_parsed)?; - - // '@' must always be followed by a capital letter! - match peek_utf8_char(&state) { - Ok((next_ch, next_bytes_parsed)) => { - if next_ch.is_uppercase() { - state = state.advance_without_indenting(next_bytes_parsed)?; - - part_buf.push('@'); - part_buf.push(next_ch); - - is_private_tag = true; - is_capitalized = true; - is_accessor_fn = false; - } else { - return Err(unexpected( - bytes_parsed + next_bytes_parsed, - Attempting::Identifier, - state, - )); - } - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - } else { - return Err(unexpected(0, Attempting::Identifier, state)); - } - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { - Ok((ch, bytes_parsed)) => { - // After the first character, only these are allowed: - // - // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers - // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() - // * A dot ('.') - if ch.is_alphabetic() { - if part_buf.is_empty() { - // Capitalization is determined by the first character in the part. - is_capitalized = ch.is_uppercase(); - } - - part_buf.push(ch); - } else if ch.is_ascii_digit() { - // Parts may not start with numbers! - if part_buf.is_empty() { - return malformed( - Some(ch), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - part_buf.push(ch); - } else if ch == '.' { - // There are two posssible errors here: - // - // 1. Having two consecutive dots is an error. - // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error. - if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) { - return malformed( - Some(ch), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - if is_capitalized { - capitalized_parts.push(part_buf.into_bump_str()); - } else { - noncapitalized_parts.push(part_buf.into_bump_str()); - } - - // Now that we've recorded the contents of the current buffer, reset it. - part_buf = String::new_in(arena); - } else { - // This must be the end of the identifier. We're done! - - break; - } - - state = state.advance_without_indenting(bytes_parsed)?; - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); - } - } - } - - if part_buf.is_empty() { - // We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed! - // - // This condition might also occur if we encounter a malformed accessor like `.|` - // - // If we made it this far and don't have a next_char, then necessarily - // we have consumed a '.' char previously. - return malformed( - Some('.'), - arena, - state, - capitalized_parts, - noncapitalized_parts, - ); - } - - // Record the final parts. - if is_capitalized { - capitalized_parts.push(part_buf.into_bump_str()); - } else { - noncapitalized_parts.push(part_buf.into_bump_str()); - } - - let answer = if is_accessor_fn { - // Handle accessor functions first because they have the strictest requirements. - // Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts. - if capitalized_parts.is_empty() && noncapitalized_parts.len() == 1 && !is_private_tag { - let value = noncapitalized_parts.iter().next().unwrap(); - - Ident::AccessorFunction(value) - } else { - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } - } else if noncapitalized_parts.is_empty() { - // We have capitalized parts only, so this must be a tag. - match capitalized_parts.first() { - Some(value) => { - if capitalized_parts.len() == 1 { - if is_private_tag { - Ident::PrivateTag(value) - } else { - Ident::GlobalTag(value) - } - } else { - // This is a qualified tag, which is not allowed! - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } - } - None => { - // We had neither capitalized nor noncapitalized parts, - // yet we made it this far. The only explanation is that this was - // a stray '.' drifting through the cosmos. - return Err(unexpected(1, Attempting::Identifier, state)); - } - } - } else if is_private_tag { - // This is qualified field access with an '@' in front, which does not make sense! - return malformed(None, arena, state, capitalized_parts, noncapitalized_parts); - } else { - // We have multiple noncapitalized parts, so this must be field access. - Ident::Access { - module_name: join_module_parts(arena, capitalized_parts.into_bump_slice()), - parts: noncapitalized_parts.into_bump_slice(), - } - }; - - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - debug_assert_eq!(progress, Progress::MadeProgress,); - Ok((Progress::MadeProgress, (answer, None), state)) -} - -fn malformed<'a>( - opt_bad_char: Option, - arena: &'a Bump, - mut state: State<'a>, - capitalized_parts: Vec<&'a str>, - noncapitalized_parts: Vec<&'a str>, -) -> ParseResult<'a, (Ident<'a>, Option), SyntaxError<'a>> { - // Reconstruct the original string that we've been parsing. - let mut full_string = String::new_in(arena); - - full_string - .push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str()); - full_string - .push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str()); - - if let Some(bad_char) = opt_bad_char { - full_string.push(bad_char); - } - - // Consume the remaining chars in the identifier. - let mut next_char = None; - - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { - Ok((ch, bytes_parsed)) => { - // We can't use ch.is_alphanumeric() here because that passes for - // things that are "numeric" but not ASCII digits, like `¾` - if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { - full_string.push(ch); - } else { - next_char = Some(ch); - - break; - } - - state = state.advance_without_indenting(bytes_parsed)?; - } - Err(reason) => return state.fail(arena, MadeProgress, reason), - } - } - - Ok(( - MadeProgress, - (Ident::Malformed(full_string.into_bump_str()), next_char), - state, - )) -} - pub fn ident<'a>() -> impl Parser<'a, Ident<'a>, SyntaxError<'a>> { - move |arena: &'a Bump, state: State<'a>| { - // Discard next_char; we don't need it. - let (progress, (string, _), state) = parse_ident(arena, state)?; - - Ok((progress, string, state)) - } + crate::parser::specialize(|e, _, _| SyntaxError::Expr(e), parse_ident_help) } pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str, SyntaxError<'a>> @@ -435,3 +171,332 @@ pub fn join_module_parts<'a>(arena: &'a Bump, module_parts: &[&str]) -> &'a str buf.into_bump_str() } + +macro_rules! advance_state { + ($state:expr, $n:expr) => { + $state.advance_without_indenting_ee($n, |r, c| { + BadIdent::Space(crate::parser::BadInputError::LineTooLong, r, c) + }) + }; +} + +pub fn parse_ident_help<'a>( + arena: &'a Bump, + state: State<'a>, +) -> ParseResult<'a, Ident<'a>, EExpr<'a>> { + let initial = state.clone(); + + match parse_ident_help_help(arena, state) { + Ok((progress, (ident, _), state)) => { + if let Ident::Access { module_name, parts } = ident { + if module_name.is_empty() { + if let Some(first) = parts.first() { + for keyword in crate::keyword::KEYWORDS.iter() { + if first == keyword { + return Err(( + NoProgress, + EExpr::Start(initial.line, initial.column), + initial, + )); + } + } + } + } + } + + Ok((progress, ident, state)) + } + Err((NoProgress, _, state)) => { + Err((NoProgress, EExpr::Start(state.line, state.column), state)) + } + Err((MadeProgress, fail, state)) => match fail { + BadIdent::Start(r, c) => Err((NoProgress, EExpr::Start(r, c), state)), + BadIdent::Space(e, r, c) => Err((NoProgress, EExpr::Space(e, r, c), state)), + _ => malformed_identifier(initial.bytes, fail, arena, state), + }, + } +} + +fn malformed_identifier<'a>( + initial_bytes: &'a [u8], + problem: BadIdent, + _arena: &'a Bump, + mut state: State<'a>, +) -> ParseResult<'a, Ident<'a>, EExpr<'a>> { + // skip forward to the next non-identifier character + while !state.bytes.is_empty() { + match peek_utf8_char(&state) { + Ok((ch, bytes_parsed)) => { + // We can't use ch.is_alphanumeric() here because that passes for + // things that are "numeric" but not ASCII digits, like `¾` + if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { + state = state.advance_without_indenting_ee(bytes_parsed, |r, c| { + EExpr::Space(crate::parser::BadInputError::LineTooLong, r, c) + })?; + continue; + } else { + break; + } + } + Err(_reason) => { + break; + } + } + } + + let parsed = &initial_bytes[..(initial_bytes.len() - state.bytes.len())]; + + let parsed_str = unsafe { std::str::from_utf8_unchecked(parsed) }; + + Ok((MadeProgress, Ident::Malformed(parsed_str, problem), state)) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BadIdent { + Start(Row, Col), + Space(BadInputError, Row, Col), + QualifiedTag(Row, Col), + PrivateTagNotUppercase(Row, Col), + PartStartsWithNumber(Row, Col), + WeirdAccessor(Row, Col), + PrivateTagFieldAccess(Row, Col), + + WeirdDotAccess(Row, Col), + WeirdDotQualified(Row, Col), + DoubleDot(Row, Col), + StrayDot(Row, Col), +} + +/// Parse an identifier into a string. +/// +/// This is separate from the `ident` Parser because string interpolation +/// wants to use it this way. +pub fn parse_ident_help_help<'a>( + arena: &'a Bump, + mut state: State<'a>, +) -> ParseResult<'a, (Ident<'a>, Option), BadIdent> { + let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) + let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); + let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); + let mut is_capitalized; + let is_accessor_fn; + let mut is_private_tag = false; + + // Identifiers and accessor functions must start with either a letter or a dot. + // If this starts with neither, it must be something else! + match peek_utf8_char(&state) { + Ok((first_ch, bytes_parsed)) => { + if first_ch.is_alphabetic() { + part_buf.push(first_ch); + + is_capitalized = first_ch.is_uppercase(); + is_accessor_fn = false; + + state = advance_state!(state, bytes_parsed)?; + } else if first_ch == '.' { + is_capitalized = false; + is_accessor_fn = true; + + state = advance_state!(state, bytes_parsed)?; + } else if first_ch == '@' { + state = advance_state!(state, bytes_parsed)?; + + // '@' must always be followed by a capital letter! + match peek_utf8_char(&state) { + Ok((next_ch, next_bytes_parsed)) => { + if next_ch.is_uppercase() { + state = advance_state!(state, next_bytes_parsed)?; + + part_buf.push('@'); + part_buf.push(next_ch); + + is_private_tag = true; + is_capitalized = true; + is_accessor_fn = false; + } else { + return Err(( + MadeProgress, + BadIdent::PrivateTagNotUppercase(state.line, state.column), + state, + )); + } + } + Err(_reason) => { + return Err(( + MadeProgress, + BadIdent::PrivateTagNotUppercase(state.line, state.column), + state, + )); + } + } + } else { + return Err((NoProgress, BadIdent::Start(state.line, state.column), state)); + } + } + Err(_reason) => { + return Err((NoProgress, BadIdent::Start(state.line, state.column), state)); + } + } + + while !state.bytes.is_empty() { + match peek_utf8_char(&state) { + Ok((ch, bytes_parsed)) => { + // After the first character, only these are allowed: + // + // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers + // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() + // * A dot ('.') + if ch.is_alphabetic() { + if part_buf.is_empty() { + // Capitalization is determined by the first character in the part. + is_capitalized = ch.is_uppercase(); + } + + part_buf.push(ch); + } else if ch.is_ascii_digit() { + // Parts may not start with numbers! + if part_buf.is_empty() { + return Err(( + MadeProgress, + BadIdent::PartStartsWithNumber(state.line, state.column), + state, + )); + } + + part_buf.push(ch); + } else if ch == '.' { + // There are two posssible errors here: + // + // 1. Having two consecutive dots is an error. + // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error. + if part_buf.is_empty() { + return Err(( + MadeProgress, + BadIdent::DoubleDot(state.line, state.column), + state, + )); + } + + if is_capitalized && !noncapitalized_parts.is_empty() { + return Err(( + MadeProgress, + BadIdent::WeirdDotQualified(state.line, state.column), + state, + )); + } + + if is_capitalized { + capitalized_parts.push(part_buf.into_bump_str()); + } else { + noncapitalized_parts.push(part_buf.into_bump_str()); + } + + // Now that we've recorded the contents of the current buffer, reset it. + part_buf = String::new_in(arena); + } else { + // This must be the end of the identifier. We're done! + + break; + } + + state = advance_state!(state, bytes_parsed)?; + } + Err(_reason) => { + // + return Err(( + MadeProgress, + BadIdent::Start(state.line, state.column), + state, + )); + } + } + } + + if part_buf.is_empty() { + // We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed! + // + // This condition might also occur if we encounter a malformed accessor like `.|` + // + // If we made it this far and don't have a next_char, then necessarily + // we have consumed a '.' char previously. + let fail = if noncapitalized_parts.is_empty() { + if capitalized_parts.is_empty() { + BadIdent::StrayDot(state.line, state.column) + } else { + BadIdent::WeirdDotQualified(state.line, state.column) + } + } else { + BadIdent::WeirdDotAccess(state.line, state.column) + }; + + return Err((MadeProgress, fail, state)); + } + + // Record the final parts. + if is_capitalized { + capitalized_parts.push(part_buf.into_bump_str()); + } else { + noncapitalized_parts.push(part_buf.into_bump_str()); + } + + let answer = if is_accessor_fn { + // Handle accessor functions first because they have the strictest requirements. + // Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts. + if capitalized_parts.is_empty() && noncapitalized_parts.len() == 1 && !is_private_tag { + let value = noncapitalized_parts.iter().next().unwrap(); + + Ident::AccessorFunction(value) + } else { + return Err(( + MadeProgress, + BadIdent::WeirdAccessor(state.line, state.column), + state, + )); + } + } else if noncapitalized_parts.is_empty() { + // We have capitalized parts only, so this must be a tag. + match capitalized_parts.first() { + Some(value) => { + if capitalized_parts.len() == 1 { + if is_private_tag { + Ident::PrivateTag(value) + } else { + Ident::GlobalTag(value) + } + } else { + // This is a qualified tag, which is not allowed! + return Err(( + MadeProgress, + BadIdent::QualifiedTag(state.line, state.column), + state, + )); + } + } + None => { + // We had neither capitalized nor noncapitalized parts, + // yet we made it this far. The only explanation is that this was + // a stray '.' drifting through the cosmos. + return Err(( + MadeProgress, + BadIdent::StrayDot(state.line, state.column), + state, + )); + } + } + } else if is_private_tag { + // This is qualified field access with an '@' in front, which does not make sense! + return Err(( + MadeProgress, + BadIdent::PrivateTagFieldAccess(state.line, state.column), + state, + )); + } else { + // We have multiple noncapitalized parts, so this must be field access. + Ident::Access { + module_name: join_module_parts(arena, capitalized_parts.into_bump_slice()), + parts: noncapitalized_parts.into_bump_slice(), + } + }; + + Ok((Progress::MadeProgress, (answer, None), state)) +} diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index b7c604c896..2fa2ef70ea 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -404,6 +404,7 @@ pub enum EExpr<'a> { Ident(Row, Col), ElmStyleFunction(Region, Row, Col), MalformedPattern(Row, Col), + QualifiedTag(Row, Col), Syntax(&'a SyntaxError<'a>, Row, Col), diff --git a/compiler/parse/src/pattern.rs b/compiler/parse/src/pattern.rs index a26193aab6..76fc15c65d 100644 --- a/compiler/parse/src/pattern.rs +++ b/compiler/parse/src/pattern.rs @@ -267,7 +267,7 @@ fn loc_ident_pattern_help<'a>( }, state, )), - Ident::Malformed(malformed) => { + Ident::Malformed(malformed, _problem) => { debug_assert!(!malformed.is_empty()); Err(( diff --git a/compiler/parse/tests/test_parse.rs b/compiler/parse/tests/test_parse.rs index c65ce1fe8c..7a42a77575 100644 --- a/compiler/parse/tests/test_parse.rs +++ b/compiler/parse/tests/test_parse.rs @@ -982,15 +982,14 @@ mod test_parse { assert_eq!(Ok(expected), actual); } - // TODO restore this test - it fails, but is not worth fixing right now. - // #[test] - // fn qualified_private_tag() { - // let arena = Bump::new(); - // let expected = Expr::MalformedIdent("One.Two.@Whee"); - // let actual = parse_expr_with(&arena, "One.Two.@Whee"); + #[test] + fn private_qualified_tag() { + let arena = Bump::new(); + let expected = Expr::MalformedIdent("@One.Two.Whee"); + let actual = parse_expr_with(&arena, "@One.Two.Whee"); - // assert_eq!(Ok(expected), actual); - // } + assert_eq!(Ok(expected), actual); + } #[test] fn tag_pattern() { @@ -1003,15 +1002,6 @@ mod test_parse { assert_eq!(Ok(expected), actual); } - #[test] - fn private_qualified_tag() { - let arena = Bump::new(); - let expected = Expr::MalformedIdent("@One.Two.Whee"); - let actual = parse_expr_with(&arena, "@One.Two.Whee"); - - assert_eq!(Ok(expected), actual); - } - // LISTS #[test] diff --git a/compiler/problem/src/can.rs b/compiler/problem/src/can.rs index 5eee364411..e6ec70e65c 100644 --- a/compiler/problem/src/can.rs +++ b/compiler/problem/src/can.rs @@ -133,7 +133,7 @@ pub enum RuntimeError { region: Region, }, InvalidPrecedence(PrecedenceProblem, Region), - MalformedIdentifier(Box, Region), + MalformedIdentifier(Box, roc_parse::ident::BadIdent, Region), MalformedClosure(Region), InvalidRecordUpdate { region: Region, diff --git a/compiler/reporting/src/error/canonicalize.rs b/compiler/reporting/src/error/canonicalize.rs index e7202b154e..f8dc2f5a39 100644 --- a/compiler/reporting/src/error/canonicalize.rs +++ b/compiler/reporting/src/error/canonicalize.rs @@ -482,15 +482,127 @@ fn pretty_runtime_error<'b>( // do nothing, reported with PrecedenceProblem unreachable!() } - RuntimeError::MalformedIdentifier(box_str, region) => { - alloc.stack(vec![ - alloc.concat(vec![ - alloc.reflow("The ") - .append(format!("`{}`", box_str)) - .append(alloc.reflow(" identifier is malformed:")), - ]), - alloc.region(region), - ]) + RuntimeError::MalformedIdentifier(_box_str, bad_ident, surroundings) => { + // we re-parse the identifier, to learn exactly what is going on + use roc_parse::ident::BadIdent::*; + + + + match bad_ident { + Start(_,_) | Space(_,_,_) => unreachable!("these are handled in the parser"), + WeirdDotAccess(row, col) | StrayDot(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I trying to parse a record field accessor here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Something like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow(" that accesses a value from a record."), + ]) + ]) + } + + PartStartsWithNumber(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I trying to parse a record field access here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("So I expect to see a lowercase letter next, like "), + alloc.parser_suggestion(".name"), + alloc.reflow(" or "), + alloc.parser_suggestion(".height"), + alloc.reflow("."), + ]) + ]) + } + + WeirdAccessor(_row, _col) => { + + alloc.stack(vec![ + alloc.reflow("I am very confused by this field access"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow("It looks like a field access on an accessor. I parse"), + alloc.parser_suggestion(".client.name"), + alloc.reflow(" as "), + alloc.parser_suggestion("(.client).name"), + alloc.reflow(". Maybe use an anonymous function like "), + alloc.parser_suggestion("(\\r -> r.client.name)"), + alloc.reflow(" instead"), + alloc.reflow("?"), + ]) + ]) + } + + + WeirdDotQualified(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I was expecting to see an identifier next, like "), + alloc.parser_suggestion("height"), + alloc.reflow(". A complete qualified name looks something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text(".") + ]) + ]) + } + QualifiedTag(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a qualified name here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("This looks like a qualified tag name to me, but tags cannot be qualified! "), + alloc.reflow("Maybe you wanted a qualified name, something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.text("?") + ]) + ]) + } + PrivateTagNotUppercase(row, col) => { + let region = Region::from_row_col(row, col); + + alloc.stack(vec![ + alloc.reflow("I am trying to parse a private tag here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"But after the "), + alloc.keyword("@"), + alloc.reflow(r" symbol I found a lowercase letter. "), + alloc.reflow(r"All tag names (global and private) must start with an uppercase letter, like "), + alloc.parser_suggestion("@UUID"), + alloc.reflow(" or "), + alloc.parser_suggestion("@Secrets"), + alloc.reflow("."), + ]) + ]) + } + + PrivateTagFieldAccess(_row, _col) => { + + alloc.stack(vec![ + alloc.reflow("I am very confused by this field access:"), + alloc.region(surroundings), + alloc.concat(vec![ + alloc.reflow(r"It looks like a record field access on a private tag."), + ]) + ]) + } + _ => todo!(), + } + + } RuntimeError::MalformedClosure(_) => todo!(""), RuntimeError::InvalidFloat(sign @ FloatErrorKind::PositiveInfinity, region, _raw_str) diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs index 0cb157a5c1..7b7e1850e6 100644 --- a/compiler/reporting/src/error/parse.rs +++ b/compiler/reporting/src/error/parse.rs @@ -221,7 +221,30 @@ fn to_expr_report<'a>( } } - EExpr::Ident(_row, _col) => unreachable!("another branch would have been chosen"), + EExpr::Ident(_row, _col) => unreachable!("another branch would be taken"), + + EExpr::QualifiedTag(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I am very confused by this identifier:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("Are you trying to qualify a name? I am execting something like "), + alloc.parser_suggestion("Json.Decode.string"), + alloc.reflow(". Maybe you are trying to qualify a tag? Tags like "), + alloc.parser_suggestion("Err"), + alloc.reflow(" are globally scoped in roc, and cannot be qualified."), + ]), + ]); + + Report { + filename, + doc, + title: "WEIRD IDENTIFIER".to_string(), + } + } EExpr::Start(row, col) => { let (context_row, context_col, a_thing) = match context { diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index d24c680159..f9c30413af 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -4020,11 +4020,87 @@ mod test_reporting { indoc!( r#" ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── - - The `Foo.Bar` identifier is malformed: - + + I am trying to parse a qualified name here: + 1│ Foo.Bar - ^^^^^^^ + ^ + + This looks like a qualified tag name to me, but tags cannot be + qualified! Maybe you wanted a qualified name, something like + Json.Decode.string? + "# + ), + ) + } + + #[test] + fn module_ident_ends_with_dot() { + report_problem_as( + indoc!( + r#" + Foo.Bar. + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a qualified name here: + + 1│ Foo.Bar. + ^ + + I was expecting to see an identifier next, like height. A complete + qualified name looks something like Json.Decode.string. + "# + ), + ) + } + + #[test] + fn record_access_ends_with_dot() { + report_problem_as( + indoc!( + r#" + foo.bar. + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field accessor here: + + 1│ foo.bar. + ^ + + Something like .name or .height that accesses a value from a record. + "# + ), + ) + } + + #[test] + fn qualified_private_tag() { + report_problem_as( + indoc!( + r#" + @Foo.Bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a qualified name here: + + 1│ @Foo.Bar + ^ + + This looks like a qualified tag name to me, but tags cannot be + qualified! Maybe you wanted a qualified name, something like + Json.Decode.string? "# ), ) @@ -5300,4 +5376,177 @@ mod test_reporting { ), ) } + + #[test] + fn keyword_record_field_access() { + report_problem_as( + indoc!( + r#" + foo = {} + + foo.if + "# + ), + indoc!( + r#" + ── TYPE MISMATCH ─────────────────────────────────────────────────────────────── + + This expression is used in an unexpected way: + + 3│ foo.if + ^^^^^^ + + This `foo` value is a: + + {} + + But you are trying to use it as: + + { if : a }b + + + "# + ), + ) + } + + #[test] + fn keyword_qualified_import() { + report_problem_as( + indoc!( + r#" + Num.if + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + The Num module does not expose a if value: + + 1│ Num.if + ^^^^^^ + "# + ), + ) + } + + #[test] + fn stray_dot_expr() { + report_problem_as( + indoc!( + r#" + Num.add . 23 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field accessor here: + + 1│ Num.add . 23 + ^ + + Something like .name or .height that accesses a value from a record. + "# + ), + ) + } + + #[test] + fn private_tag_not_uppercase() { + report_problem_as( + indoc!( + r#" + Num.add @foo 23 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am trying to parse a private tag here: + + 1│ Num.add @foo 23 + ^ + + But after the `@` symbol I found a lowercase letter. All tag names + (global and private) must start with an uppercase letter, like @UUID + or @Secrets. + "# + ), + ) + } + + #[test] + fn private_tag_field_access() { + report_problem_as( + indoc!( + r#" + @UUID.bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am very confused by this field access: + + 1│ @UUID.bar + ^^^^^^^^^ + + It looks like a record field access on a private tag. + "# + ), + ) + } + + #[test] + fn weird_accessor() { + report_problem_as( + indoc!( + r#" + .foo.bar + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I am very confused by this field access + + 1│ .foo.bar + ^^^^^^^^ + + It looks like a field access on an accessor. I parse.client.name as + (.client).name. Maybe use an anonymous function like + (\r -> r.client.name) instead? + "# + ), + ) + } + + #[test] + fn part_starts_with_number() { + report_problem_as( + indoc!( + r#" + foo.100 + "# + ), + indoc!( + r#" + ── SYNTAX PROBLEM ────────────────────────────────────────────────────────────── + + I trying to parse a record field access here: + + 1│ foo.100 + ^ + + So I expect to see a lowercase letter next, like .name or .height. + "# + ), + ) + } }