diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 5780967d54..1f69407eca 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -3,7 +3,7 @@ use crate::blankspace::{ line_comment, space0_after_e, space0_around_ee, space0_before_e, space0_e, space1_e, spaces_exactly_e, }; -use crate::ident::{ident, lowercase_ident, Ident}; +use crate::ident::{lowercase_ident, parse_ident_help, Ident}; use crate::keyword; use crate::parser::{ self, allocated, and_then_with_indent_level, ascii_char, backtrackable, map, newline_char, @@ -1967,11 +1967,11 @@ fn ident_then_args<'a>( } fn ident_without_apply_help<'a>() -> impl Parser<'a, Expr<'a>, EExpr<'a>> { - specialize_ref( - EExpr::Syntax, - then(loc!(ident()), move |arena, state, progress, loc_ident| { + then( + loc!(parse_ident_help), + move |arena, state, progress, loc_ident| { Ok((progress, ident_to_expr(arena, loc_ident.value), state)) - }), + }, ) } @@ -2163,7 +2163,7 @@ fn record_field_help<'a>( fn record_updateable_identifier<'a>() -> impl Parser<'a, Expr<'a>, ERecord<'a>> { specialize( |_, r, c| ERecord::Updateable(r, c), - map_with_arena!(ident(), ident_to_expr), + map_with_arena!(parse_ident_help, ident_to_expr), ) } diff --git a/compiler/parse/src/ident.rs b/compiler/parse/src/ident.rs index bb3143b53d..d1819f7f36 100644 --- a/compiler/parse/src/ident.rs +++ b/compiler/parse/src/ident.rs @@ -61,11 +61,44 @@ impl<'a> Ident<'a> { } } -pub fn ident<'a>() -> impl Parser<'a, Ident<'a>, SyntaxError<'a>> { - crate::parser::specialize(|e, _, _| SyntaxError::Expr(e), parse_ident_help) +fn chomp_identifier<'a, F>(pred: F, buffer: &[u8]) -> Result<&str, Progress> +where + F: Fn(char) -> bool, +{ + use encode_unicode::CharExt; + + let mut chomped = 0; + + match char::from_utf8_slice_start(&buffer[chomped..]) { + Ok((ch, width)) if pred(ch) => { + chomped += width; + } + _ => { + // no parse + return Err(Progress::NoProgress); + } + } + + while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) { + // After the first character, only these are allowed: + // + // * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers + // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() + // * A ':' indicating the end of the field + if ch.is_alphabetic() || ch.is_ascii_digit() { + chomped += width; + } else { + // we're done + break; + } + } + + let name = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) }; + + Ok(name) } -pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str, SyntaxError<'a>> +fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str, SyntaxError<'a>> where F: Fn(char) -> bool, { diff --git a/compiler/parse/src/pattern.rs b/compiler/parse/src/pattern.rs index 5e073623d8..146940d790 100644 --- a/compiler/parse/src/pattern.rs +++ b/compiler/parse/src/pattern.rs @@ -1,6 +1,6 @@ use crate::ast::Pattern; use crate::blankspace::{space0_around_ee, space0_before_e, space0_e}; -use crate::ident::{ident, lowercase_ident, Ident}; +use crate::ident::{lowercase_ident, parse_ident_help, Ident}; use crate::parser::Progress::{self, *}; use crate::parser::{ backtrackable, optional, specialize, specialize_ref, word1, EPattern, PInParens, PRecord, @@ -179,7 +179,8 @@ fn loc_ident_pattern_help<'a>( let original_state = state.clone(); let (_, loc_ident, state) = - specialize(|_, r, c| EPattern::Start(r, c), loc!(ident())).parse(arena, state)?; + specialize(|_, r, c| EPattern::Start(r, c), loc!(parse_ident_help)) + .parse(arena, state)?; match loc_ident.value { Ident::GlobalTag(tag) => {