mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 06:44:46 +00:00
specialize accessor parsing
This commit is contained in:
parent
058551a224
commit
edd54ab4ab
1 changed files with 80 additions and 13 deletions
|
@ -305,7 +305,44 @@ pub enum BadIdent {
|
||||||
///
|
///
|
||||||
/// This is separate from the `ident` Parser because string interpolation
|
/// This is separate from the `ident` Parser because string interpolation
|
||||||
/// wants to use it this way.
|
/// wants to use it this way.
|
||||||
pub fn parse_ident_help_help<'a>(
|
|
||||||
|
/// a `.foo` accessor function
|
||||||
|
fn chomp_accessor(buffer: &[u8], row: Row, col: Col) -> Result<&str, BadIdent> {
|
||||||
|
// assumes the leading `.` has been chomped already
|
||||||
|
use encode_unicode::CharExt;
|
||||||
|
|
||||||
|
let mut chomped = 0;
|
||||||
|
|
||||||
|
if let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||||
|
if ch.is_lowercase() {
|
||||||
|
chomped += width;
|
||||||
|
} else {
|
||||||
|
return Err(BadIdent::StrayDot(row, col + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||||
|
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||||
|
chomped += width;
|
||||||
|
} else {
|
||||||
|
// we're done
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if chomped == 0 {
|
||||||
|
Err(BadIdent::StrayDot(row, col + 1))
|
||||||
|
} else if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||||
|
Err(BadIdent::WeirdAccessor(row, col))
|
||||||
|
} else {
|
||||||
|
let name = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||||
|
|
||||||
|
dbg!(name);
|
||||||
|
Ok(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_ident_help_help<'a>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
mut state: State<'a>,
|
mut state: State<'a>,
|
||||||
) -> ParseResult<'a, (Ident<'a>, Option<char>), BadIdent> {
|
) -> ParseResult<'a, (Ident<'a>, Option<char>), BadIdent> {
|
||||||
|
@ -319,22 +356,36 @@ pub fn parse_ident_help_help<'a>(
|
||||||
let mut chomped_capitalized = 0;
|
let mut chomped_capitalized = 0;
|
||||||
let mut cparts = 0;
|
let mut cparts = 0;
|
||||||
|
|
||||||
|
let mut chomped_part_buf = 0;
|
||||||
|
let mut chomped = 0;
|
||||||
|
|
||||||
// Identifiers and accessor functions must start with either a letter or a dot.
|
// Identifiers and accessor functions must start with either a letter or a dot.
|
||||||
// If this starts with neither, it must be something else!
|
// If this starts with neither, it must be something else!
|
||||||
match peek_utf8_char(&state) {
|
match peek_utf8_char(&state) {
|
||||||
Ok((first_ch, bytes_parsed)) => {
|
Ok((first_ch, bytes_parsed)) => {
|
||||||
if first_ch.is_alphabetic() {
|
if first_ch.is_alphabetic() {
|
||||||
part_buf.push(first_ch);
|
part_buf.push(first_ch);
|
||||||
|
chomped_part_buf += bytes_parsed;
|
||||||
|
|
||||||
is_capitalized = first_ch.is_uppercase();
|
is_capitalized = first_ch.is_uppercase();
|
||||||
is_accessor_fn = false;
|
is_accessor_fn = false;
|
||||||
|
|
||||||
state = advance_state!(state, bytes_parsed)?;
|
state = advance_state!(state, bytes_parsed)?;
|
||||||
} else if first_ch == '.' {
|
} else if first_ch == '.' {
|
||||||
is_capitalized = false;
|
match chomp_accessor(&state.bytes[1..], state.line, state.column) {
|
||||||
is_accessor_fn = true;
|
Ok(accessor) => {
|
||||||
|
let bytes_parsed = 1 + accessor.len();
|
||||||
|
|
||||||
state = advance_state!(state, bytes_parsed)?;
|
state = advance_state!(state, bytes_parsed)?;
|
||||||
|
|
||||||
|
return Ok((
|
||||||
|
MadeProgress,
|
||||||
|
(Ident::AccessorFunction(accessor), None),
|
||||||
|
state,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Err(fail) => return Err((MadeProgress, fail, state)),
|
||||||
|
}
|
||||||
} else if first_ch == '@' {
|
} else if first_ch == '@' {
|
||||||
state = advance_state!(state, bytes_parsed)?;
|
state = advance_state!(state, bytes_parsed)?;
|
||||||
|
|
||||||
|
@ -346,6 +397,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
|
|
||||||
part_buf.push('@');
|
part_buf.push('@');
|
||||||
part_buf.push(next_ch);
|
part_buf.push(next_ch);
|
||||||
|
chomped_part_buf += 1 + next_bytes_parsed;
|
||||||
|
|
||||||
is_private_tag = true;
|
is_private_tag = true;
|
||||||
is_capitalized = true;
|
is_capitalized = true;
|
||||||
|
@ -377,7 +429,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
|
|
||||||
while !state.bytes.is_empty() {
|
while !state.bytes.is_empty() {
|
||||||
match peek_utf8_char(&state) {
|
match peek_utf8_char(&state) {
|
||||||
Ok((ch, bytes_parsed)) => {
|
Ok((ch, width)) => {
|
||||||
// After the first character, only these are allowed:
|
// After the first character, only these are allowed:
|
||||||
//
|
//
|
||||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||||
|
@ -390,6 +442,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
}
|
}
|
||||||
|
|
||||||
part_buf.push(ch);
|
part_buf.push(ch);
|
||||||
|
chomped_part_buf += width;
|
||||||
} else if ch.is_ascii_digit() {
|
} else if ch.is_ascii_digit() {
|
||||||
// Parts may not start with numbers!
|
// Parts may not start with numbers!
|
||||||
if part_buf.is_empty() {
|
if part_buf.is_empty() {
|
||||||
|
@ -401,6 +454,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
}
|
}
|
||||||
|
|
||||||
part_buf.push(ch);
|
part_buf.push(ch);
|
||||||
|
chomped_part_buf += width;
|
||||||
} else if ch == '.' {
|
} else if ch == '.' {
|
||||||
// There are two posssible errors here:
|
// There are two posssible errors here:
|
||||||
//
|
//
|
||||||
|
@ -423,19 +477,27 @@ pub fn parse_ident_help_help<'a>(
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_capitalized {
|
if is_capitalized {
|
||||||
chomped_capitalized += part_buf.len() + (chomped_capitalized != 0) as usize;
|
chomped_capitalized +=
|
||||||
|
chomped_part_buf + (chomped_capitalized != 0) as usize;
|
||||||
cparts += 1;
|
cparts += 1;
|
||||||
} else {
|
} else {
|
||||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
let value = unsafe {
|
||||||
|
std::str::from_utf8_unchecked(
|
||||||
|
&bytes[chomped..chomped + chomped_part_buf],
|
||||||
|
)
|
||||||
|
};
|
||||||
|
noncapitalized_parts.push(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now that we've recorded the contents of the current buffer, reset it.
|
// Now that we've recorded the contents of the current buffer, reset it.
|
||||||
part_buf = String::new_in(arena);
|
part_buf = String::new_in(arena);
|
||||||
|
chomped += chomped_part_buf + 1;
|
||||||
|
chomped_part_buf = 0;
|
||||||
} else if ch == '_' {
|
} else if ch == '_' {
|
||||||
// we don't allow underscores in the middle of an identifier
|
// we don't allow underscores in the middle of an identifier
|
||||||
// but still parse them (and generate a malformed identifier)
|
// but still parse them (and generate a malformed identifier)
|
||||||
// to give good error messages for this case
|
// to give good error messages for this case
|
||||||
state = advance_state!(state, bytes_parsed)?;
|
state = advance_state!(state, width)?;
|
||||||
return Err((
|
return Err((
|
||||||
MadeProgress,
|
MadeProgress,
|
||||||
BadIdent::Underscore(state.line, state.column),
|
BadIdent::Underscore(state.line, state.column),
|
||||||
|
@ -447,7 +509,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
state = advance_state!(state, bytes_parsed)?;
|
state = advance_state!(state, width)?;
|
||||||
}
|
}
|
||||||
Err(_reason) => {
|
Err(_reason) => {
|
||||||
//
|
//
|
||||||
|
@ -460,7 +522,7 @@ pub fn parse_ident_help_help<'a>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if part_buf.is_empty() {
|
if chomped_part_buf == 0 {
|
||||||
// We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed!
|
// We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed!
|
||||||
//
|
//
|
||||||
// This condition might also occur if we encounter a malformed accessor like `.|`
|
// This condition might also occur if we encounter a malformed accessor like `.|`
|
||||||
|
@ -482,17 +544,22 @@ pub fn parse_ident_help_help<'a>(
|
||||||
|
|
||||||
// Record the final parts.
|
// Record the final parts.
|
||||||
if is_capitalized {
|
if is_capitalized {
|
||||||
chomped_capitalized += part_buf.len() + (chomped_capitalized != 0) as usize;
|
chomped_capitalized += chomped_part_buf + (chomped_capitalized != 0) as usize;
|
||||||
cparts += 1;
|
cparts += 1;
|
||||||
} else {
|
} else {
|
||||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
let value =
|
||||||
|
unsafe { std::str::from_utf8_unchecked(&bytes[chomped..chomped + chomped_part_buf]) };
|
||||||
|
noncapitalized_parts.push(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
let answer = if is_accessor_fn {
|
let answer = if is_accessor_fn {
|
||||||
// Handle accessor functions first because they have the strictest requirements.
|
// Handle accessor functions first because they have the strictest requirements.
|
||||||
// Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts.
|
// Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts.
|
||||||
if cparts == 0 && noncapitalized_parts.len() == 1 && !is_private_tag {
|
if cparts == 0 && noncapitalized_parts.len() == 1 && !is_private_tag {
|
||||||
let value = noncapitalized_parts.iter().next().unwrap();
|
// an accessor starts with a `.`, but we drop that from the name
|
||||||
|
let value = unsafe {
|
||||||
|
std::str::from_utf8_unchecked(&bytes[1 + chomped..1 + chomped + chomped_part_buf])
|
||||||
|
};
|
||||||
|
|
||||||
Ident::AccessorFunction(value)
|
Ident::AccessorFunction(value)
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue