use new ident parser

2025-09-29 06:44:46 +00:00 · 2021-03-10 21:56:19 +01:00 · 2021-03-10 21:56:19 +01:00 · 864390a89a
commit 864390a89a
parent e7bbfe96db
1 changed files with 3 additions and 255 deletions
--- a/compiler/parse/src/ident.rs
+++ b/compiler/parse/src/ident.rs
@ -553,12 +553,6 @@ fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, &'a str>) -> Res
        }
    }
    dbg!(&parts);
    let value = unsafe { std::str::from_utf8_unchecked(&buffer[chomped..]) };
    dbg!(value);
    if chomped == 0 {
        Err(0)
    } else {
@ -573,258 +567,12 @@ fn parse_ident_help_help<'a>(
    match chomp_identifier_chain(arena, state.bytes, state.line, state.column) {
        Ok((width, ident)) => {
            state = advance_state!(state, width as usize)?;
-            return Ok((MadeProgress, ident, state));
+            Ok((MadeProgress, ident, state))
        }
        Err((0, fail)) => {
            return Err((NoProgress, fail, state));
        }
        Err((0, fail)) => Err((NoProgress, fail, state)),
        Err((width, fail)) => {
            state = advance_state!(state, width as usize)?;
-            return Err((MadeProgress, fail, state));
+            Err((MadeProgress, fail, state))
        }
    }
    let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
    let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
    let mut is_capitalized;
    let is_accessor_fn;
    let bytes = state.bytes;
    let mut chomped_capitalized = 0;
    let mut cparts = 0;
    let mut chomped_part_buf = 0;
    let mut chomped = 0;
    // Identifiers and accessor functions must start with either a letter or a dot.
    // If this starts with neither, it must be something else!
    match peek_utf8_char(&state) {
        Ok((first_ch, bytes_parsed)) => {
            if first_ch.is_alphabetic() {
                part_buf.push(first_ch);
                chomped_part_buf += bytes_parsed;
                is_capitalized = first_ch.is_uppercase();
                is_accessor_fn = false;
                state = advance_state!(state, bytes_parsed)?;
            } else if first_ch == '.' {
                match chomp_accessor(&state.bytes[1..], state.line, state.column) {
                    Ok(accessor) => {
                        let bytes_parsed = 1 + accessor.len();
                        state = advance_state!(state, bytes_parsed)?;
                        return Ok((MadeProgress, Ident::AccessorFunction(accessor), state));
                    }
                    Err(fail) => return Err((MadeProgress, fail, state)),
                }
            } else if first_ch == '@' {
                match chomp_private_tag(state.bytes, state.line, state.column) {
                    Ok(tagname) => {
                        let bytes_parsed = 1 + tagname.len();
                        state = advance_state!(state, bytes_parsed)?;
                        return Ok((MadeProgress, Ident::PrivateTag(tagname), state));
                    }
                    Err(fail) => {
                        state = advance_state!(state, 1)?;
                        return Err((MadeProgress, fail, state));
                    }
                }
            } else {
                return Err((NoProgress, BadIdent::Start(state.line, state.column), state));
            }
        }
        Err(_reason) => {
            return Err((NoProgress, BadIdent::Start(state.line, state.column), state));
        }
    }
    while !state.bytes.is_empty() {
        match peek_utf8_char(&state) {
            Ok((ch, width)) => {
                // After the first character, only these are allowed:
                //
                // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
                // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
                // * A dot ('.')
                if ch.is_alphabetic() {
                    if part_buf.is_empty() {
                        // Capitalization is determined by the first character in the part.
                        is_capitalized = ch.is_uppercase();
                    }
                    part_buf.push(ch);
                    chomped_part_buf += width;
                } else if ch.is_ascii_digit() {
                    // Parts may not start with numbers!
                    if part_buf.is_empty() {
                        return Err((
                            MadeProgress,
                            BadIdent::PartStartsWithNumber(state.line, state.column),
                            state,
                        ));
                    }
                    part_buf.push(ch);
                    chomped_part_buf += width;
                } else if ch == '.' {
                    // There are two posssible errors here:
                    //
                    // 1. Having two consecutive dots is an error.
                    // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
                    if part_buf.is_empty() {
                        return Err((
                            MadeProgress,
                            BadIdent::DoubleDot(state.line, state.column),
                            state,
                        ));
                    }
                    if is_capitalized && !noncapitalized_parts.is_empty() {
                        return Err((
                            MadeProgress,
                            BadIdent::WeirdDotQualified(state.line, state.column),
                            state,
                        ));
                    }
                    if is_capitalized {
                        chomped_capitalized +=
                            chomped_part_buf + (chomped_capitalized != 0) as usize;
                        cparts += 1;
                    } else {
                        let value = unsafe {
                            std::str::from_utf8_unchecked(
                                &bytes[chomped..chomped + chomped_part_buf],
                            )
                        };
                        noncapitalized_parts.push(value);
                    }
                    // Now that we've recorded the contents of the current buffer, reset it.
                    part_buf = String::new_in(arena);
                    chomped += chomped_part_buf + 1;
                    chomped_part_buf = 0;
                } else if ch == '_' {
                    // we don't allow underscores in the middle of an identifier
                    // but still parse them (and generate a malformed identifier)
                    // to give good error messages for this case
                    state = advance_state!(state, width)?;
                    return Err((
                        MadeProgress,
                        BadIdent::Underscore(state.line, state.column),
                        state,
                    ));
                } else {
                    // This must be the end of the identifier. We're done!
                    break;
                }
                state = advance_state!(state, width)?;
            }
            Err(_reason) => {
                //
                return Err((
                    MadeProgress,
                    BadIdent::Start(state.line, state.column),
                    state,
                ));
            }
        }
    }
    if chomped_part_buf == 0 {
        // We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed!
        //
        // This condition might also occur if we encounter a malformed accessor like `.|`
        //
        // If we made it this far and don't have a next_char, then necessarily
        // we have consumed a '.' char previously.
        let fail = if noncapitalized_parts.is_empty() {
            if cparts == 0 {
                BadIdent::StrayDot(state.line, state.column)
            } else {
                BadIdent::WeirdDotQualified(state.line, state.column)
            }
        } else {
            BadIdent::WeirdDotAccess(state.line, state.column)
        };
        return Err((MadeProgress, fail, state));
    }
    // Record the final parts.
    if is_capitalized {
        chomped_capitalized += chomped_part_buf + (chomped_capitalized != 0) as usize;
        cparts += 1;
    } else {
        let value =
            unsafe { std::str::from_utf8_unchecked(&bytes[chomped..chomped + chomped_part_buf]) };
        noncapitalized_parts.push(value);
    }
    let answer = if is_accessor_fn {
        // Handle accessor functions first because they have the strictest requirements.
        // Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts.
        if cparts == 0 && noncapitalized_parts.len() == 1 {
            // an accessor starts with a `.`, but we drop that from the name
            let value = unsafe {
                std::str::from_utf8_unchecked(&bytes[1 + chomped..1 + chomped + chomped_part_buf])
            };
            Ident::AccessorFunction(value)
        } else {
            return Err((
                MadeProgress,
                BadIdent::WeirdAccessor(state.line, state.column),
                state,
            ));
        }
    } else if noncapitalized_parts.is_empty() {
        // We have capitalized parts only, so this must be a tag.
        match cparts {
            0 => {
                // We had neither capitalized nor noncapitalized parts,
                // yet we made it this far. The only explanation is that this was
                // a stray '.' drifting through the cosmos.
                return Err((
                    MadeProgress,
                    BadIdent::StrayDot(state.line, state.column),
                    state,
                ));
            }
            1 => {
                let chomped = chomped_capitalized;
                let value = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped]) };
                Ident::GlobalTag(value)
            }
            _ => {
                // This is a qualified tag, which is not allowed!
                return Err((
                    MadeProgress,
                    BadIdent::QualifiedTag(state.line, state.column),
                    state,
                ));
            }
        }
    } else {
        // We have multiple noncapitalized parts, so this must be field access.
        let module_name = if cparts == 0 {
            ""
        } else {
            let chomped = chomped_capitalized;
            unsafe { std::str::from_utf8_unchecked(&bytes[..chomped]) }
        };
        Ident::Access {
            module_name,
            parts: noncapitalized_parts.into_bump_slice(),
        }
    };
    Ok((Progress::MadeProgress, answer, state))
 }