mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-19 01:59:48 +00:00
Parsing support for snake_case identifiers
In this initial commit, I have done the following: - Added unit tests to roc_parse's ident.rs file to cover at least the simplest Ident enum cases (Tag, OpaqueRef, and simple Access) - Added '_' as a valid "rest" character in both uppercase and lowercase identifier parts - Updated the test_syntax snapshots appropriately There is still a lot left to do here. Such as: - Do we want to allow multiple '_'s to parse successfully? - Handle qualified access - Handle accessor functions - Handle record update functions - Remove the UnderscoreInMiddle case from BadIdent - Write unit tests for Malformed Idents I am not a "Rustacean" by any means, but have been through the Book in years past. Any feedback on the way I wrote the tests or any other part of the implementation would be very appreciated.
This commit is contained in:
parent
d7825428df
commit
a2083cec30
31 changed files with 1214 additions and 460 deletions
|
@ -231,6 +231,7 @@ pub enum BadIdent {
|
|||
|
||||
UnderscoreAlone(Position),
|
||||
UnderscoreInMiddle(Position),
|
||||
TooManyUnderscores(Position),
|
||||
UnderscoreAtStart {
|
||||
position: Position,
|
||||
/// If this variable was already declared in a pattern (e.g. \_x -> _x),
|
||||
|
@ -252,11 +253,21 @@ fn is_alnum(ch: char) -> bool {
|
|||
}
|
||||
|
||||
fn chomp_lowercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(char::is_lowercase, is_alnum, true, buffer)
|
||||
chomp_part(
|
||||
char::is_lowercase,
|
||||
is_plausible_ident_continue,
|
||||
true,
|
||||
buffer,
|
||||
)
|
||||
}
|
||||
|
||||
fn chomp_uppercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(char::is_uppercase, is_alnum, false, buffer)
|
||||
chomp_part(
|
||||
char::is_uppercase,
|
||||
is_plausible_ident_continue,
|
||||
false,
|
||||
buffer,
|
||||
)
|
||||
}
|
||||
|
||||
fn chomp_anycase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
|
@ -265,7 +276,12 @@ fn chomp_anycase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
|||
let allow_bang =
|
||||
char::from_utf8_slice_start(buffer).map_or(false, |(leading, _)| leading.is_lowercase());
|
||||
|
||||
chomp_part(char::is_alphabetic, is_alnum, allow_bang, buffer)
|
||||
chomp_part(
|
||||
char::is_alphabetic,
|
||||
is_plausible_ident_continue,
|
||||
allow_bang,
|
||||
buffer,
|
||||
)
|
||||
}
|
||||
|
||||
fn chomp_integer_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
|
@ -429,7 +445,14 @@ fn chomp_opaque_ref(buffer: &[u8], pos: Position) -> Result<&str, BadIdent> {
|
|||
Err(bad_ident(pos.bump_column(width as u32)))
|
||||
} else {
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..width]) };
|
||||
Ok(value)
|
||||
if value.contains('_') {
|
||||
// we don't allow underscores in the middle of an identifier
|
||||
// but still parse them (and generate a malformed identifier)
|
||||
// to give good error messages for this case
|
||||
Err(BadIdent::UnderscoreInMiddle(pos.bump_column(width as u32)))
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => Err(bad_ident(pos.bump_column(1))),
|
||||
|
@ -486,7 +509,7 @@ fn chomp_identifier_chain<'a>(
|
|||
}
|
||||
|
||||
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' {
|
||||
chomped += width;
|
||||
} else if ch == '!' && !first_is_uppercase {
|
||||
chomped += width;
|
||||
|
@ -556,19 +579,20 @@ fn chomp_identifier_chain<'a>(
|
|||
BadIdent::WeirdDotAccess(pos.bump_column(chomped as u32 + width)),
|
||||
)),
|
||||
}
|
||||
} else if let Ok(('_', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
// we don't allow underscores in the middle of an identifier
|
||||
// but still parse them (and generate a malformed identifier)
|
||||
// to give good error messages for this case
|
||||
Err((
|
||||
chomped as u32 + 1,
|
||||
BadIdent::UnderscoreInMiddle(pos.bump_column(chomped as u32 + 1)),
|
||||
))
|
||||
} else if first_is_uppercase {
|
||||
// just one segment, starting with an uppercase letter; that's a tag
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
|
||||
Ok((chomped as u32, Ident::Tag(value)))
|
||||
if value.contains('_') {
|
||||
// we don't allow underscores in the middle of an identifier
|
||||
// but still parse them (and generate a malformed identifier)
|
||||
// to give good error messages for this case
|
||||
Err((
|
||||
chomped as u32,
|
||||
BadIdent::UnderscoreInMiddle(pos.bump_column(chomped as u32)),
|
||||
))
|
||||
} else {
|
||||
Ok((chomped as u32, Ident::Tag(value)))
|
||||
}
|
||||
} else {
|
||||
// just one segment, starting with a lowercase letter; that's a normal identifier
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
|
@ -689,3 +713,121 @@ fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, Accessor<'a>>) -
|
|||
Ok(chomped as u32)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn assert_ident_parses<'a>(arena: &'a Bump, ident: &str, expected: Ident<'a>) {
|
||||
let s = State::new(ident.as_bytes());
|
||||
let (_, id, _) = parse_ident(arena, s, 0).unwrap();
|
||||
assert_eq!(id, expected);
|
||||
}
|
||||
|
||||
fn assert_ident_parses_tag(arena: &Bump, ident: &str) {
|
||||
assert_ident_parses(arena, ident, Ident::Tag(ident));
|
||||
}
|
||||
fn assert_ident_parses_opaque(arena: &Bump, ident: &str) {
|
||||
assert_ident_parses(arena, ident, Ident::OpaqueRef(ident));
|
||||
}
|
||||
fn assert_ident_parses_simple_access(arena: &Bump, ident: &str) {
|
||||
assert_ident_parses(
|
||||
arena,
|
||||
ident,
|
||||
Ident::Access {
|
||||
module_name: "",
|
||||
parts: arena.alloc([Accessor::RecordField(ident)]),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
fn assert_ident_parses_malformed(arena: &Bump, ident: &str, pos: Position) {
|
||||
assert_ident_parses(
|
||||
arena,
|
||||
ident,
|
||||
Ident::Malformed(ident, BadIdent::UnderscoreInMiddle(pos)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ident_lowercase_camel() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_simple_access(&arena, "hello");
|
||||
assert_ident_parses_simple_access(&arena, "hello23");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorld");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorld23");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorldThisIsQuiteATag");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorldThisIsQuiteATag_");
|
||||
assert_ident_parses_simple_access(&arena, "helloworldthisisquiteatag_");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorldThisIsQuiteATag23");
|
||||
assert_ident_parses_simple_access(&arena, "helloWorldThisIsQuiteATag23_");
|
||||
assert_ident_parses_simple_access(&arena, "helloworldthisisquiteatag23_");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ident_lowercase_snake() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_simple_access(&arena, "hello_world");
|
||||
assert_ident_parses_simple_access(&arena, "hello_world23");
|
||||
assert_ident_parses_simple_access(&arena, "hello_world_this_is_quite_a_tag");
|
||||
assert_ident_parses_simple_access(&arena, "hello_world_this_is_quite_a_tag_");
|
||||
assert_ident_parses_simple_access(&arena, "hello_world_this_is_quite_a_tag23");
|
||||
assert_ident_parses_simple_access(&arena, "hello_world_this_is_quite_a_tag23_");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tag_camel() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_tag(&arena, "Hello");
|
||||
assert_ident_parses_tag(&arena, "Hello23");
|
||||
assert_ident_parses_tag(&arena, "HelloWorld");
|
||||
assert_ident_parses_tag(&arena, "HelloWorld23");
|
||||
assert_ident_parses_tag(&arena, "HelloWorldThisIsQuiteATag");
|
||||
assert_ident_parses_tag(&arena, "HelloWorldThisIsQuiteATag23");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tag_snake_is_malformed() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_malformed(&arena, "Hello_World", Position { offset: 11 });
|
||||
assert_ident_parses_malformed(&arena, "Hello_World23", Position { offset: 13 });
|
||||
assert_ident_parses_malformed(
|
||||
&arena,
|
||||
"Hello_World_This_Is_Quite_A_Tag",
|
||||
Position { offset: 31 },
|
||||
);
|
||||
assert_ident_parses_malformed(
|
||||
&arena,
|
||||
"Hello_World_This_Is_Quite_A_Tag23",
|
||||
Position { offset: 33 },
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_opaque_ref_camel() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_opaque(&arena, "@Hello");
|
||||
assert_ident_parses_opaque(&arena, "@Hello23");
|
||||
assert_ident_parses_opaque(&arena, "@HelloWorld");
|
||||
assert_ident_parses_opaque(&arena, "@HelloWorld23");
|
||||
assert_ident_parses_opaque(&arena, "@HelloWorldThisIsQuiteARef");
|
||||
assert_ident_parses_opaque(&arena, "@HelloWorldThisIsQuiteARef23");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_opaque_ref_snake_is_malformed() {
|
||||
let arena = Bump::new();
|
||||
assert_ident_parses_malformed(&arena, "@Hello_World", Position { offset: 12 });
|
||||
assert_ident_parses_malformed(&arena, "@Hello_World23", Position { offset: 14 });
|
||||
assert_ident_parses_malformed(
|
||||
&arena,
|
||||
"@Hello_World_This_Is_Quite_A_Ref",
|
||||
Position { offset: 32 },
|
||||
);
|
||||
assert_ident_parses_malformed(
|
||||
&arena,
|
||||
"@Hello_World_This_Is_Quite_A_Ref23",
|
||||
Position { offset: 34 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -855,6 +855,7 @@ fn remove_spaces_bad_ident(ident: BadIdent) -> BadIdent {
|
|||
position: Position::zero(),
|
||||
declaration_region,
|
||||
},
|
||||
BadIdent::TooManyUnderscores(_) => BadIdent::TooManyUnderscores(Position::zero()),
|
||||
BadIdent::QualifiedTag(_) => BadIdent::QualifiedTag(Position::zero()),
|
||||
BadIdent::WeirdAccessor(_) => BadIdent::WeirdAccessor(Position::zero()),
|
||||
BadIdent::WeirdDotAccess(_) => BadIdent::WeirdDotAccess(Position::zero()),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue