mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
moved all crates into seperate folder + related path fixes
This commit is contained in:
parent
12ef03bb86
commit
eee85fa45d
1063 changed files with 92 additions and 93 deletions
546
crates/compiler/parse/src/ident.rs
Normal file
546
crates/compiler/parse/src/ident.rs
Normal file
|
@ -0,0 +1,546 @@
|
|||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{BadInputError, EExpr, ParseResult, Parser};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::Position;
|
||||
|
||||
/// A tag, for example. Must start with an uppercase letter
|
||||
/// and then contain only letters and numbers afterwards - no dots allowed!
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct UppercaseIdent<'a>(&'a str);
|
||||
|
||||
impl<'a> From<&'a str> for UppercaseIdent<'a> {
|
||||
fn from(string: &'a str) -> Self {
|
||||
UppercaseIdent(string)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<UppercaseIdent<'a>> for &'a str {
|
||||
fn from(ident: UppercaseIdent<'a>) -> Self {
|
||||
ident.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a UppercaseIdent<'a>> for &'a str {
|
||||
fn from(ident: &'a UppercaseIdent<'a>) -> Self {
|
||||
ident.0
|
||||
}
|
||||
}
|
||||
|
||||
/// The parser accepts all of these in any position where any one of them could
|
||||
/// appear. This way, canonicalization can give more helpful error messages like
|
||||
/// "you can't redefine this tag!" if you wrote `Foo = ...` or
|
||||
/// "you can only define unqualified constants" if you wrote `Foo.bar = ...`
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Ident<'a> {
|
||||
/// Foo or Bar
|
||||
Tag(&'a str),
|
||||
/// @Foo or @Bar
|
||||
OpaqueRef(&'a str),
|
||||
/// foo or foo.bar or Foo.Bar.baz.qux
|
||||
Access {
|
||||
module_name: &'a str,
|
||||
parts: &'a [&'a str],
|
||||
},
|
||||
/// .foo { foo: 42 }
|
||||
AccessorFunction(&'a str),
|
||||
/// .Foo or foo. or something like foo.Bar
|
||||
Malformed(&'a str, BadIdent),
|
||||
}
|
||||
|
||||
impl<'a> Ident<'a> {
|
||||
pub fn len(&self) -> usize {
|
||||
use self::Ident::*;
|
||||
|
||||
match self {
|
||||
Tag(string) | OpaqueRef(string) => string.len(),
|
||||
Access { module_name, parts } => {
|
||||
let mut len = if module_name.is_empty() {
|
||||
0
|
||||
} else {
|
||||
module_name.len() + 1
|
||||
// +1 for the dot
|
||||
};
|
||||
|
||||
for part in parts.iter() {
|
||||
len += part.len() + 1 // +1 for the dot
|
||||
}
|
||||
|
||||
len - 1
|
||||
}
|
||||
AccessorFunction(string) => string.len(),
|
||||
Malformed(string, _) => string.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
/// This could be:
|
||||
///
|
||||
/// * A record field, e.g. "email" in `.email` or in `email:`
|
||||
/// * A named pattern match, e.g. "foo" in `foo =` or `foo ->` or `\foo ->`
|
||||
pub fn lowercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_lowercase_part(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
|
||||
Err((NoProgress, (), state))
|
||||
} else {
|
||||
let width = ident.len();
|
||||
Ok((MadeProgress, ident, state.advance(width)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tag_name<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |arena, state: State<'a>| uppercase_ident().parse(arena, state)
|
||||
}
|
||||
|
||||
/// This could be:
|
||||
///
|
||||
/// * A module name
|
||||
/// * A type name
|
||||
/// * A tag
|
||||
pub fn uppercase<'a>() -> impl Parser<'a, UppercaseIdent<'a>, ()> {
|
||||
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
let width = ident.len();
|
||||
Ok((MadeProgress, ident.into(), state.advance(width)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This could be:
|
||||
///
|
||||
/// * A module name
|
||||
/// * A type name
|
||||
/// * A tag
|
||||
pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
let width = ident.len();
|
||||
Ok((MadeProgress, ident, state.advance(width)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unqualified_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_part(|c| c.is_alphabetic(), state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
|
||||
Err((MadeProgress, (), state))
|
||||
} else {
|
||||
let width = ident.len();
|
||||
Ok((MadeProgress, ident, state.advance(width)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! advance_state {
|
||||
($state:expr, $n:expr) => {
|
||||
Ok($state.advance($n))
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parse_ident<'a>(arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Ident<'a>, EExpr<'a>> {
|
||||
let initial = state.clone();
|
||||
|
||||
match parse_ident_help(arena, state) {
|
||||
Ok((progress, ident, state)) => {
|
||||
if let Ident::Access { module_name, parts } = ident {
|
||||
if module_name.is_empty() {
|
||||
if let Some(first) = parts.first() {
|
||||
for keyword in crate::keyword::KEYWORDS.iter() {
|
||||
if first == keyword {
|
||||
return Err((NoProgress, EExpr::Start(initial.pos()), initial));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((progress, ident, state))
|
||||
}
|
||||
Err((NoProgress, _, state)) => Err((NoProgress, EExpr::Start(state.pos()), state)),
|
||||
Err((MadeProgress, fail, state)) => match fail {
|
||||
BadIdent::Start(pos) => Err((NoProgress, EExpr::Start(pos), state)),
|
||||
BadIdent::Space(e, pos) => Err((NoProgress, EExpr::Space(e, pos), state)),
|
||||
_ => malformed_identifier(initial.bytes(), fail, state),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn malformed_identifier<'a>(
|
||||
initial_bytes: &'a [u8],
|
||||
problem: BadIdent,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, Ident<'a>, EExpr<'a>> {
|
||||
let chomped = chomp_malformed(state.bytes());
|
||||
let delta = initial_bytes.len() - state.bytes().len();
|
||||
let parsed_str = unsafe { std::str::from_utf8_unchecked(&initial_bytes[..chomped + delta]) };
|
||||
|
||||
state = state.advance(chomped);
|
||||
|
||||
Ok((MadeProgress, Ident::Malformed(parsed_str, problem), state))
|
||||
}
|
||||
|
||||
/// skip forward to the next non-identifier character
|
||||
pub fn chomp_malformed(bytes: &[u8]) -> usize {
|
||||
use encode_unicode::CharExt;
|
||||
let mut chomped = 0;
|
||||
while let Ok((ch, width)) = char::from_utf8_slice_start(&bytes[chomped..]) {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch == '_' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
chomped += width;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chomped
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BadIdent {
|
||||
Start(Position),
|
||||
Space(BadInputError, Position),
|
||||
|
||||
Underscore(Position),
|
||||
QualifiedTag(Position),
|
||||
WeirdAccessor(Position),
|
||||
WeirdDotAccess(Position),
|
||||
WeirdDotQualified(Position),
|
||||
StrayDot(Position),
|
||||
BadOpaqueRef(Position),
|
||||
}
|
||||
|
||||
fn chomp_lowercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(|c: char| c.is_lowercase(), buffer)
|
||||
}
|
||||
|
||||
fn chomp_uppercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(|c: char| c.is_uppercase(), buffer)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn chomp_part<F>(leading_is_good: F, buffer: &[u8]) -> Result<&str, Progress>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
let mut chomped = 0;
|
||||
|
||||
if let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
if leading_is_good(ch) {
|
||||
chomped += width;
|
||||
} else {
|
||||
return Err(NoProgress);
|
||||
}
|
||||
}
|
||||
|
||||
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
chomped += width;
|
||||
} else {
|
||||
// we're done
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if chomped == 0 {
|
||||
Err(NoProgress)
|
||||
} else {
|
||||
let name = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
|
||||
/// a `.foo` accessor function
|
||||
fn chomp_accessor(buffer: &[u8], pos: Position) -> Result<&str, BadIdent> {
|
||||
// assumes the leading `.` has been chomped already
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
match chomp_lowercase_part(buffer) {
|
||||
Ok(name) => {
|
||||
let chomped = name.len();
|
||||
|
||||
if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
Err(BadIdent::WeirdAccessor(pos))
|
||||
} else {
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// we've already made progress with the initial `.`
|
||||
Err(BadIdent::StrayDot(pos.bump_column(1)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// a `@Token` opaque
|
||||
fn chomp_opaque_ref(buffer: &[u8], pos: Position) -> Result<&str, BadIdent> {
|
||||
// assumes the leading `@` has NOT been chomped already
|
||||
debug_assert_eq!(buffer.get(0), Some(&b'@'));
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
let bad_ident = BadIdent::BadOpaqueRef;
|
||||
|
||||
match chomp_uppercase_part(&buffer[1..]) {
|
||||
Ok(name) => {
|
||||
let width = 1 + name.len();
|
||||
|
||||
if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[width..]) {
|
||||
Err(bad_ident(pos.bump_column(width as u32)))
|
||||
} else {
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..width]) };
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
Err(_) => Err(bad_ident(pos.bump_column(1))),
|
||||
}
|
||||
}
|
||||
|
||||
fn chomp_identifier_chain<'a>(
|
||||
arena: &'a Bump,
|
||||
buffer: &'a [u8],
|
||||
pos: Position,
|
||||
) -> Result<(u32, Ident<'a>), (u32, BadIdent)> {
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
let first_is_uppercase;
|
||||
let mut chomped = 0;
|
||||
|
||||
match char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
Ok((ch, width)) => match ch {
|
||||
'.' => match chomp_accessor(&buffer[1..], pos) {
|
||||
Ok(accessor) => {
|
||||
let bytes_parsed = 1 + accessor.len();
|
||||
|
||||
return Ok((bytes_parsed as u32, Ident::AccessorFunction(accessor)));
|
||||
}
|
||||
Err(fail) => return Err((1, fail)),
|
||||
},
|
||||
'@' => match chomp_opaque_ref(buffer, pos) {
|
||||
Ok(tagname) => {
|
||||
let bytes_parsed = tagname.len();
|
||||
|
||||
let ident = Ident::OpaqueRef;
|
||||
|
||||
return Ok((bytes_parsed as u32, ident(tagname)));
|
||||
}
|
||||
Err(fail) => return Err((1, fail)),
|
||||
},
|
||||
c if c.is_alphabetic() => {
|
||||
// fall through
|
||||
chomped += width;
|
||||
first_is_uppercase = c.is_uppercase();
|
||||
}
|
||||
_ => {
|
||||
return Err((0, BadIdent::Start(pos)));
|
||||
}
|
||||
},
|
||||
Err(_) => return Err((0, BadIdent::Start(pos))),
|
||||
}
|
||||
|
||||
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
chomped += width;
|
||||
} else {
|
||||
// we're done
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
let module_name = if first_is_uppercase {
|
||||
match chomp_module_chain(&buffer[chomped..]) {
|
||||
Ok(width) => {
|
||||
chomped += width as usize;
|
||||
unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) }
|
||||
}
|
||||
Err(MadeProgress) => todo!(),
|
||||
Err(NoProgress) => unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) },
|
||||
}
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let mut parts = Vec::with_capacity_in(4, arena);
|
||||
|
||||
if !first_is_uppercase {
|
||||
let first_part = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
parts.push(first_part);
|
||||
}
|
||||
|
||||
match chomp_access_chain(&buffer[chomped..], &mut parts) {
|
||||
Ok(width) => {
|
||||
chomped += width as usize;
|
||||
|
||||
let ident = Ident::Access {
|
||||
module_name,
|
||||
parts: parts.into_bump_slice(),
|
||||
};
|
||||
|
||||
Ok((chomped as u32, ident))
|
||||
}
|
||||
Err(0) if !module_name.is_empty() => Err((
|
||||
chomped as u32,
|
||||
BadIdent::QualifiedTag(pos.bump_column(chomped as u32)),
|
||||
)),
|
||||
Err(1) if parts.is_empty() => Err((
|
||||
chomped as u32 + 1,
|
||||
BadIdent::WeirdDotQualified(pos.bump_column(chomped as u32 + 1)),
|
||||
)),
|
||||
Err(width) => Err((
|
||||
chomped as u32 + width,
|
||||
BadIdent::WeirdDotAccess(pos.bump_column(chomped as u32 + width)),
|
||||
)),
|
||||
}
|
||||
} else if let Ok(('_', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
// we don't allow underscores in the middle of an identifier
|
||||
// but still parse them (and generate a malformed identifier)
|
||||
// to give good error messages for this case
|
||||
Err((
|
||||
chomped as u32 + 1,
|
||||
BadIdent::Underscore(pos.bump_column(chomped as u32 + 1)),
|
||||
))
|
||||
} else if first_is_uppercase {
|
||||
// just one segment, starting with an uppercase letter; that's a tag
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
Ok((chomped as u32, Ident::Tag(value)))
|
||||
} else {
|
||||
// just one segment, starting with a lowercase letter; that's a normal identifier
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
let ident = Ident::Access {
|
||||
module_name: "",
|
||||
parts: arena.alloc([value]),
|
||||
};
|
||||
Ok((chomped as u32, ident))
|
||||
}
|
||||
}
|
||||
|
||||
fn chomp_module_chain(buffer: &[u8]) -> Result<u32, Progress> {
|
||||
let mut chomped = 0;
|
||||
|
||||
while let Some(b'.') = buffer.get(chomped) {
|
||||
match &buffer.get(chomped + 1..) {
|
||||
Some(slice) => match chomp_uppercase_part(slice) {
|
||||
Ok(name) => {
|
||||
chomped += name.len() + 1;
|
||||
}
|
||||
Err(MadeProgress) => return Err(MadeProgress),
|
||||
Err(NoProgress) => break,
|
||||
},
|
||||
None => return Err(MadeProgress),
|
||||
}
|
||||
}
|
||||
|
||||
if chomped == 0 {
|
||||
Err(NoProgress)
|
||||
} else {
|
||||
Ok(chomped as u32)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn concrete_type<'a>() -> impl Parser<'a, (&'a str, &'a str), ()> {
|
||||
move |_, state: State<'a>| match chomp_concrete_type(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok((module_name, type_name, width)) => {
|
||||
Ok((MadeProgress, (module_name, type_name), state.advance(width)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parse a type name like `Result` or `Result.Result`
|
||||
fn chomp_concrete_type(buffer: &[u8]) -> Result<(&str, &str, usize), Progress> {
|
||||
let first = crate::ident::chomp_uppercase_part(buffer)?;
|
||||
|
||||
if let Some(b'.') = buffer.get(first.len()) {
|
||||
match crate::ident::chomp_module_chain(&buffer[first.len()..]) {
|
||||
Err(_) => Err(MadeProgress),
|
||||
Ok(rest) => {
|
||||
let width = first.len() + rest as usize;
|
||||
|
||||
// we must explicitly check here for a trailing `.`
|
||||
if let Some(b'.') = buffer.get(width) {
|
||||
return Err(MadeProgress);
|
||||
}
|
||||
|
||||
let slice = &buffer[..width];
|
||||
|
||||
match slice.iter().rev().position(|c| *c == b'.') {
|
||||
None => Ok(("", first, first.len())),
|
||||
Some(rev_index) => {
|
||||
let index = slice.len() - rev_index;
|
||||
let module_name =
|
||||
unsafe { std::str::from_utf8_unchecked(&slice[..index - 1]) };
|
||||
let type_name = unsafe { std::str::from_utf8_unchecked(&slice[index..]) };
|
||||
|
||||
Ok((module_name, type_name, width))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(("", first, first.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn chomp_access_chain<'a>(buffer: &'a [u8], parts: &mut Vec<'a, &'a str>) -> Result<u32, u32> {
|
||||
let mut chomped = 0;
|
||||
|
||||
while let Some(b'.') = buffer.get(chomped) {
|
||||
match &buffer.get(chomped + 1..) {
|
||||
Some(slice) => match chomp_lowercase_part(slice) {
|
||||
Ok(name) => {
|
||||
let value = unsafe {
|
||||
std::str::from_utf8_unchecked(
|
||||
&buffer[chomped + 1..chomped + 1 + name.len()],
|
||||
)
|
||||
};
|
||||
parts.push(value);
|
||||
|
||||
chomped += name.len() + 1;
|
||||
}
|
||||
Err(_) => return Err(chomped as u32 + 1),
|
||||
},
|
||||
None => return Err(chomped as u32 + 1),
|
||||
}
|
||||
}
|
||||
|
||||
if chomped == 0 {
|
||||
Err(0)
|
||||
} else {
|
||||
Ok(chomped as u32)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ident_help<'a>(
|
||||
arena: &'a Bump,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, Ident<'a>, BadIdent> {
|
||||
match chomp_identifier_chain(arena, state.bytes(), state.pos()) {
|
||||
Ok((width, ident)) => {
|
||||
state = advance_state!(state, width as usize)?;
|
||||
Ok((MadeProgress, ident, state))
|
||||
}
|
||||
Err((0, fail)) => Err((NoProgress, fail, state)),
|
||||
Err((width, fail)) => {
|
||||
state = advance_state!(state, width as usize)?;
|
||||
Err((MadeProgress, fail, state))
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue