mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-01 07:41:12 +00:00
wip add more stuff
This commit is contained in:
parent
9863268793
commit
4713087bb2
7 changed files with 422 additions and 78 deletions
|
@ -36,25 +36,39 @@ pub enum Expr<'a> {
|
||||||
// List literals
|
// List literals
|
||||||
EmptyList,
|
EmptyList,
|
||||||
List(Vec<'a, Loc<Expr<'a>>>),
|
List(Vec<'a, Loc<Expr<'a>>>),
|
||||||
// // Lookups
|
// Lookups
|
||||||
// Var(&'a str),
|
Var(&'a [&'a str], &'a str),
|
||||||
|
Variant(&'a [&'a str], &'a str),
|
||||||
|
|
||||||
// // Pattern Matching
|
// // Pattern Matching
|
||||||
// Case(&'a (Loc<Expr<'a>>, [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])),
|
When(&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)]),
|
||||||
// Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
|
// Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
|
||||||
// /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
|
// /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
|
||||||
// Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
|
// Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
|
||||||
|
|
||||||
// // Application
|
// Application
|
||||||
// Call(&'a (Loc<Expr<'a>>, [Loc<Expr<'a>>])),
|
/// To apply by name, do Apply(Var(...), ...)
|
||||||
// ApplyVariant(&'a (&'a VariantName, [Loc<Expr<'a>>])),
|
/// To apply a variant by name, do Apply(Variant(...), ...)
|
||||||
// Variant(&'a VariantName),
|
Apply(&'a (Loc<Expr<'a>>, &'a [Loc<Expr<'a>>])),
|
||||||
|
Operator(&'a (Loc<Expr<'a>>, Loc<Operator>, Loc<Expr<'a>>)),
|
||||||
|
|
||||||
// Product Types
|
// Product Types
|
||||||
EmptyRecord,
|
EmptyRecord,
|
||||||
// // Sugar
|
/// e.g. `(expr).foo.bar`
|
||||||
// If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
|
Field(&'a Expr<'a>, &'a [&'a str]),
|
||||||
Operator(&'a (Loc<Expr<'a>>, Loc<Operator>, Loc<Expr<'a>>)),
|
/// e.g. `Foo.Bar.baz.qux`
|
||||||
|
QualifiedField(&'a [&'a str], &'a [&'a str]),
|
||||||
|
/// e.g. `.foo`
|
||||||
|
AccessorFunction(&'a str),
|
||||||
|
|
||||||
|
// Conditionals
|
||||||
|
If(&'a Loc<Expr<'a>>),
|
||||||
|
Then(&'a Loc<Expr<'a>>),
|
||||||
|
Else(&'a Loc<Expr<'a>>),
|
||||||
|
Case(&'a Loc<Expr<'a>>),
|
||||||
|
|
||||||
|
// Problems
|
||||||
|
MalformedIdent(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
|
|
@ -1,27 +1,102 @@
|
||||||
use bumpalo::collections::string::String;
|
use bumpalo::collections::string::String;
|
||||||
|
use bumpalo::collections::vec::Vec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use parse::ast::Attempting;
|
use parse::ast::Attempting;
|
||||||
use parse::parser::{
|
use parse::parser::{unexpected, unexpected_eof, Fail, ParseResult, Parser, State};
|
||||||
unexpected, unexpected_eof, Fail, FailReason, Keyword, ParseResult, Parser, State,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
/// The parser accepts all of these in any position where any one of them could
|
||||||
|
/// appear. This way, canonicalization can give more helpful error messages like
|
||||||
|
/// "you can't redefine this variant!" if you wrote `Foo = ...` or
|
||||||
|
/// "you can only define unqualified constants" if you wrote `Foo.bar = ...`
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum Ident<'a> {
|
||||||
|
/// foo or Bar.Baz.foo
|
||||||
|
Var(MaybeQualified<'a, &'a str>),
|
||||||
|
/// Foo or Bar.Baz.Foo
|
||||||
|
Variant(MaybeQualified<'a, &'a str>),
|
||||||
|
/// foo.bar or Foo.Bar.baz.qux
|
||||||
|
Field(MaybeQualified<'a, &'a [&'a str]>),
|
||||||
|
/// .foo
|
||||||
|
AccessorFunction(&'a str),
|
||||||
|
/// .Foo or foo. or something like foo.Bar
|
||||||
|
Malformed(&'a str),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An optional qualifier (the `Foo.Bar` in `Foo.Bar.baz`).
|
||||||
|
/// If module_parts is empty, this is unqualified.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct MaybeQualified<'a, Val> {
|
||||||
|
pub module_parts: &'a [&'a str],
|
||||||
|
pub value: Val,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse an identifier into a string.
|
||||||
|
///
|
||||||
|
/// This is separate from the `ident` Parser because string interpolation
|
||||||
|
/// wants to use it this way.
|
||||||
|
///
|
||||||
|
/// By design, this does not check for reserved keywords like "if", "else", etc.
|
||||||
|
/// Sometimes we may want to check for those later in the process, and give
|
||||||
|
/// more contextually-aware error messages than "unexpected `if`" or the like.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn parse_into<'a, I>(
|
pub fn parse_into<'a, I>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
chars: &mut I,
|
chars: &mut I,
|
||||||
state: State<'a>,
|
state: State<'a>,
|
||||||
) -> ParseResult<'a, (&'a str, Option<char>)>
|
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = char>,
|
I: Iterator<Item = char>,
|
||||||
{
|
{
|
||||||
let mut buf = String::new_in(arena);
|
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
||||||
|
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||||
|
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||||
|
let mut is_accessor_fn;
|
||||||
|
let mut is_capitalized;
|
||||||
|
|
||||||
// Identifiers must start with an ASCII letter.
|
let malformed = |opt_bad_char: Option<char>| {
|
||||||
// If this doesn't, it must not be an identifier!
|
// Reconstruct the original string that we've been parsing.
|
||||||
|
let mut full_string = String::new_in(arena);
|
||||||
|
|
||||||
|
full_string.push_str(&capitalized_parts.join("."));
|
||||||
|
full_string.push_str(&noncapitalized_parts.join("."));
|
||||||
|
|
||||||
|
if let Some(bad_char) = opt_bad_char {
|
||||||
|
full_string.push(bad_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consume the remaining chars in the identifier.
|
||||||
|
let mut next_char = None;
|
||||||
|
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
// We can't use ch.is_alphanumeric() here because that passes for
|
||||||
|
// things that are "numeric" but not ASCII digits, like `¾`
|
||||||
|
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||||
|
full_string.push(ch);
|
||||||
|
} else {
|
||||||
|
next_char = Some(ch);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
(Ident::Malformed(&full_string), next_char),
|
||||||
|
state.advance_without_indenting(full_string.len())?,
|
||||||
|
))
|
||||||
|
};
|
||||||
|
|
||||||
|
// Identifiers and accessor functions must start with either a letter or a dot.
|
||||||
|
// If this starts with neither, it must be something else!
|
||||||
match chars.next() {
|
match chars.next() {
|
||||||
Some(ch) => {
|
Some(ch) => {
|
||||||
if ch.is_ascii_alphabetic() {
|
if ch.is_alphabetic() {
|
||||||
buf.push(ch);
|
part_buf.push(ch);
|
||||||
|
|
||||||
|
is_capitalized = ch.is_uppercase();
|
||||||
|
is_accessor_fn = false;
|
||||||
|
} else if ch == '.' {
|
||||||
|
is_capitalized = false;
|
||||||
|
is_accessor_fn = true;
|
||||||
} else {
|
} else {
|
||||||
return Err(unexpected(ch, 0, state, Attempting::Identifier));
|
return Err(unexpected(ch, 0, state, Attempting::Identifier));
|
||||||
}
|
}
|
||||||
|
@ -29,44 +104,128 @@ where
|
||||||
None => {
|
None => {
|
||||||
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
let mut chars_parsed = 1;
|
||||||
let mut next_char = None;
|
let mut next_char = None;
|
||||||
|
|
||||||
while let Some(ch) = chars.next() {
|
while let Some(ch) = chars.next() {
|
||||||
// After the first character, letters, numbers, and '.' are allowed.
|
// After the first character, only these are allowed:
|
||||||
if ch.is_ascii_alphanumeric() {
|
//
|
||||||
buf.push(ch);
|
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||||
|
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||||
|
// * A dot ('.')
|
||||||
|
if ch.is_alphabetic() {
|
||||||
|
if part_buf.is_empty() {
|
||||||
|
// Capitalization is determined by the first character in the part.
|
||||||
|
is_capitalized = ch.is_uppercase();
|
||||||
|
}
|
||||||
|
|
||||||
|
part_buf.push(ch);
|
||||||
|
} else if ch.is_ascii_digit() {
|
||||||
|
// Parts may not start with numbers!
|
||||||
|
if part_buf.is_empty() {
|
||||||
|
return malformed(Some(ch));
|
||||||
|
}
|
||||||
|
|
||||||
|
part_buf.push(ch);
|
||||||
} else if ch == '.' {
|
} else if ch == '.' {
|
||||||
panic!("TODO support qualified identifiers. Make sure we don't have consecutive dots, and that module names are capitalized but post-module nothing is capitalized.");
|
// There are two posssible errors here:
|
||||||
|
//
|
||||||
|
// 1. Having two consecutive dots is an error.
|
||||||
|
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
||||||
|
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
||||||
|
return malformed(Some(ch));
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_capitalized {
|
||||||
|
capitalized_parts.push(&part_buf);
|
||||||
|
} else {
|
||||||
|
noncapitalized_parts.push(&part_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we've recorded the contents of the current buffer, reset it.
|
||||||
|
part_buf = String::new_in(arena);
|
||||||
} else {
|
} else {
|
||||||
// This must be the end of the identifier. We're done!
|
// This must be the end of the identifier. We're done!
|
||||||
|
|
||||||
next_char = Some(ch);
|
next_char = Some(ch);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chars_parsed += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let ident_str = buf.as_str();
|
if part_buf.is_empty() {
|
||||||
|
// We probably had a trailing dot, e.g. `Foo.bar.` - this is malformed!
|
||||||
|
//
|
||||||
|
// This condition might also occur if we encounter a malformed accessor like `.|`
|
||||||
|
//
|
||||||
|
// If we made it this far and don't have a next_char, then necessarily
|
||||||
|
// we have consumed a '.' char previously.
|
||||||
|
return malformed(next_char.or_else(|| Some('.')));
|
||||||
|
}
|
||||||
|
|
||||||
// Make sure we aren't trying to use a reserved keyword as an identifier
|
// Record the final parts.
|
||||||
match Keyword::from_str(ident_str) {
|
if is_capitalized {
|
||||||
Some(keyword) => Err((
|
capitalized_parts.push(&part_buf);
|
||||||
Fail {
|
} else {
|
||||||
reason: FailReason::UnexpectedKeyword(keyword),
|
noncapitalized_parts.push(&part_buf);
|
||||||
attempting: Attempting::Identifier,
|
}
|
||||||
},
|
|
||||||
state,
|
let answer = if is_accessor_fn {
|
||||||
)),
|
// Handle accessor functions first because they have the strictest requirements.
|
||||||
|
// Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts.
|
||||||
|
if capitalized_parts.is_empty() && noncapitalized_parts.len() == 1 {
|
||||||
|
let value = noncapitalized_parts.iter().next().unwrap();
|
||||||
|
|
||||||
|
Ident::AccessorFunction(value)
|
||||||
|
} else {
|
||||||
|
return malformed(None);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match noncapitalized_parts.len() {
|
||||||
|
0 => {
|
||||||
|
// We have capitalized parts only, so this must be a variant.
|
||||||
|
match capitalized_parts.pop() {
|
||||||
|
Some(value) => Ident::Variant(MaybeQualified {
|
||||||
|
module_parts: capitalized_parts.into_bump_slice(),
|
||||||
|
value,
|
||||||
|
}),
|
||||||
None => {
|
None => {
|
||||||
let state = state.advance_without_indenting(buf.len())?;
|
// We had neither capitalized nor noncapitalized parts,
|
||||||
|
// yet we made it this far. The only explanation is that this was
|
||||||
|
// a stray '.' drifting through the cosmos.
|
||||||
|
return Err(unexpected('.', 1, state, Attempting::Identifier));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
1 => {
|
||||||
|
// We have exactly one noncapitalized part, so this must be a var.
|
||||||
|
let value = noncapitalized_parts.iter().next().unwrap();
|
||||||
|
|
||||||
Ok(((buf.into_bump_str(), next_char), state))
|
Ident::Var(MaybeQualified {
|
||||||
|
module_parts: capitalized_parts.into_bump_slice(),
|
||||||
|
value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// We have multiple noncapitalized parts, so this must be a field.
|
||||||
|
Ident::Field(MaybeQualified {
|
||||||
|
module_parts: capitalized_parts.into_bump_slice(),
|
||||||
|
value: noncapitalized_parts.into_bump_slice(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let state = state.advance_without_indenting(chars_parsed)?;
|
||||||
|
|
||||||
|
Ok(((answer, next_char), state))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ident<'a>() -> impl Parser<'a, &'a str> {
|
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
||||||
move |arena: &'a Bump, state: State<'a>| {
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
// Discard next_char; we don't need it.
|
// Discard next_char; we don't need it.
|
||||||
let ((string, _), state) = parse_into(arena, &mut state.input.chars(), state)?;
|
let ((string, _), state) = parse_into(arena, &mut state.input.chars(), state)?;
|
||||||
|
@ -74,3 +233,49 @@ pub fn ident<'a>() -> impl Parser<'a, &'a str> {
|
||||||
Ok((string, state))
|
Ok((string, state))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TESTS
|
||||||
|
|
||||||
|
fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
|
||||||
|
let arena = Bump::new();
|
||||||
|
let state = State::new(input, Attempting::Expression);
|
||||||
|
|
||||||
|
ident()
|
||||||
|
.parse(&arena, state)
|
||||||
|
.map(|(answer, _)| answer)
|
||||||
|
.map_err(|(err, _)| err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||||
|
Ident::Var(MaybeQualified {
|
||||||
|
module_parts: module_parts.as_slice(),
|
||||||
|
value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||||
|
Ident::Variant(MaybeQualified {
|
||||||
|
module_parts: module_parts.as_slice(),
|
||||||
|
value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
|
||||||
|
Ident::Field(MaybeQualified {
|
||||||
|
module_parts: module_parts.as_slice(),
|
||||||
|
value: value.as_slice(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
|
||||||
|
Ident::AccessorFunction(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn malformed<'a>(value: &'a str) -> Ident<'a> {
|
||||||
|
Ident::Malformed(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_var() {
|
||||||
|
assert_eq!(test_parse("foo"), Ok(var("foo")))
|
||||||
|
}
|
||||||
|
|
5
src/parse/keyword.rs
Normal file
5
src/parse/keyword.rs
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
pub static IF: &'static str = "if";
|
||||||
|
pub static THEN: &'static str = "then";
|
||||||
|
pub static ELSE: &'static str = "else";
|
||||||
|
pub static CASE: &'static str = "case";
|
||||||
|
pub static WHEN: &'static str = "when";
|
112
src/parse/mod.rs
112
src/parse/mod.rs
|
@ -1,39 +1,46 @@
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
pub mod ident;
|
pub mod ident;
|
||||||
|
pub mod keyword;
|
||||||
pub mod module;
|
pub mod module;
|
||||||
pub mod number_literal;
|
pub mod number_literal;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod problems;
|
pub mod problems;
|
||||||
pub mod string_literal;
|
pub mod string_literal;
|
||||||
|
|
||||||
|
use bumpalo::collections::vec::Vec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use operator::Operator;
|
use operator::Operator;
|
||||||
use parse::ast::{Attempting, Expr};
|
use parse::ast::{Attempting, Expr};
|
||||||
|
use parse::ident::{ident, Ident};
|
||||||
use parse::number_literal::number_literal;
|
use parse::number_literal::number_literal;
|
||||||
use parse::parser::{
|
use parse::parser::{
|
||||||
and, attempt, loc, map, map_with_arena, one_of3, optional, string, unexpected, unexpected_eof,
|
and, attempt, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, string,
|
||||||
ParseResult, Parser, State,
|
unexpected, unexpected_eof, Either, ParseResult, Parser, State,
|
||||||
};
|
};
|
||||||
use parse::string_literal::string_literal;
|
use parse::string_literal::string_literal;
|
||||||
|
use region::Located;
|
||||||
|
|
||||||
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
|
pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
// Recursive parsers must not directly invoke functions which return (impl Parser),
|
// Recursive parsers must not directly invoke functions which return (impl Parser),
|
||||||
// as this causes rustc to stack overflow.
|
// as this causes rustc to stack overflow.
|
||||||
parse_expr
|
move |arena, state| parse_expr(min_indent, arena, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_expr<'a>(arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Expr<'a>> {
|
fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Expr<'a>> {
|
||||||
map_with_arena(
|
let expr_parser = map_with_arena(
|
||||||
and(
|
and(
|
||||||
attempt(
|
loc(one_of6(
|
||||||
Attempting::Expression,
|
|
||||||
loc(one_of3(
|
|
||||||
record_literal(),
|
record_literal(),
|
||||||
number_literal(),
|
number_literal(),
|
||||||
string_literal(),
|
string_literal(),
|
||||||
|
when(min_indent),
|
||||||
|
conditional(min_indent),
|
||||||
|
ident_etc(min_indent),
|
||||||
|
)),
|
||||||
|
optional(and(
|
||||||
|
loc(operator()),
|
||||||
|
loc(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||||
)),
|
)),
|
||||||
),
|
|
||||||
optional(and(loc(operator()), loc(parse_expr))),
|
|
||||||
),
|
),
|
||||||
|arena, (loc_expr1, opt_operator)| match opt_operator {
|
|arena, (loc_expr1, opt_operator)| match opt_operator {
|
||||||
Some((loc_op, loc_expr2)) => {
|
Some((loc_op, loc_expr2)) => {
|
||||||
|
@ -43,8 +50,89 @@ fn parse_expr<'a>(arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Expr<'a>
|
||||||
}
|
}
|
||||||
None => loc_expr1.value,
|
None => loc_expr1.value,
|
||||||
},
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
attempt(Attempting::Expression, expr_parser).parse(arena, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
|
||||||
|
move |arena, state| {
|
||||||
|
panic!("TODO stop early if we see an operator after the whitespace - precedence!");
|
||||||
|
// zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn when<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
|
map(string(keyword::WHEN), |_| {
|
||||||
|
panic!("TODO implement WHEN");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
|
one_of4(
|
||||||
|
cond_help(keyword::IF, Expr::If, min_indent),
|
||||||
|
cond_help(keyword::THEN, Expr::Then, min_indent),
|
||||||
|
cond_help(keyword::ELSE, Expr::Else, min_indent),
|
||||||
|
cond_help(keyword::CASE, Expr::Case, min_indent),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cond_help<'a, F>(name: &str, wrap_expr: F, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||||
|
where
|
||||||
|
F: Fn(&'a Located<Expr<'a>>) -> Expr<'a>,
|
||||||
|
{
|
||||||
|
map(
|
||||||
|
after(
|
||||||
|
after(string(name), skip1_whitespace(min_indent)),
|
||||||
|
loc(expr(min_indent)),
|
||||||
|
),
|
||||||
|
wrap_expr,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// When we parse an ident like `foo ` it could be any of these:
|
||||||
|
///
|
||||||
|
/// 1. A standalone variable with trailing whitespace (e.g. because an operator is next)
|
||||||
|
/// 2. The beginning of a function call (e.g. `foo bar baz`)
|
||||||
|
/// 3. The beginning of a defniition (e.g. `foo =`)
|
||||||
|
/// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
|
||||||
|
pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
|
let followed_by_equals = after(zero_or_more(whitespace(min_indent), char('=')));
|
||||||
|
|
||||||
|
map_with_arena(
|
||||||
|
and(
|
||||||
|
loc(ident()),
|
||||||
|
either(followed_by_equals, loc_function_args(min_indent)),
|
||||||
|
),
|
||||||
|
|arena, (loc_ident, equals_or_loc_args)| {
|
||||||
|
match equals_or_loc_args {
|
||||||
|
Either::First(()) => {
|
||||||
|
// We have now parsed the beginning of a def (e.g. `foo =`)
|
||||||
|
}
|
||||||
|
Either::Second(loc_args) => {
|
||||||
|
// This appears to be a var, keyword, or function application.
|
||||||
|
let name_expr = match loc_ident.value {
|
||||||
|
Ident::Var(info) => Expr::Var(info.module_parts, info.value),
|
||||||
|
Ident::Variant(info) => Expr::Variant(info.module_parts, info.value),
|
||||||
|
Ident::Field(info) => Expr::QualifiedField(info.module_parts, info.value),
|
||||||
|
Ident::AccessorFunction(string) => Expr::AccessorFunction(string),
|
||||||
|
Ident::Malformed(string) => Expr::MalformedIdent(string),
|
||||||
|
};
|
||||||
|
|
||||||
|
if loc_args.is_empty() {
|
||||||
|
name_expr
|
||||||
|
} else {
|
||||||
|
let loc_expr = Located {
|
||||||
|
region: loc_ident.region,
|
||||||
|
value: name_expr,
|
||||||
|
};
|
||||||
|
|
||||||
|
Expr::Apply(arena.alloc((loc_expr, loc_args)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
)
|
)
|
||||||
.parse(arena, state)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn operator<'a>() -> impl Parser<'a, Operator> {
|
pub fn operator<'a>() -> impl Parser<'a, Operator> {
|
||||||
|
|
|
@ -35,6 +35,12 @@ pub struct State<'a> {
|
||||||
pub attempting: Attempting,
|
pub attempting: Attempting,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum Either<First, Second> {
|
||||||
|
First(First),
|
||||||
|
Second(Second),
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> State<'a> {
|
impl<'a> State<'a> {
|
||||||
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
|
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
|
||||||
State {
|
State {
|
||||||
|
@ -151,7 +157,6 @@ pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum FailReason {
|
pub enum FailReason {
|
||||||
Unexpected(char, Region),
|
Unexpected(char, Region),
|
||||||
UnexpectedKeyword(Keyword),
|
|
||||||
ConditionFailed,
|
ConditionFailed,
|
||||||
LineTooLong(u32 /* which line was too long */),
|
LineTooLong(u32 /* which line was too long */),
|
||||||
TooManyLines,
|
TooManyLines,
|
||||||
|
@ -164,28 +169,6 @@ pub struct Fail {
|
||||||
pub reason: FailReason,
|
pub reason: FailReason,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub enum Keyword {
|
|
||||||
If,
|
|
||||||
Then,
|
|
||||||
Else,
|
|
||||||
Case,
|
|
||||||
When,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Keyword {
|
|
||||||
pub fn from_str(kw: &str) -> Option<Keyword> {
|
|
||||||
match kw {
|
|
||||||
"if" => Some(Keyword::If),
|
|
||||||
"then" => Some(Keyword::Then),
|
|
||||||
"else" => Some(Keyword::Else),
|
|
||||||
"case" => Some(Keyword::Case),
|
|
||||||
"when" => Some(Keyword::When),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Parser<'a, Output> {
|
pub trait Parser<'a, Output> {
|
||||||
fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>;
|
fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>;
|
||||||
}
|
}
|
||||||
|
@ -271,6 +254,31 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn zero_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
|
||||||
|
where
|
||||||
|
P: Parser<'a, A>,
|
||||||
|
{
|
||||||
|
move |arena, state| match parser.parse(arena, state) {
|
||||||
|
Ok((first_output, next_state)) => {
|
||||||
|
let mut state = next_state;
|
||||||
|
let mut buf = Vec::with_capacity_in(1, arena);
|
||||||
|
|
||||||
|
buf.push(first_output);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match parser.parse(arena, state) {
|
||||||
|
Ok((next_output, next_state)) => {
|
||||||
|
state = next_state;
|
||||||
|
buf.push(next_output);
|
||||||
|
}
|
||||||
|
Err((_, old_state)) => return Ok((buf, old_state)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err((_, new_state)) => return Ok((Vec::new_in(arena), new_state)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
|
pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
|
||||||
where
|
where
|
||||||
P: Parser<'a, A>,
|
P: Parser<'a, A>,
|
||||||
|
|
|
@ -898,5 +898,8 @@ mod test_canonicalize {
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// TODO test what happens when interpolated strings contain 1+ malformed idents
|
// TODO test what happens when interpolated strings contain 1+ malformed idents
|
||||||
|
//
|
||||||
// TODO test hex/oct/binary conversion to numbers
|
// TODO test hex/oct/binary conversion to numbers
|
||||||
|
//
|
||||||
|
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
|
||||||
}
|
}
|
||||||
|
|
|
@ -282,10 +282,31 @@ mod test_parse {
|
||||||
assert_eq!(Ok(expected), actual);
|
assert_eq!(Ok(expected), actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_operators() {
|
||||||
|
let arena = Bump::new();
|
||||||
|
let inner = arena.alloc((
|
||||||
|
Located::new(0, 3, 0, 5, Int("42")),
|
||||||
|
Located::new(0, 5, 0, 6, Plus),
|
||||||
|
Located::new(0, 6, 0, 9, Int("534")),
|
||||||
|
));
|
||||||
|
let outer = arena.alloc((
|
||||||
|
Located::new(0, 0, 0, 2, Int("31")),
|
||||||
|
Located::new(0, 2, 0, 3, Star),
|
||||||
|
Located::new(0, 3, 0, 9, Operator(inner)),
|
||||||
|
));
|
||||||
|
let expected = Operator(outer);
|
||||||
|
let actual = parse_with(&arena, "31*42+534");
|
||||||
|
|
||||||
|
assert_eq!(Ok(expected), actual);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO test hex/oct/binary parsing
|
// TODO test hex/oct/binary parsing
|
||||||
//
|
//
|
||||||
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
|
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
|
||||||
//
|
//
|
||||||
|
// TODO test for non-ASCII variables
|
||||||
|
//
|
||||||
// TODO verify that when a string literal contains a newline before the
|
// TODO verify that when a string literal contains a newline before the
|
||||||
// closing " it correctly updates both the line *and* column in the State.
|
// closing " it correctly updates both the line *and* column in the State.
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue