mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-01 07:41:12 +00:00
Initial pass at parsing spaces/comments
This commit is contained in:
parent
4713087bb2
commit
df305e4cc8
8 changed files with 529 additions and 137 deletions
|
@ -1,3 +1,5 @@
|
|||
use bumpalo::collections::String;
|
||||
use bumpalo::Bump;
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
pub use fxhash::FxHasher;
|
||||
|
@ -21,3 +23,23 @@ pub type MutSet<K> = std::collections::HashSet<K, BuildHasher>;
|
|||
pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>;
|
||||
|
||||
pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>;
|
||||
|
||||
pub fn arena_join<'a, I>(arena: &'a Bump, strings: &mut I, join_str: &str) -> String<'a>
|
||||
where
|
||||
I: Iterator<Item = &'a str>,
|
||||
{
|
||||
let mut buf = String::new_in(arena);
|
||||
|
||||
if let Some(first) = strings.next() {
|
||||
buf.push_str(&first);
|
||||
|
||||
while let Some(string) = strings.next() {
|
||||
buf.reserve(join_str.len() + string.len());
|
||||
|
||||
buf.push_str(join_str);
|
||||
buf.push_str(string);
|
||||
}
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
|
|
@ -67,10 +67,22 @@ pub enum Expr<'a> {
|
|||
Else(&'a Loc<Expr<'a>>),
|
||||
Case(&'a Loc<Expr<'a>>),
|
||||
|
||||
// Blank Space (e.g. comments, spaces, newlines) before or after an expression.
|
||||
// We preserve this for the formatter; canonicalization ignores it.
|
||||
SpaceBefore(&'a [Space<'a>], &'a Loc<Expr<'a>>),
|
||||
SpaceAfter(&'a Loc<Expr<'a>>, &'a [Space<'a>]),
|
||||
|
||||
// Problems
|
||||
MalformedIdent(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Space<'a> {
|
||||
Newline,
|
||||
LineComment(&'a str),
|
||||
BlockComment(&'a [&'a str]),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Pattern<'a> {
|
||||
// Identifier
|
||||
|
|
268
src/parse/blankspace.rs
Normal file
268
src/parse/blankspace.rs
Normal file
|
@ -0,0 +1,268 @@
|
|||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use parse::ast::{Expr, Space};
|
||||
use parse::parser::{and, loc, map_with_arena, unexpected, unexpected_eof, Parser, State};
|
||||
|
||||
/// What type of comment (if any) are we currently parsing?
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum CommentParsing {
|
||||
Line,
|
||||
Block,
|
||||
No,
|
||||
}
|
||||
|
||||
pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||
where
|
||||
P: Parser<'a, Expr<'a>>,
|
||||
{
|
||||
map_with_arena(
|
||||
and(space0(min_indent), loc(parser)),
|
||||
|arena, (space_list, loc_expr)| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr.value
|
||||
} else {
|
||||
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||
where
|
||||
P: Parser<'a, Expr<'a>>,
|
||||
{
|
||||
map_with_arena(
|
||||
and(space1(min_indent), loc(parser)),
|
||||
|arena, (space_list, loc_expr)| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr.value
|
||||
} else {
|
||||
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||
where
|
||||
P: Parser<'a, Expr<'a>>,
|
||||
{
|
||||
map_with_arena(
|
||||
and(space0(min_indent), loc(parser)),
|
||||
|arena, (space_list, loc_expr)| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr.value
|
||||
} else {
|
||||
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||
where
|
||||
P: Parser<'a, Expr<'a>>,
|
||||
{
|
||||
map_with_arena(
|
||||
and(space1(min_indent), loc(parser)),
|
||||
|arena, (space_list, loc_expr)| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr.value
|
||||
} else {
|
||||
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||
spaces(false, min_indent)
|
||||
}
|
||||
|
||||
pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||
// TODO try doing a short-circuit for the typical case: see if there is
|
||||
// exactly one space followed by char that isn't [' ', '\n', or '#'], and
|
||||
// if so, return empty slice. The case where there's exactly 1 space should
|
||||
// be by far the most common.
|
||||
spaces(true, min_indent)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn spaces<'a>(require_at_least_one: bool, min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let mut chars = state.input.chars().peekable();
|
||||
let mut space_list = Vec::new_in(arena);
|
||||
let mut chars_parsed = 0;
|
||||
let mut comment_lines: Vec<'a, &'a str> = Vec::new_in(arena);
|
||||
let mut comment_line_buf = String::new_in(arena);
|
||||
let mut comment_parsing = CommentParsing::No;
|
||||
let mut state = state;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
chars_parsed += 1;
|
||||
|
||||
match comment_parsing {
|
||||
CommentParsing::No => match ch {
|
||||
' ' => {
|
||||
state = state.advance_spaces(1)?;
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
// Newlines only get added to the list when they're outside comments.
|
||||
space_list.push(Space::Newline);
|
||||
}
|
||||
'#' => {
|
||||
// We're now parsing a line comment!
|
||||
comment_parsing = CommentParsing::Line;
|
||||
}
|
||||
nonblank => {
|
||||
return if space_list.is_empty() && require_at_least_one {
|
||||
Err(unexpected(
|
||||
nonblank,
|
||||
chars_parsed,
|
||||
state.clone(),
|
||||
state.attempting,
|
||||
))
|
||||
} else {
|
||||
Ok((space_list.into_bump_slice(), state))
|
||||
};
|
||||
}
|
||||
},
|
||||
CommentParsing::Line => {
|
||||
match ch {
|
||||
' ' => {
|
||||
state = state.advance_spaces(1)?;
|
||||
|
||||
comment_line_buf.push(ch);
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
// This was a newline, so end this line comment.
|
||||
space_list.push(Space::LineComment(comment_line_buf.into_bump_str()));
|
||||
comment_line_buf = String::new_in(arena);
|
||||
|
||||
comment_parsing = CommentParsing::No;
|
||||
}
|
||||
'#' if comment_line_buf.is_empty() => {
|
||||
if chars.peek() == Some(&'#') {
|
||||
// Consume the '#' we peeked in the conditional.
|
||||
chars.next();
|
||||
|
||||
// Advance past the '#' we parsed and the one
|
||||
// we peeked (and then consumed manually).
|
||||
state = state.advance_without_indenting(2)?;
|
||||
|
||||
// This must be the start of a block comment,
|
||||
// since we are parsing a LineComment with an empty buffer
|
||||
// (meaning the previous char must have been '#'),
|
||||
// then we parsed a '#' right after it, and finally
|
||||
// we peeked and saw a third '#' after that.
|
||||
// "###" begins a block comment!
|
||||
comment_parsing = CommentParsing::Block;
|
||||
} else {
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
comment_line_buf.push('#');
|
||||
}
|
||||
}
|
||||
nonblank => {
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
}
|
||||
}
|
||||
}
|
||||
CommentParsing::Block => {
|
||||
match ch {
|
||||
' ' => {
|
||||
state = state.advance_spaces(1)?;
|
||||
|
||||
comment_line_buf.push(ch);
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
// End the current line and start a fresh one.
|
||||
comment_lines.push(comment_line_buf.into_bump_str());
|
||||
|
||||
comment_line_buf = String::new_in(arena);
|
||||
}
|
||||
'#' => {
|
||||
// Three '#' in a row means the comment is finished.
|
||||
//
|
||||
// We want to peek ahead two characters to see if there
|
||||
// are another two '#' there. If so, this comment is done.
|
||||
// Otherwise, we want to proceed as normal.
|
||||
//
|
||||
// Since we can only peek one character at a time,
|
||||
// we need to be careful with how we use peek() and next()
|
||||
// here to avoid accidentally recording extraneous '#' characters
|
||||
// while also making sure not to drop them if we don't
|
||||
// encounter the full "###" after all.
|
||||
match chars.peek() {
|
||||
Some('#') => {
|
||||
// Consume the second '#'.
|
||||
chars.next();
|
||||
|
||||
// We've now seen two '#' in a row. Is a third next?
|
||||
match chars.peek() {
|
||||
Some('#') => {
|
||||
// Consume the third '#'.
|
||||
chars.next();
|
||||
|
||||
// We're done! This is the end of the block comment.
|
||||
state = state.advance_without_indenting(3)?;
|
||||
|
||||
// End the current line and start a fresh one.
|
||||
comment_lines.push(comment_line_buf.into_bump_str());
|
||||
|
||||
comment_line_buf = String::new_in(arena);
|
||||
|
||||
// Add the block comment to the list.
|
||||
space_list.push(Space::BlockComment(
|
||||
comment_lines.into_bump_slice(),
|
||||
));
|
||||
|
||||
// Start a fresh comment line list.
|
||||
comment_lines = Vec::new_in(arena);
|
||||
|
||||
comment_parsing = CommentParsing::No;
|
||||
}
|
||||
_ => {
|
||||
// It was only two '#' in a row, so record them
|
||||
// and move on as normal.
|
||||
state = state.advance_without_indenting(2)?;
|
||||
|
||||
comment_line_buf.push_str("##");
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// This was a standalone '#' not followed by a second '#',
|
||||
// so record it and move on as normal.
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
comment_line_buf.push('#');
|
||||
}
|
||||
}
|
||||
}
|
||||
nonblank => {
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if space_list.is_empty() && require_at_least_one {
|
||||
Err(unexpected_eof(chars_parsed, state.attempting, state))
|
||||
} else {
|
||||
Ok((space_list.into_bump_slice(), state))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,8 +1,9 @@
|
|||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use collections::arena_join;
|
||||
use parse::ast::Attempting;
|
||||
use parse::parser::{unexpected, unexpected_eof, Fail, ParseResult, Parser, State};
|
||||
use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
|
||||
/// The parser accepts all of these in any position where any one of them could
|
||||
/// appear. This way, canonicalization can give more helpful error messages like
|
||||
|
@ -50,40 +51,8 @@ where
|
|||
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
||||
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
let mut is_accessor_fn;
|
||||
let mut is_capitalized;
|
||||
|
||||
let malformed = |opt_bad_char: Option<char>| {
|
||||
// Reconstruct the original string that we've been parsing.
|
||||
let mut full_string = String::new_in(arena);
|
||||
|
||||
full_string.push_str(&capitalized_parts.join("."));
|
||||
full_string.push_str(&noncapitalized_parts.join("."));
|
||||
|
||||
if let Some(bad_char) = opt_bad_char {
|
||||
full_string.push(bad_char);
|
||||
}
|
||||
|
||||
// Consume the remaining chars in the identifier.
|
||||
let mut next_char = None;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
(Ident::Malformed(&full_string), next_char),
|
||||
state.advance_without_indenting(full_string.len())?,
|
||||
))
|
||||
};
|
||||
let is_accessor_fn;
|
||||
|
||||
// Identifiers and accessor functions must start with either a letter or a dot.
|
||||
// If this starts with neither, it must be something else!
|
||||
|
@ -125,7 +94,14 @@ where
|
|||
} else if ch.is_ascii_digit() {
|
||||
// Parts may not start with numbers!
|
||||
if part_buf.is_empty() {
|
||||
return malformed(Some(ch));
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
|
@ -135,13 +111,20 @@ where
|
|||
// 1. Having two consecutive dots is an error.
|
||||
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
||||
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
||||
return malformed(Some(ch));
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
if is_capitalized {
|
||||
capitalized_parts.push(&part_buf);
|
||||
capitalized_parts.push(part_buf.into_bump_str());
|
||||
} else {
|
||||
noncapitalized_parts.push(&part_buf);
|
||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||
}
|
||||
|
||||
// Now that we've recorded the contents of the current buffer, reset it.
|
||||
|
@ -164,14 +147,21 @@ where
|
|||
//
|
||||
// If we made it this far and don't have a next_char, then necessarily
|
||||
// we have consumed a '.' char previously.
|
||||
return malformed(next_char.or_else(|| Some('.')));
|
||||
return malformed(
|
||||
next_char.or_else(|| Some('.')),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
// Record the final parts.
|
||||
if is_capitalized {
|
||||
capitalized_parts.push(&part_buf);
|
||||
capitalized_parts.push(part_buf.into_bump_str());
|
||||
} else {
|
||||
noncapitalized_parts.push(&part_buf);
|
||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||
}
|
||||
|
||||
let answer = if is_accessor_fn {
|
||||
|
@ -182,7 +172,14 @@ where
|
|||
|
||||
Ident::AccessorFunction(value)
|
||||
} else {
|
||||
return malformed(None);
|
||||
return malformed(
|
||||
None,
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
match noncapitalized_parts.len() {
|
||||
|
@ -225,6 +222,52 @@ where
|
|||
Ok(((answer, next_char), state))
|
||||
}
|
||||
|
||||
fn malformed<'a, I>(
|
||||
opt_bad_char: Option<char>,
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
chars: &mut I,
|
||||
capitalized_parts: Vec<&'a str>,
|
||||
noncapitalized_parts: Vec<&'a str>,
|
||||
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
// Reconstruct the original string that we've been parsing.
|
||||
let mut full_string = String::new_in(arena);
|
||||
|
||||
full_string
|
||||
.push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str());
|
||||
full_string
|
||||
.push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str());
|
||||
|
||||
if let Some(bad_char) = opt_bad_char {
|
||||
full_string.push(bad_char);
|
||||
}
|
||||
|
||||
// Consume the remaining chars in the identifier.
|
||||
let mut next_char = None;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let chars_parsed = full_string.len();
|
||||
|
||||
Ok((
|
||||
(Ident::Malformed(full_string.into_bump_str()), next_char),
|
||||
state.advance_without_indenting(chars_parsed)?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
// Discard next_char; we don't need it.
|
||||
|
@ -236,46 +279,46 @@ pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
|||
|
||||
// TESTS
|
||||
|
||||
fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
|
||||
let arena = Bump::new();
|
||||
let state = State::new(input, Attempting::Expression);
|
||||
// fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
|
||||
// let arena = Bump::new();
|
||||
// let state = State::new(input, Attempting::Expression);
|
||||
|
||||
ident()
|
||||
.parse(&arena, state)
|
||||
.map(|(answer, _)| answer)
|
||||
.map_err(|(err, _)| err)
|
||||
}
|
||||
// ident()
|
||||
// .parse(&arena, state)
|
||||
// .map(|(answer, _)| answer)
|
||||
// .map_err(|(err, _)| err)
|
||||
// }
|
||||
|
||||
fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||
Ident::Var(MaybeQualified {
|
||||
module_parts: module_parts.as_slice(),
|
||||
value,
|
||||
})
|
||||
}
|
||||
// fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||
// Ident::Var(MaybeQualified {
|
||||
// module_parts: module_parts.as_slice(),
|
||||
// value,
|
||||
// })
|
||||
// }
|
||||
|
||||
fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||
Ident::Variant(MaybeQualified {
|
||||
module_parts: module_parts.as_slice(),
|
||||
value,
|
||||
})
|
||||
}
|
||||
// fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||
// Ident::Variant(MaybeQualified {
|
||||
// module_parts: module_parts.as_slice(),
|
||||
// value,
|
||||
// })
|
||||
// }
|
||||
|
||||
fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
|
||||
Ident::Field(MaybeQualified {
|
||||
module_parts: module_parts.as_slice(),
|
||||
value: value.as_slice(),
|
||||
})
|
||||
}
|
||||
// fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
|
||||
// Ident::Field(MaybeQualified {
|
||||
// module_parts: module_parts.as_slice(),
|
||||
// value: value.as_slice(),
|
||||
// })
|
||||
// }
|
||||
|
||||
fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
|
||||
Ident::AccessorFunction(value)
|
||||
}
|
||||
// fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
|
||||
// Ident::AccessorFunction(value)
|
||||
// }
|
||||
|
||||
fn malformed<'a>(value: &'a str) -> Ident<'a> {
|
||||
Ident::Malformed(value)
|
||||
}
|
||||
// fn malformed<'a>(value: &'a str) -> Ident<'a> {
|
||||
// Ident::Malformed(value)
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn parse_var() {
|
||||
assert_eq!(test_parse("foo"), Ok(var("foo")))
|
||||
}
|
||||
// #[test]
|
||||
// fn parse_var() {
|
||||
// assert_eq!(test_parse("foo"), Ok(var(vec![], "foo")))
|
||||
// }
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub mod ast;
|
||||
pub mod blankspace;
|
||||
pub mod ident;
|
||||
pub mod keyword;
|
||||
pub mod module;
|
||||
|
@ -7,15 +8,15 @@ pub mod parser;
|
|||
pub mod problems;
|
||||
pub mod string_literal;
|
||||
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use operator::Operator;
|
||||
use parse::ast::{Attempting, Expr};
|
||||
use parse::blankspace::{space0, space1_before};
|
||||
use parse::ident::{ident, Ident};
|
||||
use parse::number_literal::number_literal;
|
||||
use parse::parser::{
|
||||
and, attempt, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, string,
|
||||
unexpected, unexpected_eof, Either, ParseResult, Parser, State,
|
||||
and, attempt, ch, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional,
|
||||
skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State,
|
||||
};
|
||||
use parse::string_literal::string_literal;
|
||||
use region::Located;
|
||||
|
@ -55,38 +56,52 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe
|
|||
attempt(Attempting::Expression, expr_parser).parse(arena, state)
|
||||
}
|
||||
|
||||
pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
|
||||
move |arena, state| {
|
||||
pub fn loc_function_args<'a>(_min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
|
||||
move |_arena, _state| {
|
||||
panic!("TODO stop early if we see an operator after the whitespace - precedence!");
|
||||
// zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn when<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
pub fn when<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
map(string(keyword::WHEN), |_| {
|
||||
panic!("TODO implement WHEN");
|
||||
})
|
||||
}
|
||||
|
||||
pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
// TODO figure out how to remove this code duplication in a way rustc
|
||||
// accepts. I tried making a helper functions and couldn't resolve the
|
||||
// lifetime errors, so I manually inlined them and moved on.
|
||||
one_of4(
|
||||
cond_help(keyword::IF, Expr::If, min_indent),
|
||||
cond_help(keyword::THEN, Expr::Then, min_indent),
|
||||
cond_help(keyword::ELSE, Expr::Else, min_indent),
|
||||
cond_help(keyword::CASE, Expr::Case, min_indent),
|
||||
)
|
||||
}
|
||||
|
||||
fn cond_help<'a, F>(name: &str, wrap_expr: F, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||
where
|
||||
F: Fn(&'a Located<Expr<'a>>) -> Expr<'a>,
|
||||
{
|
||||
map(
|
||||
after(
|
||||
after(string(name), skip1_whitespace(min_indent)),
|
||||
loc(expr(min_indent)),
|
||||
map_with_arena(
|
||||
skip_first(
|
||||
string(keyword::IF),
|
||||
loc(space1_before(expr(min_indent), min_indent)),
|
||||
),
|
||||
|arena, loc_expr| Expr::If(arena.alloc(loc_expr)),
|
||||
),
|
||||
map_with_arena(
|
||||
skip_first(
|
||||
string(keyword::THEN),
|
||||
loc(space1_before(expr(min_indent), min_indent)),
|
||||
),
|
||||
|arena, loc_expr| Expr::Then(arena.alloc(loc_expr)),
|
||||
),
|
||||
map_with_arena(
|
||||
skip_first(
|
||||
string(keyword::ELSE),
|
||||
loc(space1_before(expr(min_indent), min_indent)),
|
||||
),
|
||||
|arena, loc_expr| Expr::Else(arena.alloc(loc_expr)),
|
||||
),
|
||||
map_with_arena(
|
||||
skip_first(
|
||||
string(keyword::CASE),
|
||||
loc(space1_before(expr(min_indent), min_indent)),
|
||||
),
|
||||
|arena, loc_expr| Expr::Case(arena.alloc(loc_expr)),
|
||||
),
|
||||
wrap_expr,
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -97,7 +112,7 @@ where
|
|||
/// 3. The beginning of a defniition (e.g. `foo =`)
|
||||
/// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
|
||||
pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
let followed_by_equals = after(zero_or_more(whitespace(min_indent), char('=')));
|
||||
let followed_by_equals = and(space0(min_indent), ch('='));
|
||||
|
||||
map_with_arena(
|
||||
and(
|
||||
|
@ -106,8 +121,9 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
|||
),
|
||||
|arena, (loc_ident, equals_or_loc_args)| {
|
||||
match equals_or_loc_args {
|
||||
Either::First(()) => {
|
||||
Either::First((_space_list, ())) => {
|
||||
// We have now parsed the beginning of a def (e.g. `foo =`)
|
||||
panic!("TODO parse def, making sure to use the space_list we got - don't drop comments!");
|
||||
}
|
||||
Either::Second(loc_args) => {
|
||||
// This appears to be a var, keyword, or function application.
|
||||
|
|
|
@ -360,13 +360,21 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// A single char.
|
||||
pub fn ch<'a>(expected: char) -> impl Parser<'a, ()> {
|
||||
move |_arena, state: State<'a>| match state.input.chars().next() {
|
||||
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
|
||||
_ => Err(unexpected_eof(1, Attempting::Keyword, state)),
|
||||
}
|
||||
}
|
||||
|
||||
/// A string with no newlines in it.
|
||||
pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
|
||||
// We can't have newlines because we don't attempt to advance the row
|
||||
// in the state, only the column.
|
||||
debug_assert!(!string.contains("\n"));
|
||||
|
||||
move |_arena: &'a Bump, state: State<'a>| {
|
||||
move |_arena, state: State<'a>| {
|
||||
let input = state.input;
|
||||
let len = string.len();
|
||||
|
||||
|
@ -400,38 +408,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// pub fn any<'a>(
|
||||
// _arena: &'a Bump,
|
||||
// state: State<'a>,
|
||||
// attempting: Attempting,
|
||||
// ) -> ParseResult<'a, char> {
|
||||
// let input = state.input;
|
||||
|
||||
// match input.chars().next() {
|
||||
// Some(ch) => {
|
||||
// let len = ch.len_utf8();
|
||||
// let mut new_state = State {
|
||||
// input: &input[len..],
|
||||
|
||||
// ..state.clone()
|
||||
// };
|
||||
|
||||
// if ch == '\n' {
|
||||
// new_state.line = new_state.line + 1;
|
||||
// new_state.column = 0;
|
||||
// }
|
||||
|
||||
// Ok((new_state, ch))
|
||||
// }
|
||||
// _ => Err((state.clone(), attempting)),
|
||||
// }
|
||||
// }
|
||||
|
||||
// fn whitespace<'a>() -> impl Parser<'a, char> {
|
||||
// // TODO advance the state appropriately, in terms of line, col, indenting, etc.
|
||||
// satisfies(any, |ch| ch.is_whitespace())
|
||||
// }
|
||||
|
||||
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
|
||||
where
|
||||
P1: Parser<'a, A>,
|
||||
|
@ -462,6 +438,61 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub fn either<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, Either<A, B>>
|
||||
where
|
||||
P1: Parser<'a, A>,
|
||||
P2: Parser<'a, B>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
Ok((output, state)) => Ok((Either::First(output), state)),
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
Ok((output, state)) => Ok((Either::Second(output), state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If the first one parses, ignore its output and move on to parse with the second one.
|
||||
pub fn skip_first<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, B>
|
||||
where
|
||||
P1: Parser<'a, A>,
|
||||
P2: Parser<'a, B>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
Ok((_, state)) => match p2.parse(arena, state) {
|
||||
Ok((out2, state)) => Ok((out2, state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
|
||||
where
|
||||
P: Parser<'a, T>,
|
||||
|
|
|
@ -17,7 +17,7 @@ use roc::region::{Located, Region};
|
|||
|
||||
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let parser = parse::expr();
|
||||
let parser = parse::expr(0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
||||
|
|
|
@ -14,7 +14,7 @@ mod test_format {
|
|||
|
||||
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let parser = parse::expr();
|
||||
let parser = parse::expr(0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue