mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-01 07:41:12 +00:00
Initial pass at parsing spaces/comments
This commit is contained in:
parent
4713087bb2
commit
df305e4cc8
8 changed files with 529 additions and 137 deletions
|
@ -1,3 +1,5 @@
|
||||||
|
use bumpalo::collections::String;
|
||||||
|
use bumpalo::Bump;
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
pub use fxhash::FxHasher;
|
pub use fxhash::FxHasher;
|
||||||
|
@ -21,3 +23,23 @@ pub type MutSet<K> = std::collections::HashSet<K, BuildHasher>;
|
||||||
pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>;
|
pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>;
|
||||||
|
|
||||||
pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>;
|
pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>;
|
||||||
|
|
||||||
|
pub fn arena_join<'a, I>(arena: &'a Bump, strings: &mut I, join_str: &str) -> String<'a>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = &'a str>,
|
||||||
|
{
|
||||||
|
let mut buf = String::new_in(arena);
|
||||||
|
|
||||||
|
if let Some(first) = strings.next() {
|
||||||
|
buf.push_str(&first);
|
||||||
|
|
||||||
|
while let Some(string) = strings.next() {
|
||||||
|
buf.reserve(join_str.len() + string.len());
|
||||||
|
|
||||||
|
buf.push_str(join_str);
|
||||||
|
buf.push_str(string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
|
@ -67,10 +67,22 @@ pub enum Expr<'a> {
|
||||||
Else(&'a Loc<Expr<'a>>),
|
Else(&'a Loc<Expr<'a>>),
|
||||||
Case(&'a Loc<Expr<'a>>),
|
Case(&'a Loc<Expr<'a>>),
|
||||||
|
|
||||||
|
// Blank Space (e.g. comments, spaces, newlines) before or after an expression.
|
||||||
|
// We preserve this for the formatter; canonicalization ignores it.
|
||||||
|
SpaceBefore(&'a [Space<'a>], &'a Loc<Expr<'a>>),
|
||||||
|
SpaceAfter(&'a Loc<Expr<'a>>, &'a [Space<'a>]),
|
||||||
|
|
||||||
// Problems
|
// Problems
|
||||||
MalformedIdent(&'a str),
|
MalformedIdent(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum Space<'a> {
|
||||||
|
Newline,
|
||||||
|
LineComment(&'a str),
|
||||||
|
BlockComment(&'a [&'a str]),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Pattern<'a> {
|
pub enum Pattern<'a> {
|
||||||
// Identifier
|
// Identifier
|
||||||
|
|
268
src/parse/blankspace.rs
Normal file
268
src/parse/blankspace.rs
Normal file
|
@ -0,0 +1,268 @@
|
||||||
|
use bumpalo::collections::string::String;
|
||||||
|
use bumpalo::collections::vec::Vec;
|
||||||
|
use bumpalo::Bump;
|
||||||
|
use parse::ast::{Expr, Space};
|
||||||
|
use parse::parser::{and, loc, map_with_arena, unexpected, unexpected_eof, Parser, State};
|
||||||
|
|
||||||
|
/// What type of comment (if any) are we currently parsing?
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
enum CommentParsing {
|
||||||
|
Line,
|
||||||
|
Block,
|
||||||
|
No,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||||
|
where
|
||||||
|
P: Parser<'a, Expr<'a>>,
|
||||||
|
{
|
||||||
|
map_with_arena(
|
||||||
|
and(space0(min_indent), loc(parser)),
|
||||||
|
|arena, (space_list, loc_expr)| {
|
||||||
|
if space_list.is_empty() {
|
||||||
|
loc_expr.value
|
||||||
|
} else {
|
||||||
|
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||||
|
where
|
||||||
|
P: Parser<'a, Expr<'a>>,
|
||||||
|
{
|
||||||
|
map_with_arena(
|
||||||
|
and(space1(min_indent), loc(parser)),
|
||||||
|
|arena, (space_list, loc_expr)| {
|
||||||
|
if space_list.is_empty() {
|
||||||
|
loc_expr.value
|
||||||
|
} else {
|
||||||
|
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||||
|
where
|
||||||
|
P: Parser<'a, Expr<'a>>,
|
||||||
|
{
|
||||||
|
map_with_arena(
|
||||||
|
and(space0(min_indent), loc(parser)),
|
||||||
|
|arena, (space_list, loc_expr)| {
|
||||||
|
if space_list.is_empty() {
|
||||||
|
loc_expr.value
|
||||||
|
} else {
|
||||||
|
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
||||||
|
where
|
||||||
|
P: Parser<'a, Expr<'a>>,
|
||||||
|
{
|
||||||
|
map_with_arena(
|
||||||
|
and(space1(min_indent), loc(parser)),
|
||||||
|
|arena, (space_list, loc_expr)| {
|
||||||
|
if space_list.is_empty() {
|
||||||
|
loc_expr.value
|
||||||
|
} else {
|
||||||
|
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||||
|
spaces(false, min_indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||||
|
// TODO try doing a short-circuit for the typical case: see if there is
|
||||||
|
// exactly one space followed by char that isn't [' ', '\n', or '#'], and
|
||||||
|
// if so, return empty slice. The case where there's exactly 1 space should
|
||||||
|
// be by far the most common.
|
||||||
|
spaces(true, min_indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn spaces<'a>(require_at_least_one: bool, min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
|
||||||
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
|
let mut chars = state.input.chars().peekable();
|
||||||
|
let mut space_list = Vec::new_in(arena);
|
||||||
|
let mut chars_parsed = 0;
|
||||||
|
let mut comment_lines: Vec<'a, &'a str> = Vec::new_in(arena);
|
||||||
|
let mut comment_line_buf = String::new_in(arena);
|
||||||
|
let mut comment_parsing = CommentParsing::No;
|
||||||
|
let mut state = state;
|
||||||
|
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
chars_parsed += 1;
|
||||||
|
|
||||||
|
match comment_parsing {
|
||||||
|
CommentParsing::No => match ch {
|
||||||
|
' ' => {
|
||||||
|
state = state.advance_spaces(1)?;
|
||||||
|
}
|
||||||
|
'\n' => {
|
||||||
|
state = state.newline()?;
|
||||||
|
|
||||||
|
// Newlines only get added to the list when they're outside comments.
|
||||||
|
space_list.push(Space::Newline);
|
||||||
|
}
|
||||||
|
'#' => {
|
||||||
|
// We're now parsing a line comment!
|
||||||
|
comment_parsing = CommentParsing::Line;
|
||||||
|
}
|
||||||
|
nonblank => {
|
||||||
|
return if space_list.is_empty() && require_at_least_one {
|
||||||
|
Err(unexpected(
|
||||||
|
nonblank,
|
||||||
|
chars_parsed,
|
||||||
|
state.clone(),
|
||||||
|
state.attempting,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok((space_list.into_bump_slice(), state))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
CommentParsing::Line => {
|
||||||
|
match ch {
|
||||||
|
' ' => {
|
||||||
|
state = state.advance_spaces(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push(ch);
|
||||||
|
}
|
||||||
|
'\n' => {
|
||||||
|
state = state.newline()?;
|
||||||
|
|
||||||
|
// This was a newline, so end this line comment.
|
||||||
|
space_list.push(Space::LineComment(comment_line_buf.into_bump_str()));
|
||||||
|
comment_line_buf = String::new_in(arena);
|
||||||
|
|
||||||
|
comment_parsing = CommentParsing::No;
|
||||||
|
}
|
||||||
|
'#' if comment_line_buf.is_empty() => {
|
||||||
|
if chars.peek() == Some(&'#') {
|
||||||
|
// Consume the '#' we peeked in the conditional.
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
// Advance past the '#' we parsed and the one
|
||||||
|
// we peeked (and then consumed manually).
|
||||||
|
state = state.advance_without_indenting(2)?;
|
||||||
|
|
||||||
|
// This must be the start of a block comment,
|
||||||
|
// since we are parsing a LineComment with an empty buffer
|
||||||
|
// (meaning the previous char must have been '#'),
|
||||||
|
// then we parsed a '#' right after it, and finally
|
||||||
|
// we peeked and saw a third '#' after that.
|
||||||
|
// "###" begins a block comment!
|
||||||
|
comment_parsing = CommentParsing::Block;
|
||||||
|
} else {
|
||||||
|
state = state.advance_without_indenting(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push('#');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nonblank => {
|
||||||
|
state = state.advance_without_indenting(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push(nonblank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CommentParsing::Block => {
|
||||||
|
match ch {
|
||||||
|
' ' => {
|
||||||
|
state = state.advance_spaces(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push(ch);
|
||||||
|
}
|
||||||
|
'\n' => {
|
||||||
|
state = state.newline()?;
|
||||||
|
|
||||||
|
// End the current line and start a fresh one.
|
||||||
|
comment_lines.push(comment_line_buf.into_bump_str());
|
||||||
|
|
||||||
|
comment_line_buf = String::new_in(arena);
|
||||||
|
}
|
||||||
|
'#' => {
|
||||||
|
// Three '#' in a row means the comment is finished.
|
||||||
|
//
|
||||||
|
// We want to peek ahead two characters to see if there
|
||||||
|
// are another two '#' there. If so, this comment is done.
|
||||||
|
// Otherwise, we want to proceed as normal.
|
||||||
|
//
|
||||||
|
// Since we can only peek one character at a time,
|
||||||
|
// we need to be careful with how we use peek() and next()
|
||||||
|
// here to avoid accidentally recording extraneous '#' characters
|
||||||
|
// while also making sure not to drop them if we don't
|
||||||
|
// encounter the full "###" after all.
|
||||||
|
match chars.peek() {
|
||||||
|
Some('#') => {
|
||||||
|
// Consume the second '#'.
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
// We've now seen two '#' in a row. Is a third next?
|
||||||
|
match chars.peek() {
|
||||||
|
Some('#') => {
|
||||||
|
// Consume the third '#'.
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
// We're done! This is the end of the block comment.
|
||||||
|
state = state.advance_without_indenting(3)?;
|
||||||
|
|
||||||
|
// End the current line and start a fresh one.
|
||||||
|
comment_lines.push(comment_line_buf.into_bump_str());
|
||||||
|
|
||||||
|
comment_line_buf = String::new_in(arena);
|
||||||
|
|
||||||
|
// Add the block comment to the list.
|
||||||
|
space_list.push(Space::BlockComment(
|
||||||
|
comment_lines.into_bump_slice(),
|
||||||
|
));
|
||||||
|
|
||||||
|
// Start a fresh comment line list.
|
||||||
|
comment_lines = Vec::new_in(arena);
|
||||||
|
|
||||||
|
comment_parsing = CommentParsing::No;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// It was only two '#' in a row, so record them
|
||||||
|
// and move on as normal.
|
||||||
|
state = state.advance_without_indenting(2)?;
|
||||||
|
|
||||||
|
comment_line_buf.push_str("##");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// This was a standalone '#' not followed by a second '#',
|
||||||
|
// so record it and move on as normal.
|
||||||
|
state = state.advance_without_indenting(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push('#');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nonblank => {
|
||||||
|
state = state.advance_without_indenting(1)?;
|
||||||
|
|
||||||
|
comment_line_buf.push(nonblank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if space_list.is_empty() && require_at_least_one {
|
||||||
|
Err(unexpected_eof(chars_parsed, state.attempting, state))
|
||||||
|
} else {
|
||||||
|
Ok((space_list.into_bump_slice(), state))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,8 +1,9 @@
|
||||||
use bumpalo::collections::string::String;
|
use bumpalo::collections::string::String;
|
||||||
use bumpalo::collections::vec::Vec;
|
use bumpalo::collections::vec::Vec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use collections::arena_join;
|
||||||
use parse::ast::Attempting;
|
use parse::ast::Attempting;
|
||||||
use parse::parser::{unexpected, unexpected_eof, Fail, ParseResult, Parser, State};
|
use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||||
|
|
||||||
/// The parser accepts all of these in any position where any one of them could
|
/// The parser accepts all of these in any position where any one of them could
|
||||||
/// appear. This way, canonicalization can give more helpful error messages like
|
/// appear. This way, canonicalization can give more helpful error messages like
|
||||||
|
@ -50,40 +51,8 @@ where
|
||||||
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
||||||
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||||
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||||
let mut is_accessor_fn;
|
|
||||||
let mut is_capitalized;
|
let mut is_capitalized;
|
||||||
|
let is_accessor_fn;
|
||||||
let malformed = |opt_bad_char: Option<char>| {
|
|
||||||
// Reconstruct the original string that we've been parsing.
|
|
||||||
let mut full_string = String::new_in(arena);
|
|
||||||
|
|
||||||
full_string.push_str(&capitalized_parts.join("."));
|
|
||||||
full_string.push_str(&noncapitalized_parts.join("."));
|
|
||||||
|
|
||||||
if let Some(bad_char) = opt_bad_char {
|
|
||||||
full_string.push(bad_char);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consume the remaining chars in the identifier.
|
|
||||||
let mut next_char = None;
|
|
||||||
|
|
||||||
while let Some(ch) = chars.next() {
|
|
||||||
// We can't use ch.is_alphanumeric() here because that passes for
|
|
||||||
// things that are "numeric" but not ASCII digits, like `¾`
|
|
||||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
|
||||||
full_string.push(ch);
|
|
||||||
} else {
|
|
||||||
next_char = Some(ch);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((
|
|
||||||
(Ident::Malformed(&full_string), next_char),
|
|
||||||
state.advance_without_indenting(full_string.len())?,
|
|
||||||
))
|
|
||||||
};
|
|
||||||
|
|
||||||
// Identifiers and accessor functions must start with either a letter or a dot.
|
// Identifiers and accessor functions must start with either a letter or a dot.
|
||||||
// If this starts with neither, it must be something else!
|
// If this starts with neither, it must be something else!
|
||||||
|
@ -125,7 +94,14 @@ where
|
||||||
} else if ch.is_ascii_digit() {
|
} else if ch.is_ascii_digit() {
|
||||||
// Parts may not start with numbers!
|
// Parts may not start with numbers!
|
||||||
if part_buf.is_empty() {
|
if part_buf.is_empty() {
|
||||||
return malformed(Some(ch));
|
return malformed(
|
||||||
|
Some(ch),
|
||||||
|
arena,
|
||||||
|
state,
|
||||||
|
chars,
|
||||||
|
capitalized_parts,
|
||||||
|
noncapitalized_parts,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
part_buf.push(ch);
|
part_buf.push(ch);
|
||||||
|
@ -135,13 +111,20 @@ where
|
||||||
// 1. Having two consecutive dots is an error.
|
// 1. Having two consecutive dots is an error.
|
||||||
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
||||||
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
||||||
return malformed(Some(ch));
|
return malformed(
|
||||||
|
Some(ch),
|
||||||
|
arena,
|
||||||
|
state,
|
||||||
|
chars,
|
||||||
|
capitalized_parts,
|
||||||
|
noncapitalized_parts,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_capitalized {
|
if is_capitalized {
|
||||||
capitalized_parts.push(&part_buf);
|
capitalized_parts.push(part_buf.into_bump_str());
|
||||||
} else {
|
} else {
|
||||||
noncapitalized_parts.push(&part_buf);
|
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now that we've recorded the contents of the current buffer, reset it.
|
// Now that we've recorded the contents of the current buffer, reset it.
|
||||||
|
@ -164,14 +147,21 @@ where
|
||||||
//
|
//
|
||||||
// If we made it this far and don't have a next_char, then necessarily
|
// If we made it this far and don't have a next_char, then necessarily
|
||||||
// we have consumed a '.' char previously.
|
// we have consumed a '.' char previously.
|
||||||
return malformed(next_char.or_else(|| Some('.')));
|
return malformed(
|
||||||
|
next_char.or_else(|| Some('.')),
|
||||||
|
arena,
|
||||||
|
state,
|
||||||
|
chars,
|
||||||
|
capitalized_parts,
|
||||||
|
noncapitalized_parts,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record the final parts.
|
// Record the final parts.
|
||||||
if is_capitalized {
|
if is_capitalized {
|
||||||
capitalized_parts.push(&part_buf);
|
capitalized_parts.push(part_buf.into_bump_str());
|
||||||
} else {
|
} else {
|
||||||
noncapitalized_parts.push(&part_buf);
|
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
let answer = if is_accessor_fn {
|
let answer = if is_accessor_fn {
|
||||||
|
@ -182,7 +172,14 @@ where
|
||||||
|
|
||||||
Ident::AccessorFunction(value)
|
Ident::AccessorFunction(value)
|
||||||
} else {
|
} else {
|
||||||
return malformed(None);
|
return malformed(
|
||||||
|
None,
|
||||||
|
arena,
|
||||||
|
state,
|
||||||
|
chars,
|
||||||
|
capitalized_parts,
|
||||||
|
noncapitalized_parts,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match noncapitalized_parts.len() {
|
match noncapitalized_parts.len() {
|
||||||
|
@ -225,6 +222,52 @@ where
|
||||||
Ok(((answer, next_char), state))
|
Ok(((answer, next_char), state))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn malformed<'a, I>(
|
||||||
|
opt_bad_char: Option<char>,
|
||||||
|
arena: &'a Bump,
|
||||||
|
state: State<'a>,
|
||||||
|
chars: &mut I,
|
||||||
|
capitalized_parts: Vec<&'a str>,
|
||||||
|
noncapitalized_parts: Vec<&'a str>,
|
||||||
|
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = char>,
|
||||||
|
{
|
||||||
|
// Reconstruct the original string that we've been parsing.
|
||||||
|
let mut full_string = String::new_in(arena);
|
||||||
|
|
||||||
|
full_string
|
||||||
|
.push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str());
|
||||||
|
full_string
|
||||||
|
.push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str());
|
||||||
|
|
||||||
|
if let Some(bad_char) = opt_bad_char {
|
||||||
|
full_string.push(bad_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consume the remaining chars in the identifier.
|
||||||
|
let mut next_char = None;
|
||||||
|
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
// We can't use ch.is_alphanumeric() here because that passes for
|
||||||
|
// things that are "numeric" but not ASCII digits, like `¾`
|
||||||
|
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||||
|
full_string.push(ch);
|
||||||
|
} else {
|
||||||
|
next_char = Some(ch);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let chars_parsed = full_string.len();
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
(Ident::Malformed(full_string.into_bump_str()), next_char),
|
||||||
|
state.advance_without_indenting(chars_parsed)?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
||||||
move |arena: &'a Bump, state: State<'a>| {
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
// Discard next_char; we don't need it.
|
// Discard next_char; we don't need it.
|
||||||
|
@ -236,46 +279,46 @@ pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
||||||
|
|
||||||
// TESTS
|
// TESTS
|
||||||
|
|
||||||
fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
|
// fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
|
||||||
let arena = Bump::new();
|
// let arena = Bump::new();
|
||||||
let state = State::new(input, Attempting::Expression);
|
// let state = State::new(input, Attempting::Expression);
|
||||||
|
|
||||||
ident()
|
// ident()
|
||||||
.parse(&arena, state)
|
// .parse(&arena, state)
|
||||||
.map(|(answer, _)| answer)
|
// .map(|(answer, _)| answer)
|
||||||
.map_err(|(err, _)| err)
|
// .map_err(|(err, _)| err)
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
// fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||||
Ident::Var(MaybeQualified {
|
// Ident::Var(MaybeQualified {
|
||||||
module_parts: module_parts.as_slice(),
|
// module_parts: module_parts.as_slice(),
|
||||||
value,
|
// value,
|
||||||
})
|
// })
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
// fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
|
||||||
Ident::Variant(MaybeQualified {
|
// Ident::Variant(MaybeQualified {
|
||||||
module_parts: module_parts.as_slice(),
|
// module_parts: module_parts.as_slice(),
|
||||||
value,
|
// value,
|
||||||
})
|
// })
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
|
// fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
|
||||||
Ident::Field(MaybeQualified {
|
// Ident::Field(MaybeQualified {
|
||||||
module_parts: module_parts.as_slice(),
|
// module_parts: module_parts.as_slice(),
|
||||||
value: value.as_slice(),
|
// value: value.as_slice(),
|
||||||
})
|
// })
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
|
// fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
|
||||||
Ident::AccessorFunction(value)
|
// Ident::AccessorFunction(value)
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn malformed<'a>(value: &'a str) -> Ident<'a> {
|
// fn malformed<'a>(value: &'a str) -> Ident<'a> {
|
||||||
Ident::Malformed(value)
|
// Ident::Malformed(value)
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn parse_var() {
|
// fn parse_var() {
|
||||||
assert_eq!(test_parse("foo"), Ok(var("foo")))
|
// assert_eq!(test_parse("foo"), Ok(var(vec![], "foo")))
|
||||||
}
|
// }
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
|
pub mod blankspace;
|
||||||
pub mod ident;
|
pub mod ident;
|
||||||
pub mod keyword;
|
pub mod keyword;
|
||||||
pub mod module;
|
pub mod module;
|
||||||
|
@ -7,15 +8,15 @@ pub mod parser;
|
||||||
pub mod problems;
|
pub mod problems;
|
||||||
pub mod string_literal;
|
pub mod string_literal;
|
||||||
|
|
||||||
use bumpalo::collections::vec::Vec;
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use operator::Operator;
|
use operator::Operator;
|
||||||
use parse::ast::{Attempting, Expr};
|
use parse::ast::{Attempting, Expr};
|
||||||
|
use parse::blankspace::{space0, space1_before};
|
||||||
use parse::ident::{ident, Ident};
|
use parse::ident::{ident, Ident};
|
||||||
use parse::number_literal::number_literal;
|
use parse::number_literal::number_literal;
|
||||||
use parse::parser::{
|
use parse::parser::{
|
||||||
and, attempt, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, string,
|
and, attempt, ch, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional,
|
||||||
unexpected, unexpected_eof, Either, ParseResult, Parser, State,
|
skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State,
|
||||||
};
|
};
|
||||||
use parse::string_literal::string_literal;
|
use parse::string_literal::string_literal;
|
||||||
use region::Located;
|
use region::Located;
|
||||||
|
@ -55,38 +56,52 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe
|
||||||
attempt(Attempting::Expression, expr_parser).parse(arena, state)
|
attempt(Attempting::Expression, expr_parser).parse(arena, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
|
pub fn loc_function_args<'a>(_min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
|
||||||
move |arena, state| {
|
move |_arena, _state| {
|
||||||
panic!("TODO stop early if we see an operator after the whitespace - precedence!");
|
panic!("TODO stop early if we see an operator after the whitespace - precedence!");
|
||||||
// zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
|
// zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn when<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
pub fn when<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
map(string(keyword::WHEN), |_| {
|
map(string(keyword::WHEN), |_| {
|
||||||
panic!("TODO implement WHEN");
|
panic!("TODO implement WHEN");
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
|
// TODO figure out how to remove this code duplication in a way rustc
|
||||||
|
// accepts. I tried making a helper functions and couldn't resolve the
|
||||||
|
// lifetime errors, so I manually inlined them and moved on.
|
||||||
one_of4(
|
one_of4(
|
||||||
cond_help(keyword::IF, Expr::If, min_indent),
|
map_with_arena(
|
||||||
cond_help(keyword::THEN, Expr::Then, min_indent),
|
skip_first(
|
||||||
cond_help(keyword::ELSE, Expr::Else, min_indent),
|
string(keyword::IF),
|
||||||
cond_help(keyword::CASE, Expr::Case, min_indent),
|
loc(space1_before(expr(min_indent), min_indent)),
|
||||||
)
|
),
|
||||||
}
|
|arena, loc_expr| Expr::If(arena.alloc(loc_expr)),
|
||||||
|
),
|
||||||
fn cond_help<'a, F>(name: &str, wrap_expr: F, min_indent: u16) -> impl Parser<'a, Expr<'a>>
|
map_with_arena(
|
||||||
where
|
skip_first(
|
||||||
F: Fn(&'a Located<Expr<'a>>) -> Expr<'a>,
|
string(keyword::THEN),
|
||||||
{
|
loc(space1_before(expr(min_indent), min_indent)),
|
||||||
map(
|
),
|
||||||
after(
|
|arena, loc_expr| Expr::Then(arena.alloc(loc_expr)),
|
||||||
after(string(name), skip1_whitespace(min_indent)),
|
),
|
||||||
loc(expr(min_indent)),
|
map_with_arena(
|
||||||
|
skip_first(
|
||||||
|
string(keyword::ELSE),
|
||||||
|
loc(space1_before(expr(min_indent), min_indent)),
|
||||||
|
),
|
||||||
|
|arena, loc_expr| Expr::Else(arena.alloc(loc_expr)),
|
||||||
|
),
|
||||||
|
map_with_arena(
|
||||||
|
skip_first(
|
||||||
|
string(keyword::CASE),
|
||||||
|
loc(space1_before(expr(min_indent), min_indent)),
|
||||||
|
),
|
||||||
|
|arena, loc_expr| Expr::Case(arena.alloc(loc_expr)),
|
||||||
),
|
),
|
||||||
wrap_expr,
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +112,7 @@ where
|
||||||
/// 3. The beginning of a defniition (e.g. `foo =`)
|
/// 3. The beginning of a defniition (e.g. `foo =`)
|
||||||
/// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
|
/// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
|
||||||
pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
let followed_by_equals = after(zero_or_more(whitespace(min_indent), char('=')));
|
let followed_by_equals = and(space0(min_indent), ch('='));
|
||||||
|
|
||||||
map_with_arena(
|
map_with_arena(
|
||||||
and(
|
and(
|
||||||
|
@ -106,8 +121,9 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||||
),
|
),
|
||||||
|arena, (loc_ident, equals_or_loc_args)| {
|
|arena, (loc_ident, equals_or_loc_args)| {
|
||||||
match equals_or_loc_args {
|
match equals_or_loc_args {
|
||||||
Either::First(()) => {
|
Either::First((_space_list, ())) => {
|
||||||
// We have now parsed the beginning of a def (e.g. `foo =`)
|
// We have now parsed the beginning of a def (e.g. `foo =`)
|
||||||
|
panic!("TODO parse def, making sure to use the space_list we got - don't drop comments!");
|
||||||
}
|
}
|
||||||
Either::Second(loc_args) => {
|
Either::Second(loc_args) => {
|
||||||
// This appears to be a var, keyword, or function application.
|
// This appears to be a var, keyword, or function application.
|
||||||
|
|
|
@ -360,13 +360,21 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A single char.
|
||||||
|
pub fn ch<'a>(expected: char) -> impl Parser<'a, ()> {
|
||||||
|
move |_arena, state: State<'a>| match state.input.chars().next() {
|
||||||
|
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
|
||||||
|
_ => Err(unexpected_eof(1, Attempting::Keyword, state)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A string with no newlines in it.
|
/// A string with no newlines in it.
|
||||||
pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
|
pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
|
||||||
// We can't have newlines because we don't attempt to advance the row
|
// We can't have newlines because we don't attempt to advance the row
|
||||||
// in the state, only the column.
|
// in the state, only the column.
|
||||||
debug_assert!(!string.contains("\n"));
|
debug_assert!(!string.contains("\n"));
|
||||||
|
|
||||||
move |_arena: &'a Bump, state: State<'a>| {
|
move |_arena, state: State<'a>| {
|
||||||
let input = state.input;
|
let input = state.input;
|
||||||
let len = string.len();
|
let len = string.len();
|
||||||
|
|
||||||
|
@ -400,38 +408,6 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn any<'a>(
|
|
||||||
// _arena: &'a Bump,
|
|
||||||
// state: State<'a>,
|
|
||||||
// attempting: Attempting,
|
|
||||||
// ) -> ParseResult<'a, char> {
|
|
||||||
// let input = state.input;
|
|
||||||
|
|
||||||
// match input.chars().next() {
|
|
||||||
// Some(ch) => {
|
|
||||||
// let len = ch.len_utf8();
|
|
||||||
// let mut new_state = State {
|
|
||||||
// input: &input[len..],
|
|
||||||
|
|
||||||
// ..state.clone()
|
|
||||||
// };
|
|
||||||
|
|
||||||
// if ch == '\n' {
|
|
||||||
// new_state.line = new_state.line + 1;
|
|
||||||
// new_state.column = 0;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Ok((new_state, ch))
|
|
||||||
// }
|
|
||||||
// _ => Err((state.clone(), attempting)),
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// fn whitespace<'a>() -> impl Parser<'a, char> {
|
|
||||||
// // TODO advance the state appropriately, in terms of line, col, indenting, etc.
|
|
||||||
// satisfies(any, |ch| ch.is_whitespace())
|
|
||||||
// }
|
|
||||||
|
|
||||||
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
|
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
|
||||||
where
|
where
|
||||||
P1: Parser<'a, A>,
|
P1: Parser<'a, A>,
|
||||||
|
@ -462,6 +438,61 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn either<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, Either<A, B>>
|
||||||
|
where
|
||||||
|
P1: Parser<'a, A>,
|
||||||
|
P2: Parser<'a, B>,
|
||||||
|
{
|
||||||
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
|
let original_attempting = state.attempting;
|
||||||
|
|
||||||
|
match p1.parse(arena, state) {
|
||||||
|
Ok((output, state)) => Ok((Either::First(output), state)),
|
||||||
|
Err((_, state)) => match p2.parse(arena, state) {
|
||||||
|
Ok((output, state)) => Ok((Either::Second(output), state)),
|
||||||
|
Err((fail, state)) => Err((
|
||||||
|
Fail {
|
||||||
|
attempting: original_attempting,
|
||||||
|
..fail
|
||||||
|
},
|
||||||
|
state,
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If the first one parses, ignore its output and move on to parse with the second one.
|
||||||
|
pub fn skip_first<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, B>
|
||||||
|
where
|
||||||
|
P1: Parser<'a, A>,
|
||||||
|
P2: Parser<'a, B>,
|
||||||
|
{
|
||||||
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
|
let original_attempting = state.attempting;
|
||||||
|
|
||||||
|
match p1.parse(arena, state) {
|
||||||
|
Ok((_, state)) => match p2.parse(arena, state) {
|
||||||
|
Ok((out2, state)) => Ok((out2, state)),
|
||||||
|
Err((fail, state)) => Err((
|
||||||
|
Fail {
|
||||||
|
attempting: original_attempting,
|
||||||
|
..fail
|
||||||
|
},
|
||||||
|
state,
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
Err((fail, state)) => Err((
|
||||||
|
Fail {
|
||||||
|
attempting: original_attempting,
|
||||||
|
..fail
|
||||||
|
},
|
||||||
|
state,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
|
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
|
||||||
where
|
where
|
||||||
P: Parser<'a, T>,
|
P: Parser<'a, T>,
|
||||||
|
|
|
@ -17,7 +17,7 @@ use roc::region::{Located, Region};
|
||||||
|
|
||||||
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
|
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
|
||||||
let state = State::new(&input, Attempting::Module);
|
let state = State::new(&input, Attempting::Module);
|
||||||
let parser = parse::expr();
|
let parser = parse::expr(0);
|
||||||
let answer = parser.parse(&arena, state);
|
let answer = parser.parse(&arena, state);
|
||||||
|
|
||||||
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
||||||
|
|
|
@ -14,7 +14,7 @@ mod test_format {
|
||||||
|
|
||||||
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
|
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
|
||||||
let state = State::new(&input, Attempting::Module);
|
let state = State::new(&input, Attempting::Module);
|
||||||
let parser = parse::expr();
|
let parser = parse::expr(0);
|
||||||
let answer = parser.parse(&arena, state);
|
let answer = parser.parse(&arena, state);
|
||||||
|
|
||||||
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue