Initial pass at parsing spaces/comments

This commit is contained in:
Richard Feldman 2019-09-18 00:56:59 -04:00
parent 4713087bb2
commit df305e4cc8
8 changed files with 529 additions and 137 deletions

View file

@ -1,3 +1,5 @@
use bumpalo::collections::String;
use bumpalo::Bump;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
pub use fxhash::FxHasher; pub use fxhash::FxHasher;
@ -21,3 +23,23 @@ pub type MutSet<K> = std::collections::HashSet<K, BuildHasher>;
pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>; pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>;
pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>; pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>;
pub fn arena_join<'a, I>(arena: &'a Bump, strings: &mut I, join_str: &str) -> String<'a>
where
I: Iterator<Item = &'a str>,
{
let mut buf = String::new_in(arena);
if let Some(first) = strings.next() {
buf.push_str(&first);
while let Some(string) = strings.next() {
buf.reserve(join_str.len() + string.len());
buf.push_str(join_str);
buf.push_str(string);
}
}
buf
}

View file

@ -67,10 +67,22 @@ pub enum Expr<'a> {
Else(&'a Loc<Expr<'a>>), Else(&'a Loc<Expr<'a>>),
Case(&'a Loc<Expr<'a>>), Case(&'a Loc<Expr<'a>>),
// Blank Space (e.g. comments, spaces, newlines) before or after an expression.
// We preserve this for the formatter; canonicalization ignores it.
SpaceBefore(&'a [Space<'a>], &'a Loc<Expr<'a>>),
SpaceAfter(&'a Loc<Expr<'a>>, &'a [Space<'a>]),
// Problems // Problems
MalformedIdent(&'a str), MalformedIdent(&'a str),
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Space<'a> {
Newline,
LineComment(&'a str),
BlockComment(&'a [&'a str]),
}
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Pattern<'a> { pub enum Pattern<'a> {
// Identifier // Identifier

268
src/parse/blankspace.rs Normal file
View file

@ -0,0 +1,268 @@
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use parse::ast::{Expr, Space};
use parse::parser::{and, loc, map_with_arena, unexpected, unexpected_eof, Parser, State};
/// What type of comment (if any) are we currently parsing?
#[derive(Debug, PartialEq, Eq)]
enum CommentParsing {
Line,
Block,
No,
}
pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
where
P: Parser<'a, Expr<'a>>,
{
map_with_arena(
and(space0(min_indent), loc(parser)),
|arena, (space_list, loc_expr)| {
if space_list.is_empty() {
loc_expr.value
} else {
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
}
},
)
}
pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
where
P: Parser<'a, Expr<'a>>,
{
map_with_arena(
and(space1(min_indent), loc(parser)),
|arena, (space_list, loc_expr)| {
if space_list.is_empty() {
loc_expr.value
} else {
Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
}
},
)
}
pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
where
P: Parser<'a, Expr<'a>>,
{
map_with_arena(
and(space0(min_indent), loc(parser)),
|arena, (space_list, loc_expr)| {
if space_list.is_empty() {
loc_expr.value
} else {
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
}
},
)
}
pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
where
P: Parser<'a, Expr<'a>>,
{
map_with_arena(
and(space1(min_indent), loc(parser)),
|arena, (space_list, loc_expr)| {
if space_list.is_empty() {
loc_expr.value
} else {
Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
}
},
)
}
pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
spaces(false, min_indent)
}
pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
// TODO try doing a short-circuit for the typical case: see if there is
// exactly one space followed by char that isn't [' ', '\n', or '#'], and
// if so, return empty slice. The case where there's exactly 1 space should
// be by far the most common.
spaces(true, min_indent)
}
#[inline(always)]
fn spaces<'a>(require_at_least_one: bool, min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars().peekable();
let mut space_list = Vec::new_in(arena);
let mut chars_parsed = 0;
let mut comment_lines: Vec<'a, &'a str> = Vec::new_in(arena);
let mut comment_line_buf = String::new_in(arena);
let mut comment_parsing = CommentParsing::No;
let mut state = state;
while let Some(ch) = chars.next() {
chars_parsed += 1;
match comment_parsing {
CommentParsing::No => match ch {
' ' => {
state = state.advance_spaces(1)?;
}
'\n' => {
state = state.newline()?;
// Newlines only get added to the list when they're outside comments.
space_list.push(Space::Newline);
}
'#' => {
// We're now parsing a line comment!
comment_parsing = CommentParsing::Line;
}
nonblank => {
return if space_list.is_empty() && require_at_least_one {
Err(unexpected(
nonblank,
chars_parsed,
state.clone(),
state.attempting,
))
} else {
Ok((space_list.into_bump_slice(), state))
};
}
},
CommentParsing::Line => {
match ch {
' ' => {
state = state.advance_spaces(1)?;
comment_line_buf.push(ch);
}
'\n' => {
state = state.newline()?;
// This was a newline, so end this line comment.
space_list.push(Space::LineComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
comment_parsing = CommentParsing::No;
}
'#' if comment_line_buf.is_empty() => {
if chars.peek() == Some(&'#') {
// Consume the '#' we peeked in the conditional.
chars.next();
// Advance past the '#' we parsed and the one
// we peeked (and then consumed manually).
state = state.advance_without_indenting(2)?;
// This must be the start of a block comment,
// since we are parsing a LineComment with an empty buffer
// (meaning the previous char must have been '#'),
// then we parsed a '#' right after it, and finally
// we peeked and saw a third '#' after that.
// "###" begins a block comment!
comment_parsing = CommentParsing::Block;
} else {
state = state.advance_without_indenting(1)?;
comment_line_buf.push('#');
}
}
nonblank => {
state = state.advance_without_indenting(1)?;
comment_line_buf.push(nonblank);
}
}
}
CommentParsing::Block => {
match ch {
' ' => {
state = state.advance_spaces(1)?;
comment_line_buf.push(ch);
}
'\n' => {
state = state.newline()?;
// End the current line and start a fresh one.
comment_lines.push(comment_line_buf.into_bump_str());
comment_line_buf = String::new_in(arena);
}
'#' => {
// Three '#' in a row means the comment is finished.
//
// We want to peek ahead two characters to see if there
// are another two '#' there. If so, this comment is done.
// Otherwise, we want to proceed as normal.
//
// Since we can only peek one character at a time,
// we need to be careful with how we use peek() and next()
// here to avoid accidentally recording extraneous '#' characters
// while also making sure not to drop them if we don't
// encounter the full "###" after all.
match chars.peek() {
Some('#') => {
// Consume the second '#'.
chars.next();
// We've now seen two '#' in a row. Is a third next?
match chars.peek() {
Some('#') => {
// Consume the third '#'.
chars.next();
// We're done! This is the end of the block comment.
state = state.advance_without_indenting(3)?;
// End the current line and start a fresh one.
comment_lines.push(comment_line_buf.into_bump_str());
comment_line_buf = String::new_in(arena);
// Add the block comment to the list.
space_list.push(Space::BlockComment(
comment_lines.into_bump_slice(),
));
// Start a fresh comment line list.
comment_lines = Vec::new_in(arena);
comment_parsing = CommentParsing::No;
}
_ => {
// It was only two '#' in a row, so record them
// and move on as normal.
state = state.advance_without_indenting(2)?;
comment_line_buf.push_str("##");
}
}
}
_ => {
// This was a standalone '#' not followed by a second '#',
// so record it and move on as normal.
state = state.advance_without_indenting(1)?;
comment_line_buf.push('#');
}
}
}
nonblank => {
state = state.advance_without_indenting(1)?;
comment_line_buf.push(nonblank);
}
}
}
}
}
if space_list.is_empty() && require_at_least_one {
Err(unexpected_eof(chars_parsed, state.attempting, state))
} else {
Ok((space_list.into_bump_slice(), state))
}
}
}

View file

@ -1,8 +1,9 @@
use bumpalo::collections::string::String; use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use collections::arena_join;
use parse::ast::Attempting; use parse::ast::Attempting;
use parse::parser::{unexpected, unexpected_eof, Fail, ParseResult, Parser, State}; use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
/// The parser accepts all of these in any position where any one of them could /// The parser accepts all of these in any position where any one of them could
/// appear. This way, canonicalization can give more helpful error messages like /// appear. This way, canonicalization can give more helpful error messages like
@ -50,40 +51,8 @@ where
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut is_accessor_fn;
let mut is_capitalized; let mut is_capitalized;
let is_accessor_fn;
let malformed = |opt_bad_char: Option<char>| {
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
full_string.push_str(&capitalized_parts.join("."));
full_string.push_str(&noncapitalized_parts.join("."));
if let Some(bad_char) = opt_bad_char {
full_string.push(bad_char);
}
// Consume the remaining chars in the identifier.
let mut next_char = None;
while let Some(ch) = chars.next() {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
next_char = Some(ch);
break;
}
}
Ok((
(Ident::Malformed(&full_string), next_char),
state.advance_without_indenting(full_string.len())?,
))
};
// Identifiers and accessor functions must start with either a letter or a dot. // Identifiers and accessor functions must start with either a letter or a dot.
// If this starts with neither, it must be something else! // If this starts with neither, it must be something else!
@ -125,7 +94,14 @@ where
} else if ch.is_ascii_digit() { } else if ch.is_ascii_digit() {
// Parts may not start with numbers! // Parts may not start with numbers!
if part_buf.is_empty() { if part_buf.is_empty() {
return malformed(Some(ch)); return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
part_buf.push(ch); part_buf.push(ch);
@ -135,13 +111,20 @@ where
// 1. Having two consecutive dots is an error. // 1. Having two consecutive dots is an error.
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error. // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) { if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
return malformed(Some(ch)); return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
if is_capitalized { if is_capitalized {
capitalized_parts.push(&part_buf); capitalized_parts.push(part_buf.into_bump_str());
} else { } else {
noncapitalized_parts.push(&part_buf); noncapitalized_parts.push(part_buf.into_bump_str());
} }
// Now that we've recorded the contents of the current buffer, reset it. // Now that we've recorded the contents of the current buffer, reset it.
@ -164,14 +147,21 @@ where
// //
// If we made it this far and don't have a next_char, then necessarily // If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously. // we have consumed a '.' char previously.
return malformed(next_char.or_else(|| Some('.'))); return malformed(
next_char.or_else(|| Some('.')),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
// Record the final parts. // Record the final parts.
if is_capitalized { if is_capitalized {
capitalized_parts.push(&part_buf); capitalized_parts.push(part_buf.into_bump_str());
} else { } else {
noncapitalized_parts.push(&part_buf); noncapitalized_parts.push(part_buf.into_bump_str());
} }
let answer = if is_accessor_fn { let answer = if is_accessor_fn {
@ -182,7 +172,14 @@ where
Ident::AccessorFunction(value) Ident::AccessorFunction(value)
} else { } else {
return malformed(None); return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
} else { } else {
match noncapitalized_parts.len() { match noncapitalized_parts.len() {
@ -225,6 +222,52 @@ where
Ok(((answer, next_char), state)) Ok(((answer, next_char), state))
} }
fn malformed<'a, I>(
opt_bad_char: Option<char>,
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
capitalized_parts: Vec<&'a str>,
noncapitalized_parts: Vec<&'a str>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
full_string
.push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str());
full_string
.push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str());
if let Some(bad_char) = opt_bad_char {
full_string.push(bad_char);
}
// Consume the remaining chars in the identifier.
let mut next_char = None;
while let Some(ch) = chars.next() {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
next_char = Some(ch);
break;
}
}
let chars_parsed = full_string.len();
Ok((
(Ident::Malformed(full_string.into_bump_str()), next_char),
state.advance_without_indenting(chars_parsed)?,
))
}
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> { pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
// Discard next_char; we don't need it. // Discard next_char; we don't need it.
@ -236,46 +279,46 @@ pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
// TESTS // TESTS
fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> { // fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
let arena = Bump::new(); // let arena = Bump::new();
let state = State::new(input, Attempting::Expression); // let state = State::new(input, Attempting::Expression);
ident() // ident()
.parse(&arena, state) // .parse(&arena, state)
.map(|(answer, _)| answer) // .map(|(answer, _)| answer)
.map_err(|(err, _)| err) // .map_err(|(err, _)| err)
} // }
fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> { // fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
Ident::Var(MaybeQualified { // Ident::Var(MaybeQualified {
module_parts: module_parts.as_slice(), // module_parts: module_parts.as_slice(),
value, // value,
}) // })
} // }
fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> { // fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
Ident::Variant(MaybeQualified { // Ident::Variant(MaybeQualified {
module_parts: module_parts.as_slice(), // module_parts: module_parts.as_slice(),
value, // value,
}) // })
} // }
fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> { // fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
Ident::Field(MaybeQualified { // Ident::Field(MaybeQualified {
module_parts: module_parts.as_slice(), // module_parts: module_parts.as_slice(),
value: value.as_slice(), // value: value.as_slice(),
}) // })
} // }
fn accessor_fn<'a>(value: &'a str) -> Ident<'a> { // fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
Ident::AccessorFunction(value) // Ident::AccessorFunction(value)
} // }
fn malformed<'a>(value: &'a str) -> Ident<'a> { // fn malformed<'a>(value: &'a str) -> Ident<'a> {
Ident::Malformed(value) // Ident::Malformed(value)
} // }
#[test] // #[test]
fn parse_var() { // fn parse_var() {
assert_eq!(test_parse("foo"), Ok(var("foo"))) // assert_eq!(test_parse("foo"), Ok(var(vec![], "foo")))
} // }

View file

@ -1,4 +1,5 @@
pub mod ast; pub mod ast;
pub mod blankspace;
pub mod ident; pub mod ident;
pub mod keyword; pub mod keyword;
pub mod module; pub mod module;
@ -7,15 +8,15 @@ pub mod parser;
pub mod problems; pub mod problems;
pub mod string_literal; pub mod string_literal;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use operator::Operator; use operator::Operator;
use parse::ast::{Attempting, Expr}; use parse::ast::{Attempting, Expr};
use parse::blankspace::{space0, space1_before};
use parse::ident::{ident, Ident}; use parse::ident::{ident, Ident};
use parse::number_literal::number_literal; use parse::number_literal::number_literal;
use parse::parser::{ use parse::parser::{
and, attempt, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, string, and, attempt, ch, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional,
unexpected, unexpected_eof, Either, ParseResult, Parser, State, skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State,
}; };
use parse::string_literal::string_literal; use parse::string_literal::string_literal;
use region::Located; use region::Located;
@ -55,38 +56,52 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe
attempt(Attempting::Expression, expr_parser).parse(arena, state) attempt(Attempting::Expression, expr_parser).parse(arena, state)
} }
pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> { pub fn loc_function_args<'a>(_min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
move |arena, state| { move |_arena, _state| {
panic!("TODO stop early if we see an operator after the whitespace - precedence!"); panic!("TODO stop early if we see an operator after the whitespace - precedence!");
// zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg())) // zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
} }
} }
pub fn when<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn when<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map(string(keyword::WHEN), |_| { map(string(keyword::WHEN), |_| {
panic!("TODO implement WHEN"); panic!("TODO implement WHEN");
}) })
} }
pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
// TODO figure out how to remove this code duplication in a way rustc
// accepts. I tried making a helper functions and couldn't resolve the
// lifetime errors, so I manually inlined them and moved on.
one_of4( one_of4(
cond_help(keyword::IF, Expr::If, min_indent), map_with_arena(
cond_help(keyword::THEN, Expr::Then, min_indent), skip_first(
cond_help(keyword::ELSE, Expr::Else, min_indent), string(keyword::IF),
cond_help(keyword::CASE, Expr::Case, min_indent), loc(space1_before(expr(min_indent), min_indent)),
) ),
} |arena, loc_expr| Expr::If(arena.alloc(loc_expr)),
),
fn cond_help<'a, F>(name: &str, wrap_expr: F, min_indent: u16) -> impl Parser<'a, Expr<'a>> map_with_arena(
where skip_first(
F: Fn(&'a Located<Expr<'a>>) -> Expr<'a>, string(keyword::THEN),
{ loc(space1_before(expr(min_indent), min_indent)),
map( ),
after( |arena, loc_expr| Expr::Then(arena.alloc(loc_expr)),
after(string(name), skip1_whitespace(min_indent)), ),
loc(expr(min_indent)), map_with_arena(
skip_first(
string(keyword::ELSE),
loc(space1_before(expr(min_indent), min_indent)),
),
|arena, loc_expr| Expr::Else(arena.alloc(loc_expr)),
),
map_with_arena(
skip_first(
string(keyword::CASE),
loc(space1_before(expr(min_indent), min_indent)),
),
|arena, loc_expr| Expr::Case(arena.alloc(loc_expr)),
), ),
wrap_expr,
) )
} }
@ -97,7 +112,7 @@ where
/// 3. The beginning of a defniition (e.g. `foo =`) /// 3. The beginning of a defniition (e.g. `foo =`)
/// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else. /// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
let followed_by_equals = after(zero_or_more(whitespace(min_indent), char('='))); let followed_by_equals = and(space0(min_indent), ch('='));
map_with_arena( map_with_arena(
and( and(
@ -106,8 +121,9 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
), ),
|arena, (loc_ident, equals_or_loc_args)| { |arena, (loc_ident, equals_or_loc_args)| {
match equals_or_loc_args { match equals_or_loc_args {
Either::First(()) => { Either::First((_space_list, ())) => {
// We have now parsed the beginning of a def (e.g. `foo =`) // We have now parsed the beginning of a def (e.g. `foo =`)
panic!("TODO parse def, making sure to use the space_list we got - don't drop comments!");
} }
Either::Second(loc_args) => { Either::Second(loc_args) => {
// This appears to be a var, keyword, or function application. // This appears to be a var, keyword, or function application.

View file

@ -360,13 +360,21 @@ where
} }
} }
/// A single char.
pub fn ch<'a>(expected: char) -> impl Parser<'a, ()> {
move |_arena, state: State<'a>| match state.input.chars().next() {
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
_ => Err(unexpected_eof(1, Attempting::Keyword, state)),
}
}
/// A string with no newlines in it. /// A string with no newlines in it.
pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> { pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
// We can't have newlines because we don't attempt to advance the row // We can't have newlines because we don't attempt to advance the row
// in the state, only the column. // in the state, only the column.
debug_assert!(!string.contains("\n")); debug_assert!(!string.contains("\n"));
move |_arena: &'a Bump, state: State<'a>| { move |_arena, state: State<'a>| {
let input = state.input; let input = state.input;
let len = string.len(); let len = string.len();
@ -400,38 +408,6 @@ where
} }
} }
// pub fn any<'a>(
// _arena: &'a Bump,
// state: State<'a>,
// attempting: Attempting,
// ) -> ParseResult<'a, char> {
// let input = state.input;
// match input.chars().next() {
// Some(ch) => {
// let len = ch.len_utf8();
// let mut new_state = State {
// input: &input[len..],
// ..state.clone()
// };
// if ch == '\n' {
// new_state.line = new_state.line + 1;
// new_state.column = 0;
// }
// Ok((new_state, ch))
// }
// _ => Err((state.clone(), attempting)),
// }
// }
// fn whitespace<'a>() -> impl Parser<'a, char> {
// // TODO advance the state appropriately, in terms of line, col, indenting, etc.
// satisfies(any, |ch| ch.is_whitespace())
// }
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)> pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
where where
P1: Parser<'a, A>, P1: Parser<'a, A>,
@ -462,6 +438,61 @@ where
} }
} }
pub fn either<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, Either<A, B>>
where
P1: Parser<'a, A>,
P2: Parser<'a, B>,
{
move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
Ok((output, state)) => Ok((Either::First(output), state)),
Err((_, state)) => match p2.parse(arena, state) {
Ok((output, state)) => Ok((Either::Second(output), state)),
Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
},
}
}
}
/// If the first one parses, ignore its output and move on to parse with the second one.
pub fn skip_first<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, B>
where
P1: Parser<'a, A>,
P2: Parser<'a, B>,
{
move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
Ok((_, state)) => match p2.parse(arena, state) {
Ok((out2, state)) => Ok((out2, state)),
Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
},
Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
}
}
}
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>> pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
where where
P: Parser<'a, T>, P: Parser<'a, T>,

View file

@ -17,7 +17,7 @@ use roc::region::{Located, Region};
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> { pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(&input, Attempting::Module);
let parser = parse::expr(); let parser = parse::expr(0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail) answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)

View file

@ -14,7 +14,7 @@ mod test_format {
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> { fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(&input, Attempting::Module);
let parser = parse::expr(); let parser = parse::expr(0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail) answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)