Initial pass at parsing spaces/comments

2025-10-01 07:41:12 +00:00 · 2019-09-18 00:56:59 -04:00 · 2019-09-18 00:56:59 -04:00 · df305e4cc8
commit df305e4cc8
parent 4713087bb2
8 changed files with 529 additions and 137 deletions
--- a/src/collections.rs
+++ b/src/collections.rs
@ -1,3 +1,5 @@
+use bumpalo::collections::String;
+use bumpalo::Bump;
 use std::hash::BuildHasherDefault;

 pub use fxhash::FxHasher;
@ -21,3 +23,23 @@ pub type MutSet<K> = std::collections::HashSet<K, BuildHasher>;
 pub type ImMap<K, V> = im_rc::hashmap::HashMap<K, V, BuildHasher>;

 pub type ImSet<K> = im_rc::hashset::HashSet<K, BuildHasher>;
+
+pub fn arena_join<'a, I>(arena: &'a Bump, strings: &mut I, join_str: &str) -> String<'a>
+where
+    I: Iterator<Item = &'a str>,
+{
+    let mut buf = String::new_in(arena);
+
+    if let Some(first) = strings.next() {
+        buf.push_str(&first);
+
+        while let Some(string) = strings.next() {
+            buf.reserve(join_str.len() + string.len());
+
+            buf.push_str(join_str);
+            buf.push_str(string);
+        }
+    }
+
+    buf
+}
--- a/src/parse/ast.rs
+++ b/src/parse/ast.rs
@ -67,10 +67,22 @@ pub enum Expr<'a> {
    Else(&'a Loc<Expr<'a>>),
    Case(&'a Loc<Expr<'a>>),

+    // Blank Space (e.g. comments, spaces, newlines) before or after an expression.
+    // We preserve this for the formatter; canonicalization ignores it.
+    SpaceBefore(&'a [Space<'a>], &'a Loc<Expr<'a>>),
+    SpaceAfter(&'a Loc<Expr<'a>>, &'a [Space<'a>]),
+
    // Problems
    MalformedIdent(&'a str),
 }

+#[derive(Debug, PartialEq, Eq)]
+pub enum Space<'a> {
+    Newline,
+    LineComment(&'a str),
+    BlockComment(&'a [&'a str]),
+}
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum Pattern<'a> {
    // Identifier
--- a/src/parse/blankspace.rs
+++ b/src/parse/blankspace.rs
@ -0,0 +1,268 @@
+use bumpalo::collections::string::String;
+use bumpalo::collections::vec::Vec;
+use bumpalo::Bump;
+use parse::ast::{Expr, Space};
+use parse::parser::{and, loc, map_with_arena, unexpected, unexpected_eof, Parser, State};
+
+/// What type of comment (if any) are we currently parsing?
+#[derive(Debug, PartialEq, Eq)]
+enum CommentParsing {
+    Line,
+    Block,
+    No,
+}
+
+pub fn space0_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
+where
+    P: Parser<'a, Expr<'a>>,
+{
+    map_with_arena(
+        and(space0(min_indent), loc(parser)),
+        |arena, (space_list, loc_expr)| {
+            if space_list.is_empty() {
+                loc_expr.value
+            } else {
+                Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
+            }
+        },
+    )
+}
+
+pub fn space1_before<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
+where
+    P: Parser<'a, Expr<'a>>,
+{
+    map_with_arena(
+        and(space1(min_indent), loc(parser)),
+        |arena, (space_list, loc_expr)| {
+            if space_list.is_empty() {
+                loc_expr.value
+            } else {
+                Expr::SpaceBefore(space_list, arena.alloc(loc_expr))
+            }
+        },
+    )
+}
+
+pub fn space0_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
+where
+    P: Parser<'a, Expr<'a>>,
+{
+    map_with_arena(
+        and(space0(min_indent), loc(parser)),
+        |arena, (space_list, loc_expr)| {
+            if space_list.is_empty() {
+                loc_expr.value
+            } else {
+                Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
+            }
+        },
+    )
+}
+
+pub fn space1_after<'a, P>(parser: P, min_indent: u16) -> impl Parser<'a, Expr<'a>>
+where
+    P: Parser<'a, Expr<'a>>,
+{
+    map_with_arena(
+        and(space1(min_indent), loc(parser)),
+        |arena, (space_list, loc_expr)| {
+            if space_list.is_empty() {
+                loc_expr.value
+            } else {
+                Expr::SpaceAfter(arena.alloc(loc_expr), space_list)
+            }
+        },
+    )
+}
+
+pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
+    spaces(false, min_indent)
+}
+
+pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
+    // TODO try doing a short-circuit for the typical case: see if there is
+    // exactly one space followed by char that isn't [' ', '\n', or '#'], and
+    // if so, return empty slice. The case where there's exactly 1 space should
+    // be by far the most common.
+    spaces(true, min_indent)
+}
+
+#[inline(always)]
+fn spaces<'a>(require_at_least_one: bool, min_indent: u16) -> impl Parser<'a, &'a [Space<'a>]> {
+    move |arena: &'a Bump, state: State<'a>| {
+        let mut chars = state.input.chars().peekable();
+        let mut space_list = Vec::new_in(arena);
+        let mut chars_parsed = 0;
+        let mut comment_lines: Vec<'a, &'a str> = Vec::new_in(arena);
+        let mut comment_line_buf = String::new_in(arena);
+        let mut comment_parsing = CommentParsing::No;
+        let mut state = state;
+
+        while let Some(ch) = chars.next() {
+            chars_parsed += 1;
+
+            match comment_parsing {
+                CommentParsing::No => match ch {
+                    ' ' => {
+                        state = state.advance_spaces(1)?;
+                    }
+                    '\n' => {
+                        state = state.newline()?;
+
+                        // Newlines only get added to the list when they're outside comments.
+                        space_list.push(Space::Newline);
+                    }
+                    '#' => {
+                        // We're now parsing a line comment!
+                        comment_parsing = CommentParsing::Line;
+                    }
+                    nonblank => {
+                        return if space_list.is_empty() && require_at_least_one {
+                            Err(unexpected(
+                                nonblank,
+                                chars_parsed,
+                                state.clone(),
+                                state.attempting,
+                            ))
+                        } else {
+                            Ok((space_list.into_bump_slice(), state))
+                        };
+                    }
+                },
+                CommentParsing::Line => {
+                    match ch {
+                        ' ' => {
+                            state = state.advance_spaces(1)?;
+
+                            comment_line_buf.push(ch);
+                        }
+                        '\n' => {
+                            state = state.newline()?;
+
+                            // This was a newline, so end this line comment.
+                            space_list.push(Space::LineComment(comment_line_buf.into_bump_str()));
+                            comment_line_buf = String::new_in(arena);
+
+                            comment_parsing = CommentParsing::No;
+                        }
+                        '#' if comment_line_buf.is_empty() => {
+                            if chars.peek() == Some(&'#') {
+                                // Consume the '#' we peeked in the conditional.
+                                chars.next();
+
+                                // Advance past the '#' we parsed and the one
+                                // we peeked (and then consumed manually).
+                                state = state.advance_without_indenting(2)?;
+
+                                // This must be the start of a block comment,
+                                // since we are parsing a LineComment with an empty buffer
+                                // (meaning the previous char must have been '#'),
+                                // then we parsed a '#' right after it, and finally
+                                // we peeked and saw a third '#' after that.
+                                // "###" begins a block comment!
+                                comment_parsing = CommentParsing::Block;
+                            } else {
+                                state = state.advance_without_indenting(1)?;
+
+                                comment_line_buf.push('#');
+                            }
+                        }
+                        nonblank => {
+                            state = state.advance_without_indenting(1)?;
+
+                            comment_line_buf.push(nonblank);
+                        }
+                    }
+                }
+                CommentParsing::Block => {
+                    match ch {
+                        ' ' => {
+                            state = state.advance_spaces(1)?;
+
+                            comment_line_buf.push(ch);
+                        }
+                        '\n' => {
+                            state = state.newline()?;
+
+                            // End the current line and start a fresh one.
+                            comment_lines.push(comment_line_buf.into_bump_str());
+
+                            comment_line_buf = String::new_in(arena);
+                        }
+                        '#' => {
+                            // Three '#' in a row means the comment is finished.
+                            //
+                            // We want to peek ahead two characters to see if there
+                            // are another two '#' there. If so, this comment is done.
+                            // Otherwise, we want to proceed as normal.
+                            //
+                            // Since we can only peek one character at a time,
+                            // we need to be careful with how we use peek() and next()
+                            // here to avoid accidentally recording extraneous '#' characters
+                            // while also making sure not to drop them if we don't
+                            // encounter the full "###" after all.
+                            match chars.peek() {
+                                Some('#') => {
+                                    // Consume the second '#'.
+                                    chars.next();
+
+                                    // We've now seen two '#' in a row. Is a third next?
+                                    match chars.peek() {
+                                        Some('#') => {
+                                            // Consume the third '#'.
+                                            chars.next();
+
+                                            // We're done! This is the end of the block comment.
+                                            state = state.advance_without_indenting(3)?;
+
+                                            // End the current line and start a fresh one.
+                                            comment_lines.push(comment_line_buf.into_bump_str());
+
+                                            comment_line_buf = String::new_in(arena);
+
+                                            // Add the block comment to the list.
+                                            space_list.push(Space::BlockComment(
+                                                comment_lines.into_bump_slice(),
+                                            ));
+
+                                            // Start a fresh comment line list.
+                                            comment_lines = Vec::new_in(arena);
+
+                                            comment_parsing = CommentParsing::No;
+                                        }
+                                        _ => {
+                                            // It was only two '#' in a row, so record them
+                                            // and move on as normal.
+                                            state = state.advance_without_indenting(2)?;
+
+                                            comment_line_buf.push_str("##");
+                                        }
+                                    }
+                                }
+                                _ => {
+                                    // This was a standalone '#' not followed by a second '#',
+                                    // so record it and move on as normal.
+                                    state = state.advance_without_indenting(1)?;
+
+                                    comment_line_buf.push('#');
+                                }
+                            }
+                        }
+                        nonblank => {
+                            state = state.advance_without_indenting(1)?;
+
+                            comment_line_buf.push(nonblank);
+                        }
+                    }
+                }
+            }
+        }
+
+        if space_list.is_empty() && require_at_least_one {
+            Err(unexpected_eof(chars_parsed, state.attempting, state))
+        } else {
+            Ok((space_list.into_bump_slice(), state))
+        }
+    }
+}
--- a/src/parse/ident.rs
+++ b/src/parse/ident.rs
@ -1,8 +1,9 @@
 use bumpalo::collections::string::String;
 use bumpalo::collections::vec::Vec;
 use bumpalo::Bump;
+use collections::arena_join;
 use parse::ast::Attempting;
-use parse::parser::{unexpected, unexpected_eof, Fail, ParseResult, Parser, State};
+use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};

 /// The parser accepts all of these in any position where any one of them could
 /// appear. This way, canonicalization can give more helpful error messages like
@ -50,40 +51,8 @@ where
    let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
    let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
    let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
-    let mut is_accessor_fn;
    let mut is_capitalized;
-
-    let malformed = |opt_bad_char: Option<char>| {
-        // Reconstruct the original string that we've been parsing.
-        let mut full_string = String::new_in(arena);
-
-        full_string.push_str(&capitalized_parts.join("."));
-        full_string.push_str(&noncapitalized_parts.join("."));
-
-        if let Some(bad_char) = opt_bad_char {
-            full_string.push(bad_char);
-        }
-
-        // Consume the remaining chars in the identifier.
-        let mut next_char = None;
-
-        while let Some(ch) = chars.next() {
-            // We can't use ch.is_alphanumeric() here because that passes for
-            // things that are "numeric" but not ASCII digits, like `¾`
-            if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
-                full_string.push(ch);
-            } else {
-                next_char = Some(ch);
-
-                break;
-            }
-        }
-
-        Ok((
-            (Ident::Malformed(&full_string), next_char),
-            state.advance_without_indenting(full_string.len())?,
-        ))
-    };
+    let is_accessor_fn;

    // Identifiers and accessor functions must start with either a letter or a dot.
    // If this starts with neither, it must be something else!
@ -125,7 +94,14 @@ where
        } else if ch.is_ascii_digit() {
            // Parts may not start with numbers!
            if part_buf.is_empty() {
-                return malformed(Some(ch));
+                return malformed(
+                    Some(ch),
+                    arena,
+                    state,
+                    chars,
+                    capitalized_parts,
+                    noncapitalized_parts,
+                );
            }

            part_buf.push(ch);
@ -135,13 +111,20 @@ where
            // 1. Having two consecutive dots is an error.
            // 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
            if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
-                return malformed(Some(ch));
+                return malformed(
+                    Some(ch),
+                    arena,
+                    state,
+                    chars,
+                    capitalized_parts,
+                    noncapitalized_parts,
+                );
            }

            if is_capitalized {
-                capitalized_parts.push(&part_buf);
+                capitalized_parts.push(part_buf.into_bump_str());
            } else {
-                noncapitalized_parts.push(&part_buf);
+                noncapitalized_parts.push(part_buf.into_bump_str());
            }

            // Now that we've recorded the contents of the current buffer, reset it.
@ -164,14 +147,21 @@ where
        //
        // If we made it this far and don't have a next_char, then necessarily
        // we have consumed a '.' char previously.
-        return malformed(next_char.or_else(|| Some('.')));
+        return malformed(
+            next_char.or_else(|| Some('.')),
+            arena,
+            state,
+            chars,
+            capitalized_parts,
+            noncapitalized_parts,
+        );
    }

    // Record the final parts.
    if is_capitalized {
-        capitalized_parts.push(&part_buf);
+        capitalized_parts.push(part_buf.into_bump_str());
    } else {
-        noncapitalized_parts.push(&part_buf);
+        noncapitalized_parts.push(part_buf.into_bump_str());
    }

    let answer = if is_accessor_fn {
@ -182,7 +172,14 @@ where

            Ident::AccessorFunction(value)
        } else {
-            return malformed(None);
+            return malformed(
+                None,
+                arena,
+                state,
+                chars,
+                capitalized_parts,
+                noncapitalized_parts,
+            );
        }
    } else {
        match noncapitalized_parts.len() {
@ -225,6 +222,52 @@ where
    Ok(((answer, next_char), state))
 }

+fn malformed<'a, I>(
+    opt_bad_char: Option<char>,
+    arena: &'a Bump,
+    state: State<'a>,
+    chars: &mut I,
+    capitalized_parts: Vec<&'a str>,
+    noncapitalized_parts: Vec<&'a str>,
+) -> ParseResult<'a, (Ident<'a>, Option<char>)>
+where
+    I: Iterator<Item = char>,
+{
+    // Reconstruct the original string that we've been parsing.
+    let mut full_string = String::new_in(arena);
+
+    full_string
+        .push_str(arena_join(arena, &mut capitalized_parts.into_iter(), ".").into_bump_str());
+    full_string
+        .push_str(arena_join(arena, &mut noncapitalized_parts.into_iter(), ".").into_bump_str());
+
+    if let Some(bad_char) = opt_bad_char {
+        full_string.push(bad_char);
+    }
+
+    // Consume the remaining chars in the identifier.
+    let mut next_char = None;
+
+    while let Some(ch) = chars.next() {
+        // We can't use ch.is_alphanumeric() here because that passes for
+        // things that are "numeric" but not ASCII digits, like `¾`
+        if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
+            full_string.push(ch);
+        } else {
+            next_char = Some(ch);
+
+            break;
+        }
+    }
+
+    let chars_parsed = full_string.len();
+
+    Ok((
+        (Ident::Malformed(full_string.into_bump_str()), next_char),
+        state.advance_without_indenting(chars_parsed)?,
+    ))
+}
+
 pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
    move |arena: &'a Bump, state: State<'a>| {
        // Discard next_char; we don't need it.
@ -236,46 +279,46 @@ pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {

 // TESTS

-fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
-    let arena = Bump::new();
-    let state = State::new(input, Attempting::Expression);
+// fn test_parse<'a>(input: &'a str) -> Result<Ident<'a>, Fail> {
+//     let arena = Bump::new();
+//     let state = State::new(input, Attempting::Expression);

-    ident()
-        .parse(&arena, state)
-        .map(|(answer, _)| answer)
-        .map_err(|(err, _)| err)
-}
+//     ident()
+//         .parse(&arena, state)
+//         .map(|(answer, _)| answer)
+//         .map_err(|(err, _)| err)
+// }

-fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
-    Ident::Var(MaybeQualified {
-        module_parts: module_parts.as_slice(),
-        value,
-    })
-}
+// fn var<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
+//     Ident::Var(MaybeQualified {
+//         module_parts: module_parts.as_slice(),
+//         value,
+//     })
+// }

-fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
-    Ident::Variant(MaybeQualified {
-        module_parts: module_parts.as_slice(),
-        value,
-    })
-}
+// fn variant<'a>(module_parts: std::vec::Vec<&'a str>, value: &'a str) -> Ident<'a> {
+//     Ident::Variant(MaybeQualified {
+//         module_parts: module_parts.as_slice(),
+//         value,
+//     })
+// }

-fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
-    Ident::Field(MaybeQualified {
-        module_parts: module_parts.as_slice(),
-        value: value.as_slice(),
-    })
-}
+// fn field<'a>(module_parts: std::vec::Vec<&'a str>, value: std::vec::Vec<&'a str>) -> Ident<'a> {
+//     Ident::Field(MaybeQualified {
+//         module_parts: module_parts.as_slice(),
+//         value: value.as_slice(),
+//     })
+// }

-fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
-    Ident::AccessorFunction(value)
-}
+// fn accessor_fn<'a>(value: &'a str) -> Ident<'a> {
+//     Ident::AccessorFunction(value)
+// }

-fn malformed<'a>(value: &'a str) -> Ident<'a> {
-    Ident::Malformed(value)
-}
+// fn malformed<'a>(value: &'a str) -> Ident<'a> {
+//     Ident::Malformed(value)
+// }

-#[test]
-fn parse_var() {
-    assert_eq!(test_parse("foo"), Ok(var("foo")))
-}
+// #[test]
+// fn parse_var() {
+//     assert_eq!(test_parse("foo"), Ok(var(vec![], "foo")))
+// }
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -1,4 +1,5 @@
 pub mod ast;
+pub mod blankspace;
 pub mod ident;
 pub mod keyword;
 pub mod module;
@ -7,15 +8,15 @@ pub mod parser;
 pub mod problems;
 pub mod string_literal;

-use bumpalo::collections::vec::Vec;
 use bumpalo::Bump;
 use operator::Operator;
 use parse::ast::{Attempting, Expr};
+use parse::blankspace::{space0, space1_before};
 use parse::ident::{ident, Ident};
 use parse::number_literal::number_literal;
 use parse::parser::{
-    and, attempt, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional, string,
-    unexpected, unexpected_eof, Either, ParseResult, Parser, State,
+    and, attempt, ch, either, loc, map, map_with_arena, one_of3, one_of4, one_of6, optional,
+    skip_first, string, unexpected, unexpected_eof, Either, ParseResult, Parser, State,
 };
 use parse::string_literal::string_literal;
 use region::Located;
@ -55,38 +56,52 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe
    attempt(Attempting::Expression, expr_parser).parse(arena, state)
 }

-pub fn loc_function_args<'a>(min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
-    move |arena, state| {
+pub fn loc_function_args<'a>(_min_indent: u16) -> impl Parser<'a, &'a [Located<Expr<'a>>]> {
+    move |_arena, _state| {
        panic!("TODO stop early if we see an operator after the whitespace - precedence!");
        // zero_or_more(after(one_or_more(whitespace(min_indent)), function_arg()))
    }
 }

-pub fn when<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
+pub fn when<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> {
    map(string(keyword::WHEN), |_| {
        panic!("TODO implement WHEN");
    })
 }

 pub fn conditional<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
+    // TODO figure out how to remove this code duplication in a way rustc
+    // accepts. I tried making a helper functions and couldn't resolve the
+    // lifetime errors, so I manually inlined them and moved on.
    one_of4(
-        cond_help(keyword::IF, Expr::If, min_indent),
-        cond_help(keyword::THEN, Expr::Then, min_indent),
-        cond_help(keyword::ELSE, Expr::Else, min_indent),
-        cond_help(keyword::CASE, Expr::Case, min_indent),
-    )
-}
-
-fn cond_help<'a, F>(name: &str, wrap_expr: F, min_indent: u16) -> impl Parser<'a, Expr<'a>>
-where
-    F: Fn(&'a Located<Expr<'a>>) -> Expr<'a>,
-{
-    map(
-        after(
-            after(string(name), skip1_whitespace(min_indent)),
-            loc(expr(min_indent)),
+        map_with_arena(
+            skip_first(
+                string(keyword::IF),
+                loc(space1_before(expr(min_indent), min_indent)),
+            ),
+            |arena, loc_expr| Expr::If(arena.alloc(loc_expr)),
+        ),
+        map_with_arena(
+            skip_first(
+                string(keyword::THEN),
+                loc(space1_before(expr(min_indent), min_indent)),
+            ),
+            |arena, loc_expr| Expr::Then(arena.alloc(loc_expr)),
+        ),
+        map_with_arena(
+            skip_first(
+                string(keyword::ELSE),
+                loc(space1_before(expr(min_indent), min_indent)),
+            ),
+            |arena, loc_expr| Expr::Else(arena.alloc(loc_expr)),
+        ),
+        map_with_arena(
+            skip_first(
+                string(keyword::CASE),
+                loc(space1_before(expr(min_indent), min_indent)),
+            ),
+            |arena, loc_expr| Expr::Case(arena.alloc(loc_expr)),
        ),
-        wrap_expr,
    )
 }

@ -97,7 +112,7 @@ where
 /// 3. The beginning of a defniition (e.g. `foo =`)
 /// 4. A reserved keyword (e.g. `if ` or `case `), meaning we should do something else.
 pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
-    let followed_by_equals = after(zero_or_more(whitespace(min_indent), char('=')));
+    let followed_by_equals = and(space0(min_indent), ch('='));

    map_with_arena(
        and(
@ -106,8 +121,9 @@ pub fn ident_etc<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
        ),
        |arena, (loc_ident, equals_or_loc_args)| {
            match equals_or_loc_args {
-                Either::First(()) => {
+                Either::First((_space_list, ())) => {
                    // We have now parsed the beginning of a def (e.g. `foo =`)
+                    panic!("TODO parse def, making sure to use the space_list we got - don't drop comments!");
                }
                Either::Second(loc_args) => {
                    // This appears to be a var, keyword, or function application.
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@ -360,13 +360,21 @@ where
    }
 }

+/// A single char.
+pub fn ch<'a>(expected: char) -> impl Parser<'a, ()> {
+    move |_arena, state: State<'a>| match state.input.chars().next() {
+        Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
+        _ => Err(unexpected_eof(1, Attempting::Keyword, state)),
+    }
+}
+
 /// A string with no newlines in it.
 pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
    // We can't have newlines because we don't attempt to advance the row
    // in the state, only the column.
    debug_assert!(!string.contains("\n"));

-    move |_arena: &'a Bump, state: State<'a>| {
+    move |_arena, state: State<'a>| {
        let input = state.input;
        let len = string.len();

@ -400,38 +408,6 @@ where
    }
 }

-// pub fn any<'a>(
-//     _arena: &'a Bump,
-//     state: State<'a>,
-//     attempting: Attempting,
-// ) -> ParseResult<'a, char> {
-//     let input = state.input;
-
-//     match input.chars().next() {
-//         Some(ch) => {
-//             let len = ch.len_utf8();
-//             let mut new_state = State {
-//                 input: &input[len..],
-
-//                 ..state.clone()
-//             };
-
-//             if ch == '\n' {
-//                 new_state.line = new_state.line + 1;
-//                 new_state.column = 0;
-//             }
-
-//             Ok((new_state, ch))
-//         }
-//         _ => Err((state.clone(), attempting)),
-//     }
-// }
-
-// fn whitespace<'a>() -> impl Parser<'a, char> {
-//     // TODO advance the state appropriately, in terms of line, col, indenting, etc.
-//     satisfies(any, |ch| ch.is_whitespace())
-// }
-
 pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
 where
    P1: Parser<'a, A>,
@ -462,6 +438,61 @@ where
    }
 }

+pub fn either<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, Either<A, B>>
+where
+    P1: Parser<'a, A>,
+    P2: Parser<'a, B>,
+{
+    move |arena: &'a Bump, state: State<'a>| {
+        let original_attempting = state.attempting;
+
+        match p1.parse(arena, state) {
+            Ok((output, state)) => Ok((Either::First(output), state)),
+            Err((_, state)) => match p2.parse(arena, state) {
+                Ok((output, state)) => Ok((Either::Second(output), state)),
+                Err((fail, state)) => Err((
+                    Fail {
+                        attempting: original_attempting,
+                        ..fail
+                    },
+                    state,
+                )),
+            },
+        }
+    }
+}
+
+/// If the first one parses, ignore its output and move on to parse with the second one.
+pub fn skip_first<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, B>
+where
+    P1: Parser<'a, A>,
+    P2: Parser<'a, B>,
+{
+    move |arena: &'a Bump, state: State<'a>| {
+        let original_attempting = state.attempting;
+
+        match p1.parse(arena, state) {
+            Ok((_, state)) => match p2.parse(arena, state) {
+                Ok((out2, state)) => Ok((out2, state)),
+                Err((fail, state)) => Err((
+                    Fail {
+                        attempting: original_attempting,
+                        ..fail
+                    },
+                    state,
+                )),
+            },
+            Err((fail, state)) => Err((
+                Fail {
+                    attempting: original_attempting,
+                    ..fail
+                },
+                state,
+            )),
+        }
+    }
+}
+
 pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
 where
    P: Parser<'a, T>,
--- a/tests/helpers/mod.rs
+++ b/tests/helpers/mod.rs
@ -17,7 +17,7 @@ use roc::region::{Located, Region};

 pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
    let state = State::new(&input, Attempting::Module);
-    let parser = parse::expr();
+    let parser = parse::expr(0);
    let answer = parser.parse(&arena, state);

    answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
--- a/tests/test_format.rs
+++ b/tests/test_format.rs
@ -14,7 +14,7 @@ mod test_format {

    fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
        let state = State::new(&input, Attempting::Module);
-        let parser = parse::expr();
+        let parser = parse::expr(0);
        let answer = parser.parse(&arena, state);

        answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)