rcl/src/parser.rs

// RCL -- A reasonable configuration language.
// Copyright 2023 Ruud van Asseldonk

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// A copy of the License has been included in the root of the repository.

//! The parser converts a sequence of tokens into a Concrete Syntax Tree.

use crate::cst::{BinOp, Chain, Expr, List, NonCode, Prefixed, Seq, Stmt, StringPart, Type, UnOp};
use crate::error::{Error, IntoError, Result};
use crate::lexer::{Lexeme, QuoteStyle, StringPrefix, Token};
use crate::pprint::{concat, Doc};
use crate::source::{DocId, Span};

/// Parse an input document into a concrete syntax tree.
pub fn parse(doc: DocId, input: &str, tokens: &[Lexeme]) -> Result<(Span, Expr)> {
    let mut parser = Parser::new(doc, input, tokens);

    // Comments at the start of the document are allowed, but the document
    // should not start with blank lines, those we drop.
    parser.skip_blanks();

    let (span, result) = parser.parse_expr()?;
    parser.parse_eof()?;
    Ok((span, result))
}

fn to_unop(token: Token) -> Option<UnOp> {
    match token {
        Token::KwNot => Some(UnOp::Not),
        Token::Minus => Some(UnOp::Neg),
        _ => None,
    }
}

fn to_binop(token: Token) -> Option<BinOp> {
    match token {
        Token::KwAnd => Some(BinOp::And),
        Token::KwOr => Some(BinOp::Or),
        Token::Pipe => Some(BinOp::Union),
        Token::Plus => Some(BinOp::Add),
        Token::Minus => Some(BinOp::Sub),
        Token::Star => Some(BinOp::Mul),
        Token::Slash => Some(BinOp::Div),
        Token::Lt => Some(BinOp::Lt),
        Token::Gt => Some(BinOp::Gt),
        Token::LtEq => Some(BinOp::LtEq),
        Token::GtEq => Some(BinOp::GtEq),
        Token::Eq2 => Some(BinOp::Eq),
        Token::Neq => Some(BinOp::Neq),
        _ => None,
    }
}

struct Parser<'a> {
    doc: DocId,
    input: &'a str,
    tokens: &'a [(Token, Span)],
    cursor: usize,

    /// The unclosed opening brackets (all of `()`, `[]`, `{}`) encountered.
    bracket_stack: Vec<(Token, Span)>,

    /// The last known valid location where a comment was allowed.
    ///
    /// This is used in error reporting to provide a hint for where to place the
    /// comment.
    comment_anchor: Span,

    /// The depth of parsing expressions and sequences, to prevent stack
    /// overflow.
    depth: u32,
}

impl<'a> Parser<'a> {
    pub fn new(doc: DocId, input: &'a str, tokens: &'a [(Token, Span)]) -> Parser<'a> {
        Parser {
            doc,
            input,
            tokens,
            cursor: 0,
            bracket_stack: Vec::new(),
            comment_anchor: Span::new(doc, 0, 0),
            depth: 0,
        }
    }

    /// Return the token under the cursor.
    fn peek(&self) -> Token {
        self.peek_n(0)
    }

    /// Return the next code token, ignoring whitespace and non-code.
    fn peek_past_non_code(&self) -> Token {
        self.tokens[self.cursor..]
            .iter()
            .filter(|t| !matches!(t.0, Token::Blank | Token::LineComment | Token::Shebang))
            .map(|t| t.0)
            .next()
            .unwrap_or(Token::Eof)
    }

    /// Return the token `offset` tokens after the cursor, if there is one.
    fn peek_n(&self, offset: usize) -> Token {
        self.tokens
            .get(self.cursor + offset)
            .map(|t| t.0)
            .unwrap_or(Token::Eof)
    }

    /// Return the span under the cursor, or end of document otherwise.
    fn peek_span(&self) -> Span {
        self.tokens
            .get(self.cursor)
            .map(|t| t.1)
            .unwrap_or(Span::new(self.doc, self.input.len(), self.input.len()))
    }

    /// Build a parse error at the current cursor location.
    fn error(&self, message: &'static str) -> Error {
        self.peek_span().error(message)
    }

    /// Advance the cursor by one token, consuming the token under the cursor.
    ///
    /// Returns the span of the consumed token.
    fn consume(&mut self) -> Span {
        let result = self.tokens[self.cursor].1;

        self.cursor += 1;
        debug_assert!(
            self.cursor <= self.tokens.len(),
            // coverage:off -- Error not expected to be hit.
            "Cursor should not go more than beyond the last token.",
            // coverage:on
        );

        result
    }

    fn increase_depth(&mut self) -> Result<()> {
        self.depth += 1;

        if self.depth >= 100 {
            return self
                .error("Parser recursion limit reached, please reduce nesting.")
                .err();
        }

        Ok(())
    }

    fn decrease_depth(&mut self) {
        debug_assert!(self.depth > 0, "Expression depth underflow.");
        self.depth -= 1;
    }

    /// Return the span from start until (but not including) the cursor, stripping trailing noncode.
    fn span_from(&self, start: Span) -> Span {
        let end = self.tokens[..self.cursor]
            .iter()
            .rev()
            .filter(|t| !matches!(t.0, Token::Blank | Token::LineComment))
            .map(|t| t.1.end())
            .next()
            .expect("If we pushed a start, we should find at least that.");
        Span::new(self.doc, start.start(), end)
    }

    /// Push an opening bracket onto the stack of brackets when inside a query.
    ///
    /// Consumes the token under the cursor.
    fn push_bracket(&mut self) -> Result<Span> {
        self.increase_depth()?;
        let start_token = self.tokens[self.cursor];
        let result = self.consume();
        self.bracket_stack.push(start_token);
        match start_token.0 {
            Token::LBrace | Token::LParen | Token::LBracket => {}
            invalid => unreachable!("Invalid token for `push_bracket`: {:?}", invalid),
        };
        Ok(result)
    }

    /// Pop a closing bracket while verifying that it is the right one.
    ///
    /// Consumes the token under the cursor.
    fn pop_bracket(&mut self) -> Result<Span> {
        self.decrease_depth();
        let actual_end_token = self.tokens.get(self.cursor).map(|t| t.0);
        let top = self
            .bracket_stack
            .pop()
            .expect("If brackets were unmatched, lexing would have failed.");
        let expected_end_token = match top.0 {
            Token::LParen => Token::RParen,
            Token::LBrace => Token::RBrace,
            Token::LBracket => Token::RBracket,
            invalid => unreachable!("Invalid token on bracket stack: {:?}", invalid),
        };

        if actual_end_token == Some(expected_end_token) {
            return Ok(self.consume());
        }

        // The lexer ensures matching brackets, but even in a document where
        // that is the case, we may still encounter a different token where the
        // parser expects a closing bracket. E.g. in `{1 1}`.
        let err = match expected_end_token {
            Token::RParen => self
                .error("Expected ')'.")
                .with_note(top.1, "Unmatched '(' opened here."),
            Token::RBrace => self
                .error("Expected '}'.")
                .with_note(top.1, "Unmatched '{' opened here."),
            Token::RBracket => self
                .error("Expected ']'.")
                .with_note(top.1, "Unmatched '[' opened here."),
            _ => unreachable!("End token is one of the above three."),
        };
        err.err()
    }

    /// Eat comments and whitespace.
    ///
    /// This may advance the cursor even if it returns `None`, when the
    /// whitespace was significant enough to keep.
    #[must_use]
    fn parse_non_code(&mut self) -> Box<[NonCode]> {
        let mut result = Vec::new();

        loop {
            match self.peek() {
                Token::LineComment => result.push(NonCode::LineComment(self.consume())),
                Token::Shebang => result.push(NonCode::Shebang(self.consume())),
                Token::Blank => result.push(NonCode::Blank(self.consume())),
                _ => {
                    // If it's not a space, then this is the last location where
                    // a comment could have been inserted. Record that, so we
                    // can suggest this place in case an invalid comment is
                    // encountered.
                    let anchor = self.peek_span();
                    self.comment_anchor = Span::new(self.doc, anchor.start(), anchor.start());
                    return result.into_boxed_slice();
                }
            }
        }
    }

    /// Skip over any blank line tokens.
    fn skip_blanks(&mut self) {
        while self.peek() == Token::Blank {
            self.consume();
        }
    }

    /// Skip over any non-code tokens.
    fn skip_non_code(&mut self) -> Result<()> {
        loop {
            match self.peek() {
                Token::Blank => {
                    self.consume();
                }
                Token::LineComment => {
                    return self
                        .error("A comment is not allowed here.")
                        .with_note(
                            self.comment_anchor,
                            "Try inserting the comment above this instead.",
                        )
                        .err();
                }
                _ => return Ok(()),
            }
        }
    }

    /// Expect an identifier.
    fn parse_ident(&mut self) -> Result<Span> {
        match self.peek() {
            Token::Ident => Ok(self.consume()),
            _ => self.error("Expected an identifier here.").err(),
        }
    }

    /// Consume the given token, report an error otherwise.
    fn parse_token(&mut self, expected: Token, error: &'static str) -> Result<Span> {
        match self.peek() {
            token if token == expected => Ok(self.consume()),
            _ => self.error(error).err(),
        }
    }

    /// Consume the given token, report an error with note otherwise.
    fn parse_token_with_note(
        &mut self,
        expected: Token,
        error: &'static str,
        note_span: Span,
        note: &'static str,
    ) -> Result<Span> {
        match self.peek() {
            token if token == expected => Ok(self.consume()),
            _ => self.error(error).with_note(note_span, note).err(),
        }
    }

    fn parse_prefixed<T, F>(&mut self, parse_inner: F) -> Result<Prefixed<T>>
    where
        F: Fn(&mut Self) -> Result<T>,
    {
        let prefix = self.parse_non_code();
        let inner = parse_inner(self)?;
        let result = Prefixed { prefix, inner };
        Ok(result)
    }

    /// Parse a top-level expression, which may start with a list of statements.
    fn parse_expr(&mut self) -> Result<(Span, Expr)> {
        // Increase the depth once, this depth applies to all statements
        // and also the expression.
        self.increase_depth()?;

        let mut statements = Vec::new();
        loop {
            let prefix = self.parse_non_code();
            let begin = self.peek_span();

            match self.peek() {
                Token::KwAssert | Token::KwLet | Token::KwTrace => {
                    let stmt = self.parse_stmt()?;
                    let prefixed = Prefixed {
                        prefix,
                        inner: stmt,
                    };
                    let span = self.span_from(begin);
                    statements.push((span, prefixed));
                }
                _ => {
                    let expr = self.parse_expr_no_stmt()?;
                    let span = self.span_from(begin);
                    self.decrease_depth();

                    // Do not make the CST deeper than it needs to be. If there
                    // are no statements, there is no need for a wrapping node.
                    if statements.is_empty() && prefix.is_empty() {
                        return Ok((span, expr));
                    }

                    let expr = Expr::Statements {
                        stmts: statements,
                        body_span: span,
                        body: Box::new(Prefixed {
                            prefix,
                            inner: expr,
                        }),
                    };

                    // We have a choice of what span to return here.
                    // What is the span for an expression preceded by statements?
                    // Does it include the statements or not? Let's say for now
                    // it does not, because the entire expression evaluates to
                    // its body anyway, so that is the span that matters. If it
                    // leads to confusing errors, we can re-evaluate this.
                    return Ok((span, expr));
                }
            }
        }
    }

    /// Parse an expression that is known to not be a statement.
    fn parse_expr_no_stmt(&mut self) -> Result<Expr> {
        match self.peek() {
            Token::KwIf => self.parse_expr_if(),
            _ => Ok(self.parse_expr_op()?.1),
        }
    }

    fn parse_expr_if(&mut self) -> Result<Expr> {
        // Consume the `if` keyword.
        let if_span = self.consume();

        // We do not allow non-code between the if and the condition, and for
        // the condition, we do not allow if or statements. There is no technical
        // need to limit this, we could use `parse_expr`, but the resulting CST
        // is a royal pain to format in a pleasant way. What to do with blank
        // lines, how do you indent? And `if if` looks confusing anyway. If you
        // want an expr there you can still do it, just put parens around it.
        self.skip_non_code()?;
        let (condition_span, condition) = self.parse_expr_op()?;

        self.skip_non_code()?;
        self.parse_token(Token::Colon, "Expected ':' after the condition.")?;
        let (then_span, then_body) = self.parse_expr()?;

        self.skip_non_code()?;
        self.parse_token_with_note(
            Token::KwElse,
            "Expected 'else' here.",
            if_span,
            "To match this 'if'.",
        )?;

        // Allow a colon directly after `else` (with no tokens in between), but
        // do not demand it. I was ambivalent about it in the past, up to RCL
        // 0.5.0 the syntax was to omit the colon, but I think it makes more
        // sense to have it. It should become mandatory in some future release,
        // but for now it can be optional. TODO: Make it mandatory.
        if self.peek() == Token::Colon {
            self.consume();
        }

        let (else_span, else_body) = self.parse_expr()?;

        let result = Expr::IfThenElse {
            condition_span,
            condition: Box::new(condition),
            then_span,
            then_body: Box::new(then_body),
            else_span,
            else_body: Box::new(else_body),
        };
        Ok(result)
    }

    fn parse_expr_import(&mut self) -> Result<(Span, Expr)> {
        // Consume the `import` keyword.
        let import_span = self.consume();
        let (path_span, path) = self.parse_expr()?;
        let result = Expr::Import {
            path_span,
            path: Box::new(path),
        };
        Ok((import_span.union(path_span), result))
    }

    /// Parse the statement under the cursor.
    #[inline]
    fn parse_stmt(&mut self) -> Result<Stmt> {
        match self.peek() {
            Token::KwAssert => self.parse_stmt_assert(),
            Token::KwLet => self.parse_stmt_let(),
            Token::KwTrace => self.parse_stmt_trace(),
            _ => panic!("Should only be called at 'assert', 'let', or 'trace'."),
        }
    }

    fn parse_stmt_assert(&mut self) -> Result<Stmt> {
        // Consume the `assert` keyword.
        let assert_span = self.consume();

        self.skip_non_code()?;
        let (condition_span, condition) = self.parse_expr()?;

        // After the condition is a comma, but if the user wrote a semicolon,
        // then explain that the message is not optional (unlike in Python).
        self.skip_non_code()?;
        match self.peek() {
            Token::Comma => self.consume(),
            Token::Semicolon => {
                return self
                    .error("Expected ',' here between the assertion condition and message.")
                    .with_help(
                        "An assertion has the form 'assert <condition>, <message>;'. \
                        The message is not optional.",
                    )
                    .err();
            }
            _ => {
                return self
                    .error("Expected ',' here between the assertion condition and message.")
                    .err()
            }
        };

        self.skip_non_code()?;
        let (message_span, message) = self.parse_expr()?;

        self.skip_non_code()?;
        self.parse_token_with_note(
            Token::Semicolon,
            "Expected ';' here to close the assertion.",
            assert_span,
            "Assertion opened here.",
        )?;

        let result = Stmt::Assert {
            condition_span,
            condition: Box::new(condition),
            message_span,
            message: Box::new(message),
        };

        Ok(result)
    }

    fn parse_stmt_let(&mut self) -> Result<Stmt> {
        // Consume the `let` keyword.
        let let_ = self.consume();

        self.skip_non_code()?;
        let ident = self.parse_ident()?;

        // Parse the optional type signature, and then the '='.
        self.skip_non_code()?;
        let type_: Option<Box<Type>> = match self.peek() {
            Token::Colon => {
                self.consume();
                self.skip_non_code()?;
                let type_ = self.parse_type_expr()?;
                // After the type annotation, only `=` is valid, but if we see
                // something that looks like it might be part of a function
                // type, educate the user about how to do that.
                match self.peek() {
                    Token::Eq1 => self.consume(),
                    Token::FatArrow => {
                        return self
                            .error("Expected '=' after type annotation.")
                            .with_help(
                                "Function types require parentheses \
                                and use '->' instead of '=>', e.g. '(Int) -> Bool'.",
                            )
                            .err();
                    }
                    Token::ThinArrow => {
                        return self
                            .error("Expected '=' after type annotation.")
                            .with_help("Function types require parentheses, e.g. '(Int) -> Bool'.")
                            .err();
                    }
                    _ => return self.error("Expected '=' after type annotation.").err(),
                };
                Some(Box::new(type_))
            }
            Token::Eq1 => {
                self.consume();
                None
            }
            _ => return self.error("Expected '=' or ':' here.").err(),
        };

        self.skip_non_code()?;
        let (value_span, value) = self.parse_expr()?;

        self.skip_non_code()?;
        self.parse_token_with_note(
            Token::Semicolon,
            "Expected ';' here to close the let-binding.",
            let_,
            "Let-binding opened here.",
        )?;

        let result = Stmt::Let {
            ident,
            type_,
            value_span,
            value: Box::new(value),
        };

        Ok(result)
    }

    fn parse_stmt_trace(&mut self) -> Result<Stmt> {
        // Consume the `trace` keyword.
        let trace_span = self.consume();

        self.skip_non_code()?;
        let (message_span, message) = self.parse_expr()?;

        self.skip_non_code()?;
        self.parse_token_with_note(
            Token::Semicolon,
            "Expected ';' here to close the trace expression.",
            trace_span,
            "Trace opened here.",
        )?;

        let result = Stmt::Trace {
            message_span,
            message: Box::new(message),
        };

        Ok(result)
    }

    /// Return an error with hint if there is a known bad unary operator under the cursor.
    fn check_bad_unop(&self) -> Result<()> {
        if self.peek() == Token::Bang {
            return self
                .error("Invalid operator. Negation is written with keyword 'not' instead of '!'.")
                .err();
        }
        Ok(())
    }

    /// Check if we should parse a function (`true`) or a different expression.
    ///
    /// There is an ambiguity in e.g. `(x)`, which could be an identifier in
    /// parens as an expression, or it could be the argument list of a lambda,
    /// and we don't know that until we see the token after it. So look ahead
    /// until we either see a `=>` and we know it's a lambda, or until we see
    /// some violation and we know it's not a lambda.
    ///
    /// TODO: This is getting complex. I should consider using a prefix token
    /// to disambiguate lambdas after all.
    ///
    /// TODO II: A better way to handle this: when closing a matching ), write
    /// to a side buffer the index of the token next to the opening (. That way,
    /// when we have a (, we can jump ahead to the closing ), and peek what
    /// comes after that closing ).
    fn look_ahead_is_function(&mut self) -> bool {
        let mut offset = 0;
        match self.peek() {
            Token::Ident => offset = 1,
            Token::LParen => {
                // Find the next closing paren, and continue parsing from there.
                // We don't have to be exact here, because this is only used to
                // look ahead to see if we should parse a lambda or expr. We
                // don't consider unbalanced parens, and we also don't return
                // false early even if we see a token that would be invalid for
                // a lambda. We do this to get more helpful errors, e.g. if you
                // write `(x, [y]) => x + y`, then it still looks like the
                // intent was a lambda and we can error on the `[`, rather than
                // trying to parse an expression and failing on the `,`.
                for i in 1.. {
                    match self.peek_n(i) {
                        Token::RParen => {
                            offset = i + 1;
                            break;
                        }
                        Token::Eof => unreachable!("The lexer returns balanced parens."),
                        _ => continue,
                    }
                }
            }
            _ => return false,
        };
        for i in offset.. {
            match self.peek_n(i) {
                Token::LineComment => continue,
                Token::Blank => continue,
                Token::FatArrow => return true,
                _ => return false,
            }
        }
        unreachable!("We'd run out of input before the loop ends.")
    }

    /// Try parsing a lambda function expression.
    fn parse_expr_function(&mut self) -> Result<(Span, Expr)> {
        let begin = self.peek_span();
        let args = match self.peek() {
            Token::Ident => {
                let prefixed = Prefixed {
                    prefix: [].into(),
                    inner: self.consume(),
                };
                List {
                    elements: [prefixed].into(),
                    suffix: [].into(),
                    trailing_comma: false,
                }
            }
            Token::LParen => {
                self.push_bracket()?;
                let args = self.parse_function_args()?;
                self.pop_bracket()?;
                args
            }
            _ => panic!("Should only call `parse_expr_function` on a lambda."),
        };

        self.skip_non_code()?;
        self.parse_token(Token::FatArrow, "Expected '=>' here.")?;
        self.skip_non_code()?;
        let (body_span, body) = self.parse_expr()?;

        let result = Expr::Function {
            args,
            body_span,
            body: Box::new(body),
        };
        Ok((self.span_from(begin), result))
    }

    fn parse_expr_op(&mut self) -> Result<(Span, Expr)> {
        // First we check all the rules for prefix unary operators.
        self.check_bad_unop()?;

        if to_unop(self.peek()).is_some() {
            return self.parse_expr_unop();
        }

        // Instead of an operator chain, it could still be an import or lambda,
        // and those cannot be followed by operators, they return here.
        if self.look_ahead_is_function() {
            return self.parse_expr_function();
        }
        if self.peek() == Token::KwImport {
            return self.parse_expr_import();
        }

        let (mut lhs_span, mut result) = self.parse_expr_not_op()?;

        // We might have binary operators following. If we find one, then
        // all the other ones must be of the same type, to avoid unclear
        // situations like whether "a and b or c" means "(a and b) or c"
        // or "a and (b or c)".
        let mut allowed_op = None;
        let mut allowed_span = None;
        loop {
            self.skip_non_code()?;
            match to_binop(self.peek()) {
                Some(op) if allowed_op.is_none() || allowed_op == Some(op) => {
                    let span = self.consume();
                    self.skip_non_code()?;
                    let (rhs_span, rhs) = self.parse_expr_not_op()?;
                    allowed_span = Some(span);
                    allowed_op = Some(op);
                    result = Expr::BinOp {
                        op,
                        op_span: span,
                        lhs_span,
                        lhs: Box::new(result),
                        rhs_span,
                        rhs: Box::new(rhs),
                    };
                    lhs_span = lhs_span.union(rhs_span);
                }
                Some(_op) => {
                    return self.error(
                        "Parentheses are needed to clarify the precedence of this operator.",
                    ).with_note(
                        allowed_span.expect("If we are here, allowed_span must be set."),
                        "Without parentheses, it is not clear whether this operator should take precedence.",
                    ).err();
                }
                _ => return Ok((lhs_span, result)),
            }
        }
    }

    fn parse_expr_unop(&mut self) -> Result<(Span, Expr)> {
        let op = to_unop(self.peek()).expect("Should only call this with unop under cursor.");
        let span = self.consume();
        self.skip_non_code()?;
        self.check_bad_unop()?;

        // Nested unary expressions are okay.
        let (body_span, body) = if to_unop(self.peek()).is_some() {
            self.parse_expr_unop()?
        } else {
            self.parse_expr_not_op()?
        };

        let result = Expr::UnOp {
            op_span: span,
            op,
            body_span,
            body: Box::new(body),
        };

        // Check if the expression is followed by a binary operator. This is
        // not allowed in the grammar on purpose to force parens to clarify
        // precedence, but if we don't check for it, then the resulting
        // parse error is confusing, about unexpected content after the end
        // of the expression/document.
        self.skip_non_code()?;
        if to_binop(self.peek()).is_some() {
            return self
                .error("Parentheses are needed to clarify the precedence of this operator.")
                .with_note(
                    span,
                    "Without parentheses, it is not clear whether this operator \
                    applies only to the left-hand side, or the full expression.",
                )
                .err();
        }

        let result_span = span.until(body_span);
        Ok((result_span, result))
    }

    fn parse_expr_not_op(&mut self) -> Result<(Span, Expr)> {
        // TODO: check for operators before, and report a pretty error
        // to clarify that parens must be used to disambiguate.

        let begin = self.peek_span();
        let base_expr = self.parse_expr_term()?;
        let mut inner_span;

        let mut chain = Vec::new();

        loop {
            inner_span = self.span_from(begin);
            self.skip_non_code()?;
            match self.peek() {
                Token::LParen => {
                    let open = self.push_bracket()?;
                    let args = self.parse_call_args()?;
                    let close = self.pop_bracket()?;
                    let chain_expr = Chain::Call { open, close, args };
                    chain.push((inner_span, chain_expr));
                }
                Token::LBracket => {
                    let open = self.push_bracket()?;
                    let (index_span, index) = self.parse_expr()?;
                    let close = self.pop_bracket()?;
                    let chain_expr = Chain::Index {
                        open,
                        close,
                        index_span,
                        index: Box::new(index),
                    };
                    chain.push((inner_span, chain_expr));
                }
                Token::Dot => {
                    self.consume();
                    self.skip_non_code()?;
                    let field = self.parse_token(Token::Ident, "Expected an identifier here.")?;
                    let chain_expr = Chain::Field { field };
                    chain.push((inner_span, chain_expr));
                }
                _ => {
                    // If it's not any of those cases, then the chain ends here.
                    // If we have no chained expressions then keep the CST small,
                    // if we have then we wrap it into a Chain node.
                    if chain.is_empty() {
                        return Ok((inner_span, base_expr));
                    } else {
                        let expr = Expr::Chain {
                            base_expr: Box::new(base_expr),
                            chain,
                        };
                        return Ok((inner_span, expr));
                    }
                }
            }
        }
    }

    fn parse_expr_term(&mut self) -> Result<Expr> {
        match self.peek() {
            Token::LBrace => {
                let open = self.push_bracket()?;
                let elements = self.parse_seqs()?;
                let close = self.pop_bracket()?;
                let result = Expr::BraceLit {
                    open,
                    close,
                    elements,
                };
                Ok(result)
            }
            Token::LBracket => {
                let open = self.push_bracket()?;
                let elements = self.parse_seqs()?;
                let close = self.pop_bracket()?;
                let result = Expr::BracketLit {
                    open,
                    close,
                    elements,
                };
                Ok(result)
            }
            Token::LParen => {
                let open = self.push_bracket()?;
                let (body_span, body) = self.parse_expr()?;
                let close = self.pop_bracket()?;
                let result = Expr::Parens {
                    open,
                    close,
                    body_span,
                    body: Box::new(body),
                };
                Ok(result)
            }
            Token::QuoteOpen(prefix, style) => self.parse_string(prefix, style),
            Token::KwNull => Ok(Expr::NullLit(self.consume())),
            Token::KwTrue => Ok(Expr::BoolLit(self.consume(), true)),
            Token::KwFalse => Ok(Expr::BoolLit(self.consume(), false)),
            Token::NumHexadecimal => Ok(Expr::NumHexadecimal(self.consume())),
            Token::NumBinary => Ok(Expr::NumBinary(self.consume())),
            Token::NumDecimal => Ok(Expr::NumDecimal(self.consume())),
            Token::Ident => Ok(Expr::Var(self.consume())),

            // Some tokens are valid starts of an expression, but just not at
            // the term level. For those, we can recommend the user to wrap
            // everything in parens, because then it would be allowed.
            Token::KwLet | Token::KwAssert | Token::KwTrace | Token::KwIf => self
                .error("Expected a term here.")
                .with_help("If this should be an expression, try wrapping it in parentheses.")
                .err(),

            _ => self.error("Expected a term here.").err(),
        }
    }

    /// Consume a string inner span, ensuring that multiline strings start with a newline.
    ///
    /// If we allowed content between the opening quote and the first line break,
    /// the following string would be ambiguous:
    /// ```text
    /// let ambiguous = """  foo
    ///     bar""";
    /// let candidate1 = "  foo\n    bar";
    /// let candidate2 = "foo\n  bar";
    /// let candidate3 = "  foo\nbar";
    /// ```
    /// Which of the candidates do you expect the ambiguous string to be equal
    /// to? To avoid such confusion, we do not allow this.
    ///
    /// There is a form we *could* allow: strings with no line breaks at all.
    /// ```text
    /// """"It's too bad she won't live!", said Gaff."""
    /// ```
    /// would be unambiguous, and it may be nice to not have to escape the `"`.
    /// We might support this at a later time, but it makes handling of the
    /// string literals messy, so for now we enforce the line break.
    ///
    /// In addition, this method ensures that if two [`StringPart::String`] are
    /// consecutive, the second one starts with a line break. The lexer may break
    /// up the string into multiple tokens, but here we merge them again. It
    /// simplifies the formatter when it can assume that consecutive string
    /// parts are really separate lines.
    fn parse_string_inner(&mut self, style: QuoteStyle, into: &mut Vec<StringPart>) -> Result<()> {
        let inner = self.consume();
        let inner_str = inner.resolve(self.input);

        let is_new_line = !inner_str.is_empty() && inner_str.as_bytes()[0] == b'\n';

        if style == QuoteStyle::Triple && into.is_empty() && !is_new_line {
            return inner
                .error("Expected a line break after the \"\"\". Move this to the next line.")
                .err();
        }

        match into.last_mut() {
            Some(StringPart::String(span)) if !is_new_line => *span = span.union(inner),
            _ => into.push(StringPart::String(inner)),
        }

        Ok(())
    }

    fn parse_string(&mut self, prefix: StringPrefix, style: QuoteStyle) -> Result<Expr> {
        let open = self.consume();
        let mut parts = Vec::new();
        let mut has_hole = false;

        loop {
            match self.peek() {
                Token::StringInner => {
                    self.parse_string_inner(style, &mut parts)?;
                }
                Token::Escape(esc) => {
                    parts.push(StringPart::Escape(self.consume(), esc));
                }
                Token::HoleOpen => {
                    // Consume the opening `{`.
                    let hole_open = self.consume();
                    let (span, expr) = self.parse_expr()?;
                    parts.push(StringPart::Hole(span, expr));
                    self.parse_token_with_note(
                        Token::HoleClose,
                        "Expected '}' here to close format string hole.",
                        hole_open,
                        "Unmatched '{' opened here.",
                    )?;
                    has_hole = true;
                }
                Token::QuoteClose => {
                    let close = self.consume();
                    if prefix == StringPrefix::Format && !has_hole {
                        let span = open.union(close);
                        return span
                            .error("This format string has no holes, it can be a regular string.")
                            .err();
                    }
                    let result = Expr::StringLit {
                        prefix,
                        style,
                        open,
                        close,
                        parts,
                    };
                    return Ok(result);
                }
                invalid => panic!("The lexer should not have produced {invalid:?} in a string."),
            }
        }
    }

    /// Parse arguments in a lambda function definition.
    fn parse_function_args(&mut self) -> Result<List<Prefixed<Span>>> {
        let mut result = Vec::new();
        let mut trailing_comma = false;

        loop {
            let prefix = self.parse_non_code();
            if self.peek() == Token::RParen {
                let final_result = List {
                    elements: result.into_boxed_slice(),
                    suffix: prefix,
                    trailing_comma,
                };
                return Ok(final_result);
            }

            let ident = self.parse_ident()?;
            let prefixed = Prefixed {
                prefix,
                inner: ident,
            };
            result.push(prefixed);
            trailing_comma = false;

            self.skip_non_code()?;
            match self.peek() {
                Token::RParen => continue,
                Token::Comma => {
                    self.consume();
                    trailing_comma = true;
                    continue;
                }
                _ => {
                    // If we don't find a separator, nor the end of the args,
                    // that's an error. We can report an unmatched bracket
                    // as the problem, because it is.
                    self.pop_bracket()?;
                    unreachable!("pop_bracket should have failed.");
                }
            }
        }
    }

    /// Parse arguments in a function call.
    fn parse_call_args(&mut self) -> Result<List<(Span, Expr)>> {
        let mut result = Vec::new();
        let mut trailing_comma = false;

        loop {
            if self.peek_past_non_code() == Token::RParen {
                let suffix = self.parse_non_code();
                let final_result = List {
                    elements: result.into_boxed_slice(),
                    suffix,
                    trailing_comma,
                };
                return Ok(final_result);
            }

            result.push(self.parse_expr()?);
            trailing_comma = false;

            self.skip_non_code()?;
            match self.peek() {
                Token::RParen => continue,
                Token::Comma => {
                    self.consume();
                    trailing_comma = true;
                    continue;
                }
                _ => {
                    // If we don't find a separator, nor the end of the args,
                    // that's an error. We can report an unmatched bracket
                    // as the problem, because it is.
                    self.pop_bracket()?;
                    unreachable!("pop_bracket should have failed.");
                }
            }
        }
    }

    /// Parse sequence elements.
    ///
    /// This corresponds to `seqs` in the grammar, but it is slightly different
    /// from the rule there to be able to incorporate noncode.
    fn parse_seqs(&mut self) -> Result<List<Prefixed<Seq>>> {
        let mut result = Vec::new();
        let mut trailing_comma = false;

        loop {
            let prefix = self.parse_non_code();
            if matches!(self.peek(), Token::RBrace | Token::RBracket) {
                let final_result = List {
                    elements: result.into_boxed_slice(),
                    suffix: prefix,
                    trailing_comma,
                };
                return Ok(final_result);
            }

            let (_span, seq) = self.parse_seq()?;
            let prefixed = Prefixed { prefix, inner: seq };
            result.push(prefixed);
            trailing_comma = false;

            self.skip_non_code()?;
            match self.peek() {
                Token::RBrace | Token::RBracket => continue,
                Token::Comma => {
                    self.consume();
                    trailing_comma = true;
                    continue;
                }
                // All of the next tokens are unexpected, but we add special
                // errors for them to help the user along.
                Token::Semicolon => {
                    return self.error("Expected ',' instead of ';' here.").err();
                }
                Token::KwElse => {
                    return self
                        .pop_bracket()
                        .expect_err("We are in a seq.")
                        .with_help(concat! {
                            "Inside a comprehension, '"
                            Doc::highlight("if")
                            "' controls the loop, there is no '" Doc::highlight("else") "' part."
                            Doc::Sep
                            "To use an if-else expression inside a comprehension, "
                            "enclose the expression in parentheses."
                        })
                        .err();
                }
                // If we don't find a separator, nor the end of the collection
                // literal, that's an error. We can report an unmatched bracket
                // as the problem, because it is. The pop will fail. If we see
                // an '=' maybe the user tried to make a key-value mapping and
                // we can report a better error.
                Token::Eq1 => {
                    return self
                        .pop_bracket()
                        .expect_err("We are in a seq.")
                        .with_help(concat! {
                            "To use '"
                            Doc::highlight("key = value")
                            "' record notation, the left-hand side must be an identifier."
                            Doc::Sep
                            "When that is not possible, use json-style '"
                            Doc::highlight("\"key\": value")
                            "' instead."
                        })
                        .err();
                }
                _ => {
                    self.pop_bracket()?;
                    unreachable!("pop_bracket should have failed.");
                }
            }
        }
    }

    pub fn parse_prefixed_seq(&mut self) -> Result<(Span, Prefixed<Seq>)> {
        let ps = self.parse_prefixed(|s| s.parse_seq())?;
        Ok((
            ps.inner.0,
            Prefixed {
                prefix: ps.prefix,
                inner: ps.inner.1,
            },
        ))
    }

    fn parse_seq(&mut self) -> Result<(Span, Seq)> {
        let begin = self.peek_span();

        // Here we have a lookahead of two tokens ... not great if we want to
        // keep the grammar simple, but for making the syntax prettier it is
        // worth some complications to allow { a = b; p = q } notation.
        let next1 = self.peek();
        let next2 = self.peek_n(1);

        let result = match (next1, next2) {
            // TODO: Would need to skip noncode here ... maybe it's better to
            // parse an expression, and re-interpret it later if it reads like a
            // variable access?
            (Token::Ident, Token::Eq1) => self.parse_seq_assoc_ident()?,
            (Token::KwAssert | Token::KwLet | Token::KwTrace, _) => {
                let stmt = self.parse_stmt()?;
                let (body_span, body) = self.parse_prefixed_seq()?;
                Seq::Stmt {
                    stmt,
                    body_span,
                    body: Box::new(body),
                }
            }
            (Token::KwFor, _) => self.parse_seq_for()?,
            (Token::KwIf, _) => self.parse_seq_if()?,
            _ => {
                let (expr_span, expr) = self.parse_expr_op()?;
                self.skip_non_code()?;
                match self.peek() {
                    Token::Colon => {
                        let op = self.consume();
                        self.skip_non_code()?;
                        let (value_span, value) = self.parse_expr()?;
                        Seq::AssocExpr {
                            op_span: op,
                            field_span: expr_span,
                            field: Box::new(expr),
                            value_span,
                            value: Box::new(value),
                        }
                    }
                    _ => Seq::Elem {
                        span: expr_span,
                        value: Box::new(expr),
                    },
                }
            }
        };

        Ok((self.span_from(begin), result))
    }

    fn parse_seq_assoc_ident(&mut self) -> Result<Seq> {
        let ident = self.consume();

        self.skip_non_code()?;
        let op = self.parse_token(Token::Eq1, "Expected '=' here.")?;

        self.skip_non_code()?;
        let (value_span, value) = self.parse_expr()?;

        let result = Seq::AssocIdent {
            op_span: op,
            field: ident,
            value_span,
            value: Box::new(value),
        };

        Ok(result)
    }

    fn parse_seq_for(&mut self) -> Result<Seq> {
        let _for = self.consume();

        // Parse the loop variables. Here a trailing comma is not allowed.
        let mut idents = Vec::new();
        loop {
            self.skip_non_code()?;
            let ident = self.parse_token(Token::Ident, "Expected identifier here.")?;
            idents.push(ident);

            self.skip_non_code()?;
            match self.peek() {
                Token::Comma => {
                    self.consume();
                    continue;
                }
                _ => break,
            }
        }

        self.skip_non_code()?;
        self.parse_token(Token::KwIn, "Expected 'in' here.")?;

        self.skip_non_code()?;
        let (collection_span, collection) = self.parse_expr_op()?;

        self.skip_non_code()?;
        self.parse_token(Token::Colon, "Expected ':' after the collection.")?;

        let (_body_span, body) = self.parse_prefixed_seq()?;

        let result = Seq::For {
            idents: idents.into_boxed_slice(),
            collection_span,
            collection: Box::new(collection),
            body: Box::new(body),
        };

        Ok(result)
    }

    fn parse_seq_if(&mut self) -> Result<Seq> {
        let _if = self.consume();

        // See also the note in `parse_expr_if` about why we don't allow
        // arbitrary expressions here.
        self.skip_non_code()?;
        let (condition_span, condition) = self.parse_expr_op()?;

        self.skip_non_code()?;
        self.parse_token(Token::Colon, "Expected ':' after the condition.")?;

        let (_body_span, body) = self.parse_prefixed_seq()?;

        let result = Seq::If {
            condition_span,
            condition: Box::new(condition),
            body: Box::new(body),
        };

        Ok(result)
    }

    /// Parse a type expression.
    fn parse_type_expr(&mut self) -> Result<Type> {
        // If it starts with a `(`, then that is the start of an argument list,
        // and we are parsing a function type.
        if self.peek() == Token::LParen {
            return self.parse_type_function();
        }

        // Otherwise, we definitely start with a term.
        let begin = self.peek_span();
        let term = self.parse_type_term()?;

        // Optionally, the term can be followed by `[` to instantiate a generic
        // type.
        self.skip_non_code()?;
        if let (Type::Term(name), Token::LBracket) = (&term, self.peek()) {
            self.push_bracket()?;
            let args = self.parse_types()?;
            self.pop_bracket()?;

            let type_apply = Type::Apply {
                span: self.span_from(begin),
                name: *name,
                args,
            };
            return Ok(type_apply);
        }

        // When it's not followed by a `[`, then this is a regular term.
        Ok(term)
    }

    /// Parse a function type that starts with a `(`.
    fn parse_type_function(&mut self) -> Result<Type> {
        let begin = self.peek_span();
        self.push_bracket()?;
        let args = self.parse_types()?;
        self.pop_bracket()?;
        self.skip_non_code()?;
        self.parse_token(Token::ThinArrow, "Expected '->' here in function type.")?;
        let result_type = self.parse_type_expr()?;
        let fn_type = Type::Function {
            span: self.span_from(begin),
            args,
            result: Box::new(result_type),
        };
        Ok(fn_type)
    }

    /// Parse a comma-delimited list of types with optional trailing comma.
    fn parse_types(&mut self) -> Result<List<Prefixed<Type>>> {
        let mut result = Vec::new();
        let mut trailing_comma = false;

        loop {
            let prefix = self.parse_non_code();
            if matches!(self.peek(), Token::RParen | Token::RBracket) {
                let final_result = List {
                    elements: result.into_boxed_slice(),
                    suffix: prefix,
                    trailing_comma,
                };
                return Ok(final_result);
            }

            let type_ = self.parse_type_expr()?;
            let prefixed = Prefixed {
                prefix,
                inner: type_,
            };
            result.push(prefixed);
            trailing_comma = false;

            self.skip_non_code()?;
            match self.peek() {
                Token::RParen | Token::RBracket => continue,
                Token::Comma => {
                    self.consume();
                    trailing_comma = true;
                    continue;
                }
                _ => {
                    // If we don't find a separator, nor the end of the list,
                    // that's an error. We can report an unmatched bracket
                    // as the problem, because it is.
                    self.pop_bracket()?;
                    unreachable!("pop_bracket should have failed.");
                }
            }
        }
    }

    fn parse_type_term(&mut self) -> Result<Type> {
        match self.peek() {
            Token::Ident => {
                let span = self.consume();
                Ok(Type::Term(span))
            }
            // TODO: Consider string literals as type terms too.
            _ => self.error("Expected a type here.").err(),
        }
    }

    /// Confirm that there is no trailing content left to parse.
    fn parse_eof(&mut self) -> Result<()> {
        self.skip_non_code()?;
        if self.peek() != Token::Eof {
            return self
                .error("Unexpected content after the main expression.")
                .err();
        }
        Ok(())
    }
}