mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 14:24:45 +00:00
1523 lines
53 KiB
Rust
1523 lines
53 KiB
Rust
use crate::ast::Attempting;
|
|
use bumpalo::collections::vec::Vec;
|
|
use bumpalo::Bump;
|
|
use encode_unicode::CharExt;
|
|
use roc_region::all::{Located, Region};
|
|
use std::fmt;
|
|
use std::str::from_utf8;
|
|
use std::{char, u16};
|
|
use Progress::*;
|
|
|
|
/// A position in a source file.
|
|
#[derive(Clone, PartialEq, Eq)]
|
|
pub struct State<'a> {
|
|
/// The raw input bytes from the file.
|
|
pub bytes: &'a [u8],
|
|
|
|
/// Current line of the input
|
|
pub line: u32,
|
|
/// Current column of the input
|
|
pub column: u16,
|
|
|
|
/// Current indentation level, in columns
|
|
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
|
pub indent_col: u16,
|
|
|
|
// true at the beginning of each line, then false after encountering
|
|
// the first nonspace char on that line.
|
|
pub is_indenting: bool,
|
|
|
|
pub context_stack: &'a ContextStack<'a>,
|
|
|
|
/// The original length of the string, before any bytes were consumed.
|
|
/// This is used internally by the State::bytes_consumed() function.
|
|
///
|
|
/// TODO make this private, in a way that doesn't break macros!
|
|
pub original_len: usize,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Either<First, Second> {
|
|
First(First),
|
|
Second(Second),
|
|
}
|
|
|
|
impl<'a> State<'a> {
|
|
pub fn new_in(arena: &'a Bump, bytes: &'a [u8], _attempting: Attempting) -> State<'a> {
|
|
State {
|
|
bytes,
|
|
line: 0,
|
|
column: 0,
|
|
indent_col: 0,
|
|
is_indenting: true,
|
|
context_stack: arena.alloc(ContextStack::Nil),
|
|
original_len: bytes.len(),
|
|
}
|
|
}
|
|
|
|
pub fn check_indent(self, arena: &'a Bump, min_indent: u16) -> Result<Self, (Bag<'a>, Self)> {
|
|
if self.indent_col < min_indent {
|
|
Err((
|
|
Bag::from_state(arena, &self, FailReason::OutdentedTooFar),
|
|
self,
|
|
))
|
|
} else {
|
|
Ok(self)
|
|
}
|
|
}
|
|
|
|
/// Returns the total number of bytes consumed since the parser began parsing.
|
|
///
|
|
/// So if the parser has consumed 8 bytes, this function will return 8.
|
|
pub fn bytes_consumed(&self) -> usize {
|
|
self.original_len - self.bytes.len()
|
|
}
|
|
|
|
/// Returns whether the parser has reached the end of the input
|
|
pub fn has_reached_end(&self) -> bool {
|
|
self.bytes.is_empty()
|
|
}
|
|
|
|
/// Increments the line, then resets column, indent_col, and is_indenting.
|
|
/// Advances the input by 1, to consume the newline character.
|
|
pub fn newline(&self, arena: &'a Bump) -> Result<Self, (Progress, Bag<'a>, Self)> {
|
|
match self.line.checked_add(1) {
|
|
Some(line) => Ok(State {
|
|
bytes: &self.bytes[1..],
|
|
line,
|
|
column: 0,
|
|
indent_col: 0,
|
|
is_indenting: true,
|
|
original_len: self.original_len,
|
|
context_stack: arena.alloc(self.context_stack.clone()),
|
|
}),
|
|
None => Err((
|
|
Progress::NoProgress,
|
|
Bag::from_state(arena, &self, FailReason::TooManyLines),
|
|
self.clone(),
|
|
)),
|
|
}
|
|
}
|
|
|
|
/// Use advance_spaces to advance with indenting.
|
|
/// This assumes we are *not* advancing with spaces, or at least that
|
|
/// any spaces on the line were preceded by non-spaces - which would mean
|
|
/// they weren't eligible to indent anyway.
|
|
pub fn advance_without_indenting(
|
|
self,
|
|
arena: &'a Bump,
|
|
quantity: usize,
|
|
) -> Result<Self, (Progress, Bag<'a>, Self)> {
|
|
match (self.column as usize).checked_add(quantity) {
|
|
Some(column_usize) if column_usize <= u16::MAX as usize => {
|
|
Ok(State {
|
|
bytes: &self.bytes[quantity..],
|
|
column: column_usize as u16,
|
|
// Once we hit a nonspace character, we are no longer indenting.
|
|
is_indenting: false,
|
|
..self
|
|
})
|
|
}
|
|
_ => Err(line_too_long(arena, self.clone())),
|
|
}
|
|
}
|
|
/// Advance the parser while also indenting as appropriate.
|
|
/// This assumes we are only advancing with spaces, since they can indent.
|
|
pub fn advance_spaces(
|
|
&self,
|
|
arena: &'a Bump,
|
|
spaces: usize,
|
|
) -> Result<Self, (Progress, Bag<'a>, Self)> {
|
|
match (self.column as usize).checked_add(spaces) {
|
|
Some(column_usize) if column_usize <= u16::MAX as usize => {
|
|
// Spaces don't affect is_indenting; if we were previously indneting,
|
|
// we still are, and if we already finished indenting, we're still done.
|
|
let is_indenting = self.is_indenting;
|
|
|
|
// If we're indenting, spaces indent us further.
|
|
let indent_col = if is_indenting {
|
|
// This doesn't need to be checked_add because it's always true that
|
|
// indent_col <= col, so if this could possibly overflow, we would
|
|
// already have errored out from the column calculation.
|
|
//
|
|
// Leaving debug assertions in case this invariant someday disappers.
|
|
debug_assert!(u16::MAX - self.indent_col >= spaces as u16);
|
|
debug_assert!(spaces <= u16::MAX as usize);
|
|
|
|
self.indent_col + spaces as u16
|
|
} else {
|
|
self.indent_col
|
|
};
|
|
|
|
Ok(State {
|
|
bytes: &self.bytes[spaces..],
|
|
line: self.line,
|
|
column: column_usize as u16,
|
|
indent_col,
|
|
is_indenting,
|
|
context_stack: arena.alloc(self.context_stack.clone()),
|
|
original_len: self.original_len,
|
|
})
|
|
}
|
|
_ => Err(line_too_long(arena, self.clone())),
|
|
}
|
|
}
|
|
|
|
/// Returns a Region corresponding to the current state, but
|
|
/// with the end_col advanced by the given amount. This is
|
|
/// useful when parsing something "manually" (using input.chars())
|
|
/// and thus wanting a Region while not having access to loc().
|
|
pub fn len_region(&self, length: u16) -> Region {
|
|
Region {
|
|
start_col: self.column,
|
|
start_line: self.line,
|
|
end_col: self
|
|
.column
|
|
.checked_add(length)
|
|
.unwrap_or_else(|| panic!("len_region overflowed")),
|
|
end_line: self.line,
|
|
}
|
|
}
|
|
|
|
/// Return a failing ParseResult for the given FailReason
|
|
pub fn fail<T>(
|
|
self,
|
|
arena: &'a Bump,
|
|
progress: Progress,
|
|
reason: FailReason,
|
|
) -> Result<(Progress, T, Self), (Progress, Bag<'a>, Self)> {
|
|
Err((progress, Bag::from_state(arena, &self, reason), self))
|
|
}
|
|
}
|
|
|
|
impl<'a> fmt::Debug for State<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(f, "State {{")?;
|
|
|
|
match from_utf8(self.bytes) {
|
|
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
|
|
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
|
|
}
|
|
|
|
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
|
|
write!(f, "\n\tindent_col: {}", self.indent_col)?;
|
|
write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?;
|
|
write!(f, "\n\toriginal_len: {}", self.original_len)?;
|
|
write!(f, "\n\tcontext stack: {:?}", self.context_stack)?;
|
|
write!(f, "\n}}")
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn state_size() {
|
|
// State should always be under 8 machine words, so it fits in a typical
|
|
// cache line.
|
|
let state_size = std::mem::size_of::<State>();
|
|
let maximum = std::mem::size_of::<usize>() * 8;
|
|
assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum);
|
|
}
|
|
|
|
pub type ParseResult<'a, Output> =
|
|
Result<(Progress, Output, State<'a>), (Progress, Bag<'a>, State<'a>)>;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum Progress {
|
|
MadeProgress,
|
|
NoProgress,
|
|
}
|
|
|
|
impl Progress {
|
|
pub fn from_lengths(before: usize, after: usize) -> Self {
|
|
Self::from_consumed(before - after)
|
|
}
|
|
pub fn from_consumed(chars_consumed: usize) -> Self {
|
|
Self::progress_when(chars_consumed != 0)
|
|
}
|
|
|
|
pub fn progress_when(made_progress: bool) -> Self {
|
|
if made_progress {
|
|
Progress::MadeProgress
|
|
} else {
|
|
Progress::NoProgress
|
|
}
|
|
}
|
|
|
|
pub fn or(&self, other: Self) -> Self {
|
|
if (*self == MadeProgress) || (other == MadeProgress) {
|
|
MadeProgress
|
|
} else {
|
|
NoProgress
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum FailReason {
|
|
Unexpected(Region),
|
|
OutdentedTooFar,
|
|
ConditionFailed,
|
|
LineTooLong(u32 /* which line was too long */),
|
|
TooManyLines,
|
|
Eof(Region),
|
|
InvalidPattern,
|
|
BadUtf8,
|
|
ReservedKeyword(Region),
|
|
ArgumentsBeforeEquals(Region),
|
|
NotYetImplemented(String),
|
|
TODO,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum ContextStack<'a> {
|
|
Cons(ContextItem, &'a ContextStack<'a>),
|
|
Nil,
|
|
}
|
|
|
|
impl<'a> ContextStack<'a> {
|
|
fn into_vec(self) -> std::vec::Vec<ContextItem> {
|
|
let mut result = std::vec::Vec::new();
|
|
let mut next = &self;
|
|
|
|
while let ContextStack::Cons(item, rest) = next {
|
|
next = rest;
|
|
|
|
result.push(*item);
|
|
}
|
|
|
|
result.reverse();
|
|
|
|
result
|
|
}
|
|
|
|
pub fn uncons(&'a self) -> Option<(ContextItem, &'a Self)> {
|
|
match self {
|
|
ContextStack::Cons(item, rest) => Some((*item, rest)),
|
|
ContextStack::Nil => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct ContextItem {
|
|
pub line: u32,
|
|
pub column: u16,
|
|
pub context: Attempting,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct DeadEnd<'a> {
|
|
pub line: u32,
|
|
pub column: u16,
|
|
pub problem: FailReason,
|
|
pub context_stack: ContextStack<'a>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct Bag<'a>(Vec<'a, DeadEnd<'a>>);
|
|
|
|
impl<'a> Bag<'a> {
|
|
pub fn new_in(arena: &'a Bump) -> Self {
|
|
Bag(Vec::new_in(arena))
|
|
}
|
|
|
|
pub fn from_state(arena: &'a Bump, state: &State<'a>, x: FailReason) -> Self {
|
|
let mut dead_ends = Vec::with_capacity_in(1, arena);
|
|
|
|
let dead_end = DeadEnd {
|
|
line: state.line,
|
|
column: state.column,
|
|
problem: x,
|
|
context_stack: state.context_stack.clone(),
|
|
};
|
|
dead_ends.push(dead_end);
|
|
|
|
Bag(dead_ends)
|
|
}
|
|
|
|
fn pop(&mut self) -> Option<DeadEnd<'a>> {
|
|
self.0.pop()
|
|
}
|
|
|
|
pub fn into_parse_problem(
|
|
mut self,
|
|
filename: std::path::PathBuf,
|
|
bytes: &[u8],
|
|
) -> ParseProblem<'_> {
|
|
match self.pop() {
|
|
None => unreachable!("there is a parse error, but no problem"),
|
|
Some(dead_end) => {
|
|
let context_stack = dead_end.context_stack.into_vec();
|
|
|
|
ParseProblem {
|
|
line: dead_end.line,
|
|
column: dead_end.column,
|
|
problem: dead_end.problem,
|
|
context_stack,
|
|
filename,
|
|
bytes,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// use std vec to escape the arena's lifetime bound
|
|
/// since this is only used when there is in fact an error
|
|
/// I think this is fine
|
|
#[derive(Debug)]
|
|
pub struct ParseProblem<'a> {
|
|
pub line: u32,
|
|
pub column: u16,
|
|
pub problem: FailReason,
|
|
pub context_stack: std::vec::Vec<ContextItem>,
|
|
pub filename: std::path::PathBuf,
|
|
pub bytes: &'a [u8],
|
|
}
|
|
|
|
pub fn fail<'a, T>() -> impl Parser<'a, T> {
|
|
move |arena, state: State<'a>| {
|
|
Err((
|
|
NoProgress,
|
|
Bag::from_state(arena, &state, FailReason::ConditionFailed),
|
|
state,
|
|
))
|
|
}
|
|
}
|
|
|
|
pub trait Parser<'a, Output> {
|
|
fn parse(&self, _: &'a Bump, _: State<'a>) -> ParseResult<'a, Output>;
|
|
}
|
|
|
|
impl<'a, F, Output> Parser<'a, Output> for F
|
|
where
|
|
F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>,
|
|
{
|
|
fn parse(&self, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Output> {
|
|
self(arena, state)
|
|
}
|
|
}
|
|
|
|
pub fn allocated<'a, P, Val>(parser: P) -> impl Parser<'a, &'a Val>
|
|
where
|
|
P: Parser<'a, Val>,
|
|
Val: 'a,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let (progress, answer, state) = parser.parse(arena, state)?;
|
|
|
|
Ok((progress, &*arena.alloc(answer), state))
|
|
}
|
|
}
|
|
|
|
pub fn not_followed_by<'a, P, ByParser, By, Val>(parser: P, by: ByParser) -> impl Parser<'a, Val>
|
|
where
|
|
ByParser: Parser<'a, By>,
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let original_state = state.clone();
|
|
|
|
parser
|
|
.parse(arena, state)
|
|
.and_then(|(progress, answer, state)| {
|
|
let after_parse = state.clone();
|
|
|
|
match by.parse(arena, state) {
|
|
Ok((_, _, state)) => Err((
|
|
NoProgress,
|
|
Bag::from_state(arena, &state, FailReason::ConditionFailed),
|
|
original_state,
|
|
)),
|
|
Err(_) => Ok((progress, answer, after_parse)),
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn not<'a, P, Val>(parser: P) -> impl Parser<'a, ()>
|
|
where
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let original_state = state.clone();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((_, _, _)) => Err((
|
|
NoProgress,
|
|
Bag::from_state(arena, &original_state, FailReason::ConditionFailed),
|
|
original_state,
|
|
)),
|
|
Err((_, _, _)) => Ok((NoProgress, (), original_state)),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn lookahead<'a, Peek, P, PeekVal, Val>(peek: Peek, parser: P) -> impl Parser<'a, Val>
|
|
where
|
|
Peek: Parser<'a, PeekVal>,
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let original_state = state.clone();
|
|
|
|
peek.parse(arena, state)
|
|
.and_then(|_| parser.parse(arena, original_state))
|
|
}
|
|
}
|
|
|
|
pub fn and_then<'a, P1, P2, F, Before, After>(parser: P1, transform: F) -> impl Parser<'a, After>
|
|
where
|
|
P1: Parser<'a, Before>,
|
|
P2: Parser<'a, After>,
|
|
F: Fn(Progress, Before) -> P2,
|
|
{
|
|
move |arena, state| {
|
|
parser
|
|
.parse(arena, state)
|
|
.and_then(|(progress, output, next_state)| {
|
|
transform(progress, output).parse(arena, next_state)
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn and_then_with_indent_level<'a, P1, P2, F, Before, After>(
|
|
parser: P1,
|
|
transform: F,
|
|
) -> impl Parser<'a, After>
|
|
where
|
|
P1: Parser<'a, Before>,
|
|
P2: Parser<'a, After>,
|
|
F: Fn(Progress, Before, u16) -> P2,
|
|
{
|
|
move |arena, state| {
|
|
parser
|
|
.parse(arena, state)
|
|
.and_then(|(progress, output, next_state)| {
|
|
transform(progress, output, next_state.indent_col).parse(arena, next_state)
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn then<'a, P1, F, Before, After>(parser: P1, transform: F) -> impl Parser<'a, After>
|
|
where
|
|
P1: Parser<'a, Before>,
|
|
After: 'a,
|
|
F: Fn(&'a Bump, State<'a>, Progress, Before) -> ParseResult<'a, After>,
|
|
{
|
|
move |arena, state| {
|
|
parser
|
|
.parse(arena, state)
|
|
.and_then(|(progress, output, next_state)| {
|
|
transform(arena, next_state, progress, output)
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn unexpected_eof<'a>(
|
|
arena: &'a Bump,
|
|
state: State<'a>,
|
|
chars_consumed: usize,
|
|
) -> (Progress, Bag<'a>, State<'a>) {
|
|
checked_unexpected(arena, state, chars_consumed, |region| {
|
|
FailReason::Eof(region)
|
|
})
|
|
}
|
|
|
|
pub fn unexpected<'a>(
|
|
arena: &'a Bump,
|
|
chars_consumed: usize,
|
|
_attempting: Attempting,
|
|
state: State<'a>,
|
|
) -> (Progress, Bag<'a>, State<'a>) {
|
|
// NOTE state is the last argument because chars_consumed often depends on the state's fields
|
|
// having state be the final argument prevents borrowing issues
|
|
checked_unexpected(arena, state, chars_consumed, |region| {
|
|
FailReason::Unexpected(region)
|
|
})
|
|
}
|
|
|
|
/// Check for line overflow, then compute a new Region based on chars_consumed
|
|
/// and provide it as a way to construct a Problem.
|
|
/// If maximum line length was exceeded, return a Problem indicating as much.
|
|
#[inline(always)]
|
|
fn checked_unexpected<'a, F>(
|
|
arena: &'a Bump,
|
|
state: State<'a>,
|
|
chars_consumed: usize,
|
|
problem_from_region: F,
|
|
) -> (Progress, Bag<'a>, State<'a>)
|
|
where
|
|
F: FnOnce(Region) -> FailReason,
|
|
{
|
|
match (state.column as usize).checked_add(chars_consumed) {
|
|
// Crucially, this is < u16::MAX and not <= u16::MAX. This means if
|
|
// column ever gets set to u16::MAX, we will automatically bail out
|
|
// with LineTooLong - which is exactly what we want! Once a line has
|
|
// been discovered to be too long, we don't want to parse anything else
|
|
// until that's fixed.
|
|
Some(end_col) if end_col < u16::MAX as usize => {
|
|
let region = Region {
|
|
start_col: state.column,
|
|
end_col: end_col as u16,
|
|
start_line: state.line,
|
|
end_line: state.line,
|
|
};
|
|
|
|
let problem = problem_from_region(region);
|
|
|
|
(
|
|
Progress::NoProgress,
|
|
Bag::from_state(arena, &state, problem),
|
|
state,
|
|
)
|
|
}
|
|
_ => {
|
|
let (_progress, fail, state) = line_too_long(arena, state);
|
|
(Progress::NoProgress, fail, state)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn line_too_long<'a>(arena: &'a Bump, state: State<'a>) -> (Progress, Bag<'a>, State<'a>) {
|
|
let problem = FailReason::LineTooLong(state.line);
|
|
// Set column to MAX and advance the parser to end of input.
|
|
// This way, all future parsers will fail on EOF, and then
|
|
// unexpected_eof will take them back here - thus propagating
|
|
// the initial LineTooLong error all the way to the end, even if
|
|
// (for example) the LineTooLong initially occurs in the middle of
|
|
// a one_of chain, which would otherwise prevent it from propagating.
|
|
let column = u16::MAX;
|
|
let bytes = state.bytes.get(0..state.bytes.len()).unwrap();
|
|
let state = State {
|
|
bytes,
|
|
line: state.line,
|
|
column,
|
|
..state
|
|
};
|
|
|
|
// TODO do we make progress in this case?
|
|
// isn't this error fatal?
|
|
(
|
|
Progress::NoProgress,
|
|
Bag::from_state(arena, &state, problem),
|
|
state,
|
|
)
|
|
}
|
|
|
|
/// A single ASCII char that isn't a newline.
|
|
/// (For newlines, use newline_char(), which handles line numbers)
|
|
pub fn ascii_char<'a>(expected: u8) -> impl Parser<'a, ()> {
|
|
// Make sure this really is not a newline!
|
|
debug_assert_ne!(expected, b'\n');
|
|
|
|
move |arena, state: State<'a>| match state.bytes.first() {
|
|
Some(&actual) if expected == actual => Ok((
|
|
Progress::MadeProgress,
|
|
(),
|
|
state.advance_without_indenting(arena, 1)?,
|
|
)),
|
|
Some(_) => Err(unexpected(arena, 0, Attempting::Keyword, state)),
|
|
_ => Err(unexpected_eof(arena, state, 0)),
|
|
}
|
|
}
|
|
|
|
/// A single '\n' character.
|
|
/// Use this instead of ascii_char('\n') because it properly handles
|
|
/// incrementing the line number.
|
|
pub fn newline_char<'a>() -> impl Parser<'a, ()> {
|
|
move |arena, state: State<'a>| match state.bytes.first() {
|
|
Some(b'\n') => Ok((Progress::MadeProgress, (), state.newline(arena)?)),
|
|
Some(_) => Err(unexpected(arena, 0, Attempting::Keyword, state)),
|
|
_ => Err(unexpected_eof(arena, state, 0)),
|
|
}
|
|
}
|
|
|
|
/// One or more ASCII hex digits. (Useful when parsing unicode escape codes,
|
|
/// which must consist entirely of ASCII hex digits.)
|
|
pub fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str> {
|
|
move |arena, state: State<'a>| {
|
|
let mut buf = bumpalo::collections::String::new_in(arena);
|
|
|
|
for &byte in state.bytes.iter() {
|
|
if (byte as char).is_ascii_hexdigit() {
|
|
buf.push(byte as char);
|
|
} else if buf.is_empty() {
|
|
// We didn't find any hex digits!
|
|
return Err(unexpected(arena, 0, Attempting::Keyword, state));
|
|
} else {
|
|
let state = state.advance_without_indenting(arena, buf.len())?;
|
|
|
|
return Ok((Progress::MadeProgress, buf.into_bump_str(), state));
|
|
}
|
|
}
|
|
|
|
Err(unexpected_eof(arena, state, 0))
|
|
}
|
|
}
|
|
|
|
/// A single UTF-8-encoded char. This will both parse *and* validate that the
|
|
/// char is valid UTF-8, but it will *not* advance the state.
|
|
pub fn peek_utf8_char(state: &State) -> Result<(char, usize), FailReason> {
|
|
if !state.bytes.is_empty() {
|
|
match char::from_utf8_slice_start(state.bytes) {
|
|
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
|
|
Err(_) => Err(FailReason::BadUtf8),
|
|
}
|
|
} else {
|
|
Err(FailReason::Eof(
|
|
Region::zero(), /* TODO get a better region */
|
|
))
|
|
}
|
|
}
|
|
|
|
/// A single UTF-8-encoded char, with an offset. This will both parse *and*
|
|
/// validate that the char is valid UTF-8, but it will *not* advance the state.
|
|
pub fn peek_utf8_char_at(state: &State, offset: usize) -> Result<(char, usize), FailReason> {
|
|
if state.bytes.len() > offset {
|
|
let bytes = &state.bytes[offset..];
|
|
|
|
match char::from_utf8_slice_start(bytes) {
|
|
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
|
|
Err(_) => Err(FailReason::BadUtf8),
|
|
}
|
|
} else {
|
|
Err(FailReason::Eof(
|
|
Region::zero(), /* TODO get a better region */
|
|
))
|
|
}
|
|
}
|
|
|
|
pub fn keyword<'a>(keyword: &'static str, min_indent: u16) -> impl Parser<'a, ()> {
|
|
move |arena, state: State<'a>| {
|
|
let initial_state = state.clone();
|
|
// first parse the keyword characters
|
|
let (_, _, after_keyword_state) = ascii_string(keyword).parse(arena, state)?;
|
|
|
|
// then we must have at least one space character
|
|
// TODO this is potentially wasteful if there are a lot of spaces
|
|
match crate::blankspace::space1(min_indent).parse(arena, after_keyword_state.clone()) {
|
|
Err((_, fail, _)) => {
|
|
// this is not a keyword, maybe it's `whence` or `iffy`
|
|
// anyway, make no progress and return the initial state
|
|
// so we can try something else
|
|
Err((NoProgress, fail, initial_state))
|
|
}
|
|
Ok((_, _, _)) => {
|
|
// give back the state after parsing the keyword, but before the whitespace
|
|
// that way we can attach the whitespace to whatever follows
|
|
Ok((MadeProgress, (), after_keyword_state))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A hardcoded string with no newlines, consisting only of ASCII characters
|
|
pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
|
|
// Verify that this really is exclusively ASCII characters.
|
|
// The `unsafe` block in this function relies upon this assumption!
|
|
//
|
|
// Also, this can't have newlines because we don't attempt to advance
|
|
// the row in the state, only the column.
|
|
debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n'));
|
|
|
|
move |arena, state: State<'a>| {
|
|
let len = keyword.len();
|
|
|
|
// TODO do this comparison in one SIMD instruction (on supported systems)
|
|
match state.bytes.get(0..len) {
|
|
Some(next_str) => {
|
|
if next_str == keyword.as_bytes() {
|
|
Ok((
|
|
Progress::MadeProgress,
|
|
(),
|
|
state.advance_without_indenting(arena, len)?,
|
|
))
|
|
} else {
|
|
let (_, fail, state) = unexpected(arena, len, Attempting::Keyword, state);
|
|
Err((NoProgress, fail, state))
|
|
}
|
|
}
|
|
_ => Err(unexpected_eof(arena, state, 0)),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse zero or more values separated by a delimiter (e.g. a comma) whose
|
|
/// values are discarded
|
|
pub fn sep_by0<'a, P, D, Val>(delimiter: D, parser: P) -> impl Parser<'a, Vec<'a, Val>>
|
|
where
|
|
D: Parser<'a, ()>,
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let start_bytes_len = state.bytes.len();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((elem_progress, first_output, next_state)) => {
|
|
// in practice, we want elements to make progress
|
|
debug_assert_eq!(elem_progress, MadeProgress);
|
|
|
|
let mut state = next_state;
|
|
let mut buf = Vec::with_capacity_in(1, arena);
|
|
|
|
buf.push(first_output);
|
|
|
|
loop {
|
|
match delimiter.parse(arena, state) {
|
|
Ok((_, (), next_state)) => {
|
|
// If the delimiter passed, check the element parser.
|
|
match parser.parse(arena, next_state) {
|
|
Ok((element_progress, next_output, next_state)) => {
|
|
// in practice, we want elements to make progress
|
|
debug_assert_eq!(element_progress, MadeProgress);
|
|
|
|
state = next_state;
|
|
buf.push(next_output);
|
|
}
|
|
Err((_, fail, state)) => {
|
|
// If the delimiter parsed, but the following
|
|
// element did not, that's a fatal error.
|
|
let progress =
|
|
Progress::from_lengths(start_bytes_len, state.bytes.len());
|
|
|
|
return Err((progress, fail, state));
|
|
}
|
|
}
|
|
}
|
|
Err((delim_progress, fail, old_state)) => match delim_progress {
|
|
MadeProgress => return Err((MadeProgress, fail, old_state)),
|
|
NoProgress => return Ok((NoProgress, buf, old_state)),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
Err((element_progress, fail, new_state)) => match element_progress {
|
|
MadeProgress => Err((MadeProgress, fail, new_state)),
|
|
NoProgress => Ok((NoProgress, Vec::new_in(arena), new_state)),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse zero or more values separated by a delimiter (e.g. a comma)
|
|
/// with an optional trailing delimiter whose values are discarded
|
|
pub fn trailing_sep_by0<'a, P, D, Val>(delimiter: D, parser: P) -> impl Parser<'a, Vec<'a, Val>>
|
|
where
|
|
D: Parser<'a, ()>,
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let start_bytes_len = state.bytes.len();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((progress, first_output, next_state)) => {
|
|
// in practice, we want elements to make progress
|
|
debug_assert_eq!(progress, MadeProgress);
|
|
let mut state = next_state;
|
|
let mut buf = Vec::with_capacity_in(1, arena);
|
|
|
|
buf.push(first_output);
|
|
|
|
loop {
|
|
match delimiter.parse(arena, state) {
|
|
Ok((_, (), next_state)) => {
|
|
// If the delimiter passed, check the element parser.
|
|
match parser.parse(arena, next_state) {
|
|
Ok((element_progress, next_output, next_state)) => {
|
|
// in practice, we want elements to make progress
|
|
debug_assert_eq!(element_progress, MadeProgress);
|
|
|
|
state = next_state;
|
|
buf.push(next_output);
|
|
}
|
|
Err((_, _fail, old_state)) => {
|
|
// If the delimiter parsed, but the following
|
|
// element did not, that means we saw a trailing comma
|
|
let progress = Progress::from_lengths(
|
|
start_bytes_len,
|
|
old_state.bytes.len(),
|
|
);
|
|
return Ok((progress, buf, old_state));
|
|
}
|
|
}
|
|
}
|
|
Err((delim_progress, fail, old_state)) => match delim_progress {
|
|
MadeProgress => return Err((MadeProgress, fail, old_state)),
|
|
NoProgress => return Ok((NoProgress, buf, old_state)),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
Err((element_progress, fail, new_state)) => match element_progress {
|
|
MadeProgress => Err((MadeProgress, fail, new_state)),
|
|
NoProgress => Ok((NoProgress, Vec::new_in(arena), new_state)),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse one or more values separated by a delimiter (e.g. a comma) whose
|
|
/// values are discarded
|
|
pub fn sep_by1<'a, P, D, Val>(delimiter: D, parser: P) -> impl Parser<'a, Vec<'a, Val>>
|
|
where
|
|
D: Parser<'a, ()>,
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena, state: State<'a>| {
|
|
let start_bytes_len = state.bytes.len();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((progress, first_output, next_state)) => {
|
|
debug_assert_eq!(progress, MadeProgress);
|
|
let mut state = next_state;
|
|
let mut buf = Vec::with_capacity_in(1, arena);
|
|
|
|
buf.push(first_output);
|
|
|
|
loop {
|
|
match delimiter.parse(arena, state) {
|
|
Ok((_, (), next_state)) => {
|
|
// If the delimiter passed, check the element parser.
|
|
match parser.parse(arena, next_state) {
|
|
Ok((_, next_output, next_state)) => {
|
|
state = next_state;
|
|
buf.push(next_output);
|
|
}
|
|
Err((element_progress, fail, state)) => {
|
|
// If the delimiter parsed, but the following
|
|
// element did not, that's a fatal error.
|
|
return Err((element_progress, fail, state));
|
|
}
|
|
}
|
|
}
|
|
Err((delim_progress, fail, old_state)) => {
|
|
match delim_progress {
|
|
MadeProgress => {
|
|
// fail if the delimiter made progress
|
|
return Err((MadeProgress, fail, old_state));
|
|
}
|
|
NoProgress => {
|
|
let progress = Progress::from_lengths(
|
|
start_bytes_len,
|
|
old_state.bytes.len(),
|
|
);
|
|
return Ok((progress, buf, old_state));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err((fail_progress, fail, new_state)) => Err((fail_progress, fail, new_state)),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn fail_when_progress<'a, T>(
|
|
progress: Progress,
|
|
fail: Bag<'a>,
|
|
value: T,
|
|
state: State<'a>,
|
|
) -> ParseResult<'a, T> {
|
|
match progress {
|
|
MadeProgress => Err((MadeProgress, fail, state)),
|
|
NoProgress => Ok((NoProgress, value, state)),
|
|
}
|
|
}
|
|
|
|
pub fn satisfies<'a, P, A, F>(parser: P, predicate: F) -> impl Parser<'a, A>
|
|
where
|
|
P: Parser<'a, A>,
|
|
F: Fn(&A) -> bool,
|
|
{
|
|
move |arena: &'a Bump, state: State<'a>| match parser.parse(arena, state.clone()) {
|
|
Ok((progress, output, next_state)) if predicate(&output) => {
|
|
Ok((progress, output, next_state))
|
|
}
|
|
Ok((progress, _, _)) | Err((progress, _, _)) => Err((
|
|
progress,
|
|
Bag::from_state(arena, &state, FailReason::ConditionFailed),
|
|
state,
|
|
)),
|
|
}
|
|
}
|
|
|
|
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
|
|
where
|
|
P: Parser<'a, T>,
|
|
{
|
|
move |arena: &'a Bump, state: State<'a>| {
|
|
// We have to clone this because if the optional parser fails,
|
|
// we need to revert back to the original state.
|
|
let original_state = state.clone();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((progress, out1, state)) => Ok((progress, Some(out1), state)),
|
|
Err((_, _, _)) => {
|
|
// NOTE this will backtrack
|
|
// TODO can we get rid of some of the potential backtracking?
|
|
Ok((NoProgress, None, original_state))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// MACRO COMBINATORS
|
|
//
|
|
// Using some combinators together results in combinatorial type explosion
|
|
// which makes things take forever to compile. Using macros instead avoids this!
|
|
|
|
#[macro_export]
|
|
macro_rules! loc {
|
|
($parser:expr) => {
|
|
move |arena, state: $crate::parser::State<'a>| {
|
|
use roc_region::all::{Located, Region};
|
|
|
|
let start_col = state.column;
|
|
let start_line = state.line;
|
|
|
|
match $parser.parse(arena, state) {
|
|
Ok((progress, value, state)) => {
|
|
let end_col = state.column;
|
|
let end_line = state.line;
|
|
let region = Region {
|
|
start_col,
|
|
start_line,
|
|
end_col,
|
|
end_line,
|
|
};
|
|
|
|
Ok((progress, Located { region, value }, state))
|
|
}
|
|
Err(err) => Err(err),
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// If the first one parses, ignore its output and move on to parse with the second one.
|
|
#[macro_export]
|
|
macro_rules! skip_first {
|
|
($p1:expr, $p2:expr) => {
|
|
move |arena, state: $crate::parser::State<'a>| {
|
|
let original_state = state.clone();
|
|
|
|
match $p1.parse(arena, state) {
|
|
Ok((p1, _, state)) => match $p2.parse(arena, state) {
|
|
Ok((p2, out2, state)) => Ok((p1.or(p2), out2, state)),
|
|
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
|
|
},
|
|
Err((progress, fail, _)) => Err((progress, fail, original_state)),
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// If the first one parses, parse the second one; if it also parses, use the
|
|
/// output from the first one.
|
|
#[macro_export]
|
|
macro_rules! skip_second {
|
|
($p1:expr, $p2:expr) => {
|
|
move |arena, state: $crate::parser::State<'a>| {
|
|
let original_state = state.clone();
|
|
|
|
match $p1.parse(arena, state) {
|
|
Ok((p1, out1, state)) => match $p2.parse(arena, state) {
|
|
Ok((p2, _, state)) => Ok((p1.or(p2), out1, state)),
|
|
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
|
|
},
|
|
Err((progress, fail, _)) => Err((progress, fail, original_state)),
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Parse zero or more elements between two braces (e.g. square braces).
|
|
/// Elements can be optionally surrounded by spaces, and are separated by a
|
|
/// delimiter (e.g comma-separated). Braces and delimiters get discarded.
|
|
#[macro_export]
|
|
macro_rules! collection {
|
|
($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $min_indent:expr) => {
|
|
skip_first!(
|
|
$opening_brace,
|
|
skip_first!(
|
|
// We specifically allow space characters inside here, so that
|
|
// `[ ]` can be successfully parsed as an empty list, and then
|
|
// changed by the formatter back into `[]`.
|
|
//
|
|
// We don't allow newlines or comments in the middle of empty
|
|
// roc_collections because those are normally stored in an Expr,
|
|
// and there's no Expr in which to store them in an empty collection!
|
|
//
|
|
// We could change the AST to add extra storage specifically to
|
|
// support empty literals containing newlines or comments, but this
|
|
// does not seem worth even the tiniest regression in compiler performance.
|
|
zero_or_more!($crate::parser::ascii_char(b' ')),
|
|
skip_second!(
|
|
$crate::parser::sep_by0(
|
|
$delimiter,
|
|
$crate::blankspace::space0_around($elem, $min_indent)
|
|
),
|
|
$closing_brace
|
|
)
|
|
)
|
|
)
|
|
};
|
|
}
|
|
|
|
/// Parse zero or more elements between two braces (e.g. square braces).
|
|
/// Elements can be optionally surrounded by spaces, and are separated by a
|
|
/// delimiter (e.g comma-separated) with optionally a trailing delimiter.
|
|
/// Braces and delimiters get discarded.
|
|
#[macro_export]
|
|
macro_rules! collection_trailing_sep {
|
|
($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $min_indent:expr) => {
|
|
skip_first!(
|
|
$opening_brace,
|
|
skip_first!(
|
|
// We specifically allow space characters inside here, so that
|
|
// `[ ]` can be successfully parsed as an empty list, and then
|
|
// changed by the formatter back into `[]`.
|
|
//
|
|
// We don't allow newlines or comments in the middle of empty
|
|
// roc_collections because those are normally stored in an Expr,
|
|
// and there's no Expr in which to store them in an empty collection!
|
|
//
|
|
// We could change the AST to add extra storage specifically to
|
|
// support empty literals containing newlines or comments, but this
|
|
// does not seem worth even the tiniest regression in compiler performance.
|
|
zero_or_more!($crate::parser::ascii_char(b' ')),
|
|
skip_second!(
|
|
and!(
|
|
$crate::parser::trailing_sep_by0(
|
|
$delimiter,
|
|
$crate::blankspace::space0_around($elem, $min_indent)
|
|
),
|
|
$crate::blankspace::space0($min_indent)
|
|
),
|
|
$closing_brace
|
|
)
|
|
)
|
|
)
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! and {
|
|
($p1:expr, $p2:expr) => {
|
|
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
|
// We have to clone this because if the first parser passes and then
|
|
// the second one fails, we need to revert back to the original state.
|
|
let original_state = state.clone();
|
|
|
|
match $p1.parse(arena, state) {
|
|
Ok((p1, out1, state)) => match $p2.parse(arena, state) {
|
|
Ok((p2, out2, state)) => Ok((p1.or(p2), (out1, out2), state)),
|
|
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
|
|
},
|
|
Err((progress, fail, state)) => Err((progress, fail, state)),
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! one_of {
|
|
($p1:expr, $p2:expr) => {
|
|
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
|
|
|
match $p1.parse(arena, state) {
|
|
valid @ Ok(_) => valid,
|
|
Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)),
|
|
Err((NoProgress, _, state)) => $p2.parse( arena, state),
|
|
}
|
|
}
|
|
};
|
|
|
|
($p1:expr, $($others:expr),+) => {
|
|
one_of!($p1, one_of!($($others),+))
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! map {
|
|
($parser:expr, $transform:expr) => {
|
|
move |arena, state| {
|
|
$parser
|
|
.parse(arena, state)
|
|
.map(|(progress, output, next_state)| (progress, $transform(output), next_state))
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! map_with_arena {
|
|
($parser:expr, $transform:expr) => {
|
|
move |arena, state| {
|
|
$parser
|
|
.parse(arena, state)
|
|
.map(|(progress, output, next_state)| {
|
|
(progress, $transform(arena, output), next_state)
|
|
})
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! zero_or_more {
|
|
($parser:expr) => {
|
|
move |arena, state: State<'a>| {
|
|
use bumpalo::collections::Vec;
|
|
|
|
let start_bytes_len = state.bytes.len();
|
|
|
|
match $parser.parse(arena, state) {
|
|
Ok((_, first_output, next_state)) => {
|
|
let mut state = next_state;
|
|
let mut buf = Vec::with_capacity_in(1, arena);
|
|
|
|
buf.push(first_output);
|
|
|
|
loop {
|
|
match $parser.parse(arena, state) {
|
|
Ok((_, next_output, next_state)) => {
|
|
state = next_state;
|
|
buf.push(next_output);
|
|
}
|
|
Err((fail_progress, fail, old_state)) => {
|
|
match fail_progress {
|
|
MadeProgress => {
|
|
// made progress on an element and then failed; that's an error
|
|
return Err((MadeProgress, fail, old_state));
|
|
}
|
|
NoProgress => {
|
|
// the next element failed with no progress
|
|
// report whether we made progress before
|
|
let progress = Progress::from_lengths(start_bytes_len, old_state.bytes.len());
|
|
return Ok((progress, buf, old_state));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err((fail_progress, fail, new_state)) => {
|
|
match fail_progress {
|
|
MadeProgress => {
|
|
// made progress on an element and then failed; that's an error
|
|
Err((MadeProgress, fail, new_state))
|
|
}
|
|
NoProgress => {
|
|
// the first element failed (with no progress), but that's OK
|
|
// because we only need to parse 0 elements
|
|
Ok((NoProgress, Vec::new_in(arena), new_state))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! one_or_more {
|
|
($parser:expr) => {
|
|
move |arena, state: State<'a>| {
|
|
use bumpalo::collections::Vec;
|
|
|
|
match $parser.parse(arena, state) {
|
|
Ok((_, first_output, next_state)) => {
|
|
let mut state = next_state;
|
|
let mut buf = Vec::with_capacity_in(1, arena);
|
|
|
|
buf.push(first_output);
|
|
|
|
loop {
|
|
match $parser.parse(arena, state) {
|
|
Ok((_, next_output, next_state)) => {
|
|
state = next_state;
|
|
buf.push(next_output);
|
|
}
|
|
Err((progress, fail, old_state)) => {
|
|
return $crate::parser::fail_when_progress(
|
|
progress, fail, buf, old_state,
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err((progress, _, new_state)) => {
|
|
debug_assert_eq!(progress, NoProgress, "{:?}", &new_state);
|
|
Err($crate::parser::unexpected_eof(arena, new_state, 0))
|
|
}
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! debug {
|
|
($parser:expr) => {
|
|
move |arena, state: $crate::parser::State<'a>| dbg!($parser.parse(arena, state))
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! attempt {
|
|
($attempting:expr, $parser:expr) => {
|
|
move |arena: &'a Bump, mut state: $crate::parser::State<'a>| {
|
|
let item = $crate::parser::ContextItem {
|
|
context: $attempting,
|
|
line: state.line,
|
|
column: state.column,
|
|
};
|
|
|
|
state.context_stack = arena.alloc($crate::parser::ContextStack::Cons(
|
|
item,
|
|
state.context_stack,
|
|
));
|
|
|
|
$parser
|
|
.parse(arena, state)
|
|
.map(|(progress, answer, mut state)| {
|
|
// If the parser suceeded, go back to what we were originally attempting.
|
|
// (If it failed, that's exactly where we care what we were attempting!)
|
|
// debug_assert_eq!(!state.context_stack.is_empty());
|
|
match state.context_stack.uncons() {
|
|
Some((_item, rest)) => {
|
|
state.context_stack = rest;
|
|
}
|
|
None => unreachable!("context stack contains at least one element"),
|
|
}
|
|
|
|
(progress, answer, state)
|
|
})
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! either {
|
|
($p1:expr, $p2:expr) => {
|
|
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1
|
|
.parse(arena, state)
|
|
{
|
|
Ok((progress, output, state)) => {
|
|
Ok((progress, $crate::parser::Either::First(output), state))
|
|
}
|
|
Err((NoProgress, _, state)) => match $p2.parse(arena, state) {
|
|
Ok((progress, output, state)) => {
|
|
Ok((progress, $crate::parser::Either::Second(output), state))
|
|
}
|
|
Err((progress, fail, state)) => Err((progress, fail, state)),
|
|
},
|
|
Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)),
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Parse everything between two braces (e.g. parentheses), skipping both braces
|
|
/// and keeping only whatever was parsed in between them.
|
|
#[macro_export]
|
|
macro_rules! between {
|
|
($opening_brace:expr, $parser:expr, $closing_brace:expr) => {
|
|
skip_first!($opening_brace, skip_second!($parser, $closing_brace))
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! record_field {
|
|
($val_parser:expr, $min_indent:expr) => {
|
|
move |arena: &'a bumpalo::Bump,
|
|
state: $crate::parser::State<'a>|
|
|
-> $crate::parser::ParseResult<'a, $crate::ast::AssignedField<'a, _>> {
|
|
use $crate::ast::AssignedField::*;
|
|
use $crate::blankspace::{space0, space0_before};
|
|
use $crate::ident::lowercase_ident;
|
|
use $crate::parser::ascii_char;
|
|
use $crate::parser::Either::*;
|
|
|
|
// You must have a field name, e.g. "email"
|
|
let (progress, loc_label, state) = loc!(lowercase_ident()).parse(arena, state)?;
|
|
debug_assert_eq!(progress, MadeProgress);
|
|
|
|
let (_, spaces, state) = space0($min_indent).parse(arena, state)?;
|
|
|
|
// Having a value is optional; both `{ email }` and `{ email: blah }` work.
|
|
// (This is true in both literals and types.)
|
|
let (_, opt_loc_val, state) = $crate::parser::optional(either!(
|
|
skip_first!(ascii_char(b':'), space0_before($val_parser, $min_indent)),
|
|
skip_first!(ascii_char(b'?'), space0_before($val_parser, $min_indent))
|
|
))
|
|
.parse(arena, state)?;
|
|
|
|
let answer = match opt_loc_val {
|
|
Some(either) => match either {
|
|
First(loc_val) => RequiredValue(loc_label, spaces, arena.alloc(loc_val)),
|
|
Second(loc_val) => OptionalValue(loc_label, spaces, arena.alloc(loc_val)),
|
|
},
|
|
// If no value was provided, record it as a Var.
|
|
// Canonicalize will know what to do with a Var later.
|
|
None => {
|
|
if !spaces.is_empty() {
|
|
SpaceAfter(arena.alloc(LabelOnly(loc_label)), spaces)
|
|
} else {
|
|
LabelOnly(loc_label)
|
|
}
|
|
}
|
|
};
|
|
|
|
Ok((MadeProgress, answer, state))
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! record_without_update {
|
|
($val_parser:expr, $min_indent:expr) => {
|
|
collection_trailing_sep!(
|
|
ascii_char(b'{'),
|
|
loc!(record_field!($val_parser, $min_indent)),
|
|
ascii_char(b','),
|
|
ascii_char(b'}'),
|
|
$min_indent
|
|
)
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! record {
|
|
($val_parser:expr, $min_indent:expr) => {
|
|
skip_first!(
|
|
$crate::parser::ascii_char(b'{'),
|
|
and!(
|
|
// You can optionally have an identifier followed by an '&' to
|
|
// make this a record update, e.g. { Foo.user & username: "blah" }.
|
|
$crate::parser::optional(skip_second!(
|
|
$crate::blankspace::space0_around(
|
|
// We wrap the ident in an Expr here,
|
|
// so that we have a Spaceable value to work with,
|
|
// and then in canonicalization verify that it's an Expr::Var
|
|
// (and not e.g. an `Expr::Access`) and extract its string.
|
|
loc!(map_with_arena!(
|
|
$crate::expr::ident(),
|
|
$crate::expr::ident_to_expr
|
|
)),
|
|
$min_indent
|
|
),
|
|
$crate::parser::ascii_char(b'&')
|
|
)),
|
|
loc!(skip_first!(
|
|
// We specifically allow space characters inside here, so that
|
|
// `{ }` can be successfully parsed as an empty record, and then
|
|
// changed by the formatter back into `{}`.
|
|
zero_or_more!($crate::parser::ascii_char(b' ')),
|
|
skip_second!(
|
|
and!(
|
|
$crate::parser::trailing_sep_by0(
|
|
$crate::parser::ascii_char(b','),
|
|
$crate::blankspace::space0_around(
|
|
loc!(record_field!($val_parser, $min_indent)),
|
|
$min_indent
|
|
),
|
|
),
|
|
$crate::blankspace::space0($min_indent)
|
|
),
|
|
$crate::parser::ascii_char(b'}')
|
|
)
|
|
))
|
|
)
|
|
)
|
|
};
|
|
}
|
|
|
|
/// For some reason, some usages won't compile unless they use this instead of the macro version
|
|
#[inline(always)]
|
|
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
|
|
where
|
|
P1: Parser<'a, A>,
|
|
P2: Parser<'a, B>,
|
|
P1: 'a,
|
|
P2: 'a,
|
|
A: 'a,
|
|
B: 'a,
|
|
{
|
|
and!(p1, p2)
|
|
}
|
|
|
|
/// For some reason, some usages won't compile unless they use this instead of the macro version
|
|
#[inline(always)]
|
|
pub fn loc<'a, P, Val>(parser: P) -> impl Parser<'a, Located<Val>>
|
|
where
|
|
P: Parser<'a, Val>,
|
|
{
|
|
loc!(parser)
|
|
}
|
|
|
|
/// For some reason, some usages won't compile unless they use this instead of the macro version
|
|
#[inline(always)]
|
|
pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
|
|
where
|
|
P: Parser<'a, Before>,
|
|
F: Fn(Before) -> After,
|
|
{
|
|
map!(parser, transform)
|
|
}
|
|
|
|
/// For some reason, some usages won't compile unless they use this instead of the macro version
|
|
#[inline(always)]
|
|
pub fn map_with_arena<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
|
|
where
|
|
P: Parser<'a, Before>,
|
|
P: 'a,
|
|
F: Fn(&'a Bump, Before) -> After,
|
|
F: 'a,
|
|
Before: 'a,
|
|
After: 'a,
|
|
{
|
|
map_with_arena!(parser, transform)
|
|
}
|
|
|
|
/// For some reason, some usages won't compile unless they use this instead of the macro version
|
|
#[inline(always)]
|
|
pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val>
|
|
where
|
|
P: Parser<'a, Val>,
|
|
{
|
|
attempt!(attempting, parser)
|
|
}
|
|
|
|
pub fn parse_utf8(bytes: &[u8]) -> Result<&str, FailReason> {
|
|
match from_utf8(bytes) {
|
|
Ok(string) => Ok(string),
|
|
Err(_) => Err(FailReason::BadUtf8),
|
|
}
|
|
}
|
|
|
|
pub fn end_of_file<'a>() -> impl Parser<'a, ()> {
|
|
|arena: &'a Bump, state: State<'a>| {
|
|
if state.has_reached_end() {
|
|
Ok((NoProgress, (), state))
|
|
} else {
|
|
Err((
|
|
NoProgress,
|
|
Bag::from_state(arena, &state, FailReason::ConditionFailed),
|
|
state,
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn backtrackable<'a, P, Val>(parser: P) -> impl Parser<'a, Val>
|
|
where
|
|
P: Parser<'a, Val>,
|
|
{
|
|
move |arena: &'a Bump, state: State<'a>| {
|
|
let old_state = state.clone();
|
|
|
|
match parser.parse(arena, state) {
|
|
Ok((_, a, s1)) => Ok((NoProgress, a, s1)),
|
|
Err((_, f, _)) => Err((NoProgress, f, old_state)),
|
|
}
|
|
}
|
|
}
|