Refactor Parser trait to pass min_indent

This removes the need to explicitly pass thru min_indent when using the parser combinators.

My ultimate goal here is to evolve the current parser closer toward a purely combinator-based parser,
at which point we can more easily transition smoothly to a formal(ish) grammar, or expand the meanings of combinators
to include things like:
* Incremental (re)parsing
* Unified parsing and formatting code
* Better error recovery
* Using the main parser directly for syntax highlighting
This commit is contained in:
Joshua Warner 2022-10-25 21:03:28 -07:00
parent ae1a9e4dd6
commit 07be8ec000
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
18 changed files with 704 additions and 844 deletions

View file

@ -729,7 +729,12 @@ pub struct FileError<'a, T> {
}
pub trait Parser<'a, Output, Error> {
fn parse(&self, _: &'a Bump, _: State<'a>) -> ParseResult<'a, Output, Error>;
fn parse(
&self,
alloc: &'a Bump,
state: State<'a>,
min_indent: u32,
) -> ParseResult<'a, Output, Error>;
#[cfg(not(feature = "parse_debug_trace"))]
fn trace(self, _message: &'static str) -> Self
@ -759,10 +764,15 @@ pub trait Parser<'a, Output, Error> {
impl<'a, F, Output, Error> Parser<'a, Output, Error> for F
where
Error: 'a,
F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output, Error>,
F: Fn(&'a Bump, State<'a>, u32) -> ParseResult<'a, Output, Error>,
{
fn parse(&self, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Output, Error> {
self(arena, state)
fn parse(
&self,
arena: &'a Bump,
state: State<'a>,
min_indent: u32,
) -> ParseResult<'a, Output, Error> {
self(arena, state, min_indent)
}
}
@ -800,7 +810,7 @@ where
);
INDENT.with(|i| *i.borrow_mut() += 1);
let res = self.parser.parse(arena, state);
let res = self.parser.parse(arena, state, min_indent);
INDENT.with(|i| *i.borrow_mut() = cur_indent);
let (progress, value, state) = match &res {
@ -827,8 +837,8 @@ where
P: Parser<'a, Val, Error>,
Val: 'a,
{
move |arena, state: State<'a>| {
let (progress, answer, state) = parser.parse(arena, state)?;
move |arena, state: State<'a>, min_indent: u32| {
let (progress, answer, state) = parser.parse(arena, state, min_indent)?;
Ok((progress, &*arena.alloc(answer), state))
}
@ -844,11 +854,11 @@ where
F: Fn(Progress, Before) -> P2,
Error: 'a,
{
move |arena, state| {
move |arena, state, min_indent| {
parser
.parse(arena, state)
.parse(arena, state, min_indent)
.and_then(|(progress, output, next_state)| {
transform(progress, output).parse(arena, next_state)
transform(progress, output).parse(arena, next_state, min_indent)
})
}
}
@ -858,13 +868,13 @@ where
P1: Parser<'a, Before, E>,
After: 'a,
E: 'a,
F: Fn(&'a Bump, State<'a>, Progress, Before) -> ParseResult<'a, After, E>,
F: Fn(&'a Bump, State<'a>, Progress, Before, u32) -> ParseResult<'a, After, E>,
{
move |arena, state| {
move |arena, state, min_indent| {
parser
.parse(arena, state)
.parse(arena, state, min_indent)
.and_then(|(progress, output, next_state)| {
transform(arena, next_state, progress, output)
transform(arena, next_state, progress, output, min_indent)
})
}
}
@ -874,7 +884,7 @@ where
ToError: Fn(Position) -> E,
E: 'a,
{
move |_, mut state: State<'a>| {
move |_, mut state: State<'a>, _min_indent| {
let width = keyword.len();
if !state.bytes().starts_with(keyword.as_bytes()) {
@ -908,10 +918,10 @@ where
P: Parser<'a, Val, Error>,
Error: 'a,
{
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((elem_progress, first_output, next_state)) => {
// in practice, we want elements to make progress
debug_assert_eq!(elem_progress, MadeProgress);
@ -922,10 +932,10 @@ where
buf.push(first_output);
loop {
match delimiter.parse(arena, state) {
match delimiter.parse(arena, state, min_indent) {
Ok((_, (), next_state)) => {
// If the delimiter passed, check the element parser.
match parser.parse(arena, next_state) {
match parser.parse(arena, next_state, min_indent) {
Ok((element_progress, next_output, next_state)) => {
// in practice, we want elements to make progress
debug_assert_eq!(element_progress, MadeProgress);
@ -971,10 +981,10 @@ where
P: Parser<'a, Val, Error>,
Error: 'a,
{
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((progress, first_output, next_state)) => {
// in practice, we want elements to make progress
debug_assert_eq!(progress, MadeProgress);
@ -984,10 +994,10 @@ where
buf.push(first_output);
loop {
match delimiter.parse(arena, state) {
match delimiter.parse(arena, state, min_indent) {
Ok((_, (), next_state)) => {
// If the delimiter passed, check the element parser.
match parser.parse(arena, next_state) {
match parser.parse(arena, next_state, min_indent) {
Ok((element_progress, next_output, next_state)) => {
// in practice, we want elements to make progress
debug_assert_eq!(element_progress, MadeProgress);
@ -1032,10 +1042,10 @@ where
P: Parser<'a, Val, Error>,
Error: 'a,
{
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((progress, first_output, next_state)) => {
debug_assert_eq!(progress, MadeProgress);
let mut state = next_state;
@ -1044,10 +1054,10 @@ where
buf.push(first_output);
loop {
match delimiter.parse(arena, state) {
match delimiter.parse(arena, state, min_indent) {
Ok((_, (), next_state)) => {
// If the delimiter passed, check the element parser.
match parser.parse(arena, next_state) {
match parser.parse(arena, next_state, min_indent) {
Ok((_, next_output, next_state)) => {
state = next_state;
buf.push(next_output);
@ -1093,10 +1103,10 @@ where
V: Fn(Position) -> Error,
Error: 'a,
{
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((progress, first_output, next_state)) => {
debug_assert_eq!(progress, MadeProgress);
let mut state = next_state;
@ -1105,10 +1115,10 @@ where
buf.push(first_output);
loop {
match delimiter.parse(arena, state) {
match delimiter.parse(arena, state, min_indent) {
Ok((_, (), next_state)) => {
// If the delimiter passed, check the element parser.
match parser.parse(arena, next_state) {
match parser.parse(arena, next_state, min_indent) {
Ok((_, next_output, next_state)) => {
state = next_state;
buf.push(next_output);
@ -1165,12 +1175,12 @@ where
P: Parser<'a, T, E>,
E: 'a,
{
move |arena: &'a Bump, state: State<'a>| {
move |arena: &'a Bump, state: State<'a>, min_indent: u32| {
// We have to clone this because if the optional parser fails,
// we need to revert back to the original state.
let original_state = state.clone();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((progress, out1, state)) => Ok((progress, Some(out1), state)),
Err((_, _, _)) => {
// NOTE this will backtrack
@ -1189,12 +1199,12 @@ where
#[macro_export]
macro_rules! loc {
($parser:expr) => {
move |arena, state: $crate::state::State<'a>| {
move |arena, state: $crate::state::State<'a>, min_indent: u32| {
use roc_region::all::{Loc, Region};
let start = state.pos();
match $parser.parse(arena, state) {
match $parser.parse(arena, state, min_indent) {
Ok((progress, value, state)) => {
let end = state.pos();
let region = Region::new(start, end);
@ -1211,11 +1221,11 @@ macro_rules! loc {
#[macro_export]
macro_rules! skip_first {
($p1:expr, $p2:expr) => {
move |arena, state: $crate::state::State<'a>| {
move |arena, state: $crate::state::State<'a>, min_indent: u32| {
let original_state = state.clone();
match $p1.parse(arena, state) {
Ok((p1, _, state)) => match $p2.parse(arena, state) {
match $p1.parse(arena, state, min_indent) {
Ok((p1, _, state)) => match $p2.parse(arena, state, min_indent) {
Ok((p2, out2, state)) => Ok((p1.or(p2), out2, state)),
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
},
@ -1230,11 +1240,11 @@ macro_rules! skip_first {
#[macro_export]
macro_rules! skip_second {
($p1:expr, $p2:expr) => {
move |arena, state: $crate::state::State<'a>| {
move |arena, state: $crate::state::State<'a>, min_indent: u32| {
let original_state = state.clone();
match $p1.parse(arena, state) {
Ok((p1, out1, state)) => match $p2.parse(arena, state) {
match $p1.parse(arena, state, min_indent) {
Ok((p1, out1, state)) => match $p2.parse(arena, state, min_indent) {
Ok((p2, _, state)) => Ok((p1.or(p2), out1, state)),
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
},
@ -1279,12 +1289,12 @@ macro_rules! collection {
#[macro_export]
macro_rules! collection_trailing_sep_e {
($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $min_indent:expr, $open_problem:expr, $indent_problem:expr, $space_before:expr) => {
($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $open_problem:expr, $indent_problem:expr, $space_before:expr) => {
skip_first!(
$opening_brace,
|arena, state| {
let (_, spaces, state) = space0_e($min_indent, $indent_problem)
.parse(arena, state)?;
|arena, state, min_indent| {
let (_, spaces, state) = space0_e($indent_problem)
.parse(arena, state, min_indent)?;
let (_, (mut parsed_elems, mut final_comments), state) =
and!(
@ -1292,23 +1302,18 @@ macro_rules! collection_trailing_sep_e {
$delimiter,
$crate::blankspace::space0_before_optional_after(
$elem,
$min_indent,
$indent_problem,
$indent_problem
)
),
$crate::blankspace::space0_e(
// we use min_indent=0 because we want to parse incorrectly indented closing braces
// and later fix these up in the formatter.
0 /* min_indent */,
$indent_problem)
).parse(arena, state)?;
$crate::parser::reset_min_indent($crate::blankspace::space0_e($indent_problem))
).parse(arena, state, min_indent)?;
let (_,_, state) =
if parsed_elems.is_empty() {
one_of_with_error![$open_problem; $closing_brace].parse(arena, state)?
one_of_with_error![$open_problem; $closing_brace].parse(arena, state, min_indent)?
} else {
$closing_brace.parse(arena, state)?
$closing_brace.parse(arena, state, min_indent)?
};
if !spaces.is_empty() {
@ -1334,22 +1339,33 @@ macro_rules! collection_trailing_sep_e {
#[macro_export]
macro_rules! succeed {
($value:expr) => {
move |_arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
move |_arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, _min_indent: u32| {
Ok((NoProgress, $value, state))
}
};
}
pub fn fail<'a, T, E, F>(f: F) -> impl Parser<'a, T, E>
where
T: 'a,
E: 'a,
F: Fn(Position) -> E,
{
move |_arena: &'a bumpalo::Bump, state: State<'a>, _min_indent: u32| {
Err((NoProgress, f(state.pos()), state))
}
}
#[macro_export]
macro_rules! and {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, min_indent: u32| {
// We have to clone this because if the first parser passes and then
// the second one fails, we need to revert back to the original state.
let original_state = state.clone();
match $p1.parse(arena, state) {
Ok((p1, out1, state)) => match $p2.parse(arena, state) {
match $p1.parse(arena, state, min_indent) {
Ok((p1, out1, state)) => match $p2.parse(arena, state, min_indent) {
Ok((p2, out2, state)) => Ok((p1.or(p2), (out1, out2), state)),
Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)),
},
@ -1362,12 +1378,12 @@ macro_rules! and {
#[macro_export]
macro_rules! one_of {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, min_indent: u32| {
match $p1.parse(arena, state) {
match $p1.parse(arena, state, min_indent) {
valid @ Ok(_) => valid,
Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)),
Err((NoProgress, _, state)) => $p2.parse( arena, state),
Err((NoProgress, _, state)) => $p2.parse(arena, state, min_indent),
}
}
};
@ -1383,8 +1399,8 @@ macro_rules! one_of {
#[macro_export]
macro_rules! maybe {
($p1:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
.parse(arena, state)
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, min_indent: u32| match $p1
.parse(arena, state, min_indent)
{
Ok((progress, value, state)) => Ok((progress, Some(value), state)),
Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)),
@ -1396,9 +1412,9 @@ macro_rules! maybe {
#[macro_export]
macro_rules! one_of_with_error {
($toerror:expr; $p1:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, min_indent: u32| {
match $p1.parse(arena, state) {
match $p1.parse(arena, state, min_indent) {
valid @ Ok(_) => valid,
Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state )),
Err((NoProgress, _, state)) => Err((MadeProgress, $toerror(state.pos()), state)),
@ -1411,13 +1427,44 @@ macro_rules! one_of_with_error {
};
}
pub fn reset_min_indent<'a, P, T, X: 'a>(parser: P) -> impl Parser<'a, T, X>
where
P: Parser<'a, T, X>,
{
move |arena, state, _min_indent| parser.parse(arena, state, 0)
}
pub fn set_min_indent<'a, P, T, X: 'a>(min_indent: u32, parser: P) -> impl Parser<'a, T, X>
where
P: Parser<'a, T, X>,
{
move |arena, state, _m| parser.parse(arena, state, min_indent)
}
pub fn increment_min_indent<'a, P, T, X: 'a>(parser: P) -> impl Parser<'a, T, X>
where
P: Parser<'a, T, X>,
{
move |arena, state, min_indent| parser.parse(arena, state, min_indent + 1)
}
pub fn absolute_column_min_indent<'a, P, T, X: 'a>(parser: P) -> impl Parser<'a, T, X>
where
P: Parser<'a, T, X>,
{
move |arena, state: State<'a>, _min_indent| {
let min_indent = state.column() + 1;
parser.parse(arena, state, min_indent)
}
}
pub fn specialize<'a, F, P, T, X, Y>(map_error: F, parser: P) -> impl Parser<'a, T, Y>
where
F: Fn(X, Position) -> Y,
P: Parser<'a, T, X>,
Y: 'a,
{
move |a, s| match parser.parse(a, s) {
move |a, s, min_indent| match parser.parse(a, s, min_indent) {
Ok(t) => Ok(t),
Err((p, error, s)) => Err((p, map_error(error, s.pos()), s)),
}
@ -1430,9 +1477,9 @@ where
P: Parser<'a, T, X>,
Y: 'a,
{
move |a, s: State<'a>| {
move |a, s: State<'a>, min_indent: u32| {
let start = s.pos();
match parser.parse(a, s) {
match parser.parse(a, s, min_indent) {
Ok(t) => Ok(t),
Err((p, error, s)) => Err((p, map_error(error, Region::new(start, s.pos())), s)),
}
@ -1446,7 +1493,7 @@ where
Y: 'a,
X: 'a,
{
move |a, s| match parser.parse(a, s) {
move |a, s, min_indent| match parser.parse(a, s, min_indent) {
Ok(t) => Ok(t),
Err((p, error, s)) => Err((p, map_error(a.alloc(error), s.pos()), s)),
}
@ -1459,7 +1506,7 @@ where
{
debug_assert_ne!(word, b'\n');
move |_arena: &'a Bump, state: State<'a>| match state.bytes().first() {
move |_arena: &'a Bump, state: State<'a>, _min_indent: u32| match state.bytes().first() {
Some(x) if *x == word => {
let state = state.advance(1);
Ok((MadeProgress, (), state))
@ -1503,7 +1550,7 @@ where
let needle = [word_1, word_2];
move |_arena: &'a Bump, state: State<'a>| {
move |_arena: &'a Bump, state: State<'a>, _min_indent: u32| {
if state.bytes().starts_with(&needle) {
let state = state.advance(2);
Ok((MadeProgress, (), state))
@ -1529,7 +1576,7 @@ where
let needle = [word_1, word_2, word_3];
move |_arena: &'a Bump, state: State<'a>| {
move |_arena: &'a Bump, state: State<'a>, _min_indent: u32| {
if state.bytes().starts_with(&needle) {
let state = state.advance(3);
Ok((MadeProgress, (), state))
@ -1552,9 +1599,9 @@ macro_rules! word1_check_indent {
#[macro_export]
macro_rules! map {
($parser:expr, $transform:expr) => {
move |arena, state| {
move |arena, state, min_indent| {
$parser
.parse(arena, state)
.parse(arena, state, min_indent)
.map(|(progress, output, next_state)| (progress, $transform(output), next_state))
}
};
@ -1563,9 +1610,9 @@ macro_rules! map {
#[macro_export]
macro_rules! map_with_arena {
($parser:expr, $transform:expr) => {
move |arena, state| {
move |arena, state, min_indent| {
$parser
.parse(arena, state)
.parse(arena, state, min_indent)
.map(|(progress, output, next_state)| {
(progress, $transform(arena, output), next_state)
})
@ -1576,12 +1623,12 @@ macro_rules! map_with_arena {
#[macro_export]
macro_rules! zero_or_more {
($parser:expr) => {
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
use bumpalo::collections::Vec;
let start_bytes_len = state.bytes().len();
match $parser.parse(arena, state) {
match $parser.parse(arena, state, min_indent) {
Ok((_, first_output, next_state)) => {
let mut state = next_state;
let mut buf = Vec::with_capacity_in(1, arena);
@ -1589,7 +1636,7 @@ macro_rules! zero_or_more {
buf.push(first_output);
loop {
match $parser.parse(arena, state) {
match $parser.parse(arena, state, min_indent) {
Ok((_, next_output, next_state)) => {
state = next_state;
buf.push(next_output);
@ -1632,10 +1679,10 @@ macro_rules! zero_or_more {
#[macro_export]
macro_rules! one_or_more {
($parser:expr, $to_error:expr) => {
move |arena, state: State<'a>| {
move |arena, state: State<'a>, min_indent: u32| {
use bumpalo::collections::Vec;
match $parser.parse(arena, state) {
match $parser.parse(arena, state, min_indent) {
Ok((_, first_output, next_state)) => {
let mut state = next_state;
let mut buf = Vec::with_capacity_in(1, arena);
@ -1643,7 +1690,7 @@ macro_rules! one_or_more {
buf.push(first_output);
loop {
match $parser.parse(arena, state) {
match $parser.parse(arena, state, min_indent) {
Ok((_, next_output, next_state)) => {
state = next_state;
buf.push(next_output);
@ -1668,20 +1715,22 @@ macro_rules! one_or_more {
#[macro_export]
macro_rules! debug {
($parser:expr) => {
move |arena, state: $crate::state::State<'a>| dbg!($parser.parse(arena, state))
move |arena, state: $crate::state::State<'a>, min_indent: u32| {
dbg!($parser.parse(arena, state, min_indent))
}
};
}
#[macro_export]
macro_rules! either {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
.parse(arena, state)
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>, min_indent: u32| match $p1
.parse(arena, state, min_indent)
{
Ok((progress, output, state)) => {
Ok((progress, $crate::parser::Either::First(output), state))
}
Err((NoProgress, _, state)) => match $p2.parse(arena, state) {
Err((NoProgress, _, state)) => match $p2.parse(arena, state, min_indent) {
Ok((progress, output, state)) => {
Ok((progress, $crate::parser::Either::Second(output), state))
}
@ -1749,10 +1798,10 @@ where
P: Parser<'a, Val, Error>,
Error: 'a,
{
move |arena: &'a Bump, state: State<'a>| {
move |arena: &'a Bump, state: State<'a>, min_indent: u32| {
let old_state = state.clone();
match parser.parse(arena, state) {
match parser.parse(arena, state, min_indent) {
Ok((_, a, s1)) => Ok((NoProgress, a, s1)),
Err((_, f, _)) => Err((NoProgress, f, old_state)),
}