Don't allow closures to gobble unindented expr lines following them

This commit is contained in:
Joshua Warner 2022-10-06 19:38:33 -07:00
parent ab4e03b05d
commit 4d4c0d9483
13 changed files with 267 additions and 87 deletions

View file

@ -345,7 +345,12 @@ fn eat_spaces<'a>(
state = state.advance(1);
return eat_line_comment(state, comments_and_newlines);
}
_ => break,
_ => {
if !comments_and_newlines.is_empty() {
state = state.mark_current_indent();
}
break;
}
}
}
@ -398,7 +403,10 @@ fn eat_line_comment<'a>(
index += 1;
continue 'outer;
}
_ => break,
_ => {
state = state.mark_current_indent();
break;
}
}
index += 1;
@ -490,7 +498,10 @@ fn eat_line_comment<'a>(
index += 1;
continue 'outer;
}
_ => break,
_ => {
state = state.mark_current_indent();
break;
}
}
index += 1;
@ -554,7 +565,10 @@ fn eat_line_comment<'a>(
index += 1;
continue 'outer;
}
_ => break,
_ => {
state = state.mark_current_indent();
break;
}
}
index += 1;

View file

@ -8,8 +8,8 @@ use crate::blankspace::{
use crate::ident::{lowercase_ident, parse_ident, Ident};
use crate::keyword;
use crate::parser::{
self, backtrackable, optional, sep_by1, sep_by1_e, specialize, specialize_ref, then,
trailing_sep_by0, word1, word2, EClosure, EExpect, EExpr, EIf, EInParens, EList, ENumber,
self, backtrackable, optional, parse_word1, sep_by1, sep_by1_e, specialize, specialize_ref,
then, trailing_sep_by0, word1, word2, EClosure, EExpect, EExpr, EIf, EInParens, EList, ENumber,
EPattern, ERecord, EString, EType, EWhen, Either, ParseResult, Parser,
};
use crate::pattern::{loc_closure_param, loc_has_parser};
@ -353,7 +353,6 @@ fn unary_negate<'a>() -> impl Parser<'a, (), EExpr<'a>> {
fn parse_expr_start<'a>(
min_indent: u32,
options: ExprParseOptions,
start_column: u32,
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Loc<Expr<'a>>, EExpr<'a>> {
@ -368,7 +367,7 @@ fn parse_expr_start<'a>(
EExpr::Closure,
closure_help(min_indent, options)
)),
loc!(move |a, s| parse_expr_operator_chain(min_indent, options, start_column, a, s)),
loc!(move |a, s| parse_expr_operator_chain(min_indent, options, a, s)),
fail_expr_start_e()
]
.parse(arena, state)
@ -377,10 +376,11 @@ fn parse_expr_start<'a>(
fn parse_expr_operator_chain<'a>(
min_indent: u32,
options: ExprParseOptions,
start_column: u32,
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Expr<'a>, EExpr<'a>> {
let min_indent = state.check_indent(min_indent, EExpr::IndentStart)?;
let (_, expr, state) =
loc_possibly_negative_or_negated_term(min_indent, options).parse(arena, state)?;
@ -399,7 +399,7 @@ fn parse_expr_operator_chain<'a>(
end,
};
parse_expr_end(min_indent, options, start_column, expr_state, arena, state)
parse_expr_end(min_indent, options, expr_state, arena, state)
}
}
}
@ -616,13 +616,11 @@ fn numeric_negate_expression<'a, T>(
fn parse_defs_end<'a>(
_options: ExprParseOptions,
start_column: u32,
min_indent: u32,
mut defs: Defs<'a>,
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Defs<'a>, EExpr<'a>> {
let min_indent = start_column;
let mut global_state = state;
loop {
@ -735,7 +733,7 @@ fn parse_defs_end<'a>(
loc_has_parser(min_indent).parse(arena, state.clone())
{
let (_, (type_def, def_region), state) = finish_parsing_ability_def_help(
start_column,
min_indent,
Loc::at(name_region, name),
args,
loc_has,
@ -979,14 +977,12 @@ fn parse_defs_end<'a>(
fn parse_defs_expr<'a>(
options: ExprParseOptions,
start_column: u32,
min_indent: u32,
defs: Defs<'a>,
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Expr<'a>, EExpr<'a>> {
let min_indent = start_column;
match parse_defs_end(options, start_column, defs, arena, state) {
match parse_defs_end(options, min_indent, defs, arena, state) {
Err(bad) => Err(bad),
Ok((_, def_state, state)) => {
// this is no def, because there is no `=` or `:`; parse as an expr
@ -1062,7 +1058,6 @@ enum AliasOrOpaque {
fn finish_parsing_alias_or_opaque<'a>(
min_indent: u32,
options: ExprParseOptions,
start_column: u32,
expr_state: ExprState<'a>,
loc_op: Loc<BinOp>,
arena: &'a Bump,
@ -1071,7 +1066,7 @@ fn finish_parsing_alias_or_opaque<'a>(
kind: AliasOrOpaque,
) -> ParseResult<'a, Expr<'a>, EExpr<'a>> {
let expr_region = expr_state.expr.region;
let indented_more = start_column + 1;
let indented_more = min_indent + 1;
let (expr, arguments) = expr_state
.validate_is_type_def(arena, loc_op, kind)
@ -1187,7 +1182,7 @@ fn finish_parsing_alias_or_opaque<'a>(
}
};
parse_defs_expr(options, start_column, defs, arena, state)
parse_defs_expr(options, min_indent, defs, arena, state)
}
mod ability {
@ -1376,7 +1371,6 @@ fn finish_parsing_ability_def_help<'a>(
fn parse_expr_operator<'a>(
min_indent: u32,
options: ExprParseOptions,
start_column: u32,
mut expr_state: ExprState<'a>,
loc_op: Loc<BinOp>,
arena: &'a Bump,
@ -1417,11 +1411,11 @@ fn parse_expr_operator<'a>(
expr_state.spaces_after = spaces;
expr_state.end = new_end;
parse_expr_end(min_indent, options, start_column, expr_state, arena, state)
parse_expr_end(min_indent, options, expr_state, arena, state)
}
BinOp::Assignment => {
let expr_region = expr_state.expr.region;
let indented_more = start_column + 1;
let indented_more = min_indent + 1;
let call = expr_state
.validate_assignment_or_backpassing(arena, loc_op, EExpr::ElmStyleFunction)
@ -1460,11 +1454,11 @@ fn parse_expr_operator<'a>(
let mut defs = Defs::default();
defs.push_value_def(value_def, def_region, &[], &[]);
parse_defs_expr(options, start_column, defs, arena, state)
parse_defs_expr(options, min_indent, defs, arena, state)
}
BinOp::Backpassing => {
let expr_region = expr_state.expr.region;
let indented_more = start_column + 1;
let indented_more = min_indent + 1;
let call = expr_state
.validate_assignment_or_backpassing(arena, loc_op, |_, pos| {
@ -1514,7 +1508,6 @@ fn parse_expr_operator<'a>(
BinOp::IsAliasType | BinOp::IsOpaqueType => finish_parsing_alias_or_opaque(
min_indent,
options,
start_column,
expr_state,
loc_op,
arena,
@ -1564,7 +1557,7 @@ fn parse_expr_operator<'a>(
expr_state.spaces_after = spaces;
// TODO new start?
parse_expr_end(min_indent, options, start_column, expr_state, arena, state)
parse_expr_end(min_indent, options, expr_state, arena, state)
}
}
}
@ -1578,7 +1571,6 @@ fn parse_expr_operator<'a>(
fn parse_expr_end<'a>(
min_indent: u32,
options: ExprParseOptions,
start_column: u32,
mut expr_state: ExprState<'a>,
arena: &'a Bump,
state: State<'a>,
@ -1638,13 +1630,13 @@ fn parse_expr_end<'a>(
let args = arguments.into_bump_slice();
let (_, (type_def, def_region), state) =
finish_parsing_ability_def_help(start_column, name, args, has, arena, state)?;
finish_parsing_ability_def_help(min_indent, name, args, has, arena, state)?;
let mut defs = Defs::default();
defs.push_type_def(type_def, def_region, &[], &[]);
parse_defs_expr(options, start_column, defs, arena, state)
parse_defs_expr(options, min_indent, defs, arena, state)
}
Ok((_, mut arg, state)) => {
let new_end = state.pos();
@ -1672,7 +1664,7 @@ fn parse_expr_end<'a>(
expr_state.end = new_end;
expr_state.spaces_after = new_spaces;
parse_expr_end(min_indent, options, start_column, expr_state, arena, state)
parse_expr_end(min_indent, options, expr_state, arena, state)
}
}
}
@ -1684,15 +1676,7 @@ fn parse_expr_end<'a>(
Ok((_, loc_op, state)) => {
expr_state.consume_spaces(arena);
expr_state.initial = before_op;
parse_expr_operator(
min_indent,
options,
start_column,
expr_state,
loc_op,
arena,
state,
)
parse_expr_operator(min_indent, options, expr_state, loc_op, arena, state)
}
Err((NoProgress, _, mut state)) => {
// try multi-backpassing
@ -1726,8 +1710,6 @@ fn parse_expr_end<'a>(
match word2(b'<', b'-', EExpr::BackpassArrow).parse(arena, state) {
Err((_, fail, state)) => Err((MadeProgress, fail, state)),
Ok((_, _, state)) => {
let min_indent = start_column;
let parse_body = space0_before_e(
move |a, s| parse_loc_expr(min_indent + 1, a, s),
min_indent,
@ -1805,8 +1787,7 @@ fn parse_loc_expr_with_options<'a>(
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Loc<Expr<'a>>, EExpr<'a>> {
let column = state.column();
parse_expr_start(min_indent, options, column, arena, state)
parse_expr_start(min_indent, options, arena, state)
}
/// If the given Expr would parse the same way as a valid Pattern, convert it.
@ -1986,46 +1967,66 @@ fn closure_help<'a>(
min_indent: u32,
options: ExprParseOptions,
) -> impl Parser<'a, Expr<'a>, EClosure<'a>> {
map_with_arena!(
skip_first!(
// All closures start with a '\' - e.g. (\x -> x + 1)
word1(b'\\', EClosure::Start),
// Once we see the '\', we're committed to parsing this as a closure.
// It may turn out to be malformed, but it is definitely a closure.
and!(
// Parse the params
// Params are comma-separated
sep_by1_e(
word1(b',', EClosure::Comma),
space0_around_ee(
specialize(EClosure::Pattern, loc_closure_param(min_indent)),
min_indent,
EClosure::IndentArg,
EClosure::IndentArrow
),
EClosure::Arg,
),
skip_first!(
// Parse the -> which separates params from body
word2(b'-', b'>', EClosure::Arrow),
// Parse the body
space0_before_e(
specialize_ref(EClosure::Body, move |arena, state| {
parse_loc_expr_with_options(min_indent, options, arena, state)
}),
min_indent,
EClosure::IndentBody
)
)
)
),
|arena: &'a Bump, (params, loc_body)| {
let params: Vec<'a, Loc<Pattern<'a>>> = params;
let params: &'a [Loc<Pattern<'a>>] = params.into_bump_slice();
move |arena, state| parse_closure_help(arena, state, min_indent, options)
}
Expr::Closure(params, arena.alloc(loc_body))
}
fn parse_closure_help<'a>(
arena: &'a Bump,
state: State<'a>,
_min_indent: u32,
options: ExprParseOptions,
) -> ParseResult<'a, Expr<'a>, EClosure<'a>> {
let start_indent = state.line_indent();
let min_indent = start_indent;
// All closures start with a '\' - e.g. (\x -> x + 1)
let (_, (), state) = parse_word1(state, min_indent, b'\\', EClosure::Start)?;
// After the first token, all other tokens must be indented past the start of the line
let min_indent = min_indent + 1;
// Once we see the '\', we're committed to parsing this as a closure.
// It may turn out to be malformed, but it is definitely a closure.
// Parse the params
// Params are comma-separated
let (_, params, state) = sep_by1_e(
word1(b',', EClosure::Comma),
space0_around_ee(
specialize(EClosure::Pattern, loc_closure_param(min_indent)),
min_indent,
EClosure::IndentArg,
EClosure::IndentArrow,
),
EClosure::Arg,
)
.parse(arena, state)
.map_err(|(_p, e, s)| (MadeProgress, e, s))?;
let (_, loc_body, state) = skip_first!(
// Parse the -> which separates params from body
word2(b'-', b'>', EClosure::Arrow),
// Parse the body
space0_before_e(
specialize_ref(EClosure::Body, move |arena, state| {
parse_loc_expr_with_options(min_indent, options, arena, state)
}),
min_indent,
EClosure::IndentBody
)
)
.parse(arena, state)
.map_err(|(_p, e, s)| (MadeProgress, e, s))?;
let params: Vec<'a, Loc<Pattern<'a>>> = params;
let params: &'a [Loc<Pattern<'a>>] = params.into_bump_slice();
Ok((
MadeProgress,
Expr::Closure(params, arena.alloc(loc_body)),
state,
))
}
mod when {

View file

@ -1452,6 +1452,31 @@ where
}
}
pub fn parse_word1<'a, ToError, E>(
state: State<'a>,
min_indent: u32,
word: u8,
to_error: ToError,
) -> ParseResult<'a, (), E>
where
ToError: Fn(Position) -> E,
E: 'a,
{
debug_assert_ne!(word, b'\n');
if min_indent > state.column() {
return Err((NoProgress, to_error(state.pos()), state));
}
match state.bytes().first() {
Some(x) if *x == word => {
let state = state.advance(1);
Ok((MadeProgress, (), state))
}
_ => Err((NoProgress, to_error(state.pos()), state)),
}
}
pub fn word2<'a, ToError, E>(word_1: u8, word_2: u8, to_error: ToError) -> impl Parser<'a, (), E>
where
ToError: Fn(Position) -> E,

View file

@ -1,13 +1,15 @@
use roc_region::all::{Position, Region};
use std::fmt;
use crate::parser::Progress;
/// A position in a source file.
// NB: [Copy] is explicitly NOT derived to reduce the chance of bugs due to accidentally re-using
// parser state.
#[derive(Clone)]
pub struct State<'a> {
/// The raw input bytes from the file.
/// Beware: original_bytes[0] always points the the start of the file.
/// Beware: original_bytes[0] always points at the start of the file.
/// Use bytes()[0] to access the current byte the parser is inspecting
original_bytes: &'a [u8],
@ -16,6 +18,9 @@ pub struct State<'a> {
/// Position of the start of the current line
pub(crate) line_start: Position,
/// Position of the first non-whitespace character on the current line
pub(crate) line_start_after_whitespace: Position,
}
impl<'a> State<'a> {
@ -24,6 +29,10 @@ impl<'a> State<'a> {
original_bytes: bytes,
offset: 0,
line_start: Position::zero(),
// Technically not correct.
// We don't know the position of the first non-whitespace character yet.
line_start_after_whitespace: Position::zero(),
}
}
@ -39,6 +48,24 @@ impl<'a> State<'a> {
self.pos().offset - self.line_start.offset
}
pub fn line_indent(&self) -> u32 {
self.line_start_after_whitespace.offset - self.line_start.offset
}
/// Check that the indent is at least `indent` spaces.
/// Return a new indent if the current indent is greater than `indent`.
pub fn check_indent<E>(
&self,
indent: u32,
e: impl Fn(Position) -> E,
) -> Result<u32, (Progress, E, State<'a>)> {
if self.column() < indent {
Err((Progress::NoProgress, e(self.pos()), self.clone()))
} else {
Ok(std::cmp::max(indent, self.line_indent()))
}
}
/// Mutably advance the state by a given offset
#[inline(always)]
pub(crate) fn advance_mut(&mut self, offset: usize) {
@ -70,6 +97,18 @@ impl<'a> State<'a> {
pub(crate) const fn advance_newline(mut self) -> State<'a> {
self.offset += 1;
self.line_start = self.pos();
// WARNING! COULD CAUSE BUGS IF WE FORGET TO CALL mark_current_ident LATER!
// We really need to be stricter about this.
self.line_start_after_whitespace = self.line_start;
self
}
#[must_use]
#[inline(always)]
pub(crate) const fn mark_current_indent(mut self) -> State<'a> {
self.line_start_after_whitespace = self.pos();
self
}

View file

@ -0,0 +1 @@
Expr(Closure(Arg(@1), @1), @0)

View file

@ -0,0 +1 @@
\,x -> 1

View file

@ -0,0 +1 @@
Expr(Closure(IndentBody(@5), @3), @0)

View file

@ -0,0 +1,2 @@
\x ->
1

View file

@ -0,0 +1,71 @@
BinOps(
[
(
@0-10 SpaceAfter(
Str(
PlainLine(
"a string",
),
),
[
Newline,
],
),
@11-13 Pizza,
),
(
@14-24 SpaceAfter(
Var {
module_name: "Str",
ident: "toUtf8",
},
[
Newline,
],
),
@25-27 Pizza,
),
(
@28-54 SpaceAfter(
Apply(
@28-36 Var {
module_name: "List",
ident: "map",
},
[
@37-54 Closure(
[
@38-42 Identifier(
"byte",
),
],
@46-54 BinOps(
[
(
@46-50 Var {
module_name: "",
ident: "byte",
},
@51-52 Plus,
),
],
@53-54 Num(
"1",
),
),
),
],
Space,
),
[
Newline,
],
),
@55-57 Pizza,
),
],
@58-70 Var {
module_name: "List",
ident: "reverse",
},
)

View file

@ -0,0 +1,4 @@
"a string"
|> Str.toUtf8
|> List.map \byte -> byte + 1
|> List.reverse

View file

@ -0,0 +1,15 @@
Closure(
[
@1-2 Identifier(
"x",
),
],
@8-9 SpaceBefore(
Num(
"1",
),
[
Newline,
],
),
)

View file

@ -0,0 +1,2 @@
\x ->
1

View file

@ -120,6 +120,8 @@ mod test_parse {
// see tests/snapshots to see test input(.roc) and expected output(.result-ast)
snapshot_tests! {
fail/lambda_extra_comma.expr,
fail/lambda_missing_indent.expr,
fail/type_argument_no_arrow.expr,
fail/type_double_comma.expr,
pass/ability_demand_signature_is_multiline.expr,
@ -157,6 +159,7 @@ mod test_parse {
pass/empty_string.expr,
pass/equals_with_spaces.expr,
pass/equals.expr,
pass/expect_fx.module,
pass/expect.expr,
pass/float_with_underscores.expr,
pass/full_app_header_trailing_commas.header,
@ -167,6 +170,8 @@ mod test_parse {
pass/if_def.expr,
pass/int_with_underscore.expr,
pass/interface_with_newline.header,
pass/lambda_in_chain.expr,
pass/lambda_indent.expr,
pass/list_closing_indent_not_enough.expr,
pass/list_closing_same_indent_no_trailing_comma.expr,
pass/list_closing_same_indent_with_trailing_comma.expr,
@ -181,6 +186,7 @@ mod test_parse {
pass/module_def_newline.module,
pass/multi_backpassing.expr,
pass/multi_char_string.expr,
pass/multiline_string.expr,
pass/multiline_type_signature_with_comment.expr,
pass/multiline_type_signature.expr,
pass/multiple_fields.expr,
@ -204,7 +210,6 @@ mod test_parse {
pass/not_docs.expr,
pass/number_literal_suffixes.expr,
pass/one_backpassing.expr,
pass/multiline_string.expr,
pass/one_char_string.expr,
pass/one_def.expr,
pass/one_minus_two.expr,
@ -251,7 +256,6 @@ mod test_parse {
pass/spaced_singleton_list.expr,
pass/spaces_inside_empty_list.expr,
pass/standalone_module_defs.module,
pass/expect_fx.module,
pass/string_without_escape.expr,
pass/sub_var_with_spaces.expr,
pass/sub_with_spaces.expr,
@ -277,9 +281,9 @@ mod test_parse {
pass/var_minus_two.expr,
pass/var_then.expr,
pass/var_when.expr,
pass/when_if_guard.expr,
pass/when_in_assignment.expr,
pass/when_in_function.expr,
pass/when_if_guard.expr,
pass/when_in_parens_indented.expr,
pass/when_in_parens.expr,
pass/when_with_alternative_patterns.expr,