Fix approximately a bajillion fmt and parsing bugs

(discovered by fuzzing)

There's more to come, but this seems like a good batch for now.
This commit is contained in:
Joshua Warner 2023-01-11 19:44:29 -08:00
parent 8f62eeaf7e
commit 0b8e68f70d
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
68 changed files with 1011 additions and 229 deletions

View file

@ -12,7 +12,7 @@ use roc_region::all::Loc;
/// Does an AST node need parens around it?
///
/// Usually not, but there are two cases where it may be required
/// Usually not, but there are a few cases where it may be required
///
/// 1. In a function type, function types are in parens
///
@ -25,11 +25,19 @@ use roc_region::all::Loc;
/// Just (Just a)
/// List (List a)
/// reverse (reverse l)
///
/// 3. In a chain of binary operators, things like nested defs require parens.
///
/// a + (
/// x = 3
/// x + 1
/// )
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum Parens {
NotNeeded,
InFunctionType,
InApply,
InOperator,
}
/// In an AST node, do we show newlines around it
@ -263,7 +271,6 @@ impl<'a> Formattable for TypeAnnotation<'a> {
}
Apply(pkg, name, arguments) => {
buf.indent(indent);
// NOTE apply is never multiline
let write_parens = parens == Parens::InApply && !arguments.is_empty();
if write_parens {
@ -277,11 +284,38 @@ impl<'a> Formattable for TypeAnnotation<'a> {
buf.push_str(name);
for argument in *arguments {
buf.spaces(1);
argument
.value
.format_with_options(buf, Parens::InApply, Newlines::No, indent);
let needs_indent = except_last(arguments).any(|a| a.is_multiline())
|| arguments
.last()
.map(|a| {
a.is_multiline()
&& (!a.extract_spaces().before.is_empty()
|| !is_outdentable(&a.value))
})
.unwrap_or_default();
let arg_indent = if needs_indent {
indent + INDENT
} else {
indent
};
for arg in arguments.iter() {
if needs_indent {
let arg = arg.extract_spaces();
fmt_spaces(buf, arg.before.iter(), arg_indent);
buf.ensure_ends_with_newline();
arg.item.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
arg_indent,
);
fmt_spaces(buf, arg.after.iter(), arg_indent);
} else {
buf.spaces(1);
arg.format_with_options(buf, Parens::InApply, Newlines::No, arg_indent);
}
}
if write_parens {
@ -372,6 +406,13 @@ impl<'a> Formattable for TypeAnnotation<'a> {
}
}
fn is_outdentable(ann: &TypeAnnotation) -> bool {
matches!(
ann.extract_spaces().item,
TypeAnnotation::Tuple { .. } | TypeAnnotation::Record { .. }
)
}
/// Fields are subtly different on the type and term level:
///
/// > type: { x : Int, y : Bool }
@ -691,3 +732,11 @@ impl<'a> Formattable for HasAbilities<'a> {
}
}
}
pub fn except_last<T>(items: &[T]) -> impl Iterator<Item = &T> {
if items.is_empty() {
items.iter()
} else {
items[..items.len() - 1].iter()
}
}

View file

@ -77,6 +77,7 @@ impl<'a> Formattable for TypeDef<'a> {
for var in *vars {
buf.spaces(1);
fmt_pattern(buf, &var.value, indent, Parens::NotNeeded);
buf.indent(indent);
}
buf.push_str(" :");
@ -95,6 +96,7 @@ impl<'a> Formattable for TypeDef<'a> {
for var in *vars {
buf.spaces(1);
fmt_pattern(buf, &var.value, indent, Parens::NotNeeded);
buf.indent(indent);
}
buf.push_str(" :=");
@ -136,6 +138,7 @@ impl<'a> Formattable for TypeDef<'a> {
for var in *vars {
buf.spaces(1);
fmt_pattern(buf, &var.value, indent, Parens::NotNeeded);
buf.indent(indent);
}
buf.push_str(" has");
@ -191,6 +194,7 @@ impl<'a> Formattable for ValueDef<'a> {
match self {
Annotation(loc_pattern, loc_annotation) => {
loc_pattern.format(buf, indent);
buf.indent(indent);
if loc_annotation.is_multiline() {
buf.push_str(" :");
@ -390,6 +394,7 @@ pub fn fmt_body<'a, 'buf>(
indent: u16,
) {
pattern.format_with_options(buf, Parens::InApply, Newlines::No, indent);
buf.indent(indent);
buf.push_str(" =");
if body.is_multiline() {
@ -415,7 +420,7 @@ pub fn fmt_body<'a, 'buf>(
);
}
}
Expr::BinOps(_, _) => {
Expr::Defs(..) | Expr::BinOps(_, _) => {
// Binop chains always get a newline. Otherwise you can have things like:
//
// something = foo

View file

@ -1,4 +1,4 @@
use crate::annotation::{Formattable, Newlines, Parens};
use crate::annotation::{except_last, Formattable, Newlines, Parens};
use crate::collection::{fmt_collection, Braces};
use crate::def::fmt_defs;
use crate::pattern::fmt_pattern;
@ -142,7 +142,7 @@ impl<'a> Formattable for Expr<'a> {
}
ParensAround(sub_expr) => {
if parens == Parens::NotNeeded && !sub_expr_requests_parens(sub_expr) {
sub_expr.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
sub_expr.format_with_options(buf, Parens::NotNeeded, newlines, indent);
} else {
let should_add_newlines = match sub_expr {
Expr::Closure(..)
@ -202,86 +202,111 @@ impl<'a> Formattable for Expr<'a> {
buf.push_str("crash");
}
Apply(loc_expr, loc_args, _) => {
// Sadly this assertion fails in practice. The fact that the parser produces code like this is going to
// confuse the formatter, because it depends on being able to "see" spaces that logically come before the inner
// expr in several places - which is necessarily the case when the `loc_expr` of the apply itself has
// SpaceBefore.
//
// TODO: enforce in the type system that spaces must be pushed to the "outside".
// In other words, Expr::Apply should look something like the following, and there shouldn't be Expr::SpaceBefore and ::SpaceAfter.
//
// ```
// Apply(&'a SpaceAfter<Loc<Expr<'a>>>, &'a [&'a SpaceBefore<Loc<Expr<'a>>>], CalledVia),
// ```
//
// assert!(loc_expr.extract_spaces().before.is_empty(), "{:#?}", self);
buf.indent(indent);
if apply_needs_parens && !loc_args.is_empty() {
buf.push('(');
}
// should_reflow_outdentable, aka should we transform this:
//
// ```
// foo bar
// [
// 1,
// 2,
// ]
// ```
//
// Into this:
//
// ```
// foo bar [
// 1,
// 2,
// ]
// ```
let should_reflow_outdentable = loc_expr.extract_spaces().after.is_empty()
&& except_last(loc_args).all(|a| !a.is_multiline())
&& loc_args
.last()
.map(|a| {
a.extract_spaces().item.is_multiline()
&& matches!(
a.value.extract_spaces().item,
Expr::Tuple(_) | Expr::List(_) | Expr::Record(_)
)
&& a.extract_spaces().before == [CommentOrNewline::Newline]
})
.unwrap_or_default();
let needs_indent = !should_reflow_outdentable
&& (!loc_expr.extract_spaces().after.is_empty()
|| except_last(loc_args).any(|a| a.is_multiline())
|| loc_args
.last()
.map(|a| {
a.is_multiline()
&& (!a.extract_spaces().before.is_empty()
|| !is_outdentable(&a.value))
})
.unwrap_or_default());
let arg_indent = if needs_indent {
indent + INDENT
} else {
indent
};
loc_expr.format_with_options(buf, Parens::InApply, Newlines::Yes, indent);
let multiline_args = loc_args.iter().any(|loc_arg| loc_arg.is_multiline());
for loc_arg in loc_args.iter() {
if should_reflow_outdentable {
buf.spaces(1);
let mut found_multiline_expr = false;
let mut iter = loc_args.iter().peekable();
// Ignore any comments+newlines before/after.
// We checked above that there's only a single newline before the last arg,
// which we're intentionally ignoring.
while let Some(loc_arg) = iter.next() {
if iter.peek().is_none() {
found_multiline_expr = match loc_arg.value {
SpaceBefore(sub_expr, spaces) => match sub_expr {
Record { .. } | List { .. } => {
let is_only_newlines = spaces.iter().all(|s| s.is_newline());
is_only_newlines
&& !found_multiline_expr
&& sub_expr.is_multiline()
}
_ => false,
},
Record { .. } | List { .. } | Closure { .. } => {
!found_multiline_expr && loc_arg.is_multiline()
}
_ => false,
}
let arg = loc_arg.extract_spaces();
arg.item.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
arg_indent,
);
} else if needs_indent {
let arg = loc_arg.extract_spaces();
fmt_spaces(buf, arg.before.iter(), arg_indent);
buf.ensure_ends_with_newline();
arg.item.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
arg_indent,
);
fmt_spaces(buf, arg.after.iter(), arg_indent);
} else {
found_multiline_expr = loc_arg.is_multiline();
}
}
let should_outdent_last_arg = found_multiline_expr;
if multiline_args && !should_outdent_last_arg {
let arg_indent = indent + INDENT;
for loc_arg in loc_args.iter() {
buf.newline();
loc_arg.format_with_options(buf, Parens::InApply, Newlines::No, arg_indent);
}
} else if multiline_args && should_outdent_last_arg {
let mut iter = loc_args.iter().peekable();
while let Some(loc_arg) = iter.next() {
buf.spaces(1);
if iter.peek().is_none() {
match loc_arg.value {
SpaceBefore(sub_expr, _) => {
sub_expr.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
indent,
);
}
_ => {
loc_arg.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
indent,
);
}
}
} else {
loc_arg.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
indent,
);
}
}
} else {
for loc_arg in loc_args.iter() {
buf.spaces(1);
loc_arg.format_with_options(buf, Parens::InApply, Newlines::Yes, indent);
loc_arg.format_with_options(
buf,
Parens::InApply,
Newlines::Yes,
arg_indent,
);
}
}
@ -337,24 +362,28 @@ impl<'a> Formattable for Expr<'a> {
fmt_backpassing(buf, loc_patterns, loc_body, loc_ret, indent);
}
Defs(defs, ret) => {
// It should theoretically be impossible to *parse* an empty defs list.
// (Canonicalization can remove defs later, but that hasn't happened yet!)
debug_assert!(!defs.is_empty());
{
let indent = if parens == Parens::InOperator {
buf.indent(indent);
buf.push('(');
buf.newline();
indent + INDENT
} else {
indent
};
fmt_defs(buf, defs, indent);
// It should theoretically be impossible to *parse* an empty defs list.
// (Canonicalization can remove defs later, but that hasn't happened yet!)
debug_assert!(!defs.is_empty());
match &ret.value {
SpaceBefore(sub_expr, spaces) => {
let empty_line_before_return = empty_line_before_expr(&ret.value);
let has_inline_comment = has_line_comment_before(&ret.value);
fmt_defs(buf, defs, indent);
if has_inline_comment {
match &ret.value {
SpaceBefore(sub_expr, spaces) => {
buf.spaces(1);
format_spaces(buf, spaces, newlines, indent);
fmt_spaces(buf, spaces.iter(), indent);
if !empty_line_before_return {
buf.newline();
}
buf.indent(indent);
sub_expr.format_with_options(
buf,
@ -362,17 +391,21 @@ impl<'a> Formattable for Expr<'a> {
Newlines::Yes,
indent,
);
} else {
}
_ => {
buf.ensure_ends_with_newline();
buf.indent(indent);
// Even if there were no defs, which theoretically should never happen,
// still print the return value.
ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
}
}
_ => {
buf.ensure_ends_with_newline();
buf.indent(indent);
// Even if there were no defs, which theoretically should never happen,
// still print the return value.
ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
}
}
if parens == Parens::InOperator {
buf.ensure_ends_with_newline();
buf.indent(indent);
buf.push(')');
}
}
Expect(condition, continuation) => {
@ -387,7 +420,7 @@ impl<'a> Formattable for Expr<'a> {
When(loc_condition, branches) => fmt_when(buf, loc_condition, branches, indent),
Tuple(items) => fmt_collection(buf, indent, Braces::Round, *items, Newlines::No),
List(items) => fmt_collection(buf, indent, Braces::Square, *items, Newlines::No),
BinOps(lefts, right) => fmt_binops(buf, lefts, right, false, parens, indent),
BinOps(lefts, right) => fmt_binops(buf, lefts, right, false, indent),
UnaryOp(sub_expr, unary_op) => {
buf.indent(indent);
match &unary_op.value {
@ -461,6 +494,13 @@ pub(crate) fn format_sq_literal(buf: &mut Buf, s: &str) {
buf.push('\'');
}
fn is_outdentable(expr: &Expr) -> bool {
matches!(
expr.extract_spaces().item,
Expr::Tuple(_) | Expr::List(_) | Expr::Record(_) | Expr::Closure(..)
)
}
fn starts_with_newline(expr: &Expr) -> bool {
use roc_parse::ast::Expr::*;
@ -599,7 +639,6 @@ fn fmt_binops<'a, 'buf>(
lefts: &'a [(Loc<Expr<'a>>, Loc<BinOp>)],
loc_right_side: &'a Loc<Expr<'a>>,
part_of_multi_line_binops: bool,
apply_needs_parens: Parens,
indent: u16,
) {
let is_multiline = part_of_multi_line_binops
@ -609,7 +648,7 @@ fn fmt_binops<'a, 'buf>(
for (loc_left_side, loc_binop) in lefts {
let binop = loc_binop.value;
loc_left_side.format_with_options(buf, apply_needs_parens, Newlines::No, indent);
loc_left_side.format_with_options(buf, Parens::InOperator, Newlines::No, indent);
if is_multiline {
buf.ensure_ends_with_newline();
@ -623,7 +662,7 @@ fn fmt_binops<'a, 'buf>(
buf.spaces(1);
}
loc_right_side.format_with_options(buf, apply_needs_parens, Newlines::Yes, indent);
loc_right_side.format_with_options(buf, Parens::InOperator, Newlines::Yes, indent);
}
fn format_spaces<'a, 'buf>(
@ -642,44 +681,6 @@ fn format_spaces<'a, 'buf>(
}
}
fn has_line_comment_before<'a>(expr: &'a Expr<'a>) -> bool {
use roc_parse::ast::Expr::*;
match expr {
SpaceBefore(_, spaces) => {
matches!(spaces.iter().next(), Some(CommentOrNewline::LineComment(_)))
}
_ => false,
}
}
fn empty_line_before_expr<'a>(expr: &'a Expr<'a>) -> bool {
use roc_parse::ast::Expr::*;
match expr {
SpaceBefore(_, spaces) => {
let mut has_at_least_one_newline = false;
for comment_or_newline in spaces.iter() {
match comment_or_newline {
CommentOrNewline::Newline => {
if has_at_least_one_newline {
return true;
} else {
has_at_least_one_newline = true;
}
}
CommentOrNewline::LineComment(_) | CommentOrNewline::DocComment(_) => {}
}
}
false
}
_ => false,
}
}
fn is_when_patterns_multiline(when_branch: &WhenBranch) -> bool {
let patterns = when_branch.patterns;
let (first_pattern, rest) = patterns.split_first().unwrap();
@ -1204,19 +1205,16 @@ fn fmt_backpassing<'a, 'buf>(
indent
};
let pattern_needs_parens = loc_patterns
.iter()
.any(|p| pattern_needs_parens_when_backpassing(&p.value));
if pattern_needs_parens {
buf.indent(indent);
buf.push('(');
}
let mut it = loc_patterns.iter().peekable();
while let Some(loc_pattern) = it.next() {
loc_pattern.format(buf, indent);
let needs_parens = if pattern_needs_parens_when_backpassing(&loc_pattern.value) {
Parens::InApply
} else {
Parens::NotNeeded
};
loc_pattern.format_with_options(buf, needs_parens, Newlines::No, indent);
if it.peek().is_some() {
if arguments_are_multiline {
@ -1229,10 +1227,6 @@ fn fmt_backpassing<'a, 'buf>(
}
}
if pattern_needs_parens {
buf.push(')');
}
if arguments_are_multiline {
buf.newline();
buf.indent(indent);
@ -1479,6 +1473,8 @@ fn sub_expr_requests_parens(expr: &Expr<'_>) -> bool {
})
}
Expr::If(_, _) => true,
Expr::SpaceBefore(e, _) => sub_expr_requests_parens(e),
Expr::SpaceAfter(e, _) => sub_expr_requests_parens(e),
_ => false,
}
}

View file

@ -72,12 +72,11 @@ fn fmt_spaces_max_consecutive_newlines<'a, 'buf, I>(
// Only ever print two newlines back to back.
// (Two newlines renders as one blank line.)
let mut consecutive_newlines = 0;
let mut encountered_comment = false;
for space in spaces {
match space {
Newline => {
if !encountered_comment && (consecutive_newlines < max_consecutive_newlines) {
if consecutive_newlines < max_consecutive_newlines {
buf.newline();
// Don't bother incrementing it if we're already over the limit.
@ -90,14 +89,14 @@ fn fmt_spaces_max_consecutive_newlines<'a, 'buf, I>(
fmt_comment(buf, comment);
buf.newline();
encountered_comment = true;
consecutive_newlines = 1;
}
DocComment(docs) => {
buf.indent(indent);
fmt_docs(buf, docs);
buf.newline();
encountered_comment = true;
consecutive_newlines = 1;
}
}
}