Fix a bunch of bugs in parsing/formatting found by fuzzing

2025-12-04 00:55:00 +00:00 · 2023-02-05 11:48:05 -08:00 · 2023-02-05 11:48:05 -08:00 · 3fee0d3e8f
commit 3fee0d3e8f
parent acd446f6bd
43 changed files with 593 additions and 70 deletions
--- a/crates/compiler/fmt/src/def.rs
+++ b/crates/compiler/fmt/src/def.rs
@ -76,8 +76,19 @@ impl<'a> Formattable for TypeDef<'a> {

                for var in *vars {
                    buf.spaces(1);
+
+                    let need_parens = matches!(var.value, Pattern::Apply(..));
+
+                    if need_parens {
+                        buf.push_str("(");
+                    }
+
                    fmt_pattern(buf, &var.value, indent, Parens::NotNeeded);
                    buf.indent(indent);
+
+                    if need_parens {
+                        buf.push_str(")");
+                    }
                }

                buf.push_str(" :");
--- a/crates/compiler/fmt/src/expr.rs
+++ b/crates/compiler/fmt/src/expr.rs
@ -51,25 +51,7 @@ impl<'a> Formattable for Expr<'a> {

            List(items) => items.iter().any(|loc_expr| loc_expr.is_multiline()),

-            Str(literal) => {
-                use roc_parse::ast::StrLiteral::*;
-
-                match literal {
-                    PlainLine(string) => {
-                        // When a PlainLine contains '\n' or '"', format as a block string
-                        string.contains('"') || string.contains('\n')
-                    }
-                    Line(_) => {
-                        // If this had any newlines, it'd have parsed as Block.
-                        false
-                    }
-                    Block(_) => {
-                        // Block strings are always formatted on multiple lines,
-                        // even if the string is only a single line.
-                        true
-                    }
-                }
-            }
+            Str(literal) => is_str_multiline(literal),
            Apply(loc_expr, args, _) => {
                loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline())
            }
@ -271,8 +253,21 @@ impl<'a> Formattable for Expr<'a> {
                    indent
                };

+                let expr_needs_parens =
+                    matches!(loc_expr.value.extract_spaces().item, Expr::Closure(..))
+                        && !loc_args.is_empty();
+
+                if expr_needs_parens {
+                    buf.push('(');
+                }
+
                loc_expr.format_with_options(buf, Parens::InApply, Newlines::Yes, indent);

+                if expr_needs_parens {
+                    buf.indent(indent);
+                    buf.push(')');
+                }
+
                for loc_arg in loc_args.iter() {
                    if should_reflow_outdentable {
                        buf.spaces(1);
@ -432,7 +427,31 @@ impl<'a> Formattable for Expr<'a> {
                    }
                }

-                sub_expr.format_with_options(buf, Parens::InApply, newlines, indent);
+                let needs_newline = match &sub_expr.value {
+                    SpaceBefore(..) => true,
+                    Str(text) => is_str_multiline(text),
+                    _ => false,
+                };
+                let needs_parens =
+                    needs_newline && matches!(unary_op.value, called_via::UnaryOp::Negate);
+
+                if needs_parens {
+                    // Unary negation can't be followed by whitespace (which is what a newline is) - so
+                    // we need to wrap the negated value in parens.
+                    buf.push('(');
+                }
+
+                let inner_indent = if needs_parens {
+                    indent + INDENT
+                } else {
+                    indent
+                };
+
+                sub_expr.format_with_options(buf, Parens::InApply, newlines, inner_indent);
+
+                if needs_parens {
+                    buf.push(')');
+                }
            }
            RecordAccessorFunction(key) => {
                buf.indent(indent);
@ -464,6 +483,26 @@ impl<'a> Formattable for Expr<'a> {
    }
 }

+fn is_str_multiline(literal: &StrLiteral) -> bool {
+    use roc_parse::ast::StrLiteral::*;
+
+    match literal {
+        PlainLine(string) => {
+            // When a PlainLine contains '\n' or '"', format as a block string
+            string.contains('"') || string.contains('\n')
+        }
+        Line(_) => {
+            // If this had any newlines, it'd have parsed as Block.
+            false
+        }
+        Block(_) => {
+            // Block strings are always formatted on multiple lines,
+            // even if the string is only a single line.
+            true
+        }
+    }
+}
+
 fn needs_unicode_escape(ch: char) -> bool {
    matches!(ch, '\u{0000}'..='\u{001f}' | '\u{007f}'..='\u{009f}')
 }
@ -585,11 +624,11 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
                buf.ensure_ends_with_newline();
                buf.indent(indent);
                buf.push_str("\"\"\"");
-                buf.newline();
+                buf.push_newline_literal();
                for line in string.split('\n') {
                    buf.indent(indent);
                    buf.push_str_allow_spaces(line);
-                    buf.newline();
+                    buf.push_newline_literal();
                }
                buf.indent(indent);
                buf.push_str("\"\"\"");
@ -613,7 +652,7 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
            buf.ensure_ends_with_newline();
            buf.indent(indent);
            buf.push_str("\"\"\"");
-            buf.newline();
+            buf.push_newline_literal();

            for segments in lines.iter() {
                for seg in segments.iter() {
@ -622,11 +661,11 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
                        buf.indent(indent);
                        format_str_segment(seg, buf, indent);
                    } else {
-                        buf.newline();
+                        buf.push_newline_literal();
                    }
                }

-                buf.newline();
+                buf.push_newline_literal();
            }
            buf.indent(indent);
            buf.push_str("\"\"\"");
--- a/crates/compiler/fmt/src/lib.rs
+++ b/crates/compiler/fmt/src/lib.rs
@ -106,12 +106,20 @@ impl<'a> Buf<'a> {
        self.spaces_to_flush += count;
    }

-    pub fn newline(&mut self) {
+    /// Only for use in emitting newlines in block strings, which don't follow the rule of
+    /// having at most two newlines in a row.
+    pub fn push_newline_literal(&mut self) {
        self.spaces_to_flush = 0;
        self.newlines_to_flush += 1;
        self.beginning_of_line = true;
    }

+    pub fn newline(&mut self) {
+        self.spaces_to_flush = 0;
+        self.newlines_to_flush = std::cmp::min(self.newlines_to_flush + 1, 2);
+        self.beginning_of_line = true;
+    }
+
    /// Ensures the current buffer ends in a newline, if it didn't already.
    /// Doesn't add a newline if the buffer already ends in one.
    pub fn ensure_ends_with_newline(&mut self) {
--- a/crates/compiler/fmt/src/spaces.rs
+++ b/crates/compiler/fmt/src/spaces.rs
@ -745,6 +745,7 @@ fn remove_spaces_bad_ident(ident: BadIdent) -> BadIdent {
        BadIdent::WeirdDotQualified(_) => BadIdent::WeirdDotQualified(Position::zero()),
        BadIdent::StrayDot(_) => BadIdent::StrayDot(Position::zero()),
        BadIdent::BadOpaqueRef(_) => BadIdent::BadOpaqueRef(Position::zero()),
+        BadIdent::QualifiedTupleAccessor(_) => BadIdent::QualifiedTupleAccessor(Position::zero()),
    }
 }