Merge pull request #4758 from joshuawarner32/fuzzing-take-2

Add fuzzing for the formatter and fix bugs
This commit is contained in:
Ayaz 2022-12-22 06:35:34 -07:00 committed by GitHub
commit dbdf4acdd7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
58 changed files with 1286 additions and 471 deletions

View file

@ -207,6 +207,8 @@ impl<'a> Formattable for TypeAnnotation<'a> {
) {
use roc_parse::ast::TypeAnnotation::*;
let self_is_multiline = self.is_multiline();
match self {
Function(args, ret) => {
let needs_parens = parens != Parens::NotNeeded;
@ -218,32 +220,31 @@ impl<'a> Formattable for TypeAnnotation<'a> {
}
let mut it = args.iter().enumerate().peekable();
let should_add_newlines = newlines == Newlines::Yes;
while let Some((index, argument)) = it.next() {
let is_first = index == 0;
let is_multiline = &argument.value.is_multiline();
if !is_first && !is_multiline && should_add_newlines {
if !is_first && !is_multiline && self_is_multiline {
buf.newline();
}
argument.value.format_with_options(
buf,
Parens::InFunctionType,
Newlines::No,
Newlines::Yes,
indent,
);
if it.peek().is_some() {
buf.push_str(",");
if !should_add_newlines {
if !self_is_multiline {
buf.spaces(1);
}
}
}
if should_add_newlines {
if self_is_multiline {
buf.newline();
buf.indent(indent);
} else {
@ -332,7 +333,12 @@ impl<'a> Formattable for TypeAnnotation<'a> {
Where(annot, has_clauses) => {
annot.format_with_options(buf, parens, newlines, indent);
buf.spaces(1);
if has_clauses.iter().any(|has| has.is_multiline()) {
buf.newline();
buf.indent(indent);
} else {
buf.spaces(1);
}
for (i, has) in has_clauses.iter().enumerate() {
buf.push(if i == 0 { '|' } else { ',' });
buf.spaces(1);
@ -341,19 +347,9 @@ impl<'a> Formattable for TypeAnnotation<'a> {
}
SpaceBefore(ann, spaces) => {
let is_function = matches!(ann, TypeAnnotation::Function(..));
let next_newlines = if is_function && newlines == Newlines::Yes {
Newlines::Yes
} else {
Newlines::No
};
if !buf.ends_with_newline() {
buf.newline();
buf.indent(indent);
}
buf.ensure_ends_with_newline();
fmt_comments_only(buf, spaces.iter(), NewlineAt::Bottom, indent);
ann.format_with_options(buf, parens, next_newlines, indent)
ann.format_with_options(buf, parens, newlines, indent)
}
SpaceAfter(ann, spaces) => {
ann.format_with_options(buf, parens, newlines, indent);

View file

@ -1,10 +1,10 @@
use crate::annotation::{Formattable, Newlines, Parens};
use crate::pattern::fmt_pattern;
use crate::spaces::{fmt_spaces, INDENT};
use crate::spaces::{fmt_default_newline, fmt_spaces, INDENT};
use crate::Buf;
use roc_parse::ast::{
AbilityMember, Defs, Expr, ExtractSpaces, Pattern, Spaces, TypeAnnotation, TypeDef, TypeHeader,
ValueDef,
AbilityMember, Defs, Expr, ExtractSpaces, Pattern, Spaces, StrLiteral, TypeAnnotation, TypeDef,
TypeHeader, ValueDef,
};
use roc_region::all::Loc;
@ -22,11 +22,17 @@ impl<'a> Formattable for Defs<'a> {
_newlines: Newlines,
indent: u16,
) {
let mut prev_spaces = true;
for (index, def) in self.defs().enumerate() {
let spaces_before = &self.spaces[self.space_before[index].indices()];
let spaces_after = &self.spaces[self.space_after[index].indices()];
fmt_spaces(buf, spaces_before.iter(), indent);
if prev_spaces {
fmt_spaces(buf, spaces_before.iter(), indent);
} else {
fmt_default_newline(buf, spaces_before, indent);
}
match def {
Ok(type_def) => type_def.format(buf, indent),
@ -34,6 +40,8 @@ impl<'a> Formattable for Defs<'a> {
}
fmt_spaces(buf, spaces_after.iter(), indent);
prev_spaces = !spaces_after.is_empty();
}
}
}
@ -176,7 +184,7 @@ impl<'a> Formattable for ValueDef<'a> {
&self,
buf: &mut Buf<'buf>,
_parens: Parens,
_newlines: Newlines,
newlines: Newlines,
indent: u16,
) {
use roc_parse::ast::ValueDef::*;
@ -186,6 +194,7 @@ impl<'a> Formattable for ValueDef<'a> {
if loc_annotation.is_multiline() {
buf.push_str(" :");
buf.spaces(1);
let should_outdent = match loc_annotation.value {
TypeAnnotation::SpaceBefore(sub_def, spaces) => match sub_def {
@ -200,7 +209,6 @@ impl<'a> Formattable for ValueDef<'a> {
};
if should_outdent {
buf.spaces(1);
match loc_annotation.value {
TypeAnnotation::SpaceBefore(sub_def, _) => {
sub_def.format_with_options(
@ -223,7 +231,7 @@ impl<'a> Formattable for ValueDef<'a> {
loc_annotation.format_with_options(
buf,
Parens::NotNeeded,
Newlines::Yes,
newlines,
indent + INDENT,
);
}
@ -423,6 +431,10 @@ pub fn fmt_body<'a, 'buf>(
buf.newline();
body.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent + INDENT);
}
Expr::When(..) | Expr::Str(StrLiteral::Block(_)) => {
buf.ensure_ends_with_newline();
body.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent + INDENT);
}
_ => {
buf.spaces(1);
body.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);

View file

@ -55,13 +55,18 @@ impl<'a> Formattable for Expr<'a> {
use roc_parse::ast::StrLiteral::*;
match literal {
PlainLine(_) | Line(_) => {
PlainLine(string) => {
// When a PlainLine contains '\n' or '"', format as a block string
string.contains('"') || string.contains('\n')
}
Line(_) => {
// If this had any newlines, it'd have parsed as Block.
false
}
Block(lines) => {
// Block strings don't *have* to be multiline!
lines.len() > 1
Block(_) => {
// Block strings are always formatted on multiple lines,
// even if the string is only a single line.
true
}
}
}
@ -358,14 +363,12 @@ impl<'a> Formattable for Expr<'a> {
indent,
);
} else {
if !empty_line_before_return {
buf.newline();
}
ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
}
}
_ => {
buf.ensure_ends_with_newline();
buf.indent(indent);
// Even if there were no defs, which theoretically should never happen,
// still print the return value.
ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
@ -521,11 +524,11 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
match literal {
PlainLine(string) => {
buf.indent(indent);
buf.push('"');
// When a PlainLine contains '\n' or '"', format as a block string
if string.contains('"') || string.contains('\n') {
buf.push_str("\"\"");
buf.ensure_ends_with_newline();
buf.indent(indent);
buf.push_str("\"\"\"");
buf.newline();
for line in string.split('\n') {
buf.indent(indent);
@ -533,11 +536,13 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
buf.newline();
}
buf.indent(indent);
buf.push_str("\"\"");
buf.push_str("\"\"\"");
} else {
buf.indent(indent);
buf.push('"');
buf.push_str_allow_spaces(string);
buf.push('"');
};
buf.push('"');
}
Line(segments) => {
buf.indent(indent);
@ -684,11 +689,9 @@ fn fmt_when<'a, 'buf>(
indent: u16,
) {
let is_multiline_condition = loc_condition.is_multiline();
buf.ensure_ends_with_newline();
buf.indent(indent);
buf.push_str(
"\
when",
);
buf.push_str("when");
if is_multiline_condition {
let condition_indent = indent + INDENT;

View file

@ -9,15 +9,11 @@ pub mod expr;
pub mod module;
pub mod pattern;
pub mod spaces;
pub mod test_helpers;
use bumpalo::{collections::String, Bump};
use roc_parse::ast::Module;
#[cfg(windows)]
const NEWLINE: &str = "\r\n";
#[cfg(not(windows))]
const NEWLINE: &str = "\n";
#[derive(Debug)]
pub struct Ast<'a> {
pub module: Module<'a>,
@ -28,6 +24,7 @@ pub struct Ast<'a> {
pub struct Buf<'a> {
text: String<'a>,
spaces_to_flush: usize,
newlines_to_flush: usize,
beginning_of_line: bool,
}
@ -36,6 +33,7 @@ impl<'a> Buf<'a> {
Buf {
text: String::new_in(arena),
spaces_to_flush: 0,
newlines_to_flush: 0,
beginning_of_line: true,
}
}
@ -50,9 +48,7 @@ impl<'a> Buf<'a> {
pub fn indent(&mut self, indent: u16) {
if self.beginning_of_line {
for _ in 0..indent {
self.text.push(' ');
}
self.spaces_to_flush = indent as usize;
}
self.beginning_of_line = false;
}
@ -104,48 +100,42 @@ impl<'a> Buf<'a> {
pub fn newline(&mut self) {
self.spaces_to_flush = 0;
self.text.push_str(NEWLINE);
self.newlines_to_flush += 1;
self.beginning_of_line = true;
}
/// Ensures the current buffer ends in a newline, if it didn't already.
/// Doesn't add a newline if the buffer already ends in one.
pub fn ensure_ends_with_newline(&mut self) {
if self.spaces_to_flush > 0 {
self.flush_spaces();
self.newline();
} else if !self.text.ends_with('\n') && !self.text.is_empty() {
if !self.text.is_empty() && self.newlines_to_flush == 0 {
self.newline()
}
}
pub fn ensure_ends_with_blank_line(&mut self) {
if self.spaces_to_flush > 0 {
self.flush_spaces();
self.newline();
self.newline();
} else if !self.text.ends_with('\n') {
self.newline();
self.newline();
} else if !self.text.ends_with("\n\n") {
self.newline();
if !self.text.is_empty() && self.newlines_to_flush < 2 {
self.spaces_to_flush = 0;
self.newlines_to_flush = 2;
self.beginning_of_line = true;
}
}
fn flush_spaces(&mut self) {
if self.spaces_to_flush > 0 {
for _ in 0..self.spaces_to_flush {
self.text.push(' ');
}
self.spaces_to_flush = 0;
for _ in 0..self.newlines_to_flush {
self.text.push('\n');
}
self.newlines_to_flush = 0;
for _ in 0..self.spaces_to_flush {
self.text.push(' ');
}
self.spaces_to_flush = 0;
}
/// Ensures the text ends in a newline with no whitespace preceding it.
pub fn fmt_end_of_file(&mut self) {
fmt_text_eof(&mut self.text)
self.ensure_ends_with_newline();
self.flush_spaces();
}
pub fn ends_with_space(&self) -> bool {
@ -153,117 +143,10 @@ impl<'a> Buf<'a> {
}
pub fn ends_with_newline(&self) -> bool {
self.spaces_to_flush == 0 && self.text.ends_with('\n')
self.newlines_to_flush > 0 || self.text.ends_with('\n')
}
fn is_empty(&self) -> bool {
self.spaces_to_flush == 0 && self.text.is_empty()
}
}
/// Ensures the text ends in a newline with no whitespace preceding it.
fn fmt_text_eof(text: &mut bumpalo::collections::String<'_>) {
let mut chars_rev = text.chars().rev();
let mut last_whitespace = None;
let mut last_whitespace_index = text.len();
// Keep going until we either run out of characters or encounter one
// that isn't whitespace.
loop {
match chars_rev.next() {
Some(ch) if ch.is_whitespace() => {
last_whitespace = Some(ch);
last_whitespace_index -= 1;
}
_ => {
break;
}
}
}
match last_whitespace {
Some('\n') => {
// There may have been more whitespace after this newline; remove it!
text.truncate(last_whitespace_index + '\n'.len_utf8());
}
Some(_) => {
// There's some whitespace at the end of this file, but the first
// whitespace char after the last non-whitespace char isn't a newline.
// So replace that whitespace char (and everything after it) with a newline.
text.replace_range(last_whitespace_index.., NEWLINE);
}
None => {
debug_assert!(last_whitespace_index == text.len());
debug_assert!(!text.ends_with(char::is_whitespace));
// This doesn't end in whitespace at all, so add a newline.
text.push_str(NEWLINE);
}
}
}
#[test]
fn eof_text_ends_with_newline() {
use bumpalo::{collections::String, Bump};
let arena = Bump::new();
let input = "This should be a newline:\n";
let mut text = String::from_str_in(input, &arena);
fmt_text_eof(&mut text);
// This should be unchanged!
assert_eq!(text.as_str(), input);
}
#[test]
fn eof_text_ends_with_whitespace() {
use bumpalo::{collections::String, Bump};
let arena = Bump::new();
let input = "This should be a newline: \t";
let mut text = String::from_str_in(input, &arena);
fmt_text_eof(&mut text);
assert_eq!(text.as_str(), "This should be a newline:\n");
}
#[test]
fn eof_text_ends_with_whitespace_then_newline() {
use bumpalo::{collections::String, Bump};
let arena = Bump::new();
let input = "This should be a newline: \n";
let mut text = String::from_str_in(input, &arena);
fmt_text_eof(&mut text);
assert_eq!(text.as_str(), "This should be a newline:\n");
}
#[test]
fn eof_text_ends_with_no_whitespace() {
use bumpalo::{collections::String, Bump};
let arena = Bump::new();
let input = "This should be a newline:";
let mut text = String::from_str_in(input, &arena);
fmt_text_eof(&mut text);
assert_eq!(text.as_str(), "This should be a newline:\n");
}
#[test]
fn eof_text_is_empty() {
use bumpalo::{collections::String, Bump};
let arena = Bump::new();
let input = "";
let mut text = String::from_str_in(input, &arena);
fmt_text_eof(&mut text);
assert_eq!(text.as_str(), "\n");
}

View file

@ -12,9 +12,9 @@ use roc_parse::{
ModuleName, PackageEntry, PackageHeader, PackageName, PlatformHeader, PlatformRequires,
ProvidesTo, To, TypedIdent,
},
ident::UppercaseIdent,
ident::{BadIdent, UppercaseIdent},
};
use roc_region::all::{Loc, Region};
use roc_region::all::{Loc, Position, Region};
use crate::{Ast, Buf};
@ -32,6 +32,17 @@ pub fn fmt_default_spaces<'a, 'buf>(
fmt_spaces(buf, spaces.iter(), indent);
}
}
pub fn fmt_default_newline<'a, 'buf>(
buf: &mut Buf<'buf>,
spaces: &[CommentOrNewline<'a>],
indent: u16,
) {
if spaces.is_empty() {
buf.newline();
} else {
fmt_spaces(buf, spaces.iter(), indent);
}
}
/// Like fmt_spaces, but disallows two consecutive newlines.
pub fn fmt_spaces_no_blank_lines<'a, 'buf, I>(buf: &mut Buf<'buf>, spaces: I, indent: u16)
@ -714,7 +725,7 @@ impl<'a> RemoveSpaces<'a> for Expr<'a> {
// The formatter can remove redundant parentheses, so also remove these when normalizing for comparison.
a.remove_spaces(arena)
}
Expr::MalformedIdent(a, b) => Expr::MalformedIdent(a, b),
Expr::MalformedIdent(a, b) => Expr::MalformedIdent(a, remove_spaces_bad_ident(b)),
Expr::MalformedClosure => Expr::MalformedClosure,
Expr::PrecedenceConflict(a) => Expr::PrecedenceConflict(a),
Expr::SpaceBefore(a, _) => a.remove_spaces(arena),
@ -724,6 +735,20 @@ impl<'a> RemoveSpaces<'a> for Expr<'a> {
}
}
fn remove_spaces_bad_ident(ident: BadIdent) -> BadIdent {
match ident {
BadIdent::Start(_) => BadIdent::Start(Position::zero()),
BadIdent::Space(e, _) => BadIdent::Space(e, Position::zero()),
BadIdent::Underscore(_) => BadIdent::Underscore(Position::zero()),
BadIdent::QualifiedTag(_) => BadIdent::QualifiedTag(Position::zero()),
BadIdent::WeirdAccessor(_) => BadIdent::WeirdAccessor(Position::zero()),
BadIdent::WeirdDotAccess(_) => BadIdent::WeirdDotAccess(Position::zero()),
BadIdent::WeirdDotQualified(_) => BadIdent::WeirdDotQualified(Position::zero()),
BadIdent::StrayDot(_) => BadIdent::StrayDot(Position::zero()),
BadIdent::BadOpaqueRef(_) => BadIdent::BadOpaqueRef(Position::zero()),
}
}
impl<'a> RemoveSpaces<'a> for Pattern<'a> {
fn remove_spaces(&self, arena: &'a Bump) -> Self {
match *self {
@ -755,7 +780,7 @@ impl<'a> RemoveSpaces<'a> for Pattern<'a> {
Pattern::StrLiteral(a) => Pattern::StrLiteral(a),
Pattern::Underscore(a) => Pattern::Underscore(a),
Pattern::Malformed(a) => Pattern::Malformed(a),
Pattern::MalformedIdent(a, b) => Pattern::MalformedIdent(a, b),
Pattern::MalformedIdent(a, b) => Pattern::MalformedIdent(a, remove_spaces_bad_ident(b)),
Pattern::QualifiedIdentifier { module_name, ident } => {
Pattern::QualifiedIdentifier { module_name, ident }
}

View file

@ -0,0 +1,77 @@
use bumpalo::Bump;
use roc_test_utils::assert_multiline_str_eq;
use crate::{
annotation::{Formattable, Newlines, Parens},
Buf,
};
/// Parse and re-format the given input, and pass the output to `check_formatting`
/// for verification. The expectation is that `check_formatting` assert the result matches
/// expectations (or, overwrite the expectation based on a command-line flag)
/// Optionally, based on the value of `check_idempotency`, also verify that the formatting
/// is idempotent - that if we reformat the output, we get the same result.
pub fn expr_formats(input: &str, check_formatting: impl Fn(&str), check_idempotency: bool) {
let arena = Bump::new();
let input = input.trim();
match roc_parse::test_helpers::parse_expr_with(&arena, input) {
Ok(actual) => {
use crate::spaces::RemoveSpaces;
let mut buf = Buf::new_in(&arena);
actual.format_with_options(&mut buf, Parens::NotNeeded, Newlines::Yes, 0);
let output = buf.as_str();
check_formatting(output);
let reparsed_ast = roc_parse::test_helpers::parse_expr_with(&arena, output).unwrap_or_else(|err| {
panic!(
"After formatting, the source code no longer parsed!\n\n\
Parse error was: {:?}\n\n\
The code that failed to parse:\n\n{}\n\n\
The original ast was:\n\n{:#?}\n\n",
err, output, actual
);
});
let ast_normalized = actual.remove_spaces(&arena);
let reparsed_ast_normalized = reparsed_ast.remove_spaces(&arena);
// HACK!
// We compare the debug format strings of the ASTs, because I'm finding in practice that _somewhere_ deep inside the ast,
// the PartialEq implementation is returning `false` even when the Debug-formatted impl is exactly the same.
// I don't have the patience to debug this right now, so let's leave it for another day...
// TODO: fix PartialEq impl on ast types
if format!("{:?}", ast_normalized) != format!("{:?}", reparsed_ast_normalized) {
panic!(
"Formatting bug; formatting didn't reparse to the same AST (after removing spaces)\n\n\
* * * Source code before formatting:\n{}\n\n\
* * * Source code after formatting:\n{}\n\n\
* * * AST before formatting:\n{:#?}\n\n\
* * * AST after formatting:\n{:#?}\n\n",
input,
output,
ast_normalized,
reparsed_ast_normalized
);
}
// Now verify that the resultant formatting is _idempotent_ - i.e. that it doesn't change again if re-formatted
if check_idempotency {
let mut reformatted_buf = Buf::new_in(&arena);
reparsed_ast.format_with_options(&mut reformatted_buf, Parens::NotNeeded, Newlines::Yes, 0);
if output != reformatted_buf.as_str() {
eprintln!("Formatting bug; formatting is not stable.\nOriginal code:\n{}\n\nFormatted code:\n{}\n\n", input, output);
eprintln!("Reformatting the formatted code changed it again, as follows:\n\n");
assert_multiline_str_eq!(output, reformatted_buf.as_str());
}
}
}
Err(error) => panic!("Unexpected parse failure when parsing this for formatting:\n\n{}\n\nParse error was:\n\n{:?}\n\n", input, error)
};
}