Fix a bunch of bugs in parsing/formatting found by fuzzing

This commit is contained in:
Joshua Warner 2023-02-05 11:48:05 -08:00
parent acd446f6bd
commit 3fee0d3e8f
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
43 changed files with 593 additions and 70 deletions

View file

@ -76,8 +76,19 @@ impl<'a> Formattable for TypeDef<'a> {
for var in *vars {
buf.spaces(1);
let need_parens = matches!(var.value, Pattern::Apply(..));
if need_parens {
buf.push_str("(");
}
fmt_pattern(buf, &var.value, indent, Parens::NotNeeded);
buf.indent(indent);
if need_parens {
buf.push_str(")");
}
}
buf.push_str(" :");

View file

@ -51,25 +51,7 @@ impl<'a> Formattable for Expr<'a> {
List(items) => items.iter().any(|loc_expr| loc_expr.is_multiline()),
Str(literal) => {
use roc_parse::ast::StrLiteral::*;
match literal {
PlainLine(string) => {
// When a PlainLine contains '\n' or '"', format as a block string
string.contains('"') || string.contains('\n')
}
Line(_) => {
// If this had any newlines, it'd have parsed as Block.
false
}
Block(_) => {
// Block strings are always formatted on multiple lines,
// even if the string is only a single line.
true
}
}
}
Str(literal) => is_str_multiline(literal),
Apply(loc_expr, args, _) => {
loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline())
}
@ -271,8 +253,21 @@ impl<'a> Formattable for Expr<'a> {
indent
};
let expr_needs_parens =
matches!(loc_expr.value.extract_spaces().item, Expr::Closure(..))
&& !loc_args.is_empty();
if expr_needs_parens {
buf.push('(');
}
loc_expr.format_with_options(buf, Parens::InApply, Newlines::Yes, indent);
if expr_needs_parens {
buf.indent(indent);
buf.push(')');
}
for loc_arg in loc_args.iter() {
if should_reflow_outdentable {
buf.spaces(1);
@ -432,7 +427,31 @@ impl<'a> Formattable for Expr<'a> {
}
}
sub_expr.format_with_options(buf, Parens::InApply, newlines, indent);
let needs_newline = match &sub_expr.value {
SpaceBefore(..) => true,
Str(text) => is_str_multiline(text),
_ => false,
};
let needs_parens =
needs_newline && matches!(unary_op.value, called_via::UnaryOp::Negate);
if needs_parens {
// Unary negation can't be followed by whitespace (which is what a newline is) - so
// we need to wrap the negated value in parens.
buf.push('(');
}
let inner_indent = if needs_parens {
indent + INDENT
} else {
indent
};
sub_expr.format_with_options(buf, Parens::InApply, newlines, inner_indent);
if needs_parens {
buf.push(')');
}
}
RecordAccessorFunction(key) => {
buf.indent(indent);
@ -464,6 +483,26 @@ impl<'a> Formattable for Expr<'a> {
}
}
fn is_str_multiline(literal: &StrLiteral) -> bool {
use roc_parse::ast::StrLiteral::*;
match literal {
PlainLine(string) => {
// When a PlainLine contains '\n' or '"', format as a block string
string.contains('"') || string.contains('\n')
}
Line(_) => {
// If this had any newlines, it'd have parsed as Block.
false
}
Block(_) => {
// Block strings are always formatted on multiple lines,
// even if the string is only a single line.
true
}
}
}
fn needs_unicode_escape(ch: char) -> bool {
matches!(ch, '\u{0000}'..='\u{001f}' | '\u{007f}'..='\u{009f}')
}
@ -585,11 +624,11 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
buf.ensure_ends_with_newline();
buf.indent(indent);
buf.push_str("\"\"\"");
buf.newline();
buf.push_newline_literal();
for line in string.split('\n') {
buf.indent(indent);
buf.push_str_allow_spaces(line);
buf.newline();
buf.push_newline_literal();
}
buf.indent(indent);
buf.push_str("\"\"\"");
@ -613,7 +652,7 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
buf.ensure_ends_with_newline();
buf.indent(indent);
buf.push_str("\"\"\"");
buf.newline();
buf.push_newline_literal();
for segments in lines.iter() {
for seg in segments.iter() {
@ -622,11 +661,11 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
buf.indent(indent);
format_str_segment(seg, buf, indent);
} else {
buf.newline();
buf.push_newline_literal();
}
}
buf.newline();
buf.push_newline_literal();
}
buf.indent(indent);
buf.push_str("\"\"\"");

View file

@ -106,12 +106,20 @@ impl<'a> Buf<'a> {
self.spaces_to_flush += count;
}
pub fn newline(&mut self) {
/// Only for use in emitting newlines in block strings, which don't follow the rule of
/// having at most two newlines in a row.
pub fn push_newline_literal(&mut self) {
self.spaces_to_flush = 0;
self.newlines_to_flush += 1;
self.beginning_of_line = true;
}
pub fn newline(&mut self) {
self.spaces_to_flush = 0;
self.newlines_to_flush = std::cmp::min(self.newlines_to_flush + 1, 2);
self.beginning_of_line = true;
}
/// Ensures the current buffer ends in a newline, if it didn't already.
/// Doesn't add a newline if the buffer already ends in one.
pub fn ensure_ends_with_newline(&mut self) {

View file

@ -745,6 +745,7 @@ fn remove_spaces_bad_ident(ident: BadIdent) -> BadIdent {
BadIdent::WeirdDotQualified(_) => BadIdent::WeirdDotQualified(Position::zero()),
BadIdent::StrayDot(_) => BadIdent::StrayDot(Position::zero()),
BadIdent::BadOpaqueRef(_) => BadIdent::BadOpaqueRef(Position::zero()),
BadIdent::QualifiedTupleAccessor(_) => BadIdent::QualifiedTupleAccessor(Position::zero()),
}
}

View file

@ -1106,7 +1106,10 @@ fn finish_parsing_alias_or_opaque<'a>(
let mut defs = Defs::default();
let state = match &expr.value.extract_spaces().item {
Expr::Tag(name) => {
Expr::ParensAround(Expr::SpaceBefore(Expr::Tag(name), _))
| Expr::ParensAround(Expr::SpaceAfter(Expr::Tag(name), _))
| Expr::ParensAround(Expr::Tag(name))
| Expr::Tag(name) => {
let mut type_arguments = Vec::with_capacity_in(arguments.len(), arena);
for argument in arguments {
@ -1796,20 +1799,47 @@ pub fn loc_expr<'a>(accept_multi_backpassing: bool) -> impl Parser<'a, Loc<Expr<
})
}
pub fn merge_spaces<'a>(
arena: &'a Bump,
a: &'a [CommentOrNewline<'a>],
b: &'a [CommentOrNewline<'a>],
) -> &'a [CommentOrNewline<'a>] {
if a.is_empty() {
b
} else if b.is_empty() {
a
} else {
let mut merged = Vec::with_capacity_in(a.len() + b.len(), arena);
merged.extend_from_slice(a);
merged.extend_from_slice(b);
merged.into_bump_slice()
}
}
/// If the given Expr would parse the same way as a valid Pattern, convert it.
/// Example: (foo) could be either an Expr::Var("foo") or Pattern::Identifier("foo")
fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>, ()> {
match expr {
let mut expr = expr.extract_spaces();
if let Expr::ParensAround(loc_expr) = &expr.item {
let expr_inner = loc_expr.extract_spaces();
expr.before = merge_spaces(arena, expr.before, expr_inner.before);
expr.after = merge_spaces(arena, expr_inner.after, expr.after);
expr.item = expr_inner.item;
}
let mut pat = match expr.item {
Expr::Var { module_name, ident } => {
if module_name.is_empty() {
Ok(Pattern::Identifier(ident))
Pattern::Identifier(ident)
} else {
Ok(Pattern::QualifiedIdentifier { module_name, ident })
Pattern::QualifiedIdentifier { module_name, ident }
}
}
Expr::Underscore(opt_name) => Ok(Pattern::Underscore(opt_name)),
Expr::Tag(value) => Ok(Pattern::Tag(value)),
Expr::OpaqueRef(value) => Ok(Pattern::OpaqueRef(value)),
Expr::Underscore(opt_name) => Pattern::Underscore(opt_name),
Expr::Tag(value) => Pattern::Tag(value),
Expr::OpaqueRef(value) => Pattern::OpaqueRef(value),
Expr::Apply(loc_val, loc_args, _) => {
let region = loc_val.region;
let value = expr_to_pattern_help(arena, &loc_val.value)?;
@ -1826,19 +1856,10 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
let pattern = Pattern::Apply(val_pattern, arg_patterns.into_bump_slice());
Ok(pattern)
pattern
}
Expr::SpaceBefore(sub_expr, spaces) => Ok(Pattern::SpaceBefore(
arena.alloc(expr_to_pattern_help(arena, sub_expr)?),
spaces,
)),
Expr::SpaceAfter(sub_expr, spaces) => Ok(Pattern::SpaceAfter(
arena.alloc(expr_to_pattern_help(arena, sub_expr)?),
spaces,
)),
Expr::ParensAround(sub_expr) => expr_to_pattern_help(arena, sub_expr),
Expr::SpaceBefore(..) | Expr::SpaceAfter(..) | Expr::ParensAround(..) => unreachable!(),
Expr::Record(fields) => {
let patterns = fields.map_items_result(arena, |loc_assigned_field| {
@ -1847,30 +1868,27 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
Ok(Loc { region, value })
})?;
Ok(Pattern::RecordDestructure(patterns))
Pattern::RecordDestructure(patterns)
}
Expr::Tuple(fields) => Ok(Pattern::Tuple(fields.map_items_result(
arena,
|loc_expr| {
Ok(Loc {
region: loc_expr.region,
value: expr_to_pattern_help(arena, &loc_expr.value)?,
})
},
)?)),
Expr::Tuple(fields) => Pattern::Tuple(fields.map_items_result(arena, |loc_expr| {
Ok(Loc {
region: loc_expr.region,
value: expr_to_pattern_help(arena, &loc_expr.value)?,
})
})?),
&Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
&Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
Expr::Float(string) => Pattern::FloatLiteral(string),
Expr::Num(string) => Pattern::NumLiteral(string),
Expr::NonBase10Int {
string,
base,
is_negative,
} => Ok(Pattern::NonBase10Literal {
} => Pattern::NonBase10Literal {
string,
base: *base,
is_negative: *is_negative,
}),
base,
is_negative,
},
// These would not have parsed as patterns
Expr::RecordAccessorFunction(_)
| Expr::RecordAccess(_, _)
@ -1889,12 +1907,23 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
| Expr::PrecedenceConflict { .. }
| Expr::RecordUpdate { .. }
| Expr::UnaryOp(_, _)
| Expr::Crash => Err(()),
| Expr::Crash => return Err(()),
Expr::Str(string) => Ok(Pattern::StrLiteral(*string)),
Expr::SingleQuote(string) => Ok(Pattern::SingleQuote(string)),
Expr::MalformedIdent(string, problem) => Ok(Pattern::MalformedIdent(string, *problem)),
Expr::Str(string) => Pattern::StrLiteral(string),
Expr::SingleQuote(string) => Pattern::SingleQuote(string),
Expr::MalformedIdent(string, problem) => Pattern::MalformedIdent(string, problem),
};
// Now we re-add the spaces
if !expr.before.is_empty() {
pat = Pattern::SpaceBefore(arena.alloc(pat), expr.before);
}
if !expr.after.is_empty() {
pat = Pattern::SpaceAfter(arena.alloc(pat), expr.after);
}
Ok(pat)
}
fn assigned_expr_field_to_pattern_help<'a>(

View file

@ -264,6 +264,7 @@ pub enum BadIdent {
WeirdDotQualified(Position),
StrayDot(Position),
BadOpaqueRef(Position),
QualifiedTupleAccessor(Position),
}
fn is_alnum(ch: char) -> bool {
@ -500,6 +501,13 @@ fn chomp_identifier_chain<'a>(
match chomp_access_chain(&buffer[chomped..], &mut parts) {
Ok(width) => {
if matches!(parts[0], Accessor::TupleIndex(_)) && first_is_uppercase {
return Err((
chomped as u32,
BadIdent::QualifiedTupleAccessor(pos.bump_column(chomped as u32)),
));
}
chomped += width as usize;
let ident = Ident::Access {

View file

@ -0,0 +1,6 @@
MalformedIdent(
"I.5",
QualifiedTupleAccessor(
@1,
),
)

View file

@ -0,0 +1,3 @@
a
&& (\x -> x)
8

View file

@ -0,0 +1,35 @@
BinOps(
[
(
@0-1 Var {
module_name: "",
ident: "a",
},
@2-4 And,
),
],
@5-12 Apply(
@5-10 Closure(
[
@6-7 Identifier(
"x",
),
],
@9-10 Var {
module_name: "",
ident: "x",
},
),
[
@11-12 SpaceBefore(
Num(
"8",
),
[
Newline,
],
),
],
Space,
),
)

View file

@ -0,0 +1,2 @@
a && \x->x
8

View file

@ -0,0 +1,39 @@
BinOps(
[
(
@0-1 Var {
module_name: "",
ident: "i",
},
@1-2 GreaterThan,
),
],
@2-10 Apply(
@2-7 SpaceAfter(
Closure(
[
@3-4 Identifier(
"s",
),
],
@6-7 Var {
module_name: "",
ident: "s",
},
),
[
Newline,
],
),
[
@8-10 UnaryOp(
@9-10 Var {
module_name: "",
ident: "a",
},
@8-9 Negate,
),
],
Space,
),
)

View file

@ -0,0 +1,47 @@
Defs(
Defs {
tags: [
Index(0),
],
regions: [
@0-4,
],
space_before: [
Slice(start = 0, length = 0),
],
space_after: [
Slice(start = 0, length = 0),
],
spaces: [],
type_defs: [
Alias {
header: TypeHeader {
name: @0-1 "B",
vars: [],
},
ann: @2-4 Record {
fields: [],
ext: None,
},
},
],
value_defs: [],
},
@6-10 SpaceBefore(
ParensAround(
SpaceBefore(
Var {
module_name: "",
ident: "a",
},
[
Newline,
],
),
),
[
Newline,
Newline,
],
),
)

View file

@ -4,5 +4,5 @@ UnaryOp(
[],
),
),
@0-1 Not,
@0-1 Negate,
)

View file

@ -0,0 +1,8 @@
UnaryOp(
@1-9 Str(
PlainLine(
"\"<",
),
),
@0-1 Negate,
)

View file

@ -0,0 +1,8 @@
UnaryOp(
@1-7 Str(
Block(
[],
),
),
@0-1 Not,
)

View file

@ -0,0 +1,49 @@
Defs(
Defs {
tags: [
Index(0),
],
regions: [
@0-8,
],
space_before: [
Slice(start = 0, length = 0),
],
space_after: [
Slice(start = 0, length = 0),
],
spaces: [],
type_defs: [
Alias {
header: TypeHeader {
name: @0-1 "U",
vars: [
@2-5 Apply(
@2-3 Identifier(
"b",
),
[
@4-5 Identifier(
"a",
),
],
),
],
},
ann: @7-8 BoundVariable(
"b",
),
},
],
value_defs: [],
},
@9-10 SpaceBefore(
Var {
module_name: "",
ident: "a",
},
[
Newline,
],
),
)

View file

@ -0,0 +1,52 @@
Defs(
Defs {
tags: [
Index(2147483648),
],
regions: [
@0-9,
],
space_before: [
Slice(start = 0, length = 0),
],
space_after: [
Slice(start = 0, length = 0),
],
spaces: [],
type_defs: [],
value_defs: [
Annotation(
@0-1 Apply(
@0-1 Identifier(
"i",
),
[
@5-6 SpaceBefore(
Tag(
"N",
),
[
Newline,
LineComment(
"",
),
],
),
],
),
@8-9 BoundVariable(
"b",
),
),
],
},
@10-11 SpaceBefore(
Var {
module_name: "",
ident: "a",
},
[
Newline,
],
),
)

View file

@ -0,0 +1,38 @@
Defs(
Defs {
tags: [
Index(0),
],
regions: [
@1-5,
],
space_before: [
Slice(start = 0, length = 0),
],
space_after: [
Slice(start = 0, length = 0),
],
spaces: [],
type_defs: [
Alias {
header: TypeHeader {
name: @1-2 "D",
vars: [],
},
ann: @4-5 BoundVariable(
"b",
),
},
],
value_defs: [],
},
@6-7 SpaceBefore(
Var {
module_name: "",
ident: "a",
},
[
Newline,
],
),
)

View file

@ -0,0 +1,38 @@
Defs(
Defs {
tags: [
Index(0),
],
regions: [
@2-6,
],
space_before: [
Slice(start = 0, length = 0),
],
space_after: [
Slice(start = 0, length = 0),
],
spaces: [],
type_defs: [
Alias {
header: TypeHeader {
name: @2-3 "A",
vars: [],
},
ann: @5-6 BoundVariable(
"b",
),
},
],
value_defs: [],
},
@7-8 SpaceBefore(
Var {
module_name: "",
ident: "a",
},
[
Newline,
],
),
)

View file

@ -0,0 +1,24 @@
SpaceAfter(
Str(
Block(
[
[
Plaintext(
"\n",
),
Plaintext(
"\n",
),
Plaintext(
"#",
),
],
],
),
),
[
LineComment(
"",
),
],
)

View file

@ -251,6 +251,7 @@ mod test_snapshots {
malformed/malformed_ident_due_to_underscore.expr,
malformed/malformed_pattern_field_access.expr, // See https://github.com/roc-lang/roc/issues/399
malformed/malformed_pattern_module_name.expr, // See https://github.com/roc-lang/roc/issues/399
malformed/module_dot_tuple.expr,
malformed/qualified_tag.expr,
malformed/underscore_expr_in_def.expr,
pass/ability_demand_signature_is_multiline.expr,
@ -278,6 +279,8 @@ mod test_snapshots {
pass/basic_var.expr,
pass/bound_variable.expr,
pass/call_with_newlines.expr,
pass/closure_in_binop.expr,
pass/closure_in_binop_with_spaces.expr,
pass/closure_with_underscores.expr,
pass/comment_after_annotation.expr,
pass/comment_after_def.moduledefs,
@ -306,6 +309,7 @@ mod test_snapshots {
pass/equals_with_spaces.expr,
pass/expect.expr,
pass/expect_fx.moduledefs,
pass/extra_newline_in_parens.expr,
pass/float_with_underscores.expr,
pass/full_app_header.header,
pass/full_app_header_trailing_commas.header,
@ -343,6 +347,7 @@ mod test_snapshots {
pass/multiple_operators.expr,
pass/neg_inf_float.expr,
pass/negate_multiline_string.expr,
pass/negate_multiline_string_with_quote.expr,
pass/negative_float.expr,
pass/negative_in_apply_def.expr,
pass/negative_int.expr,
@ -368,6 +373,7 @@ mod test_snapshots {
pass/nonempty_package_header.header,
pass/nonempty_platform_header.header,
pass/not_docs.expr,
pass/not_multiline_string.expr,
pass/number_literal_suffixes.expr,
pass/one_backpassing.expr,
pass/one_char_string.expr,
@ -389,6 +395,10 @@ mod test_snapshots {
pass/outdented_list.expr,
pass/outdented_record.expr,
pass/packed_singleton_list.expr,
pass/parens_in_type_def_apply.expr,
pass/parens_in_value_def_annotation.expr,
pass/parenthesized_type_def.expr,
pass/parenthesized_type_def_space_before.expr,
pass/parenthetical_apply.expr,
pass/parenthetical_basic_field.expr,
pass/parenthetical_field_qualified_var.expr,
@ -420,6 +430,7 @@ mod test_snapshots {
pass/spaced_singleton_list.expr,
pass/spaces_inside_empty_list.expr,
pass/standalone_module_defs.moduledefs,
pass/str_block_multiple_newlines.expr,
pass/string_without_escape.expr,
pass/sub_var_with_spaces.expr,
pass/sub_with_spaces.expr,

View file

@ -1218,6 +1218,21 @@ fn to_bad_ident_expr_report<'b>(
]),
])
}
QualifiedTupleAccessor(pos) => {
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
alloc.stack([
alloc.reflow("I am trying to parse a qualified name here:"),
alloc.region_with_subregion(lines.convert_region(surroundings), region),
alloc.concat([
alloc.reflow("This looks like a tuple accessor on a module or tag name,"),
alloc.reflow(r"but neither modules nor tags can have tuple elements! "),
alloc.reflow(r"Maybe you wanted a qualified name, something like "),
alloc.parser_suggestion("Json.Decode.string"),
alloc.text("."),
]),
])
}
QualifiedTag(pos) => {
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
@ -1358,7 +1373,7 @@ fn to_bad_ident_pattern_report<'b>(
]),
]),
WeirdDotQualified(pos) => {
QualifiedTupleAccessor(pos) | WeirdDotQualified(pos) => {
let region = LineColumnRegion::from_pos(lines.convert_pos(pos));
alloc.stack([