Merge pull request #7409 from joshuawarner32/fuzzing-bugs-5

Fix another batch of parsing/formatting bugs found in fuzzing
This commit is contained in:
Luke Boswell 2024-12-28 17:36:03 +11:00 committed by GitHub
commit f7dbf850b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 750 additions and 74 deletions

View file

@ -34,6 +34,10 @@ impl<'a, T: Copy> ExtractSpaces<'a> for Spaces<'a, T> {
fn extract_spaces(&self) -> Spaces<'a, T> {
*self
}
fn without_spaces(&self) -> T {
self.item
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -111,6 +115,7 @@ impl<'a, T: Debug> Debug for Spaced<'a, T> {
pub trait ExtractSpaces<'a>: Sized + Copy {
type Item;
fn extract_spaces(&self) -> Spaces<'a, Self::Item>;
fn without_spaces(&self) -> Self::Item;
}
impl<'a, T: ExtractSpaces<'a>> ExtractSpaces<'a> for &'a T {
@ -118,6 +123,10 @@ impl<'a, T: ExtractSpaces<'a>> ExtractSpaces<'a> for &'a T {
fn extract_spaces(&self) -> Spaces<'a, Self::Item> {
(*self).extract_spaces()
}
fn without_spaces(&self) -> Self::Item {
(*self).without_spaces()
}
}
impl<'a, T: ExtractSpaces<'a>> ExtractSpaces<'a> for Loc<T> {
@ -130,6 +139,10 @@ impl<'a, T: ExtractSpaces<'a>> ExtractSpaces<'a> for Loc<T> {
after: spaces.after,
}
}
fn without_spaces(&self) -> Self::Item {
self.value.without_spaces()
}
}
impl<'a> Header<'a> {
@ -2350,6 +2363,17 @@ macro_rules! impl_extract_spaces {
}
}
}
fn without_spaces(&self) -> Self::Item {
match self {
$t::SpaceBefore(item, _) => {
item.without_spaces()
},
$t::SpaceAfter(item, _) => {
item.without_spaces()
},
_ => *self,
}
}
}
};
}
@ -2412,6 +2436,14 @@ impl<'a, T: Copy> ExtractSpaces<'a> for Spaced<'a, T> {
},
}
}
fn without_spaces(&self) -> T {
match self {
Spaced::SpaceBefore(item, _) => item.without_spaces(),
Spaced::SpaceAfter(item, _) => item.without_spaces(),
Spaced::Item(item) => *item,
}
}
}
impl<'a> ExtractSpaces<'a> for AbilityImpls<'a> {
@ -2454,6 +2486,14 @@ impl<'a> ExtractSpaces<'a> for AbilityImpls<'a> {
},
}
}
fn without_spaces(&self) -> Self::Item {
match self {
AbilityImpls::AbilityImpls(inner) => *inner,
AbilityImpls::SpaceBefore(item, _) => item.without_spaces(),
AbilityImpls::SpaceAfter(item, _) => item.without_spaces(),
}
}
}
pub trait Malformed {

View file

@ -1,4 +1,4 @@
use crate::ast::{Collection, Implements, Pattern, PatternAs, Spaceable};
use crate::ast::{Collection, ExtractSpaces, Implements, Pattern, PatternAs, Spaceable};
use crate::blankspace::{space0_e, spaces, spaces_before};
use crate::ident::{lowercase_ident, parse_ident, Accessor, Ident};
use crate::keyword;
@ -150,7 +150,7 @@ fn loc_tag_pattern_arg<'a>(
if stop_on_has_kw
&& matches!(
value,
value.without_spaces(),
Pattern::Identifier {
ident: crate::keyword::IMPLEMENTS,
..

View file

@ -1,10 +1,11 @@
use crate::ast::{
AbilityImpls, AssignedField, CommentOrNewline, Expr, FunctionArrow, ImplementsAbilities,
ImplementsAbility, ImplementsClause, Pattern, Spaceable, Spaced, Tag, TypeAnnotation,
TypeHeader,
AbilityImpls, AssignedField, Collection, CommentOrNewline, Expr, FunctionArrow,
ImplementsAbilities, ImplementsAbility, ImplementsClause, Pattern, Spaceable, Spaced, Tag,
TypeAnnotation, TypeHeader,
};
use crate::blankspace::{
space0_around_ee, space0_before_e, space0_before_optional_after, space0_e,
spaces_before_optional_after,
};
use crate::expr::record_field;
use crate::ident::{lowercase_ident, lowercase_ident_keyword_e};
@ -12,7 +13,7 @@ use crate::keyword;
use crate::parser::{
absolute_column_min_indent, and, collection_trailing_sep_e, either, error_on_byte,
increment_min_indent, indented_seq, loc, map, map_with_arena, skip_first, skip_second, succeed,
then, zero_or_more, ERecord, ETypeAbilityImpl,
then, zero_or_more, ERecord, ETypeAbilityImpl, ParseResult,
};
use crate::parser::{
allocated, backtrackable, byte, fail, optional, specialize_err, specialize_err_ref, two_bytes,
@ -256,40 +257,259 @@ fn loc_applied_arg<'a>(
fn loc_type_in_parens<'a>(
stop_at_surface_has: bool,
) -> impl Parser<'a, Loc<TypeAnnotation<'a>>, ETypeInParens<'a>> {
then(
loc(and(
collection_trailing_sep_e(
byte(b'(', ETypeInParens::Open),
specialize_err_ref(ETypeInParens::Type, expression(true, false)),
byte(b',', ETypeInParens::End),
byte(b')', ETypeInParens::End),
TypeAnnotation::SpaceBefore,
),
optional(allocated(specialize_err_ref(
ETypeInParens::Type,
term(stop_at_surface_has),
))),
)),
|_arena, state, progress, item| {
let Loc {
region,
value: (fields, ext),
} = item;
if fields.len() > 1 || ext.is_some() {
Ok((
MadeProgress,
Loc::at(region, TypeAnnotation::Tuple { elems: fields, ext }),
state,
))
} else if fields.len() == 1 {
Ok((MadeProgress, fields.items[0], state))
(move |arena, state: State<'a>, min_indent: u32| {
// This used to just be the following code (roughly):
// loc(and(
// collection_trailing_sep_e(
// byte(b'(', ETypeInParens::Open),
// specialize_err_ref(ETypeInParens::Type, expression(true, false)),
// byte(b',', ETypeInParens::End),
// byte(b')', ETypeInParens::End),
// TypeAnnotation::SpaceBefore,
// ),
// optional(allocated(specialize_err_ref(
// ETypeInParens::Type,
// term(stop_at_surface_has),
// ))),
// )),
//
// However, we need to implement this manually in order to performantly handle both function types and tuples.
// For example:
// We might have a long list of items in a tuple, e.g. `(a, b, c, d, e, f)`,
// and if we were simply parsing a collection of expressions, we would start by trying to
// parse `a, b, c, d, e, f` as part of `expression`, see that there is no `->` token next,
// then backtrack and return just `a` as the result.
// Repeat for b, and so on.
// This can lead to exponential behavior, repeating the parsing of the same items over and over.
//
// Instead, here we integrate the parsing of function types directly into the parsing of the tuple loop,
// so that we don't have to discard the work we've done so far.
let mut fields = Vec::new_in(arena);
let mut state = state;
let start = state.pos();
let mut func_base = 0;
// Parse the opening parenthesis
let (_, _, new_state) = byte(b'(', ETypeInParens::Open).parse(arena, state, min_indent)?;
state = new_state;
// Parse the initial comments if any
let (_, spaces, new_state) = space0_e(ETypeInParens::IndentEnd).parse(arena, state, 0)?;
state = new_state;
match spaces_before_optional_after(specialize_err_ref(
ETypeInParens::Type,
term_or_apply_with_as(false),
))
.parse(arena, state.clone(), 0)
{
Ok((progress, first_field, next_state)) => {
debug_assert_eq!(progress, MadeProgress);
state = next_state;
fields.push(first_field);
loop {
enum Sep {
Comma,
FunctionArrow(FunctionArrow),
Where,
}
let sep = one_of![
map(byte(b',', ETypeInParens::End), |_| Sep::Comma),
map(
specialize_err_ref(ETypeInParens::Type, arrow()),
Sep::FunctionArrow
),
map(word(keyword::WHERE, ETypeInParens::End), |_| Sep::Where),
];
match sep.parse(arena, state.clone(), 0) {
Ok((_, Sep::Comma, next_state)) => {
match spaces_before_optional_after(specialize_err_ref(
ETypeInParens::Type,
term_or_apply_with_as(false),
))
.parse(arena, next_state.clone(), 0)
{
Ok((element_progress, next_field, next_state)) => {
debug_assert_eq!(element_progress, MadeProgress);
state = next_state;
fields.push(next_field);
}
Err((_, _fail)) => {
state = next_state;
break;
}
}
}
Ok((_, Sep::FunctionArrow(arrow), next_state)) => {
state = next_state;
let (_, return_type, new_state) = specialize_err_ref(
ETypeInParens::Type,
space0_before_e(arrow_sequence(), EType::TIndentStart),
)
.parse(arena, state.clone(), min_indent)
.map_err(|(_, e)| (MadeProgress, e))?;
state = new_state;
let region = Region::between(
fields
.get(func_base)
.map(|f| f.region.start())
.unwrap_or(start),
return_type.region.end(),
);
// prepare arguments
let arguments = arena.alloc_slice_copy(&fields[func_base..]);
debug_assert!(!arguments.is_empty());
fields.truncate(func_base);
let output = arena.alloc(arguments);
let result = Loc {
region,
value: TypeAnnotation::Function(
output,
arrow,
arena.alloc(return_type),
),
};
fields.push(result);
func_base = fields.len();
}
Ok((_, Sep::Where, new_state)) => {
let (_, implements_clauses, new_state) =
parse_implements_clause_chain_after_where(arena, 0, new_state)
.map_err(|(_, e)| {
(MadeProgress, ETypeInParens::Type(arena.alloc(e), start))
})?;
state = new_state;
let end = state.pos();
let last = fields[fields.len() - 1];
fields.pop();
let region = Region::between(last.region.start(), end);
fields.push(Loc::at(
region,
TypeAnnotation::Where(arena.alloc(last), implements_clauses),
));
}
Err((delim_progress, fail)) => match delim_progress {
MadeProgress => return Err((MadeProgress, fail)),
NoProgress => {
break;
}
},
}
}
}
Err((element_progress, fail)) => match element_progress {
MadeProgress => return Err((MadeProgress, fail)),
NoProgress => {}
},
}
// Parse the final comments if any
let (_, mut final_comments, new_state) =
space0_e(ETypeInParens::IndentEnd).parse(arena, state, 0)?;
state = new_state;
// Parse the closing parenthesis
let (_, _, new_state) = byte(b')', ETypeInParens::End)
.trace("end")
.parse(arena, state, 0)
.map_err(|(_, e)| (MadeProgress, e))?;
state = new_state;
if !spaces.is_empty() {
if let Some(first) = fields.first_mut() {
first.value = TypeAnnotation::SpaceBefore(arena.alloc(first.value), spaces);
} else {
debug_assert!(fields.is_empty());
Err((progress, ETypeInParens::Empty(state.pos())))
debug_assert!(final_comments.is_empty());
final_comments = spaces;
}
}
let fields =
Collection::with_items_and_comments(arena, fields.into_bump_slice(), final_comments);
// Optionally parse the extension
let (progress, ext, state) = optional(allocated(specialize_err_ref(
ETypeInParens::Type,
term(stop_at_surface_has),
)))
.parse(arena, state, min_indent)?;
let end = state.pos();
// Determine the result based on the parsed fields and extension
let result = if fields.len() > 1 || ext.is_some() {
TypeAnnotation::Tuple { elems: fields, ext }
} else if fields.len() == 1 {
return Ok((MadeProgress, fields.items[0], state));
} else {
debug_assert!(fields.is_empty());
return Err((progress, ETypeInParens::Empty(state.pos())));
};
let region = Region::between(start, end);
Ok((MadeProgress, Loc::at(region, result), state))
})
.trace("type_annotation:type_in_parens")
}
fn arrow<'a>() -> impl Parser<'a, FunctionArrow, EType<'a>> {
one_of![
map(two_bytes(b'-', b'>', EType::TStart), |_| {
FunctionArrow::Pure
}),
map(two_bytes(b'=', b'>', EType::TStart), |_| {
FunctionArrow::Effectful
}),
]
}
fn arrow_sequence<'a>() -> impl Parser<'a, Loc<TypeAnnotation<'a>>, EType<'a>> {
// A sequence of a -> b -> c -> etc
map_with_arena(
and(
term_or_apply_with_as(false),
zero_or_more(and(arrow(), term_or_apply_with_as(false))),
),
|arena, (first, rest)| {
let mut it = rest.into_iter().rev();
if let Some((mut arrow, mut item)) = it.next() {
for (n_arrow, next) in it {
let region = Region::span_across(&next.region, &item.region);
let value = TypeAnnotation::Function(
arena.alloc_slice_copy(&[next]),
arrow,
arena.alloc(item),
);
item = Loc::at(region, value);
arrow = n_arrow;
}
let region = Region::span_across(&first.region, &item.region);
Loc::at(
region,
TypeAnnotation::Function(
arena.alloc_slice_copy(&[first]),
arrow,
arena.alloc(item),
),
)
} else {
first
}
},
)
.trace("type_annotation:type_in_parens")
}
#[inline(always)]
@ -518,26 +738,32 @@ fn implements_clause_chain<'a>(
)
.parse(arena, state, min_indent)?;
// Parse the first clause (there must be one), then the rest
let (_, first_clause, state) = implements_clause().parse(arena, state, min_indent)?;
let (_, mut clauses, state) = zero_or_more(skip_first(
byte(b',', EType::TImplementsClause),
implements_clause(),
))
.parse(arena, state, min_indent)?;
// Usually the number of clauses shouldn't be too large, so this is okay
clauses.insert(0, first_clause);
Ok((
MadeProgress,
(spaces_before, clauses.into_bump_slice()),
state,
))
let (_, clauses, state) =
parse_implements_clause_chain_after_where(arena, min_indent, state)?;
Ok((MadeProgress, (spaces_before, clauses), state))
}
}
fn parse_implements_clause_chain_after_where<'a>(
arena: &'a Bump,
min_indent: u32,
state: State<'a>,
) -> ParseResult<'a, &'a [Loc<ImplementsClause<'a>>], EType<'a>> {
// Parse the first clause (there must be one), then the rest
let (_, first_clause, state) = implements_clause().parse(arena, state, min_indent)?;
let (_, mut clauses, state) = zero_or_more(skip_first(
byte(b',', EType::TImplementsClause),
implements_clause(),
))
.parse(arena, state, min_indent)?;
// Usually the number of clauses shouldn't be too large, so this is okay
clauses.insert(0, first_clause);
Ok((MadeProgress, clauses.into_bump_slice(), state))
}
/// Parse a implements-abilities clause, e.g. `implements [Eq, Hash]`.
pub fn implements_abilities<'a>() -> impl Parser<'a, Loc<ImplementsAbilities<'a>>, EType<'a>> {
increment_min_indent(skip_first(
@ -643,7 +869,8 @@ fn expression<'a>(
term_or_apply_with_as(stop_at_surface_has),
EType::TIndentStart,
)
.parse(arena, state, min_indent)?;
.parse(arena, state, min_indent)
.map_err(|(_, e)| (MadeProgress, e))?;
let region = Region::span_across(&first.region, &return_type.region);