Fix approximately a bajillion fmt and parsing bugs

(discovered by fuzzing)

There's more to come, but this seems like a good batch for now.
This commit is contained in:
Joshua Warner 2023-01-11 19:44:29 -08:00
parent 8f62eeaf7e
commit 0b8e68f70d
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
68 changed files with 1011 additions and 229 deletions

View file

@ -1,7 +1,6 @@
use std::fmt::Debug;
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PackageHeader, PlatformHeader};
use crate::ident::Ident;
use crate::parser::ESingleQuote;
use bumpalo::collections::{String, Vec};
use bumpalo::Bump;
@ -807,51 +806,6 @@ pub enum Base {
}
impl<'a> Pattern<'a> {
pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> {
match ident {
Ident::Tag(string) => Pattern::Tag(string),
Ident::OpaqueRef(string) => Pattern::OpaqueRef(string),
Ident::Access { module_name, parts } => {
if parts.len() == 1 {
// This is valid iff there is no module.
let ident = parts.iter().next().unwrap();
if module_name.is_empty() {
Pattern::Identifier(ident)
} else {
Pattern::QualifiedIdentifier { module_name, ident }
}
} else {
// This is definitely malformed.
let mut buf =
String::with_capacity_in(module_name.len() + (2 * parts.len()), arena);
let mut any_parts_printed = if module_name.is_empty() {
false
} else {
buf.push_str(module_name);
true
};
for part in parts.iter() {
if any_parts_printed {
buf.push('.');
} else {
any_parts_printed = true;
}
buf.push_str(part);
}
Pattern::Malformed(buf.into_bump_str())
}
}
Ident::RecordAccessorFunction(string) => Pattern::Malformed(string),
Ident::TupleAccessorFunction(string) => Pattern::Malformed(string),
Ident::Malformed(string, _problem) => Pattern::Malformed(string),
}
}
/// Check that patterns are equivalent, meaning they have the same shape, but may have
/// different locations/whitespace
pub fn equivalent(&self, other: &Self) -> bool {

View file

@ -378,6 +378,10 @@ where
}
}
fn begins_with_crlf(bytes: &[u8]) -> bool {
bytes.len() >= 2 && bytes[0] == b'\r' && bytes[1] == b'\n'
}
pub fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where
E: 'a + SpaceProblem,
@ -399,6 +403,7 @@ where
let is_doc_comment = state.bytes().first() == Some(&b'#')
&& (state.bytes().get(1) == Some(&b' ')
|| state.bytes().get(1) == Some(&b'\n')
|| begins_with_crlf(&state.bytes()[1..])
|| state.bytes().get(1) == None);
if is_doc_comment {
@ -422,7 +427,10 @@ where
newlines.push(comment);
state.advance_mut(len);
if state.bytes().first() == Some(&b'\n') {
if begins_with_crlf(state.bytes()) {
state.advance_mut(1);
state = state.advance_newline();
} else if state.bytes().first() == Some(&b'\n') {
state = state.advance_newline();
}

View file

@ -4,7 +4,7 @@ use crate::ast::{
};
use crate::blankspace::{
space0_after_e, space0_around_e_no_after_indent_check, space0_around_ee, space0_before_e,
space0_e, spaces, spaces_around, spaces_before,
space0_before_optional_after, space0_e, spaces, spaces_around, spaces_before,
};
use crate::ident::{integer_ident, lowercase_ident, parse_ident, Accessor, Ident};
use crate::keyword;
@ -42,7 +42,7 @@ pub fn test_parse_expr<'a>(
state: State<'a>,
) -> Result<Loc<Expr<'a>>, EExpr<'a>> {
let parser = skip_second!(
space0_before_e(loc_expr(true), EExpr::IndentStart,),
space0_before_optional_after(loc_expr(true), EExpr::IndentStart, EExpr::IndentEnd),
expr_end()
);
@ -255,7 +255,10 @@ fn loc_possibly_negative_or_negated_term<'a>(
// this will parse negative numbers, which the unary negate thing up top doesn't (for now)
loc!(specialize(EExpr::Number, number_literal_help())),
loc!(map_with_arena!(
and!(loc!(word1(b'!', EExpr::Start)), loc_term(options)),
and!(
loc!(word1(b'!', EExpr::Start)),
space0_before_e(loc_term(options), EExpr::IndentStart)
),
|arena: &'a Bump, (loc_op, loc_expr): (Loc<_>, _)| {
Expr::UnaryOp(arena.alloc(loc_expr), Loc::at(loc_op.region, UnaryOp::Not))
}
@ -668,7 +671,7 @@ pub fn parse_single_def<'a>(
let (_, ann_type, state) = parser.parse(arena, state, min_indent)?;
let region = Region::span_across(&loc_pattern.region, &ann_type.region);
match &loc_pattern.value {
match &loc_pattern.value.extract_spaces().item {
Pattern::Apply(
Loc {
value: Pattern::Tag(name),
@ -740,7 +743,7 @@ pub fn parse_single_def<'a>(
opaque_signature_with_space_before().parse(arena, state, min_indent + 1)?;
let region = Region::span_across(&loc_pattern.region, &signature.region);
match &loc_pattern.value {
match &loc_pattern.value.extract_spaces().item {
Pattern::Apply(
Loc {
value: Pattern::Tag(name),
@ -1890,7 +1893,7 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
Expr::Str(string) => Ok(Pattern::StrLiteral(*string)),
Expr::SingleQuote(string) => Ok(Pattern::SingleQuote(string)),
Expr::MalformedIdent(string, _problem) => Ok(Pattern::Malformed(string)),
Expr::MalformedIdent(string, problem) => Ok(Pattern::MalformedIdent(string, *problem)),
}
}

View file

@ -290,6 +290,10 @@ fn chomp_integer_part(buffer: &[u8]) -> Result<&str, Progress> {
)
}
fn is_plausible_ident_continue(ch: char) -> bool {
ch == '_' || is_alnum(ch)
}
#[inline(always)]
fn chomp_part<F, G>(leading_is_good: F, rest_is_good: G, buffer: &[u8]) -> Result<&str, Progress>
where
@ -317,6 +321,15 @@ where
}
}
if let Ok((next, _width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
// This would mean we have e.g.:
// * identifier followed by a _
// * an integer followed by an alphabetic char
if is_plausible_ident_continue(next) {
return Err(NoProgress);
}
}
if chomped == 0 {
Err(NoProgress)
} else {

View file

@ -535,6 +535,9 @@ pub enum EPattern<'a> {
IndentStart(Position),
IndentEnd(Position),
AsIndentStart(Position),
RecordAccessorFunction(Position),
TupleAccessorFunction(Position),
}
#[derive(Debug, Clone, PartialEq, Eq)]

View file

@ -406,13 +406,13 @@ fn loc_ident_pattern_help<'a>(
))
}
}
Ident::RecordAccessorFunction(string) | Ident::TupleAccessorFunction(string) => Ok((
Ident::RecordAccessorFunction(_string) => Err((
MadeProgress,
Loc {
region: loc_ident.region,
value: Pattern::Malformed(string),
},
state,
EPattern::RecordAccessorFunction(loc_ident.region.start()),
)),
Ident::TupleAccessorFunction(_string) => Err((
MadeProgress,
EPattern::TupleAccessorFunction(loc_ident.region.start()),
)),
Ident::Malformed(malformed, problem) => {
debug_assert!(!malformed.is_empty());

View file

@ -326,8 +326,12 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
if state.bytes().starts_with(b"\"\"\"") {
// ending the string; don't use the last newline
segments
.push(StrSegment::Plaintext(utf8(state.clone(), without_newline)?));
if !without_newline.is_empty() {
segments.push(StrSegment::Plaintext(utf8(
state.clone(),
without_newline,
)?));
}
} else {
segments
.push(StrSegment::Plaintext(utf8(state.clone(), with_newline)?));