mirror of
https://github.com/roc-lang/roc.git
synced 2025-07-28 17:03:44 +00:00
Fix approximately a bajillion fmt and parsing bugs
(discovered by fuzzing) There's more to come, but this seems like a good batch for now.
This commit is contained in:
parent
8f62eeaf7e
commit
0b8e68f70d
68 changed files with 1011 additions and 229 deletions
|
@ -1,7 +1,6 @@
|
|||
use std::fmt::Debug;
|
||||
|
||||
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PackageHeader, PlatformHeader};
|
||||
use crate::ident::Ident;
|
||||
use crate::parser::ESingleQuote;
|
||||
use bumpalo::collections::{String, Vec};
|
||||
use bumpalo::Bump;
|
||||
|
@ -807,51 +806,6 @@ pub enum Base {
|
|||
}
|
||||
|
||||
impl<'a> Pattern<'a> {
|
||||
pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> {
|
||||
match ident {
|
||||
Ident::Tag(string) => Pattern::Tag(string),
|
||||
Ident::OpaqueRef(string) => Pattern::OpaqueRef(string),
|
||||
Ident::Access { module_name, parts } => {
|
||||
if parts.len() == 1 {
|
||||
// This is valid iff there is no module.
|
||||
let ident = parts.iter().next().unwrap();
|
||||
|
||||
if module_name.is_empty() {
|
||||
Pattern::Identifier(ident)
|
||||
} else {
|
||||
Pattern::QualifiedIdentifier { module_name, ident }
|
||||
}
|
||||
} else {
|
||||
// This is definitely malformed.
|
||||
let mut buf =
|
||||
String::with_capacity_in(module_name.len() + (2 * parts.len()), arena);
|
||||
let mut any_parts_printed = if module_name.is_empty() {
|
||||
false
|
||||
} else {
|
||||
buf.push_str(module_name);
|
||||
|
||||
true
|
||||
};
|
||||
|
||||
for part in parts.iter() {
|
||||
if any_parts_printed {
|
||||
buf.push('.');
|
||||
} else {
|
||||
any_parts_printed = true;
|
||||
}
|
||||
|
||||
buf.push_str(part);
|
||||
}
|
||||
|
||||
Pattern::Malformed(buf.into_bump_str())
|
||||
}
|
||||
}
|
||||
Ident::RecordAccessorFunction(string) => Pattern::Malformed(string),
|
||||
Ident::TupleAccessorFunction(string) => Pattern::Malformed(string),
|
||||
Ident::Malformed(string, _problem) => Pattern::Malformed(string),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that patterns are equivalent, meaning they have the same shape, but may have
|
||||
/// different locations/whitespace
|
||||
pub fn equivalent(&self, other: &Self) -> bool {
|
||||
|
|
|
@ -378,6 +378,10 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn begins_with_crlf(bytes: &[u8]) -> bool {
|
||||
bytes.len() >= 2 && bytes[0] == b'\r' && bytes[1] == b'\n'
|
||||
}
|
||||
|
||||
pub fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
|
@ -399,6 +403,7 @@ where
|
|||
let is_doc_comment = state.bytes().first() == Some(&b'#')
|
||||
&& (state.bytes().get(1) == Some(&b' ')
|
||||
|| state.bytes().get(1) == Some(&b'\n')
|
||||
|| begins_with_crlf(&state.bytes()[1..])
|
||||
|| state.bytes().get(1) == None);
|
||||
|
||||
if is_doc_comment {
|
||||
|
@ -422,7 +427,10 @@ where
|
|||
newlines.push(comment);
|
||||
state.advance_mut(len);
|
||||
|
||||
if state.bytes().first() == Some(&b'\n') {
|
||||
if begins_with_crlf(state.bytes()) {
|
||||
state.advance_mut(1);
|
||||
state = state.advance_newline();
|
||||
} else if state.bytes().first() == Some(&b'\n') {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::ast::{
|
|||
};
|
||||
use crate::blankspace::{
|
||||
space0_after_e, space0_around_e_no_after_indent_check, space0_around_ee, space0_before_e,
|
||||
space0_e, spaces, spaces_around, spaces_before,
|
||||
space0_before_optional_after, space0_e, spaces, spaces_around, spaces_before,
|
||||
};
|
||||
use crate::ident::{integer_ident, lowercase_ident, parse_ident, Accessor, Ident};
|
||||
use crate::keyword;
|
||||
|
@ -42,7 +42,7 @@ pub fn test_parse_expr<'a>(
|
|||
state: State<'a>,
|
||||
) -> Result<Loc<Expr<'a>>, EExpr<'a>> {
|
||||
let parser = skip_second!(
|
||||
space0_before_e(loc_expr(true), EExpr::IndentStart,),
|
||||
space0_before_optional_after(loc_expr(true), EExpr::IndentStart, EExpr::IndentEnd),
|
||||
expr_end()
|
||||
);
|
||||
|
||||
|
@ -255,7 +255,10 @@ fn loc_possibly_negative_or_negated_term<'a>(
|
|||
// this will parse negative numbers, which the unary negate thing up top doesn't (for now)
|
||||
loc!(specialize(EExpr::Number, number_literal_help())),
|
||||
loc!(map_with_arena!(
|
||||
and!(loc!(word1(b'!', EExpr::Start)), loc_term(options)),
|
||||
and!(
|
||||
loc!(word1(b'!', EExpr::Start)),
|
||||
space0_before_e(loc_term(options), EExpr::IndentStart)
|
||||
),
|
||||
|arena: &'a Bump, (loc_op, loc_expr): (Loc<_>, _)| {
|
||||
Expr::UnaryOp(arena.alloc(loc_expr), Loc::at(loc_op.region, UnaryOp::Not))
|
||||
}
|
||||
|
@ -668,7 +671,7 @@ pub fn parse_single_def<'a>(
|
|||
let (_, ann_type, state) = parser.parse(arena, state, min_indent)?;
|
||||
let region = Region::span_across(&loc_pattern.region, &ann_type.region);
|
||||
|
||||
match &loc_pattern.value {
|
||||
match &loc_pattern.value.extract_spaces().item {
|
||||
Pattern::Apply(
|
||||
Loc {
|
||||
value: Pattern::Tag(name),
|
||||
|
@ -740,7 +743,7 @@ pub fn parse_single_def<'a>(
|
|||
opaque_signature_with_space_before().parse(arena, state, min_indent + 1)?;
|
||||
let region = Region::span_across(&loc_pattern.region, &signature.region);
|
||||
|
||||
match &loc_pattern.value {
|
||||
match &loc_pattern.value.extract_spaces().item {
|
||||
Pattern::Apply(
|
||||
Loc {
|
||||
value: Pattern::Tag(name),
|
||||
|
@ -1890,7 +1893,7 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
|
|||
|
||||
Expr::Str(string) => Ok(Pattern::StrLiteral(*string)),
|
||||
Expr::SingleQuote(string) => Ok(Pattern::SingleQuote(string)),
|
||||
Expr::MalformedIdent(string, _problem) => Ok(Pattern::Malformed(string)),
|
||||
Expr::MalformedIdent(string, problem) => Ok(Pattern::MalformedIdent(string, *problem)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -290,6 +290,10 @@ fn chomp_integer_part(buffer: &[u8]) -> Result<&str, Progress> {
|
|||
)
|
||||
}
|
||||
|
||||
fn is_plausible_ident_continue(ch: char) -> bool {
|
||||
ch == '_' || is_alnum(ch)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn chomp_part<F, G>(leading_is_good: F, rest_is_good: G, buffer: &[u8]) -> Result<&str, Progress>
|
||||
where
|
||||
|
@ -317,6 +321,15 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
if let Ok((next, _width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
// This would mean we have e.g.:
|
||||
// * identifier followed by a _
|
||||
// * an integer followed by an alphabetic char
|
||||
if is_plausible_ident_continue(next) {
|
||||
return Err(NoProgress);
|
||||
}
|
||||
}
|
||||
|
||||
if chomped == 0 {
|
||||
Err(NoProgress)
|
||||
} else {
|
||||
|
|
|
@ -535,6 +535,9 @@ pub enum EPattern<'a> {
|
|||
IndentStart(Position),
|
||||
IndentEnd(Position),
|
||||
AsIndentStart(Position),
|
||||
|
||||
RecordAccessorFunction(Position),
|
||||
TupleAccessorFunction(Position),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
|
|
@ -406,13 +406,13 @@ fn loc_ident_pattern_help<'a>(
|
|||
))
|
||||
}
|
||||
}
|
||||
Ident::RecordAccessorFunction(string) | Ident::TupleAccessorFunction(string) => Ok((
|
||||
Ident::RecordAccessorFunction(_string) => Err((
|
||||
MadeProgress,
|
||||
Loc {
|
||||
region: loc_ident.region,
|
||||
value: Pattern::Malformed(string),
|
||||
},
|
||||
state,
|
||||
EPattern::RecordAccessorFunction(loc_ident.region.start()),
|
||||
)),
|
||||
Ident::TupleAccessorFunction(_string) => Err((
|
||||
MadeProgress,
|
||||
EPattern::TupleAccessorFunction(loc_ident.region.start()),
|
||||
)),
|
||||
Ident::Malformed(malformed, problem) => {
|
||||
debug_assert!(!malformed.is_empty());
|
||||
|
|
|
@ -326,8 +326,12 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
|
|||
|
||||
if state.bytes().starts_with(b"\"\"\"") {
|
||||
// ending the string; don't use the last newline
|
||||
segments
|
||||
.push(StrSegment::Plaintext(utf8(state.clone(), without_newline)?));
|
||||
if !without_newline.is_empty() {
|
||||
segments.push(StrSegment::Plaintext(utf8(
|
||||
state.clone(),
|
||||
without_newline,
|
||||
)?));
|
||||
}
|
||||
} else {
|
||||
segments
|
||||
.push(StrSegment::Plaintext(utf8(state.clone(), with_newline)?));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue