Attempt at fixing Problems bug

This commit is contained in:
Richard Feldman 2019-09-02 21:06:06 -04:00
parent 3043862989
commit fc363c62cf
3 changed files with 114 additions and 52 deletions

View file

@ -27,6 +27,6 @@ extern crate im_rc;
extern crate num; extern crate num;
#[macro_use] #[macro_use]
extern crate combine; extern crate combine; // OBSOLETE
#[macro_use] #[macro_use]
extern crate log; extern crate log;

View file

@ -16,7 +16,7 @@ use std::char;
type Loc<T> = region::Located<T>; type Loc<T> = region::Located<T>;
/// Struct which represents a position in a source file. /// Struct which represents a position in a source file.
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq)]
pub struct State<'a> { pub struct State<'a> {
/// The raw input string. /// The raw input string.
pub input: &'a str, pub input: &'a str,
@ -33,6 +33,28 @@ pub struct State<'a> {
// true at the beginning of each line, then false after encountering // true at the beginning of each line, then false after encountering
// the first nonspace char on that line. // the first nonspace char on that line.
pub is_indenting: bool, pub is_indenting: bool,
pub problems: Problems<'a>,
}
impl<'a> State<'a> {
pub fn from_input(input: &'a str, problems: Problems<'a>) -> State<'a> {
State {
input,
problems,
line: 0,
column: 0,
indent_col: 1,
is_indenting: true,
}
}
}
#[test]
fn state_size() {
// State should always be under 8 machine words, so it fits in a typical
// cache line.
assert!(std::mem::size_of::<State>() <= std::mem::size_of::<usize>() * 8);
} }
type Problems<'a> = Vec<'a, Located<Problem>>; type Problems<'a> = Vec<'a, Located<Problem>>;
@ -175,35 +197,24 @@ fn pattern_size() {
); );
} }
type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>; pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>;
trait Parser<'a, Output> { pub trait Parser<'a, Output> {
fn parse( fn parse(&self, &'a Bump, &'a mut State<'a>, attempting: Attempting)
&self, -> ParseResult<'a, Output>;
&'a Bump,
&'a State<'a>,
problems: &'a mut Problems<'a>,
attempting: Attempting,
) -> ParseResult<'a, Output>;
} }
impl<'a, F, Output> Parser<'a, Output> for F impl<'a, F, Output> Parser<'a, Output> for F
where where
F: Fn( F: Fn(&'a Bump, &'a mut State<'a>, Attempting) -> ParseResult<'a, Output>,
&'a Bump,
&'a State<'a>,
&'a mut Vec<'a, Located<Problem>>,
Attempting,
) -> ParseResult<'a, Output>,
{ {
fn parse( fn parse(
&self, &self,
arena: &'a Bump, arena: &'a Bump,
state: &'a State<'a>, state: &'a mut State<'a>,
problems: &'a mut Problems<'a>,
attempting: Attempting, attempting: Attempting,
) -> ParseResult<'a, Output> { ) -> ParseResult<'a, Output> {
self(arena, state, problems, attempting) self(arena, state, attempting)
} }
} }
@ -212,9 +223,9 @@ where
P: Parser<'a, Before>, P: Parser<'a, Before>,
F: Fn(Before) -> After, F: Fn(Before) -> After,
{ {
move |arena, state, problems, attempting| { move |arena, state, attempting| {
parser parser
.parse(arena, state, problems, attempting) .parse(arena, state, attempting)
.map(|(next_state, output)| (next_state, transform(output))) .map(|(next_state, output)| (next_state, transform(output)))
} }
} }
@ -223,7 +234,7 @@ fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val
where where
P: Parser<'a, Val>, P: Parser<'a, Val>,
{ {
move |arena, state, problems, _| parser.parse(arena, state, problems, attempting) move |arena, state, _| parser.parse(arena, state, attempting)
} }
/// A keyword with no newlines in it. /// A keyword with no newlines in it.
@ -232,7 +243,7 @@ fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> {
// in the state, only the column. // in the state, only the column.
debug_assert!(!kw.contains("\n")); debug_assert!(!kw.contains("\n"));
move |_arena: &'a Bump, state: &'a State<'a>, _problems, attempting| { move |_arena: &'a Bump, state: &'a mut State<'a>, attempting| {
let input = state.input; let input = state.input;
match input.get(0..kw.len()) { match input.get(0..kw.len()) {
@ -259,8 +270,8 @@ where
P: Parser<'a, A>, P: Parser<'a, A>,
F: Fn(&A) -> bool, F: Fn(&A) -> bool,
{ {
move |arena: &'a Bump, state: &'a State<'a>, problems, attempting| { move |arena: &'a Bump, state: &'a mut State<'a>, attempting| {
if let Ok((next_state, output)) = parser.parse(arena, state, problems, attempting) { if let Ok((next_state, output)) = parser.parse(arena, state, attempting) {
if predicate(&output) { if predicate(&output) {
return Ok((next_state, output)); return Ok((next_state, output));
} }
@ -272,8 +283,7 @@ where
fn any<'a>( fn any<'a>(
arena: &'a Bump, arena: &'a Bump,
state: &'a State<'a>, state: &'a mut State<'a>,
_problems: &'a mut Problems<'a>,
attempting: Attempting, attempting: Attempting,
) -> ParseResult<'a, char> { ) -> ParseResult<'a, char> {
let input = state.input; let input = state.input;
@ -304,12 +314,13 @@ fn whitespace<'a>() -> impl Parser<'a, char> {
/// What we're currently attempting to parse, e.g. /// What we're currently attempting to parse, e.g.
/// "currently attempting to parse a list." This helps error messages! /// "currently attempting to parse a list." This helps error messages!
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Attempting { pub enum Attempting {
List, List,
Keyword, Keyword,
StringLiteral, StringLiteral,
EscapedUnicodeChar, EscapedUnicodeChar,
Expression,
} }
// fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr { // fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr {
@ -367,8 +378,12 @@ pub enum Attempting {
// })) // }))
// } // }
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
string_literal()
}
fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |arena: &'a Bump, state: &'a State<'a>, problems: &'a mut Problems<'a>, attempting| { move |arena: &'a Bump, state: &'a mut State<'a>, attempting| {
let mut chars = state.input.chars(); let mut chars = state.input.chars();
// String literals must start with a quote. // String literals must start with a quote.
@ -403,9 +418,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
Some('t') => buf.push('\t'), Some('t') => buf.push('\t'),
Some('n') => buf.push('\n'), Some('n') => buf.push('\n'),
Some('r') => buf.push('\r'), Some('r') => buf.push('\r'),
Some('u') => { Some('u') => handle_escaped_unicode(arena, state, &mut chars, &mut buf),
handle_escaped_unicode(arena, state, &mut chars, &mut buf, problems)
}
Some('(') => panic!("TODO handle string interpolation"), Some('(') => panic!("TODO handle string interpolation"),
Some(unsupported) => { Some(unsupported) => {
// TODO don't bail out here! Instead, parse successfully // TODO don't bail out here! Instead, parse successfully
@ -448,7 +461,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
} }
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Problem { pub enum Problem {
/// TODO Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F) /// TODO Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F)
NonHexCharsInUnicodeCodePoint, NonHexCharsInUnicodeCodePoint,
@ -468,10 +481,10 @@ fn is_ascii_number(ch: char) -> bool {
fn escaped_unicode_problem<'a>( fn escaped_unicode_problem<'a>(
problem: Problem, problem: Problem,
state: &'a State<'a>, state: &'a mut State<'a>,
buf_len: usize, buf_len: usize,
hex_str_len: usize, hex_str_len: usize,
) -> Located<Problem> { ) {
let start_line = state.line; let start_line = state.line;
let start_col = state.column + buf_len as u32; let start_col = state.column + buf_len as u32;
let end_line = start_line; let end_line = start_line;
@ -485,18 +498,17 @@ fn escaped_unicode_problem<'a>(
end_col, end_col,
}; };
Located { state.problems.push(Located {
region, region,
value: problem, value: problem,
} });
} }
fn handle_escaped_unicode<'a, I>( fn handle_escaped_unicode<'a, I>(
arena: &'a Bump, arena: &'a Bump,
state: &'a State<'a>, state: &'a mut State<'a>,
chars: &mut I, chars: &mut I,
buf: &mut String<'a>, buf: &mut String<'a>,
problems: &mut Problems<'a>,
) where ) where
I: Iterator<Item = char>, I: Iterator<Item = char>,
{ {
@ -504,14 +516,12 @@ fn handle_escaped_unicode<'a, I>(
// so we should always see a '{' next. // so we should always see a '{' next.
if chars.next() != Some('{') { if chars.next() != Some('{') {
// This is not a blocker. Keep parsing. // This is not a blocker. Keep parsing.
let prob = escaped_unicode_problem( escaped_unicode_problem(
Problem::MalformedEscapedUnicode, Problem::MalformedEscapedUnicode,
state, state,
buf.len(), buf.len(),
2, // So far we've parsed `\u` 2, // So far we've parsed `\u`
); );
problems.push(prob);
} else { } else {
// Stores the accumulated unicode digits // Stores the accumulated unicode digits
let mut hex_str = String::new_in(arena); let mut hex_str = String::new_in(arena);
@ -526,41 +536,35 @@ fn handle_escaped_unicode<'a, I>(
match u32::from_str_radix(&hex_str, 16) { match u32::from_str_radix(&hex_str, 16) {
Ok(code_pt) => { Ok(code_pt) => {
if code_pt > 0x10FFFF { if code_pt > 0x10FFFF {
let prob = escaped_unicode_problem( escaped_unicode_problem(
Problem::UnicodeCodePointTooLarge, Problem::UnicodeCodePointTooLarge,
state, state,
buf.len(), buf.len(),
hex_str.len(), hex_str.len(),
); );
problems.push(prob);
} else { } else {
// If it all checked out, add it to // If it all checked out, add it to
// the main buffer. // the main buffer.
match char::from_u32(code_pt) { match char::from_u32(code_pt) {
Some(ch) => buf.push(ch), Some(ch) => buf.push(ch),
None => { None => {
let prob = escaped_unicode_problem( escaped_unicode_problem(
Problem::InvalidUnicodeCodePoint, Problem::InvalidUnicodeCodePoint,
state, state,
buf.len(), buf.len(),
hex_str.len(), hex_str.len(),
); );
problems.push(prob);
} }
} }
} }
} }
Err(_) => { Err(_) => {
let prob = escaped_unicode_problem( escaped_unicode_problem(
Problem::NonHexCharsInUnicodeCodePoint, Problem::NonHexCharsInUnicodeCodePoint,
state, state,
buf.len(), buf.len(),
hex_str.len(), hex_str.len(),
); );
problems.push(prob);
} }
} }
} else { } else {

58
tests/test_parser.rs Normal file
View file

@ -0,0 +1,58 @@
#[macro_use]
extern crate pretty_assertions;
#[macro_use]
extern crate indoc;
extern crate bumpalo;
extern crate combine; // OBSOLETE
extern crate roc;
mod helpers;
#[cfg(test)]
mod test_parser {
use bumpalo::Bump;
use roc::parser::Expr::{self, *};
use roc::parser::{Attempting, Parser, Problem, State};
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
assert_parses_to_problems(input, expected_expr, Vec::new())
}
fn assert_parses_to_problems<'a>(
input: &'a str,
expected_expr: Expr<'a>,
expected_problems: Vec<Problem>,
) {
let state = State::from_input(&input);
let arena = Bump::new();
let mut problems = bumpalo::collections::vec::Vec::new_in(&arena);
let attempting = Attempting::Expression;
let parser = roc::parser::expr();
let answer = parser.parse(&arena, &state, &mut problems, attempting);
let actual = answer
.map(|(_, expr)| expr)
.map_err(|(_, attempting)| attempting);
let mut actual_problems: Vec<Problem> = Vec::new();
for loc_problem in problems {
actual_problems.push(loc_problem.value);
}
assert_eq!(expected_problems, actual_problems);
assert_eq!(Ok(expected_expr), actual);
}
#[test]
fn empty_list() {
assert_parses_to(
indoc!(
r#"
""
"#
),
EmptyStr,
);
}
}