diff --git a/src/lib.rs b/src/lib.rs index ce182f583f..997f7e9f22 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,6 @@ extern crate im_rc; extern crate num; #[macro_use] -extern crate combine; +extern crate combine; // OBSOLETE #[macro_use] extern crate log; diff --git a/src/parser.rs b/src/parser.rs index ff6ad448a2..c7f31bd02d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,7 +16,7 @@ use std::char; type Loc = region::Located; /// Struct which represents a position in a source file. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct State<'a> { /// The raw input string. pub input: &'a str, @@ -33,6 +33,28 @@ pub struct State<'a> { // true at the beginning of each line, then false after encountering // the first nonspace char on that line. pub is_indenting: bool, + + pub problems: Problems<'a>, +} + +impl<'a> State<'a> { + pub fn from_input(input: &'a str, problems: Problems<'a>) -> State<'a> { + State { + input, + problems, + line: 0, + column: 0, + indent_col: 1, + is_indenting: true, + } + } +} + +#[test] +fn state_size() { + // State should always be under 8 machine words, so it fits in a typical + // cache line. + assert!(std::mem::size_of::() <= std::mem::size_of::() * 8); } type Problems<'a> = Vec<'a, Located>; @@ -175,35 +197,24 @@ fn pattern_size() { ); } -type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>; +pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>; -trait Parser<'a, Output> { - fn parse( - &self, - &'a Bump, - &'a State<'a>, - problems: &'a mut Problems<'a>, - attempting: Attempting, - ) -> ParseResult<'a, Output>; +pub trait Parser<'a, Output> { + fn parse(&self, &'a Bump, &'a mut State<'a>, attempting: Attempting) + -> ParseResult<'a, Output>; } impl<'a, F, Output> Parser<'a, Output> for F where - F: Fn( - &'a Bump, - &'a State<'a>, - &'a mut Vec<'a, Located>, - Attempting, - ) -> ParseResult<'a, Output>, + F: Fn(&'a Bump, &'a mut State<'a>, Attempting) -> ParseResult<'a, Output>, { fn parse( &self, arena: &'a Bump, - state: &'a State<'a>, - problems: &'a mut Problems<'a>, + state: &'a mut State<'a>, attempting: Attempting, ) -> ParseResult<'a, Output> { - self(arena, state, problems, attempting) + self(arena, state, attempting) } } @@ -212,9 +223,9 @@ where P: Parser<'a, Before>, F: Fn(Before) -> After, { - move |arena, state, problems, attempting| { + move |arena, state, attempting| { parser - .parse(arena, state, problems, attempting) + .parse(arena, state, attempting) .map(|(next_state, output)| (next_state, transform(output))) } } @@ -223,7 +234,7 @@ fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val where P: Parser<'a, Val>, { - move |arena, state, problems, _| parser.parse(arena, state, problems, attempting) + move |arena, state, _| parser.parse(arena, state, attempting) } /// A keyword with no newlines in it. @@ -232,7 +243,7 @@ fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> { // in the state, only the column. debug_assert!(!kw.contains("\n")); - move |_arena: &'a Bump, state: &'a State<'a>, _problems, attempting| { + move |_arena: &'a Bump, state: &'a mut State<'a>, attempting| { let input = state.input; match input.get(0..kw.len()) { @@ -259,8 +270,8 @@ where P: Parser<'a, A>, F: Fn(&A) -> bool, { - move |arena: &'a Bump, state: &'a State<'a>, problems, attempting| { - if let Ok((next_state, output)) = parser.parse(arena, state, problems, attempting) { + move |arena: &'a Bump, state: &'a mut State<'a>, attempting| { + if let Ok((next_state, output)) = parser.parse(arena, state, attempting) { if predicate(&output) { return Ok((next_state, output)); } @@ -272,8 +283,7 @@ where fn any<'a>( arena: &'a Bump, - state: &'a State<'a>, - _problems: &'a mut Problems<'a>, + state: &'a mut State<'a>, attempting: Attempting, ) -> ParseResult<'a, char> { let input = state.input; @@ -304,12 +314,13 @@ fn whitespace<'a>() -> impl Parser<'a, char> { /// What we're currently attempting to parse, e.g. /// "currently attempting to parse a list." This helps error messages! -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Attempting { List, Keyword, StringLiteral, EscapedUnicodeChar, + Expression, } // fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr { @@ -367,8 +378,12 @@ pub enum Attempting { // })) // } +pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> { + string_literal() +} + fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { - move |arena: &'a Bump, state: &'a State<'a>, problems: &'a mut Problems<'a>, attempting| { + move |arena: &'a Bump, state: &'a mut State<'a>, attempting| { let mut chars = state.input.chars(); // String literals must start with a quote. @@ -403,9 +418,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { Some('t') => buf.push('\t'), Some('n') => buf.push('\n'), Some('r') => buf.push('\r'), - Some('u') => { - handle_escaped_unicode(arena, state, &mut chars, &mut buf, problems) - } + Some('u') => handle_escaped_unicode(arena, state, &mut chars, &mut buf), Some('(') => panic!("TODO handle string interpolation"), Some(unsupported) => { // TODO don't bail out here! Instead, parse successfully @@ -448,7 +461,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Problem { /// TODO Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F) NonHexCharsInUnicodeCodePoint, @@ -468,10 +481,10 @@ fn is_ascii_number(ch: char) -> bool { fn escaped_unicode_problem<'a>( problem: Problem, - state: &'a State<'a>, + state: &'a mut State<'a>, buf_len: usize, hex_str_len: usize, -) -> Located { +) { let start_line = state.line; let start_col = state.column + buf_len as u32; let end_line = start_line; @@ -485,18 +498,17 @@ fn escaped_unicode_problem<'a>( end_col, }; - Located { + state.problems.push(Located { region, value: problem, - } + }); } fn handle_escaped_unicode<'a, I>( arena: &'a Bump, - state: &'a State<'a>, + state: &'a mut State<'a>, chars: &mut I, buf: &mut String<'a>, - problems: &mut Problems<'a>, ) where I: Iterator, { @@ -504,14 +516,12 @@ fn handle_escaped_unicode<'a, I>( // so we should always see a '{' next. if chars.next() != Some('{') { // This is not a blocker. Keep parsing. - let prob = escaped_unicode_problem( + escaped_unicode_problem( Problem::MalformedEscapedUnicode, state, buf.len(), 2, // So far we've parsed `\u` ); - - problems.push(prob); } else { // Stores the accumulated unicode digits let mut hex_str = String::new_in(arena); @@ -526,41 +536,35 @@ fn handle_escaped_unicode<'a, I>( match u32::from_str_radix(&hex_str, 16) { Ok(code_pt) => { if code_pt > 0x10FFFF { - let prob = escaped_unicode_problem( + escaped_unicode_problem( Problem::UnicodeCodePointTooLarge, state, buf.len(), hex_str.len(), ); - - problems.push(prob); } else { // If it all checked out, add it to // the main buffer. match char::from_u32(code_pt) { Some(ch) => buf.push(ch), None => { - let prob = escaped_unicode_problem( + escaped_unicode_problem( Problem::InvalidUnicodeCodePoint, state, buf.len(), hex_str.len(), ); - - problems.push(prob); } } } } Err(_) => { - let prob = escaped_unicode_problem( + escaped_unicode_problem( Problem::NonHexCharsInUnicodeCodePoint, state, buf.len(), hex_str.len(), ); - - problems.push(prob); } } } else { diff --git a/tests/test_parser.rs b/tests/test_parser.rs new file mode 100644 index 0000000000..c4a2276f2e --- /dev/null +++ b/tests/test_parser.rs @@ -0,0 +1,58 @@ +#[macro_use] +extern crate pretty_assertions; +#[macro_use] +extern crate indoc; +extern crate bumpalo; +extern crate combine; // OBSOLETE +extern crate roc; + +mod helpers; + +#[cfg(test)] +mod test_parser { + use bumpalo::Bump; + use roc::parser::Expr::{self, *}; + use roc::parser::{Attempting, Parser, Problem, State}; + + fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) { + assert_parses_to_problems(input, expected_expr, Vec::new()) + } + + fn assert_parses_to_problems<'a>( + input: &'a str, + expected_expr: Expr<'a>, + expected_problems: Vec, + ) { + let state = State::from_input(&input); + let arena = Bump::new(); + let mut problems = bumpalo::collections::vec::Vec::new_in(&arena); + let attempting = Attempting::Expression; + let parser = roc::parser::expr(); + let answer = parser.parse(&arena, &state, &mut problems, attempting); + let actual = answer + .map(|(_, expr)| expr) + .map_err(|(_, attempting)| attempting); + + let mut actual_problems: Vec = Vec::new(); + + for loc_problem in problems { + actual_problems.push(loc_problem.value); + } + + assert_eq!(expected_problems, actual_problems); + + assert_eq!(Ok(expected_expr), actual); + } + + #[test] + fn empty_list() { + assert_parses_to( + indoc!( + r#" + "" + "# + ), + EmptyStr, + ); + } +}