diff --git a/src/parse/ast.rs b/src/parse/ast.rs index 4f69269edb..966c3c4ccf 100644 --- a/src/parse/ast.rs +++ b/src/parse/ast.rs @@ -110,6 +110,7 @@ pub enum Attempting { List, Keyword, StringLiteral, + NumberLiteral, UnicodeEscape, Expression, } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c9816d6d70..24ee034e2b 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,12 +1,17 @@ pub mod ast; +pub mod number_literal; pub mod parser; pub mod problems; pub mod string_literal; use parse::ast::Expr; +// use parse::number_literal::number_literal; use parse::parser::Parser; use parse::string_literal::string_literal; pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> { + // parser::one_of2(string_literal(), number_literal()) string_literal() } + +const KW_IF: &'static str = "if"; diff --git a/src/parse/number_literal.rs b/src/parse/number_literal.rs new file mode 100644 index 0000000000..17b2158deb --- /dev/null +++ b/src/parse/number_literal.rs @@ -0,0 +1,105 @@ +use bumpalo::collections::string::String; +use bumpalo::Bump; +use parse::ast::{Attempting, Expr}; +use parse::parser::{ParseResult, Parser, State}; +use parse::problems::{Problem, Problems}; +use region::{Loc, Region}; +use std::char; +use std::iter::Peekable; + +// pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { +// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| { +// let mut chars = state.input.chars(); + +// match chars.next() { +// Some(first_ch) => { +// if first_ch == '-' { +// parse_number_literal(Sign::Negative, first_ch, &mut chars, arena, state) +// } else if first_ch.is_ascii_digit() { +// parse_number_literal(Sign::Positive, first_ch, &mut chars, arena, state) +// } else { +// Err((state, attempting)) +// } +// } +// None => Err((state, attempting)), +// } +// } +// } + +// // Confirm that it starts with a digit; otherwise, it's potentially an identifier! +// look_ahead(digit()) +// .with(digits_before_decimal()) +// .and(optional(char('.').with(digits_after_decimal()))) +// .then(|(int_digits, decimals): (Vec, Option>)| { +// // TODO check length of digits and make sure not to overflow +// let int_str: String = int_digits.into_iter().collect(); + +// match (int_str.parse::(), decimals) { +// (Ok(int_val), None) => value(Expr::Int(int_val as i64)).right(), +// (Ok(int_val), Some(nums)) => { +// let decimal_str: String = nums.into_iter().collect(); + +// match format!("{}.{}", int_str, decimal_str).parse::() { +// Ok(float) => value(Expr::Float(float)).right(), +// Err(_) => unexpected_any( +// "non-digit characters after decimal point in a number literal", +// ) +// .left(), +// } +// } +// (Err(_), _) => unexpected_any( +// "looked like a number literal but was actually malformed identifier", +// ) +// .left(), +// } +// }) +// } + +//#[inline(always)] +//fn parse_number_literal<'a, I>( +// sign: Sign, +// first_ch: char, +// chars: &'a mut I, +// arena: &'a Bump, +// state: State<'a>, +//) -> ParseResult<'a, Expr<'a>> +//where +// I: Iterator, +//{ +// let mut digits_before_decimal = String::with_capacity_in(1, arena); +// let mut digits_after_decimal = String::new_in(arena); + +// if sign == Sign::Positive { +// digits_before_decimal.push(first_ch); +// } + +// while let Some(next_ch) = chars.next() { +// if next_ch == '_' { +// if !digits_after_decimal.is_empty() { +// // +// return Err((state, Attempting::NumberLiteral)); +// } +// } else if first_ch.is_ascii_digit() { +// buf.push(next_output); +// } +// } +// Err((state, Attempting::NumberLiteral)) +//} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Sign { + Positive, + Negative, +} + +// pub fn underscore_separated_digits<'a>() -> impl Parser<'a, Expr<'a>> { +// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| { +// { +// // Digits before the decimal point in a numeric literal can be +// // underscore-separated, e.g. one million can be written as 1_000_000 +// many1::, _>(alpha_num().skip(optional(attempt(char('_').skip( +// // Don't mistake keywords like `then` and `else` for +// // space-separated digits! +// not_followed_by(choice((string("then"), string("else"), string("when")))), +// ))))) +// } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index b8198ddb47..6c2b3cce06 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,7 @@ +use bumpalo::collections::vec::Vec; use bumpalo::Bump; use parse::ast::Attempting; +use region::Region; use std::char; // Strategy: @@ -29,16 +31,19 @@ pub struct State<'a> { // true at the beginning of each line, then false after encountering // the first nonspace char on that line. pub is_indenting: bool, + + pub attempting: Attempting, } impl<'a> State<'a> { - pub fn from_input(input: &'a str) -> State<'a> { + pub fn new(input: &'a str, attempting: Attempting) -> State<'a> { State { input, line: 0, column: 0, indent_col: 1, is_indenting: true, + attempting, } } @@ -56,6 +61,7 @@ impl<'a> State<'a> { column: 0, indent_col: 1, is_indenting: true, + attempting: self.attempting, } } @@ -79,6 +85,7 @@ impl<'a> State<'a> { indent_col: self.indent_col, // Once we hit a nonspace character, we are no longer indenting. is_indenting: false, + attempting: self.attempting, } } /// Advance the parser while also indenting as appropriate. @@ -119,6 +126,7 @@ impl<'a> State<'a> { column: column_usize as u16, indent_col, is_indenting, + attempting: self.attempting, } } } @@ -146,23 +154,27 @@ fn state_size() { assert!(std::mem::size_of::() <= std::mem::size_of::() * 8); } -pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>; +pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Fail)>; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Fail { + Unexpected(char, Region, Attempting), + PredicateFailed(Attempting), + LineTooLong(u32 /* which line was too long */), + TooManyLines, + Eof(Region, Attempting), +} pub trait Parser<'a, Output> { - fn parse(&self, &'a Bump, State<'a>, attempting: Attempting) -> ParseResult<'a, Output>; + fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>; } impl<'a, F, Output> Parser<'a, Output> for F where - F: Fn(&'a Bump, State<'a>, Attempting) -> ParseResult<'a, Output>, + F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>, { - fn parse( - &self, - arena: &'a Bump, - state: State<'a>, - attempting: Attempting, - ) -> ParseResult<'a, Output> { - self(arena, state, attempting) + fn parse(&self, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Output> { + self(arena, state) } } @@ -171,9 +183,9 @@ where P: Parser<'a, Before>, F: Fn(Before) -> After, { - move |arena, state, attempting| { + move |arena, state| { parser - .parse(arena, state, attempting) + .parse(arena, state) .map(|(next_state, output)| (next_state, transform(output))) } } @@ -182,25 +194,111 @@ pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, where P: Parser<'a, Val>, { - move |arena, state, _| parser.parse(arena, state, attempting) + move |arena, state| { + parser.parse( + arena, + State { + attempting, + ..state + }, + ) + } } -/// A keyword with no newlines in it. -pub fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> { +pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>> +where + P: Parser<'a, A>, +{ + move |arena, state| match parser.parse(arena, state) { + Ok((next_state, first_output)) => { + let mut state = next_state; + let mut buf = Vec::with_capacity_in(1, arena); + + buf.push(first_output); + + loop { + match parser.parse(arena, state) { + Ok((next_state, next_output)) => { + state = next_state; + buf.push(next_output); + } + Err((new_state, _)) => return Ok((new_state, buf)), + } + } + } + Err((new_state, _)) => { + let attempting = new_state.attempting; + + Err(unexpected_eof(0, new_state, attempting)) + } + } +} + +pub fn unexpected_eof<'a>( + chars_consumed: usize, + state: State<'a>, + attempting: Attempting, +) -> (State<'a>, Fail) { + checked_unexpected(chars_consumed, state, |region| { + Fail::Eof(region, attempting) + }) +} + +pub fn unexpected<'a>( + ch: char, + chars_consumed: usize, + state: State<'a>, + attempting: Attempting, +) -> (State<'a>, Fail) { + checked_unexpected(chars_consumed, state, |region| { + Fail::Unexpected(ch, region, attempting) + }) +} + +/// Check for line overflow, then compute a new Region based on chars_consumed +/// and provide it as a way to construct a Problem. +/// If maximum line length was exceeded, return a Problem indicating as much. +#[inline(always)] +fn checked_unexpected<'a, F>( + chars_consumed: usize, + state: State<'a>, + problem_from_region: F, +) -> (State<'a>, Fail) +where + F: FnOnce(Region) -> Fail, +{ + match (state.column as usize).checked_add(chars_consumed) { + Some(end_col) if end_col <= std::u16::MAX as usize => { + let region = Region { + start_col: state.column, + end_col: end_col as u16, + start_line: state.line, + end_line: state.line, + }; + + (state, problem_from_region(region)) + } + _ => { + let line = state.line; + + (state, Fail::LineTooLong(line)) + } + } +} + +/// A string with no newlines in it. +pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> { // We can't have newlines because we don't attempt to advance the row // in the state, only the column. - debug_assert!(!kw.contains("\n")); + debug_assert!(!string.contains("\n")); - move |_arena: &'a Bump, state: State<'a>, attempting| { + move |_arena: &'a Bump, state: State<'a>| { let input = state.input; + let len = string.len(); - match input.get(0..kw.len()) { - Some(next) if next == kw => { - let len = kw.len(); - - Ok((state.advance_without_indenting(len), ())) - } - _ => Err((state.clone(), attempting)), + match input.get(0..len) { + Some(next_str) if next_str == string => Ok((state.advance_without_indenting(len), ())), + _ => Err(unexpected_eof(len, state, Attempting::Keyword)), } } } @@ -210,80 +308,81 @@ where P: Parser<'a, A>, F: Fn(&A) -> bool, { - move |arena: &'a Bump, state: State<'a>, attempting| { - if let Ok((next_state, output)) = parser.parse(arena, state, attempting) { + move |arena: &'a Bump, state: State<'a>| { + if let Ok((next_state, output)) = parser.parse(arena, state.clone()) { if predicate(&output) { return Ok((next_state, output)); } } - Err((state.clone(), attempting)) + let fail = Fail::PredicateFailed(state.attempting); + Err((state, fail)) } } -pub fn any<'a>( - _arena: &'a Bump, - state: State<'a>, - attempting: Attempting, -) -> ParseResult<'a, char> { - let input = state.input; +// pub fn any<'a>( +// _arena: &'a Bump, +// state: State<'a>, +// attempting: Attempting, +// ) -> ParseResult<'a, char> { +// let input = state.input; - match input.chars().next() { - Some(ch) => { - let len = ch.len_utf8(); - let mut new_state = State { - input: &input[len..], +// match input.chars().next() { +// Some(ch) => { +// let len = ch.len_utf8(); +// let mut new_state = State { +// input: &input[len..], - ..state.clone() - }; +// ..state.clone() +// }; - if ch == '\n' { - new_state.line = new_state.line + 1; - new_state.column = 0; - } +// if ch == '\n' { +// new_state.line = new_state.line + 1; +// new_state.column = 0; +// } - Ok((new_state, ch)) - } - _ => Err((state.clone(), attempting)), - } -} +// Ok((new_state, ch)) +// } +// _ => Err((state.clone(), attempting)), +// } +// } -fn whitespace<'a>() -> impl Parser<'a, char> { - // TODO advance the state appropriately, in terms of line, col, indenting, etc. - satisfies(any, |ch| ch.is_whitespace()) -} +// fn whitespace<'a>() -> impl Parser<'a, char> { +// // TODO advance the state appropriately, in terms of line, col, indenting, etc. +// satisfies(any, |ch| ch.is_whitespace()) +// } -pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A> -where - P1: Parser<'a, A>, - P2: Parser<'a, A>, -{ - move |arena: &'a Bump, state: State<'a>, attempting| { - if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { - Ok((next_state, output)) - } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { - Ok((next_state, output)) - } else { - Err((state.clone(), attempting)) - } - } -} +// pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A> +// where +// P1: Parser<'a, A>, +// P2: Parser<'a, A>, +// { +// move |arena: &'a Bump, state: State<'a>, attempting| { +// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { +// Ok((next_state, output)) +// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { +// Ok((next_state, output)) +// } else { +// Err((state, attempting)) +// } +// } +// } -pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A> -where - P1: Parser<'a, A>, - P2: Parser<'a, A>, - P3: Parser<'a, A>, -{ - move |arena: &'a Bump, state: State<'a>, attempting| { - if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { - Ok((next_state, output)) - } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { - Ok((next_state, output)) - } else if let Ok((next_state, output)) = p3.parse(arena, state, attempting) { - Ok((next_state, output)) - } else { - Err((state.clone(), attempting)) - } - } -} +// pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A> +// where +// P1: Parser<'a, A>, +// P2: Parser<'a, A>, +// P3: Parser<'a, A>, +// { +// move |arena: &'a Bump, state: State<'a>, attempting| { +// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { +// Ok((next_state, output)) +// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { +// Ok((next_state, output)) +// } else if let Ok((next_state, output)) = p3.parse(arena, state, attempting) { +// Ok((next_state, output)) +// } else { +// Err((state, attempting)) +// } +// } +// } diff --git a/src/parse/string_literal.rs b/src/parse/string_literal.rs index 86c45cff47..2ab1149af0 100644 --- a/src/parse/string_literal.rs +++ b/src/parse/string_literal.rs @@ -1,21 +1,27 @@ use bumpalo::collections::string::String; use bumpalo::Bump; use parse::ast::{Attempting, Expr}; -use parse::parser::{Parser, State}; +use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State}; use parse::problems::{Problem, Problems}; use region::{Loc, Region}; use std::char; use std::iter::Peekable; pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { - move |arena: &'a Bump, state: State<'a>, attempting: Attempting| { + move |arena: &'a Bump, state: State<'a>| { let mut problems = Vec::new(); let mut chars = state.input.chars().peekable(); // String literals must start with a quote. // If this doesn't, it must not be a string literal! - if chars.next() != Some('"') { - return Err((state, attempting)); + match chars.next() { + Some('"') => (), + Some(other_char) => { + return Err(unexpected(other_char, 0, state, Attempting::StringLiteral)); + } + None => { + return Err(unexpected_eof(0, state, Attempting::StringLiteral)); + } } // If we have precisely an empty string here, don't bother allocating @@ -37,7 +43,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { '\\' => match chars.next() { Some(next_ch) => handle_escaped_char( arena, - state, + &state, next_ch, &mut chars, &mut buf, @@ -88,14 +94,18 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { } // We ran out of characters before finding a closed quote - Err((state, Attempting::StringLiteral)) + Err(unexpected_eof( + buf.len(), + state.clone(), + Attempting::StringLiteral, + )) } } fn escaped_char_problem<'a, 'p>( problems: &'p mut Problems, problem: Problem, - state: State<'a>, + state: &State<'a>, buf_len: usize, ) { let start_line = state.line; @@ -120,7 +130,7 @@ fn escaped_char_problem<'a, 'p>( fn escaped_unicode_problem<'a, 'p>( problems: &'p mut Problems, problem: Problem, - state: State<'a>, + state: &State<'a>, buf_len: usize, hex_str_len: usize, ) { @@ -148,12 +158,12 @@ fn escaped_unicode_problem<'a, 'p>( #[inline(always)] fn handle_escaped_char<'a, 'p, I>( arena: &'a Bump, - state: State<'a>, + state: &State<'a>, ch: char, chars: &mut Peekable, buf: &mut String<'a>, problems: &'p mut Problems, -) -> Result<(), (State<'a>, Attempting)> +) -> Result<(), (State<'a>, Fail)> where I: Iterator, { @@ -168,25 +178,29 @@ where '\t' => { // Report and continue. // Tabs are syntax errors, but maybe the rest of the string is fine! - escaped_char_problem(problems, Problem::Tab, state, buf.len()); + escaped_char_problem(problems, Problem::Tab, &state, buf.len()); } '\r' => { // Report and continue. // Carriage returns aren't allowed in string literals, // but maybe the rest of the string is fine! - escaped_char_problem(problems, Problem::CarriageReturn, state, buf.len()); + escaped_char_problem(problems, Problem::CarriageReturn, &state, buf.len()); } '\n' => { // Report and bail out. // We can't safely assume where the string was supposed to end. - escaped_char_problem(problems, Problem::NewlineInLiteral, state, buf.len()); + escaped_char_problem(problems, Problem::NewlineInLiteral, &state, buf.len()); - return Err((state, Attempting::UnicodeEscape)); + return Err(unexpected_eof( + buf.len(), + state.clone(), + Attempting::UnicodeEscape, + )); } _ => { // Report and continue. // An unsupported escaped char (e.g. \q) shouldn't halt parsing. - escaped_char_problem(problems, Problem::UnsupportedEscapedChar, state, buf.len()); + escaped_char_problem(problems, Problem::UnsupportedEscapedChar, &state, buf.len()); } } @@ -196,11 +210,11 @@ where #[inline(always)] fn handle_escaped_unicode<'a, 'p, I>( arena: &'a Bump, - state: State<'a>, + state: &State<'a>, chars: &mut Peekable, buf: &mut String<'a>, problems: &'p mut Problems, -) -> Result<(), (State<'a>, Attempting)> +) -> Result<(), (State<'a>, Fail)> where I: Iterator, { @@ -279,7 +293,7 @@ where escaped_unicode_problem( problems, Problem::InvalidUnicodeCodePoint, - state, + &state, start_of_unicode, hex_str.len(), ); @@ -297,7 +311,7 @@ where escaped_unicode_problem( problems, problem, - state, + &state, start_of_unicode, hex_str.len(), ); @@ -314,7 +328,7 @@ where escaped_unicode_problem( problems, Problem::Tab, - state, + &state, start_of_unicode, hex_str.len(), ); @@ -326,7 +340,7 @@ where escaped_unicode_problem( problems, Problem::CarriageReturn, - state, + &state, start_of_unicode, hex_str.len(), ); @@ -337,12 +351,16 @@ where escaped_unicode_problem( problems, Problem::NewlineInLiteral, - state, + &state, start_of_unicode, hex_str.len(), ); - return Err((state, Attempting::UnicodeEscape)); + return Err(unexpected_eof( + buf.len(), + state.clone(), + Attempting::UnicodeEscape, + )); } normal_char => hex_str.push(normal_char), } diff --git a/src/region.rs b/src/region.rs index add2a11049..9d1bd606c5 100644 --- a/src/region.rs +++ b/src/region.rs @@ -5,16 +5,16 @@ pub type Loc = Located; #[derive(Clone, Eq, PartialEq, PartialOrd, Ord)] pub struct Region { - pub start_col: u16, - pub end_col: u16, pub start_line: u32, pub end_line: u32, + pub start_col: u16, + pub end_col: u16, } #[test] fn region_size() { // Region is used all over the place. Avoid increasing its size! - assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 12); } impl fmt::Debug for Region { diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index 9d783acc9f..9b37e89efb 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -18,9 +18,9 @@ pub fn loc(val: T) -> Located { pub fn located( start_line: u32, - start_col: u32, + start_col: u16, end_line: u32, - end_col: u32, + end_col: u16, val: T, ) -> Located { Located::new( diff --git a/tests/test_parse.rs b/tests/test_parse.rs index 7bbc870d35..95adc2df60 100644 --- a/tests/test_parse.rs +++ b/tests/test_parse.rs @@ -20,20 +20,20 @@ mod test_parser { use roc::region::Located; fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) { - let state = State::from_input(&input); + let state = State::new(&input, Attempting::Expression); let arena = Bump::new(); let parser = parse::expr(); - let answer = parser.parse(&arena, &state, Attempting::Expression); + let answer = parser.parse(&arena, state); let actual = answer.map(|(_, expr)| expr); assert_eq!(Ok(expected_expr), actual); } fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec>) { - let state = State::from_input(&input); + let state = State::new(&input, Attempting::Expression); let arena = Bump::new(); let parser = parse::expr(); - let answer = parser.parse(&arena, &state, Attempting::Expression); + let answer = parser.parse(&arena, state); let actual = answer.map(|(_, expr)| expr); assert_eq!(