diff --git a/src/parse/ast.rs b/src/parse/ast.rs index 966c3c4ccf..7c3d383c7c 100644 --- a/src/parse/ast.rs +++ b/src/parse/ast.rs @@ -58,6 +58,7 @@ pub enum Expr<'a> { // Runtime errors MalformedStr(Box<[Loc]>), + MalformedNumber(Problem), } #[derive(Clone, Debug, PartialEq)] diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 24ee034e2b..4f276daf8f 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -5,13 +5,12 @@ pub mod problems; pub mod string_literal; use parse::ast::Expr; -// use parse::number_literal::number_literal; +use parse::number_literal::number_literal; use parse::parser::Parser; use parse::string_literal::string_literal; pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> { - // parser::one_of2(string_literal(), number_literal()) - string_literal() + parser::one_of2(string_literal(), number_literal()) } const KW_IF: &'static str = "if"; diff --git a/src/parse/number_literal.rs b/src/parse/number_literal.rs index 17b2158deb..a731fa0012 100644 --- a/src/parse/number_literal.rs +++ b/src/parse/number_literal.rs @@ -1,105 +1,130 @@ use bumpalo::collections::string::String; use bumpalo::Bump; use parse::ast::{Attempting, Expr}; -use parse::parser::{ParseResult, Parser, State}; -use parse::problems::{Problem, Problems}; -use region::{Loc, Region}; +use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; +use parse::problems::Problem; use std::char; -use std::iter::Peekable; -// pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { -// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| { -// let mut chars = state.input.chars(); +pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { + move |arena: &'a Bump, state: State<'a>| { + let mut chars = state.input.chars(); -// match chars.next() { -// Some(first_ch) => { -// if first_ch == '-' { -// parse_number_literal(Sign::Negative, first_ch, &mut chars, arena, state) -// } else if first_ch.is_ascii_digit() { -// parse_number_literal(Sign::Positive, first_ch, &mut chars, arena, state) -// } else { -// Err((state, attempting)) -// } -// } -// None => Err((state, attempting)), -// } -// } -// } + match chars.next() { + Some(first_ch) => { + if first_ch == '-' { + parse_number_literal(Sign::Negative, first_ch, &mut chars, arena, state) + } else if first_ch.is_ascii_digit() { + parse_number_literal(Sign::Positive, first_ch, &mut chars, arena, state) + } else { + Err(unexpected( + first_ch, + first_ch.len_utf8(), + state, + Attempting::NumberLiteral, + )) + } + } + None => Err(unexpected_eof(0, state.attempting, state)), + } + } +} -// // Confirm that it starts with a digit; otherwise, it's potentially an identifier! -// look_ahead(digit()) -// .with(digits_before_decimal()) -// .and(optional(char('.').with(digits_after_decimal()))) -// .then(|(int_digits, decimals): (Vec, Option>)| { -// // TODO check length of digits and make sure not to overflow -// let int_str: String = int_digits.into_iter().collect(); +#[inline(always)] +fn parse_number_literal<'a, I>( + sign: Sign, + first_ch: char, + chars: &mut I, + arena: &'a Bump, + state: State<'a>, +) -> ParseResult<'a, Expr<'a>> +where + I: Iterator, +{ + let mut digits_before_decimal = String::with_capacity_in(1, arena); + let mut digits_after_decimal = String::new_in(arena); + let mut chars_skipped = 0; + let mut has_decimal_point = false; -// match (int_str.parse::(), decimals) { -// (Ok(int_val), None) => value(Expr::Int(int_val as i64)).right(), -// (Ok(int_val), Some(nums)) => { -// let decimal_str: String = nums.into_iter().collect(); + if sign == Sign::Positive { + digits_before_decimal.push(first_ch); + } -// match format!("{}.{}", int_str, decimal_str).parse::() { -// Ok(float) => value(Expr::Float(float)).right(), -// Err(_) => unexpected_any( -// "non-digit characters after decimal point in a number literal", -// ) -// .left(), -// } -// } -// (Err(_), _) => unexpected_any( -// "looked like a number literal but was actually malformed identifier", -// ) -// .left(), -// } -// }) -// } + while let Some(next_ch) = chars.next() { + match next_ch { + digit if next_ch.is_ascii_digit() => { + if has_decimal_point { + digits_after_decimal.push(digit); + } else { + digits_before_decimal.push(digit); + } + } + '_' => { + // Underscores are allowed, and disregarded. + chars_skipped += 1; + } + '.' => { + if has_decimal_point { + // You only get one decimal point! + let len = + digits_before_decimal.len() + digits_after_decimal.len() + chars_skipped; -//#[inline(always)] -//fn parse_number_literal<'a, I>( -// sign: Sign, -// first_ch: char, -// chars: &'a mut I, -// arena: &'a Bump, -// state: State<'a>, -//) -> ParseResult<'a, Expr<'a>> -//where -// I: Iterator, -//{ -// let mut digits_before_decimal = String::with_capacity_in(1, arena); -// let mut digits_after_decimal = String::new_in(arena); + return Err(unexpected('.', len, state, Attempting::NumberLiteral)); + } else { + chars_skipped += 1; + has_decimal_point = true; + } + } + invalid_char => { + if digits_before_decimal.is_empty() { + // No digits! We likely parsed a minus sign that's actually an operator. + let len = + digits_before_decimal.len() + digits_after_decimal.len() + chars_skipped; + return Err(unexpected( + invalid_char, + len, + state, + Attempting::NumberLiteral, + )); + } -// if sign == Sign::Positive { -// digits_before_decimal.push(first_ch); -// } + // We hit an invalid number literal character; we're done! + break; + } + } + } -// while let Some(next_ch) = chars.next() { -// if next_ch == '_' { -// if !digits_after_decimal.is_empty() { -// // -// return Err((state, Attempting::NumberLiteral)); -// } -// } else if first_ch.is_ascii_digit() { -// buf.push(next_output); -// } -// } -// Err((state, Attempting::NumberLiteral)) -//} + // At this point we have a number, and will definitely succeed. + // If the number is malformed (too large to fit), we'll succeed with + // an appropriate Expr which records that. + let total_chars_parsed = digits_before_decimal.len() + chars_skipped; + let state = state.advance_without_indenting(total_chars_parsed); + + match digits_before_decimal.parse::() { + Ok(int_val) => { + if has_decimal_point { + let mut f64_buf = String::with_capacity_in( + digits_before_decimal.len() + 1 + digits_after_decimal.len(), + arena, + ); + + f64_buf.push_str(&digits_before_decimal); + f64_buf.push('.'); + f64_buf.push_str(&digits_after_decimal); + + match f64_buf.parse::() { + Ok(float) => Ok((Expr::Float(float), state)), + Err(_) => Ok((Expr::MalformedNumber(Problem::TooLarge), state)), + } + } else { + Ok((Expr::Int(int_val), state)) + } + } + Err(_) => Ok((Expr::MalformedNumber(Problem::TooLarge), state)), + } +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Sign { Positive, Negative, } - -// pub fn underscore_separated_digits<'a>() -> impl Parser<'a, Expr<'a>> { -// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| { -// { -// // Digits before the decimal point in a numeric literal can be -// // underscore-separated, e.g. one million can be written as 1_000_000 -// many1::, _>(alpha_num().skip(optional(attempt(char('_').skip( -// // Don't mistake keywords like `then` and `else` for -// // space-separated digits! -// not_followed_by(choice((string("then"), string("else"), string("when")))), -// ))))) -// } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 6c2b3cce06..d147c0ca22 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -154,12 +154,12 @@ fn state_size() { assert!(std::mem::size_of::() <= std::mem::size_of::() * 8); } -pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Fail)>; +pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)>; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Fail { Unexpected(char, Region, Attempting), - PredicateFailed(Attempting), + ConditionFailed(Attempting), LineTooLong(u32 /* which line was too long */), TooManyLines, Eof(Region, Attempting), @@ -186,7 +186,7 @@ where move |arena, state| { parser .parse(arena, state) - .map(|(next_state, output)| (next_state, transform(output))) + .map(|(output, next_state)| (transform(output), next_state)) } } @@ -210,7 +210,7 @@ where P: Parser<'a, A>, { move |arena, state| match parser.parse(arena, state) { - Ok((next_state, first_output)) => { + Ok((first_output, next_state)) => { let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); @@ -218,27 +218,23 @@ where loop { match parser.parse(arena, state) { - Ok((next_state, next_output)) => { + Ok((next_output, next_state)) => { state = next_state; buf.push(next_output); } - Err((new_state, _)) => return Ok((new_state, buf)), + Err((_, old_state)) => return Ok((buf, old_state)), } } } - Err((new_state, _)) => { - let attempting = new_state.attempting; - - Err(unexpected_eof(0, new_state, attempting)) - } + Err((_, new_state)) => Err(unexpected_eof(0, new_state.attempting, new_state)), } } pub fn unexpected_eof<'a>( chars_consumed: usize, - state: State<'a>, attempting: Attempting, -) -> (State<'a>, Fail) { + state: State<'a>, +) -> (Fail, State<'a>) { checked_unexpected(chars_consumed, state, |region| { Fail::Eof(region, attempting) }) @@ -249,7 +245,7 @@ pub fn unexpected<'a>( chars_consumed: usize, state: State<'a>, attempting: Attempting, -) -> (State<'a>, Fail) { +) -> (Fail, State<'a>) { checked_unexpected(chars_consumed, state, |region| { Fail::Unexpected(ch, region, attempting) }) @@ -263,7 +259,7 @@ fn checked_unexpected<'a, F>( chars_consumed: usize, state: State<'a>, problem_from_region: F, -) -> (State<'a>, Fail) +) -> (Fail, State<'a>) where F: FnOnce(Region) -> Fail, { @@ -276,13 +272,9 @@ where end_line: state.line, }; - (state, problem_from_region(region)) - } - _ => { - let line = state.line; - - (state, Fail::LineTooLong(line)) + (problem_from_region(region), state) } + _ => (Fail::LineTooLong(state.line), state), } } @@ -297,8 +289,8 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> { let len = string.len(); match input.get(0..len) { - Some(next_str) if next_str == string => Ok((state.advance_without_indenting(len), ())), - _ => Err(unexpected_eof(len, state, Attempting::Keyword)), + Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len))), + _ => Err(unexpected_eof(len, Attempting::Keyword, state)), } } } @@ -309,14 +301,13 @@ where F: Fn(&A) -> bool, { move |arena: &'a Bump, state: State<'a>| { - if let Ok((next_state, output)) = parser.parse(arena, state.clone()) { + if let Ok((output, next_state)) = parser.parse(arena, state.clone()) { if predicate(&output) { - return Ok((next_state, output)); + return Ok((output, next_state)); } } - let fail = Fail::PredicateFailed(state.attempting); - Err((state, fail)) + Err((Fail::ConditionFailed(state.attempting), state)) } } @@ -352,37 +343,220 @@ where // satisfies(any, |ch| ch.is_whitespace()) // } -// pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A> -// where -// P1: Parser<'a, A>, -// P2: Parser<'a, A>, -// { -// move |arena: &'a Bump, state: State<'a>, attempting| { -// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { -// Ok((next_state, output)) -// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { -// Ok((next_state, output)) -// } else { -// Err((state, attempting)) -// } -// } -// } +pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), + }, + } +} -// pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A> -// where -// P1: Parser<'a, A>, -// P2: Parser<'a, A>, -// P3: Parser<'a, A>, -// { -// move |arena: &'a Bump, state: State<'a>, attempting| { -// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) { -// Ok((next_state, output)) -// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) { -// Ok((next_state, output)) -// } else if let Ok((next_state, output)) = p3.parse(arena, state, attempting) { -// Ok((next_state, output)) -// } else { -// Err((state, attempting)) -// } -// } -// } +pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), + }, + }, + } +} + +pub fn one_of4<'a, P1, P2, P3, P4, A>(p1: P1, p2: P2, p3: P3, p4: P4) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, + P4: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p4.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), + }, + }, + }, + } +} + +pub fn one_of5<'a, P1, P2, P3, P4, P5, A>( + p1: P1, + p2: P2, + p3: P3, + p4: P4, + p5: P5, +) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, + P4: Parser<'a, A>, + P5: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p4.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p5.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), + }, + }, + }, + }, + } +} + +pub fn one_of6<'a, P1, P2, P3, P4, P5, P6, A>( + p1: P1, + p2: P2, + p3: P3, + p4: P4, + p5: P5, + p6: P6, +) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, + P4: Parser<'a, A>, + P5: Parser<'a, A>, + P6: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p4.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p5.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p6.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => { + Err((Fail::ConditionFailed(state.attempting), state)) + } + }, + }, + }, + }, + }, + } +} + +pub fn one_of7<'a, P1, P2, P3, P4, P5, P6, P7, A>( + p1: P1, + p2: P2, + p3: P3, + p4: P4, + p5: P5, + p6: P6, + p7: P7, +) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, + P4: Parser<'a, A>, + P5: Parser<'a, A>, + P6: Parser<'a, A>, + P7: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p4.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p5.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p6.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p7.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => { + Err((Fail::ConditionFailed(state.attempting), state)) + } + }, + }, + }, + }, + }, + }, + } +} + +pub fn one_of8<'a, P1, P2, P3, P4, P5, P6, P7, P8, A>( + p1: P1, + p2: P2, + p3: P3, + p4: P4, + p5: P5, + p6: P6, + p7: P7, + p8: P8, +) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, + P3: Parser<'a, A>, + P4: Parser<'a, A>, + P5: Parser<'a, A>, + P6: Parser<'a, A>, + P7: Parser<'a, A>, + P8: Parser<'a, A>, +{ + move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p2.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p3.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p4.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p5.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p6.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p7.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => match p8.parse(arena, state) { + valid @ Ok(_) => valid, + Err((_, state)) => { + Err((Fail::ConditionFailed(state.attempting), state)) + } + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/src/parse/problems.rs b/src/parse/problems.rs index d90ab9942d..9ba8fad1d9 100644 --- a/src/parse/problems.rs +++ b/src/parse/problems.rs @@ -18,4 +18,7 @@ pub enum Problem { Tab, CarriageReturn, UnsupportedEscapedChar, + + // NUMBER LITERAL + TooLarge, } diff --git a/src/parse/string_literal.rs b/src/parse/string_literal.rs index 2ab1149af0..b1d682b03c 100644 --- a/src/parse/string_literal.rs +++ b/src/parse/string_literal.rs @@ -20,7 +20,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { return Err(unexpected(other_char, 0, state, Attempting::StringLiteral)); } None => { - return Err(unexpected_eof(0, state, Attempting::StringLiteral)); + return Err(unexpected_eof(0, Attempting::StringLiteral, state)); } } @@ -28,9 +28,9 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { // a buffer; instead, return EmptyStr immediately. if chars.peek() == Some(&'"') { return Ok(( + Expr::EmptyStr, // 2 because `""` has length 2 state.advance_without_indenting(2), - Expr::EmptyStr, )); } @@ -69,7 +69,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { Expr::MalformedStr(problems.into_boxed_slice()) }; - return Ok((state.advance_without_indenting(len_with_quotes), expr)); + return Ok((expr, state.advance_without_indenting(len_with_quotes))); } '\t' => { // TODO report the problem and continue. @@ -96,8 +96,8 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { // We ran out of characters before finding a closed quote Err(unexpected_eof( buf.len(), - state.clone(), Attempting::StringLiteral, + state.clone(), )) } } @@ -163,7 +163,7 @@ fn handle_escaped_char<'a, 'p, I>( chars: &mut Peekable, buf: &mut String<'a>, problems: &'p mut Problems, -) -> Result<(), (State<'a>, Fail)> +) -> Result<(), (Fail, State<'a>)> where I: Iterator, { @@ -193,8 +193,8 @@ where return Err(unexpected_eof( buf.len(), - state.clone(), Attempting::UnicodeEscape, + state.clone(), )); } _ => { @@ -214,7 +214,7 @@ fn handle_escaped_unicode<'a, 'p, I>( chars: &mut Peekable, buf: &mut String<'a>, problems: &'p mut Problems, -) -> Result<(), (State<'a>, Fail)> +) -> Result<(), (Fail, State<'a>)> where I: Iterator, { @@ -358,8 +358,8 @@ where return Err(unexpected_eof( buf.len(), - state.clone(), Attempting::UnicodeEscape, + state.clone(), )); } normal_char => hex_str.push(normal_char), diff --git a/tests/test_parse.rs b/tests/test_parse.rs index 95adc2df60..0fcd103aab 100644 --- a/tests/test_parse.rs +++ b/tests/test_parse.rs @@ -24,7 +24,7 @@ mod test_parser { let arena = Bump::new(); let parser = parse::expr(); let answer = parser.parse(&arena, state); - let actual = answer.map(|(_, expr)| expr); + let actual = answer.map(|(expr, _)| expr); assert_eq!(Ok(expected_expr), actual); } @@ -34,7 +34,7 @@ mod test_parser { let arena = Bump::new(); let parser = parse::expr(); let answer = parser.parse(&arena, state); - let actual = answer.map(|(_, expr)| expr); + let actual = answer.map(|(expr, _)| expr); assert_eq!( Ok(Expr::MalformedStr(expected_probs.into_boxed_slice())),