mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 15:21:12 +00:00
Introduce parser::Fail
This commit is contained in:
parent
329effe4ea
commit
6e4e517787
8 changed files with 347 additions and 119 deletions
|
@ -110,6 +110,7 @@ pub enum Attempting {
|
||||||
List,
|
List,
|
||||||
Keyword,
|
Keyword,
|
||||||
StringLiteral,
|
StringLiteral,
|
||||||
|
NumberLiteral,
|
||||||
UnicodeEscape,
|
UnicodeEscape,
|
||||||
Expression,
|
Expression,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
|
pub mod number_literal;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod problems;
|
pub mod problems;
|
||||||
pub mod string_literal;
|
pub mod string_literal;
|
||||||
|
|
||||||
use parse::ast::Expr;
|
use parse::ast::Expr;
|
||||||
|
// use parse::number_literal::number_literal;
|
||||||
use parse::parser::Parser;
|
use parse::parser::Parser;
|
||||||
use parse::string_literal::string_literal;
|
use parse::string_literal::string_literal;
|
||||||
|
|
||||||
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
|
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
|
// parser::one_of2(string_literal(), number_literal())
|
||||||
string_literal()
|
string_literal()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KW_IF: &'static str = "if";
|
||||||
|
|
105
src/parse/number_literal.rs
Normal file
105
src/parse/number_literal.rs
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
use bumpalo::collections::string::String;
|
||||||
|
use bumpalo::Bump;
|
||||||
|
use parse::ast::{Attempting, Expr};
|
||||||
|
use parse::parser::{ParseResult, Parser, State};
|
||||||
|
use parse::problems::{Problem, Problems};
|
||||||
|
use region::{Loc, Region};
|
||||||
|
use std::char;
|
||||||
|
use std::iter::Peekable;
|
||||||
|
|
||||||
|
// pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
|
// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| {
|
||||||
|
// let mut chars = state.input.chars();
|
||||||
|
|
||||||
|
// match chars.next() {
|
||||||
|
// Some(first_ch) => {
|
||||||
|
// if first_ch == '-' {
|
||||||
|
// parse_number_literal(Sign::Negative, first_ch, &mut chars, arena, state)
|
||||||
|
// } else if first_ch.is_ascii_digit() {
|
||||||
|
// parse_number_literal(Sign::Positive, first_ch, &mut chars, arena, state)
|
||||||
|
// } else {
|
||||||
|
// Err((state, attempting))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// None => Err((state, attempting)),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // Confirm that it starts with a digit; otherwise, it's potentially an identifier!
|
||||||
|
// look_ahead(digit())
|
||||||
|
// .with(digits_before_decimal())
|
||||||
|
// .and(optional(char('.').with(digits_after_decimal())))
|
||||||
|
// .then(|(int_digits, decimals): (Vec<char>, Option<Vec<char>>)| {
|
||||||
|
// // TODO check length of digits and make sure not to overflow
|
||||||
|
// let int_str: String = int_digits.into_iter().collect();
|
||||||
|
|
||||||
|
// match (int_str.parse::<i64>(), decimals) {
|
||||||
|
// (Ok(int_val), None) => value(Expr::Int(int_val as i64)).right(),
|
||||||
|
// (Ok(int_val), Some(nums)) => {
|
||||||
|
// let decimal_str: String = nums.into_iter().collect();
|
||||||
|
|
||||||
|
// match format!("{}.{}", int_str, decimal_str).parse::<f64>() {
|
||||||
|
// Ok(float) => value(Expr::Float(float)).right(),
|
||||||
|
// Err(_) => unexpected_any(
|
||||||
|
// "non-digit characters after decimal point in a number literal",
|
||||||
|
// )
|
||||||
|
// .left(),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// (Err(_), _) => unexpected_any(
|
||||||
|
// "looked like a number literal but was actually malformed identifier",
|
||||||
|
// )
|
||||||
|
// .left(),
|
||||||
|
// }
|
||||||
|
// })
|
||||||
|
// }
|
||||||
|
|
||||||
|
//#[inline(always)]
|
||||||
|
//fn parse_number_literal<'a, I>(
|
||||||
|
// sign: Sign,
|
||||||
|
// first_ch: char,
|
||||||
|
// chars: &'a mut I,
|
||||||
|
// arena: &'a Bump,
|
||||||
|
// state: State<'a>,
|
||||||
|
//) -> ParseResult<'a, Expr<'a>>
|
||||||
|
//where
|
||||||
|
// I: Iterator<Item = char>,
|
||||||
|
//{
|
||||||
|
// let mut digits_before_decimal = String::with_capacity_in(1, arena);
|
||||||
|
// let mut digits_after_decimal = String::new_in(arena);
|
||||||
|
|
||||||
|
// if sign == Sign::Positive {
|
||||||
|
// digits_before_decimal.push(first_ch);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// while let Some(next_ch) = chars.next() {
|
||||||
|
// if next_ch == '_' {
|
||||||
|
// if !digits_after_decimal.is_empty() {
|
||||||
|
// //
|
||||||
|
// return Err((state, Attempting::NumberLiteral));
|
||||||
|
// }
|
||||||
|
// } else if first_ch.is_ascii_digit() {
|
||||||
|
// buf.push(next_output);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Err((state, Attempting::NumberLiteral))
|
||||||
|
//}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum Sign {
|
||||||
|
Positive,
|
||||||
|
Negative,
|
||||||
|
}
|
||||||
|
|
||||||
|
// pub fn underscore_separated_digits<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
|
// move |arena: &'a Bump, state: State<'a>, attempting: Attempting| {
|
||||||
|
// {
|
||||||
|
// // Digits before the decimal point in a numeric literal can be
|
||||||
|
// // underscore-separated, e.g. one million can be written as 1_000_000
|
||||||
|
// many1::<Vec<_>, _>(alpha_num().skip(optional(attempt(char('_').skip(
|
||||||
|
// // Don't mistake keywords like `then` and `else` for
|
||||||
|
// // space-separated digits!
|
||||||
|
// not_followed_by(choice((string("then"), string("else"), string("when")))),
|
||||||
|
// )))))
|
||||||
|
// }
|
|
@ -1,5 +1,7 @@
|
||||||
|
use bumpalo::collections::vec::Vec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use parse::ast::Attempting;
|
use parse::ast::Attempting;
|
||||||
|
use region::Region;
|
||||||
use std::char;
|
use std::char;
|
||||||
|
|
||||||
// Strategy:
|
// Strategy:
|
||||||
|
@ -29,16 +31,19 @@ pub struct State<'a> {
|
||||||
// true at the beginning of each line, then false after encountering
|
// true at the beginning of each line, then false after encountering
|
||||||
// the first nonspace char on that line.
|
// the first nonspace char on that line.
|
||||||
pub is_indenting: bool,
|
pub is_indenting: bool,
|
||||||
|
|
||||||
|
pub attempting: Attempting,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> State<'a> {
|
impl<'a> State<'a> {
|
||||||
pub fn from_input(input: &'a str) -> State<'a> {
|
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
|
||||||
State {
|
State {
|
||||||
input,
|
input,
|
||||||
line: 0,
|
line: 0,
|
||||||
column: 0,
|
column: 0,
|
||||||
indent_col: 1,
|
indent_col: 1,
|
||||||
is_indenting: true,
|
is_indenting: true,
|
||||||
|
attempting,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,6 +61,7 @@ impl<'a> State<'a> {
|
||||||
column: 0,
|
column: 0,
|
||||||
indent_col: 1,
|
indent_col: 1,
|
||||||
is_indenting: true,
|
is_indenting: true,
|
||||||
|
attempting: self.attempting,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,6 +85,7 @@ impl<'a> State<'a> {
|
||||||
indent_col: self.indent_col,
|
indent_col: self.indent_col,
|
||||||
// Once we hit a nonspace character, we are no longer indenting.
|
// Once we hit a nonspace character, we are no longer indenting.
|
||||||
is_indenting: false,
|
is_indenting: false,
|
||||||
|
attempting: self.attempting,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Advance the parser while also indenting as appropriate.
|
/// Advance the parser while also indenting as appropriate.
|
||||||
|
@ -119,6 +126,7 @@ impl<'a> State<'a> {
|
||||||
column: column_usize as u16,
|
column: column_usize as u16,
|
||||||
indent_col,
|
indent_col,
|
||||||
is_indenting,
|
is_indenting,
|
||||||
|
attempting: self.attempting,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -146,23 +154,27 @@ fn state_size() {
|
||||||
assert!(std::mem::size_of::<State>() <= std::mem::size_of::<usize>() * 8);
|
assert!(std::mem::size_of::<State>() <= std::mem::size_of::<usize>() * 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>;
|
pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Fail)>;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub enum Fail {
|
||||||
|
Unexpected(char, Region, Attempting),
|
||||||
|
PredicateFailed(Attempting),
|
||||||
|
LineTooLong(u32 /* which line was too long */),
|
||||||
|
TooManyLines,
|
||||||
|
Eof(Region, Attempting),
|
||||||
|
}
|
||||||
|
|
||||||
pub trait Parser<'a, Output> {
|
pub trait Parser<'a, Output> {
|
||||||
fn parse(&self, &'a Bump, State<'a>, attempting: Attempting) -> ParseResult<'a, Output>;
|
fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, F, Output> Parser<'a, Output> for F
|
impl<'a, F, Output> Parser<'a, Output> for F
|
||||||
where
|
where
|
||||||
F: Fn(&'a Bump, State<'a>, Attempting) -> ParseResult<'a, Output>,
|
F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>,
|
||||||
{
|
{
|
||||||
fn parse(
|
fn parse(&self, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Output> {
|
||||||
&self,
|
self(arena, state)
|
||||||
arena: &'a Bump,
|
|
||||||
state: State<'a>,
|
|
||||||
attempting: Attempting,
|
|
||||||
) -> ParseResult<'a, Output> {
|
|
||||||
self(arena, state, attempting)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,9 +183,9 @@ where
|
||||||
P: Parser<'a, Before>,
|
P: Parser<'a, Before>,
|
||||||
F: Fn(Before) -> After,
|
F: Fn(Before) -> After,
|
||||||
{
|
{
|
||||||
move |arena, state, attempting| {
|
move |arena, state| {
|
||||||
parser
|
parser
|
||||||
.parse(arena, state, attempting)
|
.parse(arena, state)
|
||||||
.map(|(next_state, output)| (next_state, transform(output)))
|
.map(|(next_state, output)| (next_state, transform(output)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -182,25 +194,111 @@ pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a,
|
||||||
where
|
where
|
||||||
P: Parser<'a, Val>,
|
P: Parser<'a, Val>,
|
||||||
{
|
{
|
||||||
move |arena, state, _| parser.parse(arena, state, attempting)
|
move |arena, state| {
|
||||||
|
parser.parse(
|
||||||
|
arena,
|
||||||
|
State {
|
||||||
|
attempting,
|
||||||
|
..state
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A keyword with no newlines in it.
|
pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
|
||||||
pub fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> {
|
where
|
||||||
|
P: Parser<'a, A>,
|
||||||
|
{
|
||||||
|
move |arena, state| match parser.parse(arena, state) {
|
||||||
|
Ok((next_state, first_output)) => {
|
||||||
|
let mut state = next_state;
|
||||||
|
let mut buf = Vec::with_capacity_in(1, arena);
|
||||||
|
|
||||||
|
buf.push(first_output);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match parser.parse(arena, state) {
|
||||||
|
Ok((next_state, next_output)) => {
|
||||||
|
state = next_state;
|
||||||
|
buf.push(next_output);
|
||||||
|
}
|
||||||
|
Err((new_state, _)) => return Ok((new_state, buf)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err((new_state, _)) => {
|
||||||
|
let attempting = new_state.attempting;
|
||||||
|
|
||||||
|
Err(unexpected_eof(0, new_state, attempting))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unexpected_eof<'a>(
|
||||||
|
chars_consumed: usize,
|
||||||
|
state: State<'a>,
|
||||||
|
attempting: Attempting,
|
||||||
|
) -> (State<'a>, Fail) {
|
||||||
|
checked_unexpected(chars_consumed, state, |region| {
|
||||||
|
Fail::Eof(region, attempting)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unexpected<'a>(
|
||||||
|
ch: char,
|
||||||
|
chars_consumed: usize,
|
||||||
|
state: State<'a>,
|
||||||
|
attempting: Attempting,
|
||||||
|
) -> (State<'a>, Fail) {
|
||||||
|
checked_unexpected(chars_consumed, state, |region| {
|
||||||
|
Fail::Unexpected(ch, region, attempting)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check for line overflow, then compute a new Region based on chars_consumed
|
||||||
|
/// and provide it as a way to construct a Problem.
|
||||||
|
/// If maximum line length was exceeded, return a Problem indicating as much.
|
||||||
|
#[inline(always)]
|
||||||
|
fn checked_unexpected<'a, F>(
|
||||||
|
chars_consumed: usize,
|
||||||
|
state: State<'a>,
|
||||||
|
problem_from_region: F,
|
||||||
|
) -> (State<'a>, Fail)
|
||||||
|
where
|
||||||
|
F: FnOnce(Region) -> Fail,
|
||||||
|
{
|
||||||
|
match (state.column as usize).checked_add(chars_consumed) {
|
||||||
|
Some(end_col) if end_col <= std::u16::MAX as usize => {
|
||||||
|
let region = Region {
|
||||||
|
start_col: state.column,
|
||||||
|
end_col: end_col as u16,
|
||||||
|
start_line: state.line,
|
||||||
|
end_line: state.line,
|
||||||
|
};
|
||||||
|
|
||||||
|
(state, problem_from_region(region))
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let line = state.line;
|
||||||
|
|
||||||
|
(state, Fail::LineTooLong(line))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A string with no newlines in it.
|
||||||
|
pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
|
||||||
// We can't have newlines because we don't attempt to advance the row
|
// We can't have newlines because we don't attempt to advance the row
|
||||||
// in the state, only the column.
|
// in the state, only the column.
|
||||||
debug_assert!(!kw.contains("\n"));
|
debug_assert!(!string.contains("\n"));
|
||||||
|
|
||||||
move |_arena: &'a Bump, state: State<'a>, attempting| {
|
move |_arena: &'a Bump, state: State<'a>| {
|
||||||
let input = state.input;
|
let input = state.input;
|
||||||
|
let len = string.len();
|
||||||
|
|
||||||
match input.get(0..kw.len()) {
|
match input.get(0..len) {
|
||||||
Some(next) if next == kw => {
|
Some(next_str) if next_str == string => Ok((state.advance_without_indenting(len), ())),
|
||||||
let len = kw.len();
|
_ => Err(unexpected_eof(len, state, Attempting::Keyword)),
|
||||||
|
|
||||||
Ok((state.advance_without_indenting(len), ()))
|
|
||||||
}
|
|
||||||
_ => Err((state.clone(), attempting)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,80 +308,81 @@ where
|
||||||
P: Parser<'a, A>,
|
P: Parser<'a, A>,
|
||||||
F: Fn(&A) -> bool,
|
F: Fn(&A) -> bool,
|
||||||
{
|
{
|
||||||
move |arena: &'a Bump, state: State<'a>, attempting| {
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
if let Ok((next_state, output)) = parser.parse(arena, state, attempting) {
|
if let Ok((next_state, output)) = parser.parse(arena, state.clone()) {
|
||||||
if predicate(&output) {
|
if predicate(&output) {
|
||||||
return Ok((next_state, output));
|
return Ok((next_state, output));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Err((state.clone(), attempting))
|
let fail = Fail::PredicateFailed(state.attempting);
|
||||||
|
Err((state, fail))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn any<'a>(
|
// pub fn any<'a>(
|
||||||
_arena: &'a Bump,
|
// _arena: &'a Bump,
|
||||||
state: State<'a>,
|
// state: State<'a>,
|
||||||
attempting: Attempting,
|
// attempting: Attempting,
|
||||||
) -> ParseResult<'a, char> {
|
// ) -> ParseResult<'a, char> {
|
||||||
let input = state.input;
|
// let input = state.input;
|
||||||
|
|
||||||
match input.chars().next() {
|
// match input.chars().next() {
|
||||||
Some(ch) => {
|
// Some(ch) => {
|
||||||
let len = ch.len_utf8();
|
// let len = ch.len_utf8();
|
||||||
let mut new_state = State {
|
// let mut new_state = State {
|
||||||
input: &input[len..],
|
// input: &input[len..],
|
||||||
|
|
||||||
..state.clone()
|
// ..state.clone()
|
||||||
};
|
// };
|
||||||
|
|
||||||
if ch == '\n' {
|
// if ch == '\n' {
|
||||||
new_state.line = new_state.line + 1;
|
// new_state.line = new_state.line + 1;
|
||||||
new_state.column = 0;
|
// new_state.column = 0;
|
||||||
}
|
// }
|
||||||
|
|
||||||
Ok((new_state, ch))
|
// Ok((new_state, ch))
|
||||||
}
|
// }
|
||||||
_ => Err((state.clone(), attempting)),
|
// _ => Err((state.clone(), attempting)),
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
fn whitespace<'a>() -> impl Parser<'a, char> {
|
// fn whitespace<'a>() -> impl Parser<'a, char> {
|
||||||
// TODO advance the state appropriately, in terms of line, col, indenting, etc.
|
// // TODO advance the state appropriately, in terms of line, col, indenting, etc.
|
||||||
satisfies(any, |ch| ch.is_whitespace())
|
// satisfies(any, |ch| ch.is_whitespace())
|
||||||
}
|
// }
|
||||||
|
|
||||||
pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A>
|
// pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A>
|
||||||
where
|
// where
|
||||||
P1: Parser<'a, A>,
|
// P1: Parser<'a, A>,
|
||||||
P2: Parser<'a, A>,
|
// P2: Parser<'a, A>,
|
||||||
{
|
// {
|
||||||
move |arena: &'a Bump, state: State<'a>, attempting| {
|
// move |arena: &'a Bump, state: State<'a>, attempting| {
|
||||||
if let Ok((next_state, output)) = p1.parse(arena, state, attempting) {
|
// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) {
|
||||||
Ok((next_state, output))
|
// Ok((next_state, output))
|
||||||
} else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) {
|
// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) {
|
||||||
Ok((next_state, output))
|
// Ok((next_state, output))
|
||||||
} else {
|
// } else {
|
||||||
Err((state.clone(), attempting))
|
// Err((state, attempting))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A>
|
// pub fn one_of3<'a, P1, P2, P3, A>(p1: P1, p2: P2, p3: P3) -> impl Parser<'a, A>
|
||||||
where
|
// where
|
||||||
P1: Parser<'a, A>,
|
// P1: Parser<'a, A>,
|
||||||
P2: Parser<'a, A>,
|
// P2: Parser<'a, A>,
|
||||||
P3: Parser<'a, A>,
|
// P3: Parser<'a, A>,
|
||||||
{
|
// {
|
||||||
move |arena: &'a Bump, state: State<'a>, attempting| {
|
// move |arena: &'a Bump, state: State<'a>, attempting| {
|
||||||
if let Ok((next_state, output)) = p1.parse(arena, state, attempting) {
|
// if let Ok((next_state, output)) = p1.parse(arena, state, attempting) {
|
||||||
Ok((next_state, output))
|
// Ok((next_state, output))
|
||||||
} else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) {
|
// } else if let Ok((next_state, output)) = p2.parse(arena, state, attempting) {
|
||||||
Ok((next_state, output))
|
// Ok((next_state, output))
|
||||||
} else if let Ok((next_state, output)) = p3.parse(arena, state, attempting) {
|
// } else if let Ok((next_state, output)) = p3.parse(arena, state, attempting) {
|
||||||
Ok((next_state, output))
|
// Ok((next_state, output))
|
||||||
} else {
|
// } else {
|
||||||
Err((state.clone(), attempting))
|
// Err((state, attempting))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
|
@ -1,21 +1,27 @@
|
||||||
use bumpalo::collections::string::String;
|
use bumpalo::collections::string::String;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use parse::ast::{Attempting, Expr};
|
use parse::ast::{Attempting, Expr};
|
||||||
use parse::parser::{Parser, State};
|
use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
|
||||||
use parse::problems::{Problem, Problems};
|
use parse::problems::{Problem, Problems};
|
||||||
use region::{Loc, Region};
|
use region::{Loc, Region};
|
||||||
use std::char;
|
use std::char;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
move |arena: &'a Bump, state: State<'a>, attempting: Attempting| {
|
move |arena: &'a Bump, state: State<'a>| {
|
||||||
let mut problems = Vec::new();
|
let mut problems = Vec::new();
|
||||||
let mut chars = state.input.chars().peekable();
|
let mut chars = state.input.chars().peekable();
|
||||||
|
|
||||||
// String literals must start with a quote.
|
// String literals must start with a quote.
|
||||||
// If this doesn't, it must not be a string literal!
|
// If this doesn't, it must not be a string literal!
|
||||||
if chars.next() != Some('"') {
|
match chars.next() {
|
||||||
return Err((state, attempting));
|
Some('"') => (),
|
||||||
|
Some(other_char) => {
|
||||||
|
return Err(unexpected(other_char, 0, state, Attempting::StringLiteral));
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
return Err(unexpected_eof(0, state, Attempting::StringLiteral));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have precisely an empty string here, don't bother allocating
|
// If we have precisely an empty string here, don't bother allocating
|
||||||
|
@ -37,7 +43,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
'\\' => match chars.next() {
|
'\\' => match chars.next() {
|
||||||
Some(next_ch) => handle_escaped_char(
|
Some(next_ch) => handle_escaped_char(
|
||||||
arena,
|
arena,
|
||||||
state,
|
&state,
|
||||||
next_ch,
|
next_ch,
|
||||||
&mut chars,
|
&mut chars,
|
||||||
&mut buf,
|
&mut buf,
|
||||||
|
@ -88,14 +94,18 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// We ran out of characters before finding a closed quote
|
// We ran out of characters before finding a closed quote
|
||||||
Err((state, Attempting::StringLiteral))
|
Err(unexpected_eof(
|
||||||
|
buf.len(),
|
||||||
|
state.clone(),
|
||||||
|
Attempting::StringLiteral,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn escaped_char_problem<'a, 'p>(
|
fn escaped_char_problem<'a, 'p>(
|
||||||
problems: &'p mut Problems,
|
problems: &'p mut Problems,
|
||||||
problem: Problem,
|
problem: Problem,
|
||||||
state: State<'a>,
|
state: &State<'a>,
|
||||||
buf_len: usize,
|
buf_len: usize,
|
||||||
) {
|
) {
|
||||||
let start_line = state.line;
|
let start_line = state.line;
|
||||||
|
@ -120,7 +130,7 @@ fn escaped_char_problem<'a, 'p>(
|
||||||
fn escaped_unicode_problem<'a, 'p>(
|
fn escaped_unicode_problem<'a, 'p>(
|
||||||
problems: &'p mut Problems,
|
problems: &'p mut Problems,
|
||||||
problem: Problem,
|
problem: Problem,
|
||||||
state: State<'a>,
|
state: &State<'a>,
|
||||||
buf_len: usize,
|
buf_len: usize,
|
||||||
hex_str_len: usize,
|
hex_str_len: usize,
|
||||||
) {
|
) {
|
||||||
|
@ -148,12 +158,12 @@ fn escaped_unicode_problem<'a, 'p>(
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn handle_escaped_char<'a, 'p, I>(
|
fn handle_escaped_char<'a, 'p, I>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
state: State<'a>,
|
state: &State<'a>,
|
||||||
ch: char,
|
ch: char,
|
||||||
chars: &mut Peekable<I>,
|
chars: &mut Peekable<I>,
|
||||||
buf: &mut String<'a>,
|
buf: &mut String<'a>,
|
||||||
problems: &'p mut Problems,
|
problems: &'p mut Problems,
|
||||||
) -> Result<(), (State<'a>, Attempting)>
|
) -> Result<(), (State<'a>, Fail)>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = char>,
|
I: Iterator<Item = char>,
|
||||||
{
|
{
|
||||||
|
@ -168,25 +178,29 @@ where
|
||||||
'\t' => {
|
'\t' => {
|
||||||
// Report and continue.
|
// Report and continue.
|
||||||
// Tabs are syntax errors, but maybe the rest of the string is fine!
|
// Tabs are syntax errors, but maybe the rest of the string is fine!
|
||||||
escaped_char_problem(problems, Problem::Tab, state, buf.len());
|
escaped_char_problem(problems, Problem::Tab, &state, buf.len());
|
||||||
}
|
}
|
||||||
'\r' => {
|
'\r' => {
|
||||||
// Report and continue.
|
// Report and continue.
|
||||||
// Carriage returns aren't allowed in string literals,
|
// Carriage returns aren't allowed in string literals,
|
||||||
// but maybe the rest of the string is fine!
|
// but maybe the rest of the string is fine!
|
||||||
escaped_char_problem(problems, Problem::CarriageReturn, state, buf.len());
|
escaped_char_problem(problems, Problem::CarriageReturn, &state, buf.len());
|
||||||
}
|
}
|
||||||
'\n' => {
|
'\n' => {
|
||||||
// Report and bail out.
|
// Report and bail out.
|
||||||
// We can't safely assume where the string was supposed to end.
|
// We can't safely assume where the string was supposed to end.
|
||||||
escaped_char_problem(problems, Problem::NewlineInLiteral, state, buf.len());
|
escaped_char_problem(problems, Problem::NewlineInLiteral, &state, buf.len());
|
||||||
|
|
||||||
return Err((state, Attempting::UnicodeEscape));
|
return Err(unexpected_eof(
|
||||||
|
buf.len(),
|
||||||
|
state.clone(),
|
||||||
|
Attempting::UnicodeEscape,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Report and continue.
|
// Report and continue.
|
||||||
// An unsupported escaped char (e.g. \q) shouldn't halt parsing.
|
// An unsupported escaped char (e.g. \q) shouldn't halt parsing.
|
||||||
escaped_char_problem(problems, Problem::UnsupportedEscapedChar, state, buf.len());
|
escaped_char_problem(problems, Problem::UnsupportedEscapedChar, &state, buf.len());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,11 +210,11 @@ where
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn handle_escaped_unicode<'a, 'p, I>(
|
fn handle_escaped_unicode<'a, 'p, I>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
state: State<'a>,
|
state: &State<'a>,
|
||||||
chars: &mut Peekable<I>,
|
chars: &mut Peekable<I>,
|
||||||
buf: &mut String<'a>,
|
buf: &mut String<'a>,
|
||||||
problems: &'p mut Problems,
|
problems: &'p mut Problems,
|
||||||
) -> Result<(), (State<'a>, Attempting)>
|
) -> Result<(), (State<'a>, Fail)>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = char>,
|
I: Iterator<Item = char>,
|
||||||
{
|
{
|
||||||
|
@ -279,7 +293,7 @@ where
|
||||||
escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
problems,
|
problems,
|
||||||
Problem::InvalidUnicodeCodePoint,
|
Problem::InvalidUnicodeCodePoint,
|
||||||
state,
|
&state,
|
||||||
start_of_unicode,
|
start_of_unicode,
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
@ -297,7 +311,7 @@ where
|
||||||
escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
problems,
|
problems,
|
||||||
problem,
|
problem,
|
||||||
state,
|
&state,
|
||||||
start_of_unicode,
|
start_of_unicode,
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
@ -314,7 +328,7 @@ where
|
||||||
escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
problems,
|
problems,
|
||||||
Problem::Tab,
|
Problem::Tab,
|
||||||
state,
|
&state,
|
||||||
start_of_unicode,
|
start_of_unicode,
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
@ -326,7 +340,7 @@ where
|
||||||
escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
problems,
|
problems,
|
||||||
Problem::CarriageReturn,
|
Problem::CarriageReturn,
|
||||||
state,
|
&state,
|
||||||
start_of_unicode,
|
start_of_unicode,
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
@ -337,12 +351,16 @@ where
|
||||||
escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
problems,
|
problems,
|
||||||
Problem::NewlineInLiteral,
|
Problem::NewlineInLiteral,
|
||||||
state,
|
&state,
|
||||||
start_of_unicode,
|
start_of_unicode,
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
|
||||||
return Err((state, Attempting::UnicodeEscape));
|
return Err(unexpected_eof(
|
||||||
|
buf.len(),
|
||||||
|
state.clone(),
|
||||||
|
Attempting::UnicodeEscape,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
normal_char => hex_str.push(normal_char),
|
normal_char => hex_str.push(normal_char),
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,16 +5,16 @@ pub type Loc<T> = Located<T>;
|
||||||
|
|
||||||
#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)]
|
#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)]
|
||||||
pub struct Region {
|
pub struct Region {
|
||||||
pub start_col: u16,
|
|
||||||
pub end_col: u16,
|
|
||||||
pub start_line: u32,
|
pub start_line: u32,
|
||||||
pub end_line: u32,
|
pub end_line: u32,
|
||||||
|
pub start_col: u16,
|
||||||
|
pub end_col: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn region_size() {
|
fn region_size() {
|
||||||
// Region is used all over the place. Avoid increasing its size!
|
// Region is used all over the place. Avoid increasing its size!
|
||||||
assert_eq!(std::mem::size_of::<Region>(), 8);
|
assert_eq!(std::mem::size_of::<Region>(), 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for Region {
|
impl fmt::Debug for Region {
|
||||||
|
|
|
@ -18,9 +18,9 @@ pub fn loc<T>(val: T) -> Located<T> {
|
||||||
|
|
||||||
pub fn located<T>(
|
pub fn located<T>(
|
||||||
start_line: u32,
|
start_line: u32,
|
||||||
start_col: u32,
|
start_col: u16,
|
||||||
end_line: u32,
|
end_line: u32,
|
||||||
end_col: u32,
|
end_col: u16,
|
||||||
val: T,
|
val: T,
|
||||||
) -> Located<T> {
|
) -> Located<T> {
|
||||||
Located::new(
|
Located::new(
|
||||||
|
|
|
@ -20,20 +20,20 @@ mod test_parser {
|
||||||
use roc::region::Located;
|
use roc::region::Located;
|
||||||
|
|
||||||
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
|
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
|
||||||
let state = State::from_input(&input);
|
let state = State::new(&input, Attempting::Expression);
|
||||||
let arena = Bump::new();
|
let arena = Bump::new();
|
||||||
let parser = parse::expr();
|
let parser = parse::expr();
|
||||||
let answer = parser.parse(&arena, &state, Attempting::Expression);
|
let answer = parser.parse(&arena, state);
|
||||||
let actual = answer.map(|(_, expr)| expr);
|
let actual = answer.map(|(_, expr)| expr);
|
||||||
|
|
||||||
assert_eq!(Ok(expected_expr), actual);
|
assert_eq!(Ok(expected_expr), actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) {
|
fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) {
|
||||||
let state = State::from_input(&input);
|
let state = State::new(&input, Attempting::Expression);
|
||||||
let arena = Bump::new();
|
let arena = Bump::new();
|
||||||
let parser = parse::expr();
|
let parser = parse::expr();
|
||||||
let answer = parser.parse(&arena, &state, Attempting::Expression);
|
let answer = parser.parse(&arena, state);
|
||||||
let actual = answer.map(|(_, expr)| expr);
|
let actual = answer.map(|(_, expr)| expr);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue