use crate::ast::Attempting; use bumpalo::collections::vec::Vec; use bumpalo::Bump; use encode_unicode::CharExt; use roc_region::all::{Located, Region}; use std::fmt; use std::str::from_utf8; use std::{char, u16}; use Progress::*; /// A position in a source file. #[derive(Clone, PartialEq, Eq)] pub struct State<'a> { /// The raw input bytes from the file. pub bytes: &'a [u8], /// Current line of the input pub line: u32, /// Current column of the input pub column: u16, /// Current indentation level, in columns /// (so no indent is col 1 - this saves an arithmetic operation.) pub indent_col: u16, // true at the beginning of each line, then false after encountering // the first nonspace char on that line. pub is_indenting: bool, pub context_stack: &'a ContextStack<'a>, /// The original length of the string, before any bytes were consumed. /// This is used internally by the State::bytes_consumed() function. /// /// TODO make this private, in a way that doesn't break macros! pub original_len: usize, } #[derive(Debug, PartialEq, Eq)] pub enum Either { First(First), Second(Second), } impl<'a> State<'a> { pub fn new_in(arena: &'a Bump, bytes: &'a [u8], _attempting: Attempting) -> State<'a> { State { bytes, line: 0, column: 0, indent_col: 0, is_indenting: true, context_stack: arena.alloc(ContextStack::Nil), original_len: bytes.len(), } } pub fn check_indent( self, arena: &'a Bump, min_indent: u16, ) -> Result, Self)> { if self.indent_col < min_indent { Err((SyntaxError::OutdentedTooFar, self)) } else { Ok(self) } } /// Returns the total number of bytes consumed since the parser began parsing. /// /// So if the parser has consumed 8 bytes, this function will return 8. pub fn bytes_consumed(&self) -> usize { self.original_len - self.bytes.len() } /// Returns whether the parser has reached the end of the input pub fn has_reached_end(&self) -> bool { self.bytes.is_empty() } /// Increments the line, then resets column, indent_col, and is_indenting. /// Advances the input by 1, to consume the newline character. pub fn newline(&self, arena: &'a Bump) -> Result, Self)> { match self.line.checked_add(1) { Some(line) => Ok(State { bytes: &self.bytes[1..], line, column: 0, indent_col: 0, is_indenting: true, original_len: self.original_len, context_stack: arena.alloc(self.context_stack.clone()), }), None => Err(( Progress::NoProgress, SyntaxError::TooManyLines, self.clone(), )), } } /// Use advance_spaces to advance with indenting. /// This assumes we are *not* advancing with spaces, or at least that /// any spaces on the line were preceded by non-spaces - which would mean /// they weren't eligible to indent anyway. pub fn advance_without_indenting( self, arena: &'a Bump, quantity: usize, ) -> Result, Self)> { match (self.column as usize).checked_add(quantity) { Some(column_usize) if column_usize <= u16::MAX as usize => { Ok(State { bytes: &self.bytes[quantity..], column: column_usize as u16, // Once we hit a nonspace character, we are no longer indenting. is_indenting: false, ..self }) } _ => Err(line_too_long(arena, self.clone())), } } /// Advance the parser while also indenting as appropriate. /// This assumes we are only advancing with spaces, since they can indent. pub fn advance_spaces( &self, arena: &'a Bump, spaces: usize, ) -> Result, Self)> { match (self.column as usize).checked_add(spaces) { Some(column_usize) if column_usize <= u16::MAX as usize => { // Spaces don't affect is_indenting; if we were previously indneting, // we still are, and if we already finished indenting, we're still done. let is_indenting = self.is_indenting; // If we're indenting, spaces indent us further. let indent_col = if is_indenting { // This doesn't need to be checked_add because it's always true that // indent_col <= col, so if this could possibly overflow, we would // already have errored out from the column calculation. // // Leaving debug assertions in case this invariant someday disappers. debug_assert!(u16::MAX - self.indent_col >= spaces as u16); debug_assert!(spaces <= u16::MAX as usize); self.indent_col + spaces as u16 } else { self.indent_col }; Ok(State { bytes: &self.bytes[spaces..], line: self.line, column: column_usize as u16, indent_col, is_indenting, context_stack: arena.alloc(self.context_stack.clone()), original_len: self.original_len, }) } _ => Err(line_too_long(arena, self.clone())), } } /// Returns a Region corresponding to the current state, but /// with the end_col advanced by the given amount. This is /// useful when parsing something "manually" (using input.chars()) /// and thus wanting a Region while not having access to loc(). pub fn len_region(&self, length: u16) -> Region { Region { start_col: self.column, start_line: self.line, end_col: self .column .checked_add(length) .unwrap_or_else(|| panic!("len_region overflowed")), end_line: self.line, } } /// Return a failing ParseResult for the given FailReason pub fn fail( self, arena: &'a Bump, progress: Progress, reason: X, ) -> Result<(Progress, T, Self), (Progress, X, Self)> { Err((progress, reason, self)) } } impl<'a> fmt::Debug for State<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "State {{")?; match from_utf8(self.bytes) { Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?, Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?, } write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?; write!(f, "\n\tindent_col: {}", self.indent_col)?; write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?; write!(f, "\n\toriginal_len: {}", self.original_len)?; write!(f, "\n\tcontext stack: {:?}", self.context_stack)?; write!(f, "\n}}") } } #[test] fn state_size() { // State should always be under 8 machine words, so it fits in a typical // cache line. let state_size = std::mem::size_of::(); let maximum = std::mem::size_of::() * 8; assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum); } pub type ParseResult<'a, Output, Error> = Result<(Progress, Output, State<'a>), (Progress, Error, State<'a>)>; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Progress { MadeProgress, NoProgress, } impl Progress { pub fn from_lengths(before: usize, after: usize) -> Self { Self::from_consumed(before - after) } pub fn from_consumed(chars_consumed: usize) -> Self { Self::progress_when(chars_consumed != 0) } pub fn progress_when(made_progress: bool) -> Self { if made_progress { Progress::MadeProgress } else { Progress::NoProgress } } pub fn or(&self, other: Self) -> Self { if (*self == MadeProgress) || (other == MadeProgress) { MadeProgress } else { NoProgress } } } #[derive(Debug, Clone, PartialEq, Eq)] pub enum SyntaxError<'a> { Unexpected(Region), OutdentedTooFar, ConditionFailed, LineTooLong(u32 /* which line was too long */), TooManyLines, Eof(Region), InvalidPattern, BadUtf8, ReservedKeyword(Region), ArgumentsBeforeEquals(Region), NotYetImplemented(String), TODO, Type(Type<'a>), } impl<'a> SyntaxError<'a> { pub fn into_parse_problem( self, filename: std::path::PathBuf, bytes: &'a [u8], ) -> ParseProblem<'a, SyntaxError<'a>> { ParseProblem { line: 0, column: 0, problem: self, filename, bytes, } } } type Row = u32; type Col = u16; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Type<'a> { TRecord(TRecord<'a>, Row, Col), /// TStart(Row, Col), TSpace(Row, Col), /// TIndentStart(Row, Col), } #[derive(Debug, Clone, PartialEq, Eq)] pub enum TRecord<'a> { Open(Row, Col), End(Row, Col), /// Field(Row, Col), Colon(Row, Col), Type(&'a Type<'a>, Row, Col), /// Space(Row, Col), /// IndentOpen(Row, Col), IndentField(Row, Col), IndentColon(Row, Col), IndentType(Row, Col), IndentEnd(Row, Col), } #[derive(Debug, Clone, PartialEq, Eq)] pub enum ContextStack<'a> { Cons(ContextItem, &'a ContextStack<'a>), Nil, } impl<'a> ContextStack<'a> { fn into_vec(self) -> std::vec::Vec { let mut result = std::vec::Vec::new(); let mut next = &self; while let ContextStack::Cons(item, rest) = next { next = rest; result.push(*item); } result.reverse(); result } pub fn uncons(&'a self) -> Option<(ContextItem, &'a Self)> { match self { ContextStack::Cons(item, rest) => Some((*item, rest)), ContextStack::Nil => None, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ContextItem { pub line: u32, pub column: u16, pub context: Attempting, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct DeadEnd<'a, T> { pub line: u32, pub column: u16, pub problem: T, pub context_stack: ContextStack<'a>, } /// use std vec to escape the arena's lifetime bound /// since this is only used when there is in fact an error /// I think this is fine #[derive(Debug)] pub struct ParseProblem<'a, T> { pub line: u32, pub column: u16, pub problem: T, pub filename: std::path::PathBuf, pub bytes: &'a [u8], } pub fn fail<'a, T>() -> impl Parser<'a, T, SyntaxError<'a>> { move |arena, state: State<'a>| Err((NoProgress, SyntaxError::ConditionFailed, state)) } pub trait Parser<'a, Output, Error> { fn parse(&self, _: &'a Bump, _: State<'a>) -> ParseResult<'a, Output, Error>; } impl<'a, F, Output, Error> Parser<'a, Output, Error> for F where Error: 'a, F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output, Error>, { fn parse(&self, arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Output, Error> { self(arena, state) } } pub fn allocated<'a, P, Val, Error>(parser: P) -> impl Parser<'a, &'a Val, Error> where Error: 'a, P: Parser<'a, Val, Error>, Val: 'a, { move |arena, state: State<'a>| { let (progress, answer, state) = parser.parse(arena, state)?; Ok((progress, &*arena.alloc(answer), state)) } } pub fn not_followed_by<'a, P, ByParser, By, Val>( parser: P, by: ByParser, ) -> impl Parser<'a, Val, SyntaxError<'a>> where ByParser: Parser<'a, By, SyntaxError<'a>>, P: Parser<'a, Val, SyntaxError<'a>>, { move |arena, state: State<'a>| { let original_state = state.clone(); parser .parse(arena, state) .and_then(|(progress, answer, state)| { let after_parse = state.clone(); match by.parse(arena, state) { Ok((_, _, state)) => { Err((NoProgress, SyntaxError::ConditionFailed, original_state)) } Err(_) => Ok((progress, answer, after_parse)), } }) } } pub fn not<'a, P, Val>(parser: P) -> impl Parser<'a, (), SyntaxError<'a>> where P: Parser<'a, Val, SyntaxError<'a>>, { move |arena, state: State<'a>| { let original_state = state.clone(); match parser.parse(arena, state) { Ok((_, _, _)) => Err((NoProgress, SyntaxError::ConditionFailed, original_state)), Err((_, _, _)) => Ok((NoProgress, (), original_state)), } } } pub fn lookahead<'a, Peek, P, PeekVal, Val, Error>( peek: Peek, parser: P, ) -> impl Parser<'a, Val, Error> where Error: 'a, Peek: Parser<'a, PeekVal, Error>, P: Parser<'a, Val, Error>, { move |arena, state: State<'a>| { let original_state = state.clone(); peek.parse(arena, state) .and_then(|_| parser.parse(arena, original_state)) } } pub fn and_then<'a, P1, P2, F, Before, After, Error>( parser: P1, transform: F, ) -> impl Parser<'a, After, Error> where P1: Parser<'a, Before, Error>, P2: Parser<'a, After, Error>, F: Fn(Progress, Before) -> P2, Error: 'a, { move |arena, state| { parser .parse(arena, state) .and_then(|(progress, output, next_state)| { transform(progress, output).parse(arena, next_state) }) } } pub fn and_then_with_indent_level<'a, P1, P2, F, Before, After, E>( parser: P1, transform: F, ) -> impl Parser<'a, After, E> where P1: Parser<'a, Before, E>, P2: Parser<'a, After, E>, F: Fn(Progress, Before, u16) -> P2, E: 'a, { move |arena, state| { parser .parse(arena, state) .and_then(|(progress, output, next_state)| { transform(progress, output, next_state.indent_col).parse(arena, next_state) }) } } pub fn then<'a, P1, F, Before, After, E>(parser: P1, transform: F) -> impl Parser<'a, After, E> where P1: Parser<'a, Before, E>, After: 'a, E: 'a, F: Fn(&'a Bump, State<'a>, Progress, Before) -> ParseResult<'a, After, E>, { move |arena, state| { parser .parse(arena, state) .and_then(|(progress, output, next_state)| { transform(arena, next_state, progress, output) }) } } pub fn unexpected_eof<'a>( arena: &'a Bump, state: State<'a>, chars_consumed: usize, ) -> (Progress, SyntaxError<'a>, State<'a>) { checked_unexpected(arena, state, chars_consumed, |region| { SyntaxError::Eof(region) }) } pub fn unexpected<'a>( arena: &'a Bump, chars_consumed: usize, _attempting: Attempting, state: State<'a>, ) -> (Progress, SyntaxError<'a>, State<'a>) { // NOTE state is the last argument because chars_consumed often depends on the state's fields // having state be the final argument prevents borrowing issues checked_unexpected(arena, state, chars_consumed, |region| { SyntaxError::Unexpected(region) }) } /// Check for line overflow, then compute a new Region based on chars_consumed /// and provide it as a way to construct a Problem. /// If maximum line length was exceeded, return a Problem indicating as much. #[inline(always)] fn checked_unexpected<'a, F>( arena: &'a Bump, state: State<'a>, chars_consumed: usize, problem_from_region: F, ) -> (Progress, SyntaxError<'a>, State<'a>) where F: FnOnce(Region) -> SyntaxError<'a>, { match (state.column as usize).checked_add(chars_consumed) { // Crucially, this is < u16::MAX and not <= u16::MAX. This means if // column ever gets set to u16::MAX, we will automatically bail out // with LineTooLong - which is exactly what we want! Once a line has // been discovered to be too long, we don't want to parse anything else // until that's fixed. Some(end_col) if end_col < u16::MAX as usize => { let region = Region { start_col: state.column, end_col: end_col as u16, start_line: state.line, end_line: state.line, }; (Progress::NoProgress, problem_from_region(region), state) } _ => { let (_progress, fail, state) = line_too_long(arena, state); (Progress::NoProgress, fail, state) } } } fn line_too_long<'a>(arena: &'a Bump, state: State<'a>) -> (Progress, SyntaxError<'a>, State<'a>) { let problem = SyntaxError::LineTooLong(state.line); // Set column to MAX and advance the parser to end of input. // This way, all future parsers will fail on EOF, and then // unexpected_eof will take them back here - thus propagating // the initial LineTooLong error all the way to the end, even if // (for example) the LineTooLong initially occurs in the middle of // a one_of chain, which would otherwise prevent it from propagating. let column = u16::MAX; let bytes = state.bytes.get(0..state.bytes.len()).unwrap(); let state = State { bytes, line: state.line, column, ..state }; // TODO do we make progress in this case? // isn't this error fatal? (Progress::NoProgress, problem, state) } /// A single ASCII char that isn't a newline. /// (For newlines, use newline_char(), which handles line numbers) pub fn ascii_char<'a>(expected: u8) -> impl Parser<'a, (), SyntaxError<'a>> { // Make sure this really is not a newline! debug_assert_ne!(expected, b'\n'); move |arena, state: State<'a>| match state.bytes.first() { Some(&actual) if expected == actual => Ok(( Progress::MadeProgress, (), state.advance_without_indenting(arena, 1)?, )), Some(_) => Err(unexpected(arena, 0, Attempting::Keyword, state)), _ => Err(unexpected_eof(arena, state, 0)), } } /// A single '\n' character. /// Use this instead of ascii_char('\n') because it properly handles /// incrementing the line number. pub fn newline_char<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { move |arena, state: State<'a>| match state.bytes.first() { Some(b'\n') => Ok((Progress::MadeProgress, (), state.newline(arena)?)), Some(_) => Err(unexpected(arena, 0, Attempting::Keyword, state)), _ => Err(unexpected_eof(arena, state, 0)), } } /// One or more ASCII hex digits. (Useful when parsing unicode escape codes, /// which must consist entirely of ASCII hex digits.) pub fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, SyntaxError<'a>> { move |arena, state: State<'a>| { let mut buf = bumpalo::collections::String::new_in(arena); for &byte in state.bytes.iter() { if (byte as char).is_ascii_hexdigit() { buf.push(byte as char); } else if buf.is_empty() { // We didn't find any hex digits! return Err(unexpected(arena, 0, Attempting::Keyword, state)); } else { let state = state.advance_without_indenting(arena, buf.len())?; return Ok((Progress::MadeProgress, buf.into_bump_str(), state)); } } Err(unexpected_eof(arena, state, 0)) } } /// A single UTF-8-encoded char. This will both parse *and* validate that the /// char is valid UTF-8, but it will *not* advance the state. pub fn peek_utf8_char<'a>(state: &State) -> Result<(char, usize), SyntaxError<'a>> { if !state.bytes.is_empty() { match char::from_utf8_slice_start(state.bytes) { Ok((ch, len_utf8)) => Ok((ch, len_utf8)), Err(_) => Err(SyntaxError::BadUtf8), } } else { Err(SyntaxError::Eof( Region::zero(), /* TODO get a better region */ )) } } /// A single UTF-8-encoded char, with an offset. This will both parse *and* /// validate that the char is valid UTF-8, but it will *not* advance the state. pub fn peek_utf8_char_at<'a>( state: &State, offset: usize, ) -> Result<(char, usize), SyntaxError<'a>> { if state.bytes.len() > offset { let bytes = &state.bytes[offset..]; match char::from_utf8_slice_start(bytes) { Ok((ch, len_utf8)) => Ok((ch, len_utf8)), Err(_) => Err(SyntaxError::BadUtf8), } } else { Err(SyntaxError::Eof( Region::zero(), /* TODO get a better region */ )) } } pub fn keyword<'a>(keyword: &'static str, min_indent: u16) -> impl Parser<'a, (), SyntaxError<'a>> { move |arena, state: State<'a>| { let initial_state = state.clone(); // first parse the keyword characters let (_, _, after_keyword_state) = ascii_string(keyword).parse(arena, state)?; // then we must have at least one space character // TODO this is potentially wasteful if there are a lot of spaces match crate::blankspace::space1(min_indent).parse(arena, after_keyword_state.clone()) { Err((_, fail, _)) => { // this is not a keyword, maybe it's `whence` or `iffy` // anyway, make no progress and return the initial state // so we can try something else Err((NoProgress, fail, initial_state)) } Ok((_, _, _)) => { // give back the state after parsing the keyword, but before the whitespace // that way we can attach the whitespace to whatever follows Ok((MadeProgress, (), after_keyword_state)) } } } } /// A hardcoded string with no newlines, consisting only of ASCII characters pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, (), SyntaxError<'a>> { // Verify that this really is exclusively ASCII characters. // The `unsafe` block in this function relies upon this assumption! // // Also, this can't have newlines because we don't attempt to advance // the row in the state, only the column. debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n')); move |arena, state: State<'a>| { let len = keyword.len(); // TODO do this comparison in one SIMD instruction (on supported systems) match state.bytes.get(0..len) { Some(next_str) => { if next_str == keyword.as_bytes() { Ok(( Progress::MadeProgress, (), state.advance_without_indenting(arena, len)?, )) } else { let (_, fail, state) = unexpected(arena, len, Attempting::Keyword, state); Err((NoProgress, fail, state)) } } _ => Err(unexpected_eof(arena, state, 0)), } } } /// Parse zero or more values separated by a delimiter (e.g. a comma) whose /// values are discarded pub fn sep_by0<'a, P, D, Val, Error>( delimiter: D, parser: P, ) -> impl Parser<'a, Vec<'a, Val>, Error> where D: Parser<'a, (), Error>, P: Parser<'a, Val, Error>, Error: 'a, { move |arena, state: State<'a>| { let start_bytes_len = state.bytes.len(); match parser.parse(arena, state) { Ok((elem_progress, first_output, next_state)) => { // in practice, we want elements to make progress debug_assert_eq!(elem_progress, MadeProgress); let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); buf.push(first_output); loop { match delimiter.parse(arena, state) { Ok((_, (), next_state)) => { // If the delimiter passed, check the element parser. match parser.parse(arena, next_state) { Ok((element_progress, next_output, next_state)) => { // in practice, we want elements to make progress debug_assert_eq!(element_progress, MadeProgress); state = next_state; buf.push(next_output); } Err((_, fail, state)) => { // If the delimiter parsed, but the following // element did not, that's a fatal error. let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); return Err((progress, fail, state)); } } } Err((delim_progress, fail, old_state)) => match delim_progress { MadeProgress => return Err((MadeProgress, fail, old_state)), NoProgress => return Ok((NoProgress, buf, old_state)), }, } } } Err((element_progress, fail, new_state)) => match element_progress { MadeProgress => Err((MadeProgress, fail, new_state)), NoProgress => Ok((NoProgress, Vec::new_in(arena), new_state)), }, } } } /// Parse zero or more values separated by a delimiter (e.g. a comma) /// with an optional trailing delimiter whose values are discarded pub fn trailing_sep_by0<'a, P, D, Val, Error>( delimiter: D, parser: P, ) -> impl Parser<'a, Vec<'a, Val>, Error> where D: Parser<'a, (), Error>, P: Parser<'a, Val, Error>, Error: 'a, { move |arena, state: State<'a>| { let start_bytes_len = state.bytes.len(); match parser.parse(arena, state) { Ok((progress, first_output, next_state)) => { // in practice, we want elements to make progress debug_assert_eq!(progress, MadeProgress); let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); buf.push(first_output); loop { match delimiter.parse(arena, state) { Ok((_, (), next_state)) => { // If the delimiter passed, check the element parser. match parser.parse(arena, next_state) { Ok((element_progress, next_output, next_state)) => { // in practice, we want elements to make progress debug_assert_eq!(element_progress, MadeProgress); state = next_state; buf.push(next_output); } Err((_, _fail, old_state)) => { // If the delimiter parsed, but the following // element did not, that means we saw a trailing comma let progress = Progress::from_lengths( start_bytes_len, old_state.bytes.len(), ); return Ok((progress, buf, old_state)); } } } Err((delim_progress, fail, old_state)) => match delim_progress { MadeProgress => return Err((MadeProgress, fail, old_state)), NoProgress => return Ok((NoProgress, buf, old_state)), }, } } } Err((element_progress, fail, new_state)) => match element_progress { MadeProgress => Err((MadeProgress, fail, new_state)), NoProgress => Ok((NoProgress, Vec::new_in(arena), new_state)), }, } } } /// Parse one or more values separated by a delimiter (e.g. a comma) whose /// values are discarded pub fn sep_by1<'a, P, D, Val, Error>( delimiter: D, parser: P, ) -> impl Parser<'a, Vec<'a, Val>, Error> where D: Parser<'a, (), Error>, P: Parser<'a, Val, Error>, Error: 'a, { move |arena, state: State<'a>| { let start_bytes_len = state.bytes.len(); match parser.parse(arena, state) { Ok((progress, first_output, next_state)) => { debug_assert_eq!(progress, MadeProgress); let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); buf.push(first_output); loop { match delimiter.parse(arena, state) { Ok((_, (), next_state)) => { // If the delimiter passed, check the element parser. match parser.parse(arena, next_state) { Ok((_, next_output, next_state)) => { state = next_state; buf.push(next_output); } Err((element_progress, fail, state)) => { // If the delimiter parsed, but the following // element did not, that's a fatal error. return Err((element_progress, fail, state)); } } } Err((delim_progress, fail, old_state)) => { match delim_progress { MadeProgress => { // fail if the delimiter made progress return Err((MadeProgress, fail, old_state)); } NoProgress => { let progress = Progress::from_lengths( start_bytes_len, old_state.bytes.len(), ); return Ok((progress, buf, old_state)); } } } } } } Err((fail_progress, fail, new_state)) => Err((fail_progress, fail, new_state)), } } } pub fn fail_when_progress<'a, T, E>( progress: Progress, fail: E, value: T, state: State<'a>, ) -> ParseResult<'a, T, E> { match progress { MadeProgress => Err((MadeProgress, fail, state)), NoProgress => Ok((NoProgress, value, state)), } } pub fn satisfies<'a, P, A, F>(parser: P, predicate: F) -> impl Parser<'a, A, SyntaxError<'a>> where P: Parser<'a, A, SyntaxError<'a>>, F: Fn(&A) -> bool, { move |arena: &'a Bump, state: State<'a>| match parser.parse(arena, state.clone()) { Ok((progress, output, next_state)) if predicate(&output) => { Ok((progress, output, next_state)) } Ok((progress, _, _)) | Err((progress, _, _)) => { Err((progress, SyntaxError::ConditionFailed, state)) } } } pub fn optional<'a, P, T, E>(parser: P) -> impl Parser<'a, Option, E> where P: Parser<'a, T, E>, E: 'a, { move |arena: &'a Bump, state: State<'a>| { // We have to clone this because if the optional parser fails, // we need to revert back to the original state. let original_state = state.clone(); match parser.parse(arena, state) { Ok((progress, out1, state)) => Ok((progress, Some(out1), state)), Err((_, _, _)) => { // NOTE this will backtrack // TODO can we get rid of some of the potential backtracking? Ok((NoProgress, None, original_state)) } } } } // MACRO COMBINATORS // // Using some combinators together results in combinatorial type explosion // which makes things take forever to compile. Using macros instead avoids this! #[macro_export] macro_rules! loc { ($parser:expr) => { move |arena, state: $crate::parser::State<'a>| { use roc_region::all::{Located, Region}; let start_col = state.column; let start_line = state.line; match $parser.parse(arena, state) { Ok((progress, value, state)) => { let end_col = state.column; let end_line = state.line; let region = Region { start_col, start_line, end_col, end_line, }; Ok((progress, Located { region, value }, state)) } Err(err) => Err(err), } } }; } /// If the first one parses, ignore its output and move on to parse with the second one. #[macro_export] macro_rules! skip_first { ($p1:expr, $p2:expr) => { move |arena, state: $crate::parser::State<'a>| { let original_state = state.clone(); match $p1.parse(arena, state) { Ok((p1, _, state)) => match $p2.parse(arena, state) { Ok((p2, out2, state)) => Ok((p1.or(p2), out2, state)), Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)), }, Err((progress, fail, _)) => Err((progress, fail, original_state)), } } }; } /// If the first one parses, parse the second one; if it also parses, use the /// output from the first one. #[macro_export] macro_rules! skip_second { ($p1:expr, $p2:expr) => { move |arena, state: $crate::parser::State<'a>| { let original_state = state.clone(); match $p1.parse(arena, state) { Ok((p1, out1, state)) => match $p2.parse(arena, state) { Ok((p2, _, state)) => Ok((p1.or(p2), out1, state)), Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)), }, Err((progress, fail, _)) => Err((progress, fail, original_state)), } } }; } /// Parse zero or more elements between two braces (e.g. square braces). /// Elements can be optionally surrounded by spaces, and are separated by a /// delimiter (e.g comma-separated). Braces and delimiters get discarded. #[macro_export] macro_rules! collection { ($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $min_indent:expr) => { skip_first!( $opening_brace, skip_first!( // We specifically allow space characters inside here, so that // `[ ]` can be successfully parsed as an empty list, and then // changed by the formatter back into `[]`. // // We don't allow newlines or comments in the middle of empty // roc_collections because those are normally stored in an Expr, // and there's no Expr in which to store them in an empty collection! // // We could change the AST to add extra storage specifically to // support empty literals containing newlines or comments, but this // does not seem worth even the tiniest regression in compiler performance. zero_or_more!($crate::parser::ascii_char(b' ')), skip_second!( $crate::parser::sep_by0( $delimiter, $crate::blankspace::space0_around($elem, $min_indent) ), $closing_brace ) ) ) }; } /// Parse zero or more elements between two braces (e.g. square braces). /// Elements can be optionally surrounded by spaces, and are separated by a /// delimiter (e.g comma-separated) with optionally a trailing delimiter. /// Braces and delimiters get discarded. #[macro_export] macro_rules! collection_trailing_sep { ($opening_brace:expr, $elem:expr, $delimiter:expr, $closing_brace:expr, $min_indent:expr) => { skip_first!( $opening_brace, skip_first!( // We specifically allow space characters inside here, so that // `[ ]` can be successfully parsed as an empty list, and then // changed by the formatter back into `[]`. // // We don't allow newlines or comments in the middle of empty // roc_collections because those are normally stored in an Expr, // and there's no Expr in which to store them in an empty collection! // // We could change the AST to add extra storage specifically to // support empty literals containing newlines or comments, but this // does not seem worth even the tiniest regression in compiler performance. zero_or_more!($crate::parser::ascii_char(b' ')), skip_second!( and!( $crate::parser::trailing_sep_by0( $delimiter, $crate::blankspace::space0_around($elem, $min_indent) ), $crate::blankspace::space0($min_indent) ), $closing_brace ) ) ) }; } #[macro_export] macro_rules! and { ($p1:expr, $p2:expr) => { move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| { // We have to clone this because if the first parser passes and then // the second one fails, we need to revert back to the original state. let original_state = state.clone(); match $p1.parse(arena, state) { Ok((p1, out1, state)) => match $p2.parse(arena, state) { Ok((p2, out2, state)) => Ok((p1.or(p2), (out1, out2), state)), Err((p2, fail, _)) => Err((p1.or(p2), fail, original_state)), }, Err((progress, fail, state)) => Err((progress, fail, state)), } } }; } #[macro_export] macro_rules! one_of { ($p1:expr, $p2:expr) => { move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| { match $p1.parse(arena, state) { valid @ Ok(_) => valid, Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)), Err((NoProgress, _, state)) => $p2.parse( arena, state), } } }; ($p1:expr, $($others:expr),+) => { one_of!($p1, one_of!($($others),+)) }; } #[macro_export] macro_rules! one_of_with_error { ($toerror:expr; $p1:expr, $p2:expr) => { move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| { match $p1.parse(arena, state) { valid @ Ok(_) => valid, Err((MadeProgress, _, state)) => Err((MadeProgress, $toerror(state.line, state.column), state)), Err((NoProgress, _, state)) => $p2.parse( arena, state), } } }; ($toerror:expr; $p1:expr, $($others:expr),+) => { one_of_with_error!($toerror, $p1, one_of!($($others),+)) }; } fn word1<'a, ToError, E>(word: u8, to_error: ToError) -> impl Parser<'a, (), E> where ToError: Fn(Row, Col) -> E, E: 'a, { debug_assert_ne!(word, b'\n'); move |_arena: &'a Bump, state: State<'a>| match state.bytes.get(0) { Some(x) if *x == word => Ok(( MadeProgress, (), State { bytes: &state.bytes[1..], column: state.column + 1, ..state }, )), _ => Err((NoProgress, to_error(state.line, state.column), state)), } } #[macro_export] macro_rules! map { ($parser:expr, $transform:expr) => { move |arena, state| { $parser .parse(arena, state) .map(|(progress, output, next_state)| (progress, $transform(output), next_state)) } }; } #[macro_export] macro_rules! map_with_arena { ($parser:expr, $transform:expr) => { move |arena, state| { $parser .parse(arena, state) .map(|(progress, output, next_state)| { (progress, $transform(arena, output), next_state) }) } }; } #[macro_export] macro_rules! zero_or_more { ($parser:expr) => { move |arena, state: State<'a>| { use bumpalo::collections::Vec; let start_bytes_len = state.bytes.len(); match $parser.parse(arena, state) { Ok((_, first_output, next_state)) => { let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); buf.push(first_output); loop { match $parser.parse(arena, state) { Ok((_, next_output, next_state)) => { state = next_state; buf.push(next_output); } Err((fail_progress, fail, old_state)) => { match fail_progress { MadeProgress => { // made progress on an element and then failed; that's an error return Err((MadeProgress, fail, old_state)); } NoProgress => { // the next element failed with no progress // report whether we made progress before let progress = Progress::from_lengths(start_bytes_len, old_state.bytes.len()); return Ok((progress, buf, old_state)); } } } } } } Err((fail_progress, fail, new_state)) => { match fail_progress { MadeProgress => { // made progress on an element and then failed; that's an error Err((MadeProgress, fail, new_state)) } NoProgress => { // the first element failed (with no progress), but that's OK // because we only need to parse 0 elements Ok((NoProgress, Vec::new_in(arena), new_state)) } } } } } }; } #[macro_export] macro_rules! one_or_more { ($parser:expr) => { move |arena, state: State<'a>| { use bumpalo::collections::Vec; match $parser.parse(arena, state) { Ok((_, first_output, next_state)) => { let mut state = next_state; let mut buf = Vec::with_capacity_in(1, arena); buf.push(first_output); loop { match $parser.parse(arena, state) { Ok((_, next_output, next_state)) => { state = next_state; buf.push(next_output); } Err((progress, fail, old_state)) => { return $crate::parser::fail_when_progress( progress, fail, buf, old_state, ) } } } } Err((progress, _, new_state)) => { debug_assert_eq!(progress, NoProgress, "{:?}", &new_state); Err($crate::parser::unexpected_eof(arena, new_state, 0)) } } } }; } #[macro_export] macro_rules! debug { ($parser:expr) => { move |arena, state: $crate::parser::State<'a>| dbg!($parser.parse(arena, state)) }; } #[macro_export] macro_rules! attempt { ($attempting:expr, $parser:expr) => { move |arena: &'a Bump, mut state: $crate::parser::State<'a>| { let item = $crate::parser::ContextItem { context: $attempting, line: state.line, column: state.column, }; state.context_stack = arena.alloc($crate::parser::ContextStack::Cons( item, state.context_stack, )); $parser .parse(arena, state) .map(|(progress, answer, mut state)| { // If the parser suceeded, go back to what we were originally attempting. // (If it failed, that's exactly where we care what we were attempting!) // debug_assert_eq!(!state.context_stack.is_empty()); match state.context_stack.uncons() { Some((_item, rest)) => { state.context_stack = rest; } None => unreachable!("context stack contains at least one element"), } (progress, answer, state) }) } }; } #[macro_export] macro_rules! either { ($p1:expr, $p2:expr) => { move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1 .parse(arena, state) { Ok((progress, output, state)) => { Ok((progress, $crate::parser::Either::First(output), state)) } Err((NoProgress, _, state)) => match $p2.parse(arena, state) { Ok((progress, output, state)) => { Ok((progress, $crate::parser::Either::Second(output), state)) } Err((progress, fail, state)) => Err((progress, fail, state)), }, Err((MadeProgress, fail, state)) => Err((MadeProgress, fail, state)), } }; } /// Parse everything between two braces (e.g. parentheses), skipping both braces /// and keeping only whatever was parsed in between them. #[macro_export] macro_rules! between { ($opening_brace:expr, $parser:expr, $closing_brace:expr) => { skip_first!($opening_brace, skip_second!($parser, $closing_brace)) }; } #[macro_export] macro_rules! record_field { ($val_parser:expr, $min_indent:expr) => { move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| -> $crate::parser::ParseResult<'a, $crate::ast::AssignedField<'a, _>, _> { use $crate::ast::AssignedField::*; use $crate::blankspace::{space0, space0_before}; use $crate::ident::lowercase_ident; use $crate::parser::ascii_char; use $crate::parser::Either::*; // You must have a field name, e.g. "email" let (progress, loc_label, state) = loc!(lowercase_ident()).parse(arena, state)?; debug_assert_eq!(progress, MadeProgress); let (_, spaces, state) = space0($min_indent).parse(arena, state)?; // Having a value is optional; both `{ email }` and `{ email: blah }` work. // (This is true in both literals and types.) let (_, opt_loc_val, state) = $crate::parser::optional(either!( skip_first!(ascii_char(b':'), space0_before($val_parser, $min_indent)), skip_first!(ascii_char(b'?'), space0_before($val_parser, $min_indent)) )) .parse(arena, state)?; let answer = match opt_loc_val { Some(either) => match either { First(loc_val) => RequiredValue(loc_label, spaces, arena.alloc(loc_val)), Second(loc_val) => OptionalValue(loc_label, spaces, arena.alloc(loc_val)), }, // If no value was provided, record it as a Var. // Canonicalize will know what to do with a Var later. None => { if !spaces.is_empty() { SpaceAfter(arena.alloc(LabelOnly(loc_label)), spaces) } else { LabelOnly(loc_label) } } }; Ok((MadeProgress, answer, state)) } }; } #[macro_export] macro_rules! record_without_update { ($val_parser:expr, $min_indent:expr) => { collection_trailing_sep!( ascii_char(b'{'), loc!(record_field!($val_parser, $min_indent)), ascii_char(b','), ascii_char(b'}'), $min_indent ) }; } #[macro_export] macro_rules! record { ($val_parser:expr, $min_indent:expr) => { skip_first!( $crate::parser::ascii_char(b'{'), and!( // You can optionally have an identifier followed by an '&' to // make this a record update, e.g. { Foo.user & username: "blah" }. $crate::parser::optional(skip_second!( $crate::blankspace::space0_around( // We wrap the ident in an Expr here, // so that we have a Spaceable value to work with, // and then in canonicalization verify that it's an Expr::Var // (and not e.g. an `Expr::Access`) and extract its string. loc!(map_with_arena!( $crate::expr::ident(), $crate::expr::ident_to_expr )), $min_indent ), $crate::parser::ascii_char(b'&') )), loc!(skip_first!( // We specifically allow space characters inside here, so that // `{ }` can be successfully parsed as an empty record, and then // changed by the formatter back into `{}`. zero_or_more!($crate::parser::ascii_char(b' ')), skip_second!( and!( $crate::parser::trailing_sep_by0( $crate::parser::ascii_char(b','), $crate::blankspace::space0_around( loc!(record_field!($val_parser, $min_indent)), $min_indent ), ), $crate::blankspace::space0($min_indent) ), $crate::parser::ascii_char(b'}') ) )) ) ) }; } /// For some reason, some usages won't compile unless they use this instead of the macro version #[inline(always)] pub fn and<'a, P1, P2, A, B, E>(p1: P1, p2: P2) -> impl Parser<'a, (A, B), E> where P1: Parser<'a, A, E>, P2: Parser<'a, B, E>, P1: 'a, P2: 'a, A: 'a, B: 'a, E: 'a, { and!(p1, p2) } /// For some reason, some usages won't compile unless they use this instead of the macro version #[inline(always)] pub fn loc<'a, P, Val, Error>(parser: P) -> impl Parser<'a, Located, Error> where P: Parser<'a, Val, Error>, Error: 'a, { loc!(parser) } /// For some reason, some usages won't compile unless they use this instead of the macro version #[inline(always)] pub fn map<'a, P, F, Before, After, E>(parser: P, transform: F) -> impl Parser<'a, After, E> where P: Parser<'a, Before, E>, F: Fn(Before) -> After, E: 'a, { map!(parser, transform) } /// For some reason, some usages won't compile unless they use this instead of the macro version #[inline(always)] pub fn map_with_arena<'a, P, F, Before, After, E>( parser: P, transform: F, ) -> impl Parser<'a, After, E> where P: Parser<'a, Before, E>, P: 'a, F: Fn(&'a Bump, Before) -> After, F: 'a, Before: 'a, After: 'a, E: 'a, { map_with_arena!(parser, transform) } /// For some reason, some usages won't compile unless they use this instead of the macro version #[inline(always)] pub fn attempt<'a, P, Val, Error>(attempting: Attempting, parser: P) -> impl Parser<'a, Val, Error> where P: Parser<'a, Val, Error>, Error: 'a, { attempt!(attempting, parser) } pub fn parse_utf8<'a>(bytes: &[u8]) -> Result<&str, SyntaxError<'a>> { match from_utf8(bytes) { Ok(string) => Ok(string), Err(_) => Err(SyntaxError::BadUtf8), } } pub fn end_of_file<'a>() -> impl Parser<'a, (), SyntaxError<'a>> { |arena: &'a Bump, state: State<'a>| { if state.has_reached_end() { Ok((NoProgress, (), state)) } else { Err((NoProgress, SyntaxError::ConditionFailed, state)) } } } pub fn backtrackable<'a, P, Val, Error>(parser: P) -> impl Parser<'a, Val, Error> where P: Parser<'a, Val, Error>, Error: 'a, { move |arena: &'a Bump, state: State<'a>| { let old_state = state.clone(); match parser.parse(arena, state) { Ok((_, a, s1)) => Ok((NoProgress, a, s1)), Err((_, f, _)) => Err((NoProgress, f, old_state)), } } }