use crate::ast::CommentOrNewline::{self, *}; use crate::ast::{Attempting, Spaceable}; use crate::parser::{ self, and, ascii_char, ascii_string, backtrackable, bad_input_to_syntax_error, optional, parse_utf8, peek_utf8_char, then, unexpected, unexpected_eof, BadInputError, Col, Parser, Progress::{self, *}, Row, State, SyntaxError, }; use bumpalo::collections::string::String; use bumpalo::collections::vec::Vec; use bumpalo::Bump; use roc_region::all::{Located, Region}; /// Parses the given expression with 0 or more (spaces/comments/newlines) before and/or after it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// If any newlines or comments were found, the Expr will be wrapped in a SpaceBefore and/or /// SpaceAfter as appropriate. pub fn space0_around<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, S: Sized, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and(space0(min_indent), and(parser, space0(min_indent))), move |arena: &'a Bump, tuples: ( &'a [CommentOrNewline<'a>], (Located, &'a [CommentOrNewline<'a>]), )| { let (spaces_before, (loc_val, spaces_after)) = tuples; if spaces_before.is_empty() { if spaces_after.is_empty() { loc_val } else { arena .alloc(loc_val.value) .with_spaces_after(spaces_after, loc_val.region) } } else if spaces_after.is_empty() { arena .alloc(loc_val.value) .with_spaces_before(spaces_before, loc_val.region) } else { let wrapped_expr = arena .alloc(loc_val.value) .with_spaces_after(spaces_after, loc_val.region); arena .alloc(wrapped_expr.value) .with_spaces_before(spaces_before, wrapped_expr.region) } }, ) } pub fn space0_around_ee<'a, P, S, E>( parser: P, min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, indent_before_problem: fn(Row, Col) -> E, indent_after_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, Located, E> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, E>, P: 'a, E: 'a, { parser::map_with_arena( and( space0_e(min_indent, space_problem, indent_before_problem), and( parser, space0_e(min_indent, space_problem, indent_after_problem), ), ), move |arena: &'a Bump, tuples: ( &'a [CommentOrNewline<'a>], (Located, &'a [CommentOrNewline<'a>]), )| { let (spaces_before, (loc_val, spaces_after)) = tuples; if spaces_before.is_empty() { if spaces_after.is_empty() { loc_val } else { arena .alloc(loc_val.value) .with_spaces_after(spaces_after, loc_val.region) } } else if spaces_after.is_empty() { arena .alloc(loc_val.value) .with_spaces_before(spaces_before, loc_val.region) } else { let wrapped_expr = arena .alloc(loc_val.value) .with_spaces_after(spaces_after, loc_val.region); arena .alloc(wrapped_expr.value) .with_spaces_before(spaces_before, wrapped_expr.region) } }, ) } /// Parses the given expression with 1 or more (spaces/comments/newlines) before it, /// and also 1 or more spaces after it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// If any newlines or comments were found, the Expr will be wrapped in a SpaceBefore and/or /// SpaceAfter as appropriate. pub fn space1_around<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and(space1(min_indent), and(parser, space1(min_indent))), |arena, (spaces_before, (loc_expr, spaces_after))| { if spaces_before.is_empty() { if spaces_after.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_after(spaces_after, loc_expr.region) } } else if spaces_after.is_empty() { arena .alloc(loc_expr.value) .with_spaces_before(spaces_before, loc_expr.region) } else { let loc_wrapped_expr = arena .alloc(loc_expr.value) .with_spaces_after(spaces_after, loc_expr.region); arena .alloc(loc_wrapped_expr.value) .with_spaces_before(spaces_before, loc_wrapped_expr.region) } }, ) } /// Parses the given expression with 0 or more (spaces/comments/newlines) before it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// The Expr will be wrapped in a SpaceBefore if there were any newlines or comments found. pub fn space0_before<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and!(space0(min_indent), parser), |arena: &'a Bump, (space_list, loc_expr): (&'a [CommentOrNewline<'a>], Located)| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_before(space_list, loc_expr.region) } }, ) } pub fn space0_before_e<'a, P, S, E>( parser: P, min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, indent_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, Located, E> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, E>, P: 'a, E: 'a, { parser::map_with_arena( and!(space0_e(min_indent, space_problem, indent_problem), parser), |arena: &'a Bump, (space_list, loc_expr): (&'a [CommentOrNewline<'a>], Located)| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_before(space_list, loc_expr.region) } }, ) } pub fn space0_after_e<'a, P, S, E>( parser: P, min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, indent_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, Located, E> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, E>, P: 'a, E: 'a, { parser::map_with_arena( and!(parser, space0_e(min_indent, space_problem, indent_problem)), |arena: &'a Bump, (loc_expr, space_list): (Located, &'a [CommentOrNewline<'a>])| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_after(space_list, loc_expr.region) } }, ) } /// Parses the given expression with 1 or more (spaces/comments/newlines) before it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// The Expr will be wrapped in a SpaceBefore if there were any newlines or comments found. pub fn space1_before<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and!(backtrackable(space1(min_indent)), parser), |arena, (space_list, loc_expr)| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_before(space_list, loc_expr.region) } }, ) } /// Parses the given expression with 0 or more (spaces/comments/newlines) after it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// The Expr will be wrapped in a SpaceAfter if there were any newlines or comments found. pub fn space0_after<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and!(parser, space0(min_indent)), |arena, (loc_expr, space_list)| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_after(space_list, loc_expr.region) } }, ) } /// Parses the given expression with 1 or more (spaces/comments/newlines) after it. /// Returns a Located where the location is around the Expr, ignoring the spaces. /// The Expr will be wrapped in a SpaceAfter if there were any newlines or comments found. pub fn space1_after<'a, P, S>( parser: P, min_indent: u16, ) -> impl Parser<'a, Located, SyntaxError<'a>> where S: Spaceable<'a>, S: 'a, P: Parser<'a, Located, SyntaxError<'a>>, P: 'a, { parser::map_with_arena( and!(parser, space1(min_indent)), |arena, (loc_expr, space_list)| { if space_list.is_empty() { loc_expr } else { arena .alloc(loc_expr.value) .with_spaces_after(space_list, loc_expr.region) } }, ) } /// Zero or more (spaces/comments/newlines). pub fn space0<'a>(min_indent: u16) -> impl Parser<'a, &'a [CommentOrNewline<'a>], SyntaxError<'a>> { spaces(false, min_indent) } pub fn space0_e<'a, E>( min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, indent_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E> where E: 'a, { spaces_help(false, min_indent, space_problem, indent_problem, |_, _| { unreachable!("no spaces are required, so this is unreachable") }) } /// One or more (spaces/comments/newlines). pub fn space1<'a>(min_indent: u16) -> impl Parser<'a, &'a [CommentOrNewline<'a>], SyntaxError<'a>> { // TODO try benchmarking a short-circuit for the typical case: see if there is // exactly one space followed by char that isn't [' ', '\n', or '#'], and // if so, return empty slice. The case where there's exactly 1 space should // be by far the most common. spaces(true, min_indent) } #[derive(Debug, Clone, Copy)] enum LineState { Normal, Comment, DocComment, } pub fn line_comment<'a>() -> impl Parser<'a, &'a str, SyntaxError<'a>> { then( and!(ascii_char(b'#'), optional(ascii_string("# "))), |arena: &'a Bump, state: State<'a>, _, (_, opt_doc)| { if opt_doc != None { return Err(unexpected(arena, 3, Attempting::LineComment, state)); } let mut length = 0; for &byte in state.bytes.iter() { if byte != b'\n' { length += 1; } else { break; } } let comment = &state.bytes[..length]; let state = state.advance_without_indenting(arena, length + 1)?; match parse_utf8(comment) { Ok(comment_str) => Ok((MadeProgress, comment_str, state)), Err(reason) => state.fail(arena, MadeProgress, reason), } }, ) } #[inline(always)] pub fn spaces_exactly<'a>(spaces_expected: u16) -> impl Parser<'a, (), SyntaxError<'a>> { move |arena: &'a Bump, state: State<'a>| { if spaces_expected == 0 { return Ok((NoProgress, (), state)); } let mut state = state; let mut spaces_seen: u16 = 0; while !state.bytes.is_empty() { match peek_utf8_char(&state) { Ok((' ', _)) => { spaces_seen += 1; state = state.advance_spaces(arena, 1)?; if spaces_seen == spaces_expected { return Ok((MadeProgress, (), state)); } } Ok(_) => { return Err(unexpected( arena, spaces_seen.into(), Attempting::TODO, state.clone(), )); } Err(SyntaxError::BadUtf8) => { // If we hit an invalid UTF-8 character, bail out immediately. let progress = Progress::progress_when(spaces_seen != 0); return state.fail(arena, progress, SyntaxError::BadUtf8); } Err(_) => { if spaces_seen == 0 { return Err(unexpected_eof(arena, state, 0)); } else { return Err(unexpected( arena, spaces_seen.into(), Attempting::TODO, state.clone(), )); } } } } if spaces_seen == 0 { Err(unexpected_eof(arena, state, 0)) } else { Err(unexpected( arena, spaces_seen.into(), Attempting::TODO, state, )) } } } #[inline(always)] fn spaces<'a>( require_at_least_one: bool, min_indent: u16, ) -> impl Parser<'a, &'a [CommentOrNewline<'a>], SyntaxError<'a>> { spaces_help( require_at_least_one, min_indent, bad_input_to_syntax_error, |_, _| SyntaxError::OutdentedTooFar, |_, _| SyntaxError::Eof(Region::zero()), ) } #[inline(always)] fn spaces_help<'a, E>( require_at_least_one: bool, min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, indent_problem: fn(Row, Col) -> E, missing_space_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E> where E: 'a, { move |arena: &'a Bump, state: State<'a>| { let original_state = state.clone(); let mut space_list = Vec::new_in(arena); let mut bytes_parsed = 0; let mut comment_line_buf = String::new_in(arena); let mut line_state = LineState::Normal; let mut state = state; let mut any_newlines = false; let start_row = original_state.line; let start_col = original_state.column; let start_bytes_len = state.bytes.len(); while !state.bytes.is_empty() { match peek_utf8_char(&state) { Ok((ch, utf8_len)) => { bytes_parsed += utf8_len; match line_state { LineState::Normal => { match ch { ' ' => { // Don't check indentation here; it might not be enough // indentation yet, but maybe it will be after more spaces happen! state = state.advance_spaces_e(arena, 1, space_problem)?; } '\r' => { // Ignore carriage returns. state = state.advance_spaces_e(arena, 1, space_problem)?; } '\n' => { // don't need to check the indent here since we'll reset it // anyway state = state.newline_e(arena, space_problem)?; // Newlines only get added to the list when they're outside comments. space_list.push(Newline); any_newlines = true; } '\t' => { return Err(( MadeProgress, space_problem( BadInputError::HasTab, state.line, state.column, ), state, )); } '#' => { // Check indentation to make sure we were indented enough // before this comment began. let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); state = state .check_indent_e( arena, min_indent, indent_problem, start_row, start_col, ) .map_err(|(fail, _)| { (progress, fail, original_state.clone()) })? .advance_without_indenting_e(arena, 1, space_problem)?; // We're now parsing a line comment! line_state = LineState::Comment; } _ => { return if require_at_least_one && bytes_parsed <= 1 { // We've parsed 1 char and it was not a space, // but we require parsing at least one space! Err(( NoProgress, missing_space_problem(state.line, state.column), state, )) } else { // First make sure we were indented enough! // // (We only do this if we've encountered any newlines. // Otherwise, we assume indentation is already correct. // It's actively important for correctness that we skip // this check if there are no newlines, because otherwise // we would have false positives for single-line defs.) let progress = Progress::from_lengths( start_bytes_len, state.bytes.len(), ); if any_newlines { state = state .check_indent_e( arena, min_indent, indent_problem, start_row, start_col, ) .map_err(|(fail, _)| { (progress, fail, original_state.clone()) })?; } Ok((progress, space_list.into_bump_slice(), state)) }; } } } LineState::Comment => { match ch { ' ' => { // If we're in a line comment, this won't affect indentation anyway. state = state.advance_without_indenting_e( arena, 1, space_problem, )?; if comment_line_buf.len() == 1 { match comment_line_buf.chars().next() { Some('#') => { // This is a comment begining with `## ` - that is, // a doc comment. // // (The space is important; otherwise, this is not // a doc comment, but rather something like a // big separator block, e.g. ############) line_state = LineState::DocComment; // This is now the beginning of the doc comment. comment_line_buf.clear(); } _ => { comment_line_buf.push(ch); } } } else { comment_line_buf.push(ch); } } '\n' => { state = state.newline_e(arena, space_problem)?; match (comment_line_buf.len(), comment_line_buf.chars().next()) { (1, Some('#')) => { // This is a line with `##` - that is, // a doc comment new line. space_list.push(DocComment("")); comment_line_buf = String::new_in(arena); line_state = LineState::Normal; } _ => { // This was a newline, so end this line comment. space_list.push(LineComment( comment_line_buf.into_bump_str(), )); comment_line_buf = String::new_in(arena); line_state = LineState::Normal; } } } '\t' => { return Err(( MadeProgress, space_problem( BadInputError::HasTab, state.line, state.column, ), state, )); } nonblank => { // Chars can have btye lengths of more than 1! state = state.advance_without_indenting_e( arena, nonblank.len_utf8(), space_problem, )?; comment_line_buf.push(nonblank); } } } LineState::DocComment => { match ch { ' ' => { // If we're in a doc comment, this won't affect indentation anyway. state = state.advance_without_indenting_e( arena, 1, space_problem, )?; comment_line_buf.push(ch); } '\n' => { state = state.newline_e(arena, space_problem)?; // This was a newline, so end this doc comment. space_list.push(DocComment(comment_line_buf.into_bump_str())); comment_line_buf = String::new_in(arena); line_state = LineState::Normal; } '\t' => { return Err(( MadeProgress, space_problem( BadInputError::HasTab, state.line, state.column, ), state, )); } nonblank => { state = state.advance_without_indenting_e( arena, utf8_len, space_problem, )?; comment_line_buf.push(nonblank); } } } } } Err(SyntaxError::BadUtf8) => { // If we hit an invalid UTF-8 character, bail out immediately. let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); let row = state.line; let col = state.column; return state.fail( arena, progress, space_problem(BadInputError::BadUtf8, row, col), ); } Err(_) => { if require_at_least_one && bytes_parsed == 0 { return Err(( NoProgress, missing_space_problem(state.line, state.column), state, )); } else { let space_slice = space_list.into_bump_slice(); // First make sure we were indented enough! // // (We only do this if we've encountered any newlines. // Otherwise, we assume indentation is already correct. // It's actively important for correctness that we skip // this check if there are no newlines, because otherwise // we would have false positives for single-line defs.) let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); if any_newlines { return Ok(( progress, space_slice, state .check_indent_e( arena, min_indent, indent_problem, start_row, start_col, ) .map_err(|(fail, _)| (progress, fail, original_state))?, )); } return Ok((progress, space_slice, state)); } } }; } if require_at_least_one && original_state.bytes.len() == state.bytes.len() { Err(( NoProgress, missing_space_problem(state.line, state.column), state, )) } else { // First make sure we were indented enough! // // (We only do this if we've encountered any newlines. // Otherwise, we assume indentation is already correct. // It's actively important for correctness that we skip // this check if there are no newlines, because otherwise // we would have false positives for single-line defs.) let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); if any_newlines { state = state .check_indent_e(arena, min_indent, indent_problem, start_row, start_col) .map_err(|(fail, _)| (progress, fail, original_state))?; } Ok((progress, space_list.into_bump_slice(), state)) } } }