new space parser

This commit is contained in:
Folkert 2021-03-12 00:33:08 +01:00
parent 3e554cb21f
commit f5284d1db7
4 changed files with 266 additions and 121 deletions

View file

@ -9,8 +9,7 @@ use roc_can::scope::Scope;
use roc_collections::all::MutMap; use roc_collections::all::MutMap;
use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds}; use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds};
use roc_parse::ast; use roc_parse::ast;
use roc_parse::blankspace::space0_before; use roc_parse::parser::{State, SyntaxError};
use roc_parse::parser::{loc, Parser, State, SyntaxError};
use roc_problem::can::Problem; use roc_problem::can::Problem;
use roc_region::all::{Located, Region}; use roc_region::all::{Located, Region};
use roc_types::subs::{VarStore, Variable}; use roc_types::subs::{VarStore, Variable};
@ -31,12 +30,11 @@ pub fn parse_loc_with<'a>(
input: &'a str, input: &'a str,
) -> Result<Located<ast::Expr<'a>>, SyntaxError<'a>> { ) -> Result<Located<ast::Expr<'a>>, SyntaxError<'a>> {
let state = State::new_in(arena, input.trim().as_bytes()); let state = State::new_in(arena, input.trim().as_bytes());
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);
answer match roc_parse::expr::test_parse_expr(0, arena, state) {
.map(|(_, loc_expr, _)| loc_expr) Ok((loc_expr, _state)) => Ok(loc_expr),
.map_err(|(_, fail, _)| fail) Err(fail) => Err(SyntaxError::Expr(fail)),
}
} }
#[allow(dead_code)] #[allow(dead_code)]

View file

@ -1,7 +1,7 @@
use crate::ast::CommentOrNewline::{self, *}; use crate::ast::CommentOrNewline::{self, *};
use crate::ast::{Attempting, Spaceable}; use crate::ast::Spaceable;
use crate::parser::{ use crate::parser::{
self, and, peek_utf8_char, unexpected, unexpected_eof, BadInputError, Col, Parser, self, and, peek_utf8_char, BadInputError, Col, Parser,
Progress::{self, *}, Progress::{self, *},
Row, State, SyntaxError, Row, State, SyntaxError,
}; };
@ -160,31 +160,6 @@ enum LineState {
DocComment, DocComment,
} }
// then(
// and!(ascii_char(b'#'), optional(ascii_string("# "))),
// |arena: &'a Bump, state: State<'a>, _, (_, opt_doc)| {
// if opt_doc != None {
// return Err(unexpected(3, Attempting::LineComment, state));
// }
// let mut length = 0;
//
// for &byte in state.bytes.iter() {
// if byte != b'\n' {
// length += 1;
// } else {
// break;
// }
// }
//
// let comment = &state.bytes[..length];
// let state = state.advance_without_indenting(length + 1)?;
// match parse_utf8(comment) {
// Ok(comment_str) => Ok((MadeProgress, comment_str, state)),
// Err(reason) => state.fail(arena, MadeProgress, reason),
// }
// },
// )
pub fn line_comment<'a>() -> impl Parser<'a, &'a str, SyntaxError<'a>> { pub fn line_comment<'a>() -> impl Parser<'a, &'a str, SyntaxError<'a>> {
|_, state: State<'a>| match chomp_line_comment(state.bytes) { |_, state: State<'a>| match chomp_line_comment(state.bytes) {
Ok(comment) => { Ok(comment) => {
@ -225,60 +200,6 @@ fn chomp_line_comment<'a>(buffer: &'a [u8]) -> Result<&'a str, Progress> {
} }
} }
#[inline(always)]
pub fn spaces_exactly<'a>(spaces_expected: u16) -> impl Parser<'a, (), SyntaxError<'a>> {
move |arena: &'a Bump, state: State<'a>| {
if spaces_expected == 0 {
return Ok((NoProgress, (), state));
}
let mut state = state;
let mut spaces_seen: u16 = 0;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((' ', _)) => {
spaces_seen += 1;
state = state.advance_spaces(arena, 1)?;
if spaces_seen == spaces_expected {
return Ok((MadeProgress, (), state));
}
}
Ok(_) => {
return Err(unexpected(
spaces_seen.into(),
Attempting::TODO,
state.clone(),
));
}
Err(SyntaxError::BadUtf8) => {
// If we hit an invalid UTF-8 character, bail out immediately.
let progress = Progress::progress_when(spaces_seen != 0);
return state.fail(arena, progress, SyntaxError::BadUtf8);
}
Err(_) => {
if spaces_seen == 0 {
return Err(unexpected_eof(arena, state, 0));
} else {
return Err(unexpected(
spaces_seen.into(),
Attempting::TODO,
state.clone(),
));
}
}
}
}
if spaces_seen == 0 {
Err(unexpected_eof(arena, state, 0))
} else {
Err(unexpected(spaces_seen.into(), Attempting::TODO, state))
}
}
}
#[inline(always)] #[inline(always)]
pub fn spaces_exactly_e<'a>(spaces_expected: u16) -> impl Parser<'a, (), parser::EExpr<'a>> { pub fn spaces_exactly_e<'a>(spaces_expected: u16) -> impl Parser<'a, (), parser::EExpr<'a>> {
use parser::EExpr; use parser::EExpr;
@ -288,39 +209,25 @@ pub fn spaces_exactly_e<'a>(spaces_expected: u16) -> impl Parser<'a, (), parser:
return Ok((NoProgress, (), state)); return Ok((NoProgress, (), state));
} }
let mut state = state;
let mut spaces_seen: u16 = 0; let mut spaces_seen: u16 = 0;
while !state.bytes.is_empty() { for c in state.bytes {
match peek_utf8_char(&state) { match c {
Ok((' ', _)) => { b' ' => {
spaces_seen += 1; spaces_seen += 1;
state = state.advance_spaces_e(arena, 1, EExpr::IndentStart)?;
if spaces_seen == spaces_expected { if spaces_seen == spaces_expected {
let state = state.advance_spaces_e(
arena,
spaces_expected as usize,
EExpr::IndentStart,
)?;
return Ok((MadeProgress, (), state)); return Ok((MadeProgress, (), state));
} }
} }
Ok(_) => { _ => {
return Err(( return Err((
NoProgress, NoProgress,
EExpr::IndentStart(state.line, state.column), EExpr::IndentStart(state.line, state.column + spaces_seen),
state,
))
}
Err(SyntaxError::BadUtf8) => {
// If we hit an invalid UTF-8 character, bail out immediately.
let progress = Progress::progress_when(spaces_seen != 0);
return Err((
progress,
EExpr::Space(BadInputError::BadUtf8, state.line, state.column),
state,
));
}
Err(_) => {
return Err((
NoProgress,
EExpr::IndentStart(state.line, state.column),
state, state,
)) ))
} }
@ -329,12 +236,205 @@ pub fn spaces_exactly_e<'a>(spaces_expected: u16) -> impl Parser<'a, (), parser:
Err(( Err((
NoProgress, NoProgress,
EExpr::IndentStart(state.line, state.column), EExpr::IndentStart(state.line, state.column + spaces_seen),
state, state,
)) ))
} }
} }
#[inline(always)]
fn spaces_help_help<'a, E>(
min_indent: u16,
space_problem: fn(BadInputError, Row, Col) -> E,
indent_problem: fn(Row, Col) -> E,
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where
E: 'a,
{
use SpaceState::*;
move |arena, mut state: State<'a>| {
let comments_and_newlines = Vec::new_in(arena);
match eat_spaces(state.bytes, state.line, state.column, comments_and_newlines) {
HasTab { row, col } => {
// there was a tab character
Err((
MadeProgress,
space_problem(BadInputError::HasTab, row, col),
State {
line: row,
column: col,
..state
},
))
}
Good {
row,
col,
bytes,
comments_and_newlines,
} => {
if bytes == state.bytes {
Ok((NoProgress, &[] as &[_], state))
} else if state.line != row {
// we parsed at least one newline
state.is_indenting = true;
state.indent_col = col;
if col >= min_indent {
state.line = row;
state.column = col;
state.bytes = bytes;
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
} else {
Err((
MadeProgress,
indent_problem(state.line, state.column),
state,
))
}
} else {
state.column = col;
state.bytes = bytes;
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
}
}
}
}
}
enum SpaceState<'a> {
Good {
row: Row,
col: Col,
bytes: &'a [u8],
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
},
HasTab {
row: Row,
col: Col,
},
}
fn eat_spaces<'a>(
mut bytes: &'a [u8],
mut row: Row,
mut col: Col,
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
) -> SpaceState<'a> {
use SpaceState::*;
for c in bytes {
match c {
b' ' => {
bytes = &bytes[1..];
col += 1;
}
b'\n' => {
bytes = &bytes[1..];
row += 1;
col = 0;
comments_and_newlines.push(CommentOrNewline::Newline);
}
b'\r' => {
bytes = &bytes[1..];
}
b'\t' => {
return HasTab { row, col };
}
b'#' => {
return eat_line_comment(&bytes[1..], row, col + 1, comments_and_newlines);
}
_ => break,
}
}
return Good {
row,
col,
bytes,
comments_and_newlines,
};
}
fn eat_line_comment<'a>(
mut bytes: &'a [u8],
row: Row,
mut col: Col,
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
) -> SpaceState<'a> {
use SpaceState::*;
let is_doc_comment = if let Some(b'#') = bytes.get(0) {
match bytes.get(1) {
Some(b' ') => {
bytes = &bytes[2..];
col += 2;
true
}
Some(b'\n') => {
// consume the second # and the \n
bytes = &bytes[2..];
comments_and_newlines.push(CommentOrNewline::DocComment(""));
return eat_spaces(bytes, row + 1, 0, comments_and_newlines);
}
None => {
// consume the second #
col += 1;
bytes = &bytes[1..];
return Good {
row,
col,
bytes,
comments_and_newlines,
};
}
_ => false,
}
} else {
false
};
let initial = bytes;
let initial_col = col;
for c in bytes {
match c {
b'\t' => return HasTab { row, col },
b'\n' => {
let delta = (col - initial_col) as usize;
let comment = unsafe { std::str::from_utf8_unchecked(&initial[..delta]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
return eat_spaces(&bytes[1..], row + 1, 0, comments_and_newlines);
}
_ => {
bytes = &bytes[1..];
col += 1;
}
}
}
return Good {
row,
col,
bytes,
comments_and_newlines,
};
}
#[inline(always)] #[inline(always)]
fn spaces_help<'a, E>( fn spaces_help<'a, E>(
require_at_least_one: bool, require_at_least_one: bool,
@ -343,6 +443,46 @@ fn spaces_help<'a, E>(
indent_problem: fn(Row, Col) -> E, indent_problem: fn(Row, Col) -> E,
missing_space_problem: fn(Row, Col) -> E, missing_space_problem: fn(Row, Col) -> E,
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E> ) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where
E: 'a,
{
move |arena, state: State<'a>| {
if !require_at_least_one {
match spaces_help_help(min_indent, space_problem, indent_problem).parse(arena, state) {
Ok((a, b, c)) => Ok((a, b, c)),
Err((a, b, c)) => Err((a, b, c)),
}
} else {
match spaces_help_help_help(
require_at_least_one,
min_indent,
space_problem,
indent_problem,
missing_space_problem,
)
.parse(arena, state)
{
Ok((a, b, c)) => {
//dbg!(&c);
Ok((a, b, c))
}
Err((a, b, c)) => {
//dbg!(&c);
Err((a, b, c))
}
}
}
}
}
#[inline(always)]
fn spaces_help_help_help<'a, E>(
require_at_least_one: bool,
min_indent: u16,
space_problem: fn(BadInputError, Row, Col) -> E,
indent_problem: fn(Row, Col) -> E,
missing_space_problem: fn(Row, Col) -> E,
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where where
E: 'a, E: 'a,
{ {

View file

@ -2840,7 +2840,7 @@ mod test_parse {
#[test] #[test]
fn outdenting_newline_after_else() { fn outdenting_newline_after_else() {
let arena = Bump::new(); let arena = &Bump::new();
// highlights a problem with the else branch demanding a newline after its expression // highlights a problem with the else branch demanding a newline after its expression
let src = indoc!( let src = indoc!(
@ -2852,13 +2852,19 @@ mod test_parse {
"# "#
); );
let actual = module_defs() let state = State::new_in(arena, src.as_bytes());
.parse(&arena, State::new_in(&arena, src.as_bytes())) let parser = module_defs();
.map(|tuple| tuple.0); let parsed = parser.parse(arena, state);
match parsed {
dbg!(&actual); Ok((_, _, state)) => {
dbg!(state);
assert!(actual.is_ok()); return;
}
Err((_, _fail, _state)) => {
dbg!(_fail, _state);
assert!(false);
}
}
} }
#[test] #[test]

View file

@ -1713,6 +1713,7 @@ fn to_type_report<'a>(
} }
Type::TAsIndentStart(row, col) => { Type::TAsIndentStart(row, col) => {
dbg!(row, col);
let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col);
let region = Region::from_row_col(*row, *col); let region = Region::from_row_col(*row, *col);