mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 15:21:12 +00:00
do a fast pass first to exit early if the spaces will not be parsed
This commit is contained in:
parent
c6b13984ed
commit
65e842f64a
2 changed files with 101 additions and 52 deletions
|
@ -180,42 +180,40 @@ fn spaces_help_help<'a, E>(
|
||||||
where
|
where
|
||||||
E: 'a + SpaceProblem,
|
E: 'a + SpaceProblem,
|
||||||
{
|
{
|
||||||
use SpaceState::*;
|
|
||||||
|
|
||||||
move |arena, state: State<'a>| {
|
move |arena, state: State<'a>| {
|
||||||
let comments_and_newlines = Vec::new_in(arena);
|
match fast_eat_spaces(&state) {
|
||||||
match eat_spaces(state.clone(), false, comments_and_newlines) {
|
FastSpaceState::HasTab(position) => Err((
|
||||||
HasTab(state) => Err((
|
|
||||||
MadeProgress,
|
MadeProgress,
|
||||||
E::space_problem(BadInputError::HasTab, state.pos()),
|
E::space_problem(BadInputError::HasTab, position),
|
||||||
state,
|
state,
|
||||||
)),
|
)),
|
||||||
Good {
|
FastSpaceState::Good {
|
||||||
state: mut new_state,
|
newlines,
|
||||||
multiline,
|
consumed,
|
||||||
comments_and_newlines,
|
column,
|
||||||
} => {
|
} => {
|
||||||
if new_state.bytes() == state.bytes() {
|
if consumed == 0 {
|
||||||
Ok((NoProgress, &[] as &[_], state))
|
Ok((NoProgress, &[] as &[_], state))
|
||||||
} else if multiline {
|
} else if column < min_indent {
|
||||||
// we parsed at least one newline
|
Err((MadeProgress, indent_problem(state.pos()), state))
|
||||||
|
|
||||||
new_state.indent_column = new_state.column();
|
|
||||||
|
|
||||||
if new_state.column() >= min_indent {
|
|
||||||
Ok((
|
|
||||||
MadeProgress,
|
|
||||||
comments_and_newlines.into_bump_slice(),
|
|
||||||
new_state,
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Err((MadeProgress, indent_problem(state.pos()), state))
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
|
let comments_and_newlines =
|
||||||
|
Vec::with_capacity_in(newlines.saturating_sub(1), arena);
|
||||||
|
let spaces = eat_spaces(state, false, comments_and_newlines);
|
||||||
|
let mut state = spaces.state;
|
||||||
|
|
||||||
|
if spaces.multiline {
|
||||||
|
// we parsed at least one newline
|
||||||
|
|
||||||
|
state.indent_column = state.column();
|
||||||
|
|
||||||
|
debug_assert!(state.column() >= min_indent);
|
||||||
|
}
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
MadeProgress,
|
MadeProgress,
|
||||||
comments_and_newlines.into_bump_slice(),
|
spaces.comments_and_newlines.into_bump_slice(),
|
||||||
new_state,
|
state,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -223,13 +221,73 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum SpaceState<'a> {
|
enum FastSpaceState {
|
||||||
Good {
|
Good {
|
||||||
state: State<'a>,
|
newlines: usize,
|
||||||
multiline: bool,
|
consumed: usize,
|
||||||
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
column: u32,
|
||||||
},
|
},
|
||||||
HasTab(State<'a>),
|
HasTab(Position),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fast_eat_spaces(state: &State) -> FastSpaceState {
|
||||||
|
use FastSpaceState::*;
|
||||||
|
|
||||||
|
let mut newlines = 0;
|
||||||
|
let mut index = 0;
|
||||||
|
let mut line_start = state.line_start.offset as usize;
|
||||||
|
let base_offset = state.pos().offset as usize;
|
||||||
|
|
||||||
|
let bytes = state.bytes();
|
||||||
|
let length = bytes.len();
|
||||||
|
|
||||||
|
'outer: while index < length {
|
||||||
|
match bytes[index] {
|
||||||
|
b' ' => {
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
b'\n' => {
|
||||||
|
newlines += 1;
|
||||||
|
index += 1;
|
||||||
|
line_start = base_offset + index;
|
||||||
|
}
|
||||||
|
b'\r' => {
|
||||||
|
index += 1;
|
||||||
|
line_start = base_offset + index;
|
||||||
|
}
|
||||||
|
b'\t' => {
|
||||||
|
return HasTab(Position::new(index as u32));
|
||||||
|
}
|
||||||
|
b'#' => {
|
||||||
|
index += 1;
|
||||||
|
|
||||||
|
while index < length {
|
||||||
|
match bytes[index] {
|
||||||
|
b'\n' | b'\t' | b'\r' => {
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Good {
|
||||||
|
newlines,
|
||||||
|
consumed: index,
|
||||||
|
column: ((base_offset + index) - line_start) as u32,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SpaceState<'a> {
|
||||||
|
state: State<'a>,
|
||||||
|
multiline: bool,
|
||||||
|
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat_spaces<'a>(
|
fn eat_spaces<'a>(
|
||||||
|
@ -237,8 +295,6 @@ fn eat_spaces<'a>(
|
||||||
mut multiline: bool,
|
mut multiline: bool,
|
||||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||||
) -> SpaceState<'a> {
|
) -> SpaceState<'a> {
|
||||||
use SpaceState::*;
|
|
||||||
|
|
||||||
for c in state.bytes() {
|
for c in state.bytes() {
|
||||||
match c {
|
match c {
|
||||||
b' ' => {
|
b' ' => {
|
||||||
|
@ -252,9 +308,8 @@ fn eat_spaces<'a>(
|
||||||
b'\r' => {
|
b'\r' => {
|
||||||
state = state.advance_newline();
|
state = state.advance_newline();
|
||||||
}
|
}
|
||||||
b'\t' => {
|
b'\t' => unreachable!(),
|
||||||
return HasTab(state);
|
|
||||||
}
|
|
||||||
b'#' => {
|
b'#' => {
|
||||||
state = state.advance(1);
|
state = state.advance(1);
|
||||||
return eat_line_comment(state, multiline, comments_and_newlines);
|
return eat_line_comment(state, multiline, comments_and_newlines);
|
||||||
|
@ -263,7 +318,7 @@ fn eat_spaces<'a>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Good {
|
SpaceState {
|
||||||
state,
|
state,
|
||||||
multiline,
|
multiline,
|
||||||
comments_and_newlines,
|
comments_and_newlines,
|
||||||
|
@ -275,8 +330,6 @@ fn eat_line_comment<'a>(
|
||||||
mut multiline: bool,
|
mut multiline: bool,
|
||||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||||
) -> SpaceState<'a> {
|
) -> SpaceState<'a> {
|
||||||
use SpaceState::*;
|
|
||||||
|
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
let bytes = state.bytes();
|
let bytes = state.bytes();
|
||||||
let length = bytes.len();
|
let length = bytes.len();
|
||||||
|
@ -313,9 +366,7 @@ fn eat_line_comment<'a>(
|
||||||
b'\r' => {
|
b'\r' => {
|
||||||
state = state.advance_newline();
|
state = state.advance_newline();
|
||||||
}
|
}
|
||||||
b'\t' => {
|
b'\t' => unreachable!(),
|
||||||
return HasTab(state);
|
|
||||||
}
|
|
||||||
b'#' => {
|
b'#' => {
|
||||||
state = state.advance(1);
|
state = state.advance(1);
|
||||||
index += 1;
|
index += 1;
|
||||||
|
@ -327,7 +378,7 @@ fn eat_line_comment<'a>(
|
||||||
index += 1;
|
index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Good {
|
return SpaceState {
|
||||||
state,
|
state,
|
||||||
multiline,
|
multiline,
|
||||||
comments_and_newlines,
|
comments_and_newlines,
|
||||||
|
@ -337,7 +388,7 @@ fn eat_line_comment<'a>(
|
||||||
// consume the second #
|
// consume the second #
|
||||||
state = state.advance(1);
|
state = state.advance(1);
|
||||||
|
|
||||||
return Good {
|
return SpaceState {
|
||||||
state,
|
state,
|
||||||
multiline,
|
multiline,
|
||||||
comments_and_newlines,
|
comments_and_newlines,
|
||||||
|
@ -354,7 +405,7 @@ fn eat_line_comment<'a>(
|
||||||
|
|
||||||
while index < length {
|
while index < length {
|
||||||
match bytes[index] {
|
match bytes[index] {
|
||||||
b'\t' => return HasTab(state),
|
b'\t' => unreachable!(),
|
||||||
b'\n' => {
|
b'\n' => {
|
||||||
let comment =
|
let comment =
|
||||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||||
|
@ -381,9 +432,7 @@ fn eat_line_comment<'a>(
|
||||||
b'\r' => {
|
b'\r' => {
|
||||||
state = state.advance_newline();
|
state = state.advance_newline();
|
||||||
}
|
}
|
||||||
b'\t' => {
|
b'\t' => unreachable!(),
|
||||||
return HasTab(state);
|
|
||||||
}
|
|
||||||
b'#' => {
|
b'#' => {
|
||||||
state = state.advance(1);
|
state = state.advance(1);
|
||||||
index += 1;
|
index += 1;
|
||||||
|
@ -395,7 +444,7 @@ fn eat_line_comment<'a>(
|
||||||
index += 1;
|
index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Good {
|
return SpaceState {
|
||||||
state,
|
state,
|
||||||
multiline,
|
multiline,
|
||||||
comments_and_newlines,
|
comments_and_newlines,
|
||||||
|
@ -421,7 +470,7 @@ fn eat_line_comment<'a>(
|
||||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Good {
|
return SpaceState {
|
||||||
state,
|
state,
|
||||||
multiline,
|
multiline,
|
||||||
comments_and_newlines,
|
comments_and_newlines,
|
||||||
|
|
|
@ -13,7 +13,7 @@ pub struct State<'a> {
|
||||||
offset: usize,
|
offset: usize,
|
||||||
|
|
||||||
/// Position of the start of the current line
|
/// Position of the start of the current line
|
||||||
line_start: Position,
|
pub line_start: Position,
|
||||||
|
|
||||||
/// Current indentation level, in columns
|
/// Current indentation level, in columns
|
||||||
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue