mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
moved all crates into seperate folder + related path fixes
This commit is contained in:
parent
12ef03bb86
commit
eee85fa45d
1063 changed files with 92 additions and 93 deletions
613
crates/compiler/parse/src/blankspace.rs
Normal file
613
crates/compiler/parse/src/blankspace.rs
Normal file
|
@ -0,0 +1,613 @@
|
|||
use crate::ast::CommentOrNewline;
|
||||
use crate::ast::Spaceable;
|
||||
use crate::parser::SpaceProblem;
|
||||
use crate::parser::{self, and, backtrackable, BadInputError, Parser, Progress::*};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::Loc;
|
||||
use roc_region::all::Position;
|
||||
|
||||
pub fn space0_around_ee<'a, P, S, E>(
|
||||
parser: P,
|
||||
min_indent: u32,
|
||||
indent_before_problem: fn(Position) -> E,
|
||||
indent_after_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, Loc<S>, E>
|
||||
where
|
||||
S: Spaceable<'a>,
|
||||
S: 'a,
|
||||
P: Parser<'a, Loc<S>, E>,
|
||||
P: 'a,
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
parser::map_with_arena(
|
||||
and(
|
||||
space0_e(min_indent, indent_before_problem),
|
||||
and(parser, space0_e(min_indent, indent_after_problem)),
|
||||
),
|
||||
spaces_around_help,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space0_before_optional_after<'a, P, S, E>(
|
||||
parser: P,
|
||||
min_indent: u32,
|
||||
indent_before_problem: fn(Position) -> E,
|
||||
indent_after_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, Loc<S>, E>
|
||||
where
|
||||
S: Spaceable<'a>,
|
||||
S: 'a,
|
||||
P: Parser<'a, Loc<S>, E>,
|
||||
P: 'a,
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
parser::map_with_arena(
|
||||
and(
|
||||
space0_e(min_indent, indent_before_problem),
|
||||
and(
|
||||
parser,
|
||||
one_of![
|
||||
backtrackable(space0_e(min_indent, indent_after_problem)),
|
||||
succeed!(&[] as &[_]),
|
||||
],
|
||||
),
|
||||
),
|
||||
spaces_around_help,
|
||||
)
|
||||
}
|
||||
|
||||
fn spaces_around_help<'a, S>(
|
||||
arena: &'a Bump,
|
||||
tuples: (
|
||||
&'a [CommentOrNewline<'a>],
|
||||
(Loc<S>, &'a [CommentOrNewline<'a>]),
|
||||
),
|
||||
) -> Loc<S>
|
||||
where
|
||||
S: Spaceable<'a>,
|
||||
S: 'a,
|
||||
{
|
||||
let (spaces_before, (loc_val, spaces_after)) = tuples;
|
||||
|
||||
if spaces_before.is_empty() {
|
||||
if spaces_after.is_empty() {
|
||||
loc_val
|
||||
} else {
|
||||
arena
|
||||
.alloc(loc_val.value)
|
||||
.with_spaces_after(spaces_after, loc_val.region)
|
||||
}
|
||||
} else if spaces_after.is_empty() {
|
||||
arena
|
||||
.alloc(loc_val.value)
|
||||
.with_spaces_before(spaces_before, loc_val.region)
|
||||
} else {
|
||||
let wrapped_expr = arena
|
||||
.alloc(loc_val.value)
|
||||
.with_spaces_after(spaces_after, loc_val.region);
|
||||
|
||||
arena
|
||||
.alloc(wrapped_expr.value)
|
||||
.with_spaces_before(spaces_before, wrapped_expr.region)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn space0_before_e<'a, P, S, E>(
|
||||
parser: P,
|
||||
min_indent: u32,
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, Loc<S>, E>
|
||||
where
|
||||
S: Spaceable<'a>,
|
||||
S: 'a,
|
||||
P: Parser<'a, Loc<S>, E>,
|
||||
P: 'a,
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
parser::map_with_arena(
|
||||
and!(space0_e(min_indent, indent_problem), parser),
|
||||
|arena: &'a Bump, (space_list, loc_expr): (&'a [CommentOrNewline<'a>], Loc<S>)| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr
|
||||
} else {
|
||||
arena
|
||||
.alloc(loc_expr.value)
|
||||
.with_spaces_before(space_list, loc_expr.region)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn space0_after_e<'a, P, S, E>(
|
||||
parser: P,
|
||||
min_indent: u32,
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, Loc<S>, E>
|
||||
where
|
||||
S: Spaceable<'a>,
|
||||
S: 'a,
|
||||
P: Parser<'a, Loc<S>, E>,
|
||||
P: 'a,
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
parser::map_with_arena(
|
||||
and!(parser, space0_e(min_indent, indent_problem)),
|
||||
|arena: &'a Bump, (loc_expr, space_list): (Loc<S>, &'a [CommentOrNewline<'a>])| {
|
||||
if space_list.is_empty() {
|
||||
loc_expr
|
||||
} else {
|
||||
arena
|
||||
.alloc(loc_expr.value)
|
||||
.with_spaces_after(space_list, loc_expr.region)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn check_indent<'a, E>(
|
||||
min_indent: u32,
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, (), E>
|
||||
where
|
||||
E: 'a,
|
||||
{
|
||||
move |_, state: State<'a>| {
|
||||
if state.column() >= min_indent {
|
||||
Ok((NoProgress, (), state))
|
||||
} else {
|
||||
Err((NoProgress, indent_problem(state.pos()), state))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn space0_e<'a, E>(
|
||||
min_indent: u32,
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
spaces_help_help(min_indent, indent_problem)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn spaces_help_help<'a, E>(
|
||||
min_indent: u32,
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
move |arena, state: State<'a>| match fast_eat_spaces(&state) {
|
||||
FastSpaceState::HasTab(position) => Err((
|
||||
MadeProgress,
|
||||
E::space_problem(BadInputError::HasTab, position),
|
||||
state,
|
||||
)),
|
||||
FastSpaceState::Good {
|
||||
newlines,
|
||||
consumed,
|
||||
column,
|
||||
} => {
|
||||
if consumed == 0 {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else if column < min_indent {
|
||||
Err((MadeProgress, indent_problem(state.pos()), state))
|
||||
} else {
|
||||
let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
|
||||
let mut spaces = eat_spaces(state, false, comments_and_newlines);
|
||||
|
||||
if spaces.multiline {
|
||||
spaces.state.indent_column = spaces.state.column();
|
||||
}
|
||||
|
||||
Ok((
|
||||
MadeProgress,
|
||||
spaces.comments_and_newlines.into_bump_slice(),
|
||||
spaces.state,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum FastSpaceState {
|
||||
Good {
|
||||
newlines: usize,
|
||||
consumed: usize,
|
||||
column: u32,
|
||||
},
|
||||
HasTab(Position),
|
||||
}
|
||||
|
||||
fn fast_eat_spaces(state: &State) -> FastSpaceState {
|
||||
use FastSpaceState::*;
|
||||
|
||||
let mut newlines = 0;
|
||||
let mut line_start = state.line_start.offset as usize;
|
||||
let base_offset = state.pos().offset as usize;
|
||||
|
||||
let mut index = base_offset;
|
||||
let bytes = state.original_bytes();
|
||||
let length = bytes.len();
|
||||
|
||||
'outer: while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
index += 1;
|
||||
}
|
||||
b'\n' => {
|
||||
newlines += 1;
|
||||
index += 1;
|
||||
line_start = index;
|
||||
}
|
||||
b'\r' => {
|
||||
index += 1;
|
||||
line_start = index;
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(Position::new(index as u32));
|
||||
}
|
||||
b'#' => {
|
||||
index += 1;
|
||||
|
||||
// try to use SIMD instructions explicitly
|
||||
// run with RUSTFLAGS="-C target-cpu=native" to enable
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
))]
|
||||
{
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
// a bytestring with the three characters we're looking for (the rest is ignored)
|
||||
let needle = b"\r\n\t=============";
|
||||
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
|
||||
|
||||
while index < length {
|
||||
let remaining = length - index;
|
||||
let length = if remaining < 16 { remaining as i32 } else { 16 };
|
||||
|
||||
// the source bytes we'll be looking at
|
||||
let haystack =
|
||||
unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
|
||||
|
||||
// use first 3 characters of needle, first `length` characters of haystack
|
||||
// finds the first index where one of the `needle` characters occurs
|
||||
// or 16 when none of the needle characters occur
|
||||
let first_special_char = unsafe {
|
||||
_mm_cmpestri(needle, 3, haystack, length, _SIDD_CMP_EQUAL_ANY)
|
||||
};
|
||||
|
||||
// we've made `first_special_char` characters of progress
|
||||
index += usize::min(first_special_char as usize, remaining);
|
||||
|
||||
// if we found a special char, let the outer loop handle it
|
||||
if first_special_char != 16 {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
)))]
|
||||
{
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\n' | b'\t' | b'\r' => {
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
_ => {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Good {
|
||||
newlines,
|
||||
consumed: index - base_offset,
|
||||
column: (index - line_start) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
struct SpaceState<'a> {
|
||||
state: State<'a>,
|
||||
multiline: bool,
|
||||
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
}
|
||||
|
||||
fn eat_spaces<'a>(
|
||||
mut state: State<'a>,
|
||||
mut multiline: bool,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
for c in state.bytes() {
|
||||
match c {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
return eat_line_comment(state, multiline, comments_and_newlines);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_line_comment<'a>(
|
||||
mut state: State<'a>,
|
||||
mut multiline: bool,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
let mut index = state.pos().offset as usize;
|
||||
let bytes = state.original_bytes();
|
||||
let length = bytes.len();
|
||||
|
||||
'outer: loop {
|
||||
let is_doc_comment = if let Some(b'#') = bytes.get(index) {
|
||||
match bytes.get(index + 1) {
|
||||
Some(b' ') => {
|
||||
state = state.advance(2);
|
||||
index += 2;
|
||||
|
||||
true
|
||||
}
|
||||
Some(b'\n') => {
|
||||
// consume the second # and the \n
|
||||
state = state.advance(1);
|
||||
state = state.advance_newline();
|
||||
index += 2;
|
||||
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(""));
|
||||
multiline = true;
|
||||
|
||||
for c in state.bytes() {
|
||||
match c {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
None => {
|
||||
// consume the second #
|
||||
state = state.advance(1);
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
|
||||
Some(_) => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let loop_start = index;
|
||||
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
))]
|
||||
{
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
// a bytestring with the three characters we're looking for (the rest is ignored)
|
||||
let needle = b"\r\n\t=============";
|
||||
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
|
||||
|
||||
while index < length {
|
||||
let remaining = length - index;
|
||||
let chunk = if remaining < 16 { remaining as i32 } else { 16 };
|
||||
|
||||
// the source bytes we'll be looking at
|
||||
let haystack = unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
|
||||
|
||||
// use first 3 characters of needle, first chunk` characters of haystack
|
||||
// finds the first index where one of the `needle` characters occurs
|
||||
// or 16 when none of the needle characters occur
|
||||
let first_special_char =
|
||||
unsafe { _mm_cmpestri(needle, 3, haystack, chunk, _SIDD_CMP_EQUAL_ANY) };
|
||||
|
||||
// we've made `first_special_char` characters of progress
|
||||
let progress = usize::min(first_special_char as usize, remaining);
|
||||
index += progress;
|
||||
state = state.advance(progress);
|
||||
|
||||
if first_special_char != 16 {
|
||||
match bytes[index] {
|
||||
b'\t' => unreachable!(),
|
||||
b'\n' => {
|
||||
let comment =
|
||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
|
||||
index += 1;
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
index += 1;
|
||||
}
|
||||
odd_character => {
|
||||
unreachable!(
|
||||
"unexpected_character {} {}",
|
||||
odd_character, odd_character as char
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
)))]
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\t' => unreachable!(),
|
||||
b'\n' => {
|
||||
let comment =
|
||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
|
||||
index += 1;
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
multiline = true;
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
_ => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
// We made it to the end of the bytes. This means there's a comment without a trailing newline.
|
||||
let comment = unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue