Add a lexing-based 'highlight' mode to the parser

basic syntax highlighting

added more syntax highlighting coverage

add example of a markdown table with styling

move FIXED_TOKEN logic into highlight

refactor highlight, add support for backpassing

escape html from source code

fix bug with <pre> tag ordering

refactor out html from roc_parse

remove test, put highlight functionality into separate file

fix typo
This commit is contained in:
Luke Boswell 2023-02-28 17:03:49 +11:00
parent 7ccc23ca06
commit 1590b30b19
No known key found for this signature in database
GPG key ID: F6DB3C9DB47377B0
12 changed files with 1413 additions and 114 deletions

View file

@ -1,5 +1,6 @@
use crate::ast::CommentOrNewline;
use crate::ast::Spaceable;
use crate::parser::Progress;
use crate::parser::SpaceProblem;
use crate::parser::{self, and, backtrackable, BadInputError, Parser, Progress::*};
use crate::state::State;
@ -7,6 +8,7 @@ use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_region::all::Loc;
use roc_region::all::Position;
use roc_region::all::Region;
pub fn space0_around_ee<'a, P, S, E>(
parser: P,
@ -386,98 +388,132 @@ pub fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where
E: 'a + SpaceProblem,
{
move |arena, mut state: State<'a>, _min_indent: u32| {
move |arena, state: State<'a>, _min_indent: u32| {
let mut newlines = Vec::new_in(arena);
let mut progress = NoProgress;
loop {
let whitespace = fast_eat_whitespace(state.bytes());
if whitespace > 0 {
state.advance_mut(whitespace);
progress = MadeProgress;
}
match state.bytes().first() {
Some(b'#') => {
state.advance_mut(1);
let is_doc_comment = state.bytes().first() == Some(&b'#')
&& (state.bytes().get(1) == Some(&b' ')
|| state.bytes().get(1) == Some(&b'\n')
|| begins_with_crlf(&state.bytes()[1..])
|| Option::is_none(&state.bytes().get(1)));
if is_doc_comment {
state.advance_mut(1);
if state.bytes().first() == Some(&b' ') {
state.advance_mut(1);
}
}
let len = fast_eat_until_control_character(state.bytes());
// We already checked that the string is valid UTF-8
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
let comment = if is_doc_comment {
CommentOrNewline::DocComment(text)
} else {
CommentOrNewline::LineComment(text)
};
newlines.push(comment);
state.advance_mut(len);
if begins_with_crlf(state.bytes()) {
state.advance_mut(1);
state = state.advance_newline();
} else if state.bytes().first() == Some(&b'\n') {
state = state.advance_newline();
}
progress = MadeProgress;
}
Some(b'\r') => {
if state.bytes().get(1) == Some(&b'\n') {
newlines.push(CommentOrNewline::Newline);
state.advance_mut(1);
state = state.advance_newline();
progress = MadeProgress;
} else {
return Err((
progress,
E::space_problem(
BadInputError::HasMisplacedCarriageReturn,
state.pos(),
),
));
}
}
Some(b'\n') => {
newlines.push(CommentOrNewline::Newline);
state = state.advance_newline();
progress = MadeProgress;
}
Some(b'\t') => {
return Err((
progress,
E::space_problem(BadInputError::HasTab, state.pos()),
));
}
Some(x) if *x < b' ' => {
return Err((
progress,
E::space_problem(BadInputError::HasAsciiControl, state.pos()),
));
}
_ => {
if !newlines.is_empty() {
state = state.mark_current_indent();
}
break;
}
}
match consume_spaces(state, |_, space, _| newlines.push(space)) {
Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)),
Err((progress, err)) => Err((progress, err)),
}
Ok((progress, newlines.into_bump_slice(), state))
}
}
pub fn loc_spaces<'a, E>() -> impl Parser<'a, &'a [Loc<CommentOrNewline<'a>>], E>
where
E: 'a + SpaceProblem,
{
move |arena, state: State<'a>, _min_indent: u32| {
let mut newlines = Vec::new_in(arena);
match consume_spaces(state, |start, space, end| {
newlines.push(Loc::at(Region::between(start, end), space))
}) {
Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)),
Err((progress, err)) => Err((progress, err)),
}
}
}
fn consume_spaces<'a, E, F>(
mut state: State<'a>,
mut on_space: F,
) -> Result<(Progress, State<'a>), (Progress, E)>
where
E: 'a + SpaceProblem,
F: FnMut(Position, CommentOrNewline<'a>, Position),
{
let mut progress = NoProgress;
let mut found_newline = false;
loop {
let whitespace = fast_eat_whitespace(state.bytes());
if whitespace > 0 {
state.advance_mut(whitespace);
progress = MadeProgress;
}
let start = state.pos();
match state.bytes().first() {
Some(b'#') => {
state.advance_mut(1);
let is_doc_comment = state.bytes().first() == Some(&b'#')
&& (state.bytes().get(1) == Some(&b' ')
|| state.bytes().get(1) == Some(&b'\n')
|| begins_with_crlf(&state.bytes()[1..])
|| Option::is_none(&state.bytes().get(1)));
if is_doc_comment {
state.advance_mut(1);
if state.bytes().first() == Some(&b' ') {
state.advance_mut(1);
}
}
let len = fast_eat_until_control_character(state.bytes());
// We already checked that the string is valid UTF-8
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
let comment = if is_doc_comment {
CommentOrNewline::DocComment(text)
} else {
CommentOrNewline::LineComment(text)
};
state.advance_mut(len);
on_space(start, comment, state.pos());
found_newline = true;
if begins_with_crlf(state.bytes()) {
state.advance_mut(1);
state = state.advance_newline();
} else if state.bytes().first() == Some(&b'\n') {
state = state.advance_newline();
}
progress = MadeProgress;
}
Some(b'\r') => {
if state.bytes().get(1) == Some(&b'\n') {
state.advance_mut(1);
state = state.advance_newline();
on_space(start, CommentOrNewline::Newline, state.pos());
found_newline = true;
progress = MadeProgress;
} else {
return Err((
progress,
E::space_problem(BadInputError::HasMisplacedCarriageReturn, state.pos()),
));
}
}
Some(b'\n') => {
state = state.advance_newline();
on_space(start, CommentOrNewline::Newline, state.pos());
found_newline = true;
progress = MadeProgress;
}
Some(b'\t') => {
return Err((
progress,
E::space_problem(BadInputError::HasTab, state.pos()),
));
}
Some(x) if *x < b' ' => {
return Err((
progress,
E::space_problem(BadInputError::HasAsciiControl, state.pos()),
));
}
_ => {
if found_newline {
state = state.mark_current_indent();
}
break;
}
}
}
Ok((progress, state))
}