Add a lexing-based 'highlight' mode to the parser

basic syntax highlighting

added more syntax highlighting coverage

add example of a markdown table with styling

move FIXED_TOKEN logic into highlight

refactor highlight, add support for backpassing

escape html from source code

fix bug with <pre> tag ordering

refactor out html from roc_parse

remove test, put highlight functionality into separate file

fix typo
This commit is contained in:
Luke Boswell 2023-02-28 17:03:49 +11:00
parent 7ccc23ca06
commit 1590b30b19
No known key found for this signature in database
GPG key ID: F6DB3C9DB47377B0
12 changed files with 1413 additions and 114 deletions

View file

@ -1,5 +1,6 @@
use crate::ast::CommentOrNewline; use crate::ast::CommentOrNewline;
use crate::ast::Spaceable; use crate::ast::Spaceable;
use crate::parser::Progress;
use crate::parser::SpaceProblem; use crate::parser::SpaceProblem;
use crate::parser::{self, and, backtrackable, BadInputError, Parser, Progress::*}; use crate::parser::{self, and, backtrackable, BadInputError, Parser, Progress::*};
use crate::state::State; use crate::state::State;
@ -7,6 +8,7 @@ use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use roc_region::all::Loc; use roc_region::all::Loc;
use roc_region::all::Position; use roc_region::all::Position;
use roc_region::all::Region;
pub fn space0_around_ee<'a, P, S, E>( pub fn space0_around_ee<'a, P, S, E>(
parser: P, parser: P,
@ -386,98 +388,132 @@ pub fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where where
E: 'a + SpaceProblem, E: 'a + SpaceProblem,
{ {
move |arena, mut state: State<'a>, _min_indent: u32| { move |arena, state: State<'a>, _min_indent: u32| {
let mut newlines = Vec::new_in(arena); let mut newlines = Vec::new_in(arena);
let mut progress = NoProgress;
loop {
let whitespace = fast_eat_whitespace(state.bytes());
if whitespace > 0 {
state.advance_mut(whitespace);
progress = MadeProgress;
}
match state.bytes().first() { match consume_spaces(state, |_, space, _| newlines.push(space)) {
Some(b'#') => { Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)),
state.advance_mut(1); Err((progress, err)) => Err((progress, err)),
let is_doc_comment = state.bytes().first() == Some(&b'#')
&& (state.bytes().get(1) == Some(&b' ')
|| state.bytes().get(1) == Some(&b'\n')
|| begins_with_crlf(&state.bytes()[1..])
|| Option::is_none(&state.bytes().get(1)));
if is_doc_comment {
state.advance_mut(1);
if state.bytes().first() == Some(&b' ') {
state.advance_mut(1);
}
}
let len = fast_eat_until_control_character(state.bytes());
// We already checked that the string is valid UTF-8
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
let comment = if is_doc_comment {
CommentOrNewline::DocComment(text)
} else {
CommentOrNewline::LineComment(text)
};
newlines.push(comment);
state.advance_mut(len);
if begins_with_crlf(state.bytes()) {
state.advance_mut(1);
state = state.advance_newline();
} else if state.bytes().first() == Some(&b'\n') {
state = state.advance_newline();
}
progress = MadeProgress;
}
Some(b'\r') => {
if state.bytes().get(1) == Some(&b'\n') {
newlines.push(CommentOrNewline::Newline);
state.advance_mut(1);
state = state.advance_newline();
progress = MadeProgress;
} else {
return Err((
progress,
E::space_problem(
BadInputError::HasMisplacedCarriageReturn,
state.pos(),
),
));
}
}
Some(b'\n') => {
newlines.push(CommentOrNewline::Newline);
state = state.advance_newline();
progress = MadeProgress;
}
Some(b'\t') => {
return Err((
progress,
E::space_problem(BadInputError::HasTab, state.pos()),
));
}
Some(x) if *x < b' ' => {
return Err((
progress,
E::space_problem(BadInputError::HasAsciiControl, state.pos()),
));
}
_ => {
if !newlines.is_empty() {
state = state.mark_current_indent();
}
break;
}
}
} }
Ok((progress, newlines.into_bump_slice(), state))
} }
} }
pub fn loc_spaces<'a, E>() -> impl Parser<'a, &'a [Loc<CommentOrNewline<'a>>], E>
where
E: 'a + SpaceProblem,
{
move |arena, state: State<'a>, _min_indent: u32| {
let mut newlines = Vec::new_in(arena);
match consume_spaces(state, |start, space, end| {
newlines.push(Loc::at(Region::between(start, end), space))
}) {
Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)),
Err((progress, err)) => Err((progress, err)),
}
}
}
fn consume_spaces<'a, E, F>(
mut state: State<'a>,
mut on_space: F,
) -> Result<(Progress, State<'a>), (Progress, E)>
where
E: 'a + SpaceProblem,
F: FnMut(Position, CommentOrNewline<'a>, Position),
{
let mut progress = NoProgress;
let mut found_newline = false;
loop {
let whitespace = fast_eat_whitespace(state.bytes());
if whitespace > 0 {
state.advance_mut(whitespace);
progress = MadeProgress;
}
let start = state.pos();
match state.bytes().first() {
Some(b'#') => {
state.advance_mut(1);
let is_doc_comment = state.bytes().first() == Some(&b'#')
&& (state.bytes().get(1) == Some(&b' ')
|| state.bytes().get(1) == Some(&b'\n')
|| begins_with_crlf(&state.bytes()[1..])
|| Option::is_none(&state.bytes().get(1)));
if is_doc_comment {
state.advance_mut(1);
if state.bytes().first() == Some(&b' ') {
state.advance_mut(1);
}
}
let len = fast_eat_until_control_character(state.bytes());
// We already checked that the string is valid UTF-8
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
let comment = if is_doc_comment {
CommentOrNewline::DocComment(text)
} else {
CommentOrNewline::LineComment(text)
};
state.advance_mut(len);
on_space(start, comment, state.pos());
found_newline = true;
if begins_with_crlf(state.bytes()) {
state.advance_mut(1);
state = state.advance_newline();
} else if state.bytes().first() == Some(&b'\n') {
state = state.advance_newline();
}
progress = MadeProgress;
}
Some(b'\r') => {
if state.bytes().get(1) == Some(&b'\n') {
state.advance_mut(1);
state = state.advance_newline();
on_space(start, CommentOrNewline::Newline, state.pos());
found_newline = true;
progress = MadeProgress;
} else {
return Err((
progress,
E::space_problem(BadInputError::HasMisplacedCarriageReturn, state.pos()),
));
}
}
Some(b'\n') => {
state = state.advance_newline();
on_space(start, CommentOrNewline::Newline, state.pos());
found_newline = true;
progress = MadeProgress;
}
Some(b'\t') => {
return Err((
progress,
E::space_problem(BadInputError::HasTab, state.pos()),
));
}
Some(x) if *x < b' ' => {
return Err((
progress,
E::space_problem(BadInputError::HasAsciiControl, state.pos()),
));
}
_ => {
if found_newline {
state = state.mark_current_indent();
}
break;
}
}
}
Ok((progress, state))
}

View file

@ -0,0 +1,565 @@
use encode_unicode::CharExt;
use std::collections::HashSet;
use bumpalo::Bump;
use roc_region::all::{Loc, Region};
use crate::{
ast::CommentOrNewline,
blankspace::loc_spaces,
keyword::KEYWORDS,
number_literal::positive_number_literal,
parser::{EExpr, ParseResult, Parser},
state::State,
string_literal::{parse_str_like_literal, StrLikeLiteral},
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Token {
LineComment,
DocComment,
Error,
SingleQuote,
String,
UnicodeEscape,
EscapedChar,
Interpolated,
Keyword,
UpperIdent,
LowerIdent,
Number,
Other,
Minus,
Plus,
Colon,
Bar,
Equals,
GreaterThan,
LessThan,
Comma,
Backslash,
Brace,
Bracket,
Paren,
Arrow,
Pipe,
Backpass,
}
pub fn highlight(text: &str) -> Vec<Loc<Token>> {
let mut tokens = Vec::new();
let state = State::new(text.as_bytes());
let arena = Bump::new();
let header_keywords = HEADER_KEYWORDS.iter().copied().collect::<HashSet<_>>();
let body_keywords = KEYWORDS.iter().copied().collect::<HashSet<_>>();
if let Ok((_prog, _, new_state)) = crate::module::header().parse(&arena, state.clone(), 0) {
let inner_state =
State::new(text[..state.bytes().len() - new_state.bytes().len()].as_bytes());
highlight_inner(&arena, inner_state, &mut tokens, &header_keywords);
highlight_inner(&arena, new_state, &mut tokens, &body_keywords);
} else {
highlight_inner(&arena, state, &mut tokens, &body_keywords);
}
tokens = combine_tokens(tokens);
tokens
}
fn combine_tokens(locations: Vec<Loc<Token>>) -> Vec<Loc<Token>> {
let mut tokens: Vec<Loc<Token>> = Vec::new();
let mut previous_location: Option<Loc<Token>> = None;
for location in locations {
match location.value {
// Catch tokens which may combine for a different meaning
Token::LessThan => match previous_location {
Some(prev) => {
tokens.push(prev);
tokens.push(location);
previous_location = None;
}
None => {
previous_location = Some(location);
}
},
Token::Bar => match previous_location {
Some(prev) => {
tokens.push(prev);
tokens.push(location);
previous_location = None;
}
None => {
previous_location = Some(location);
}
},
// Combination tokens
Token::GreaterThan => {
match previous_location {
Some(prev) => {
match prev.value {
Token::Minus => {
// arrow operator "->"
tokens.push(Loc::at(
Region::between(prev.region.start(), location.region.end()),
Token::Arrow,
));
previous_location = None;
}
Token::Bar => {
// pipe operator "|>"
tokens.push(Loc::at(
Region::between(prev.region.start(), location.region.end()),
Token::Pipe,
));
previous_location = None;
}
_ => {
tokens.push(prev);
tokens.push(location);
previous_location = None;
}
}
}
_ => {
tokens.push(location);
}
}
}
Token::Minus => {
match previous_location {
Some(prev) => {
match prev.value {
Token::LessThan => {
// backpass operator "<-"
tokens.push(Loc::at(
Region::between(prev.region.start(), location.region.end()),
Token::Backpass,
));
previous_location = None;
}
_ => {
tokens.push(prev);
tokens.push(location);
previous_location = None;
}
}
}
None => {
previous_location = Some(location);
}
}
}
_ => {
tokens.push(location);
}
}
}
tokens
}
fn highlight_inner<'a>(
arena: &'a Bump,
mut state: State<'a>,
tokens: &mut Vec<Loc<Token>>,
keywords: &HashSet<&str>,
) {
loop {
let start = state.pos();
if let Ok((b, _width)) = char::from_utf8_slice_start(state.bytes()) {
match b {
' ' | '\n' | '\t' | '\r' | '#' => {
let res: ParseResult<'a, _, EExpr<'a>> =
loc_spaces().parse(arena, state.clone(), 0);
if let Ok((_, spaces, new_state)) = res {
state = new_state;
for space in spaces {
let token = match space.value {
CommentOrNewline::Newline => {
continue;
}
CommentOrNewline::LineComment(_) => Token::LineComment,
CommentOrNewline::DocComment(_) => Token::DocComment,
};
tokens.push(Loc::at(space.region, token));
}
} else {
fast_forward_to(&mut state, tokens, start, |c| c == b'\n');
}
}
'"' | '\'' => {
if let Ok((_, item, new_state)) =
parse_str_like_literal().parse(arena, state.clone(), 0)
{
state = new_state;
match item {
StrLikeLiteral::SingleQuote(_) => {
tokens.push(Loc::at(
Region::between(start, state.pos()),
Token::SingleQuote,
));
}
StrLikeLiteral::Str(_) => {
tokens.push(Loc::at(
Region::between(start, state.pos()),
Token::String,
));
}
}
} else {
fast_forward_to(&mut state, tokens, start, |c| c == b'\n');
}
}
c if c.is_alphabetic() => {
let buffer = state.bytes();
let mut chomped = 0;
let is_upper = c.is_uppercase();
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
if ch.is_alphabetic() || ch.is_ascii_digit() {
chomped += width;
} else {
// we're done
break;
}
}
let ident = std::str::from_utf8(&buffer[..chomped]).unwrap();
state.advance_mut(chomped);
if keywords.contains(ident) {
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Keyword));
} else {
tokens.push(Loc::at(
Region::between(start, state.pos()),
if is_upper {
Token::UpperIdent
} else {
Token::LowerIdent
},
));
}
}
'0'..='9' => {
if let Ok((_, _item, new_state)) =
positive_number_literal().parse(arena, state.clone(), 0)
{
state = new_state;
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Number));
} else {
fast_forward_to(&mut state, tokens, start, |b| !b.is_ascii_digit());
}
}
':' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Colon));
}
'|' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Bar));
}
'-' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Minus));
}
'+' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Plus));
}
'=' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Equals));
}
'>' => {
state.advance_mut(1);
tokens.push(Loc::at(
Region::between(start, state.pos()),
Token::GreaterThan,
));
}
'<' => {
state.advance_mut(1);
tokens.push(Loc::at(
Region::between(start, state.pos()),
Token::LessThan,
));
}
',' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Comma));
}
'\\' => {
state.advance_mut(1);
tokens.push(Loc::at(
Region::between(start, state.pos()),
Token::Backslash,
));
}
'{' | '}' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Brace));
}
'[' | ']' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Bracket));
}
'(' | ')' => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Paren));
}
_ => {
state.advance_mut(1);
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Other));
}
}
} else {
break;
}
}
}
fn fast_forward_to(
state: &mut State,
tokens: &mut Vec<Loc<Token>>,
start: roc_region::all::Position,
end: impl Fn(u8) -> bool,
) {
while let Some(b) = state.bytes().first() {
if end(*b) {
break;
}
state.advance_mut(1);
}
tokens.push(Loc::at(Region::between(start, state.pos()), Token::Error));
}
pub const HEADER_KEYWORDS: [&str; 14] = [
"interface",
"app",
"package",
"platform",
"hosted",
"exposes",
"imports",
"with",
"generates",
"package",
"packages",
"requires",
"provides",
"to",
];
#[cfg(test)]
mod tests {
use roc_region::all::Position;
use super::*;
#[test]
fn test_highlight_comments() {
let text = "# a\n#b\n#c";
let tokens = highlight(text);
assert_eq!(
tokens,
vec![
Loc::at(
Region::between(Position::new(0), Position::new(3)),
Token::LineComment
),
Loc::at(
Region::between(Position::new(4), Position::new(6)),
Token::LineComment
),
Loc::at(
Region::between(Position::new(7), Position::new(9)),
Token::LineComment
),
]
);
}
#[test]
fn test_highlight_doc_comments() {
let text = "## a\n##b\n##c";
let tokens = highlight(text);
assert_eq!(
tokens,
vec![
Loc::at(
Region::between(Position::new(0), Position::new(4)),
Token::DocComment
),
// the next two are line comments because there's not a space at the beginning
Loc::at(
Region::between(Position::new(5), Position::new(8)),
Token::LineComment
),
Loc::at(
Region::between(Position::new(9), Position::new(12)),
Token::LineComment
),
]
);
}
#[test]
fn test_highlight_strings() {
let text = r#""a""#;
let tokens = highlight(text);
assert_eq!(
tokens,
vec![Loc::at(
Region::between(Position::new(0), Position::new(3)),
Token::String
)]
);
}
#[test]
fn test_highlight_single_quotes() {
let text = r#"'a'"#;
let tokens = highlight(text);
assert_eq!(
tokens,
vec![Loc::at(
Region::between(Position::new(0), Position::new(3)),
Token::SingleQuote
)]
);
}
#[test]
fn test_highlight_header() {
let text = r#"app "test-app" provides [] to "./blah""#;
let tokens = highlight(text);
assert_eq!(
tokens,
vec![
Loc::at(
Region::between(Position::new(0), Position::new(3)),
Token::Keyword
),
Loc::at(
Region::between(Position::new(4), Position::new(14)),
Token::String
),
Loc::at(
Region::between(Position::new(15), Position::new(23)),
Token::Keyword
),
Loc::at(
Region::between(Position::new(24), Position::new(25)),
Token::Bracket
),
Loc::at(
Region::between(Position::new(25), Position::new(26)),
Token::Bracket
),
Loc::at(
Region::between(Position::new(27), Position::new(29)),
Token::Keyword
),
Loc::at(
Region::between(Position::new(30), Position::new(38)),
Token::String
),
]
);
}
#[test]
fn test_highlight_numbers() {
let text = "123.0 123 123. 123.0e10 123e10 123e-10 0x123";
let tokens = highlight(text);
assert_eq!(
tokens,
vec![
Loc::at(
Region::between(Position::new(0), Position::new(5)),
Token::Number
),
Loc::at(
Region::between(Position::new(6), Position::new(9)),
Token::Number
),
Loc::at(
Region::between(Position::new(10), Position::new(14)),
Token::Number
),
Loc::at(
Region::between(Position::new(15), Position::new(23)),
Token::Number
),
Loc::at(
Region::between(Position::new(24), Position::new(30)),
Token::Number
),
Loc::at(
Region::between(Position::new(31), Position::new(38)),
Token::Number
),
Loc::at(
Region::between(Position::new(39), Position::new(44)),
Token::Number
),
]
);
}
#[test]
fn test_combine_tokens() {
let input: Vec<Loc<Token>> = vec![
// arrow operator "->"
Loc::at(
Region::between(Position::new(0), Position::new(5)),
Token::Minus,
),
Loc::at(
Region::between(Position::new(6), Position::new(7)),
Token::GreaterThan,
),
// pipe operator "|>"
Loc::at(
Region::between(Position::new(8), Position::new(9)),
Token::Bar,
),
Loc::at(
Region::between(Position::new(10), Position::new(11)),
Token::GreaterThan,
),
// backpass operator "<-"
Loc::at(
Region::between(Position::new(12), Position::new(13)),
Token::LessThan,
),
Loc::at(
Region::between(Position::new(14), Position::new(15)),
Token::Minus,
),
];
let actual = combine_tokens(input);
let expected = vec![
Loc::at(
Region::between(Position::new(0), Position::new(7)),
Token::Arrow,
),
Loc::at(
Region::between(Position::new(8), Position::new(11)),
Token::Pipe,
),
Loc::at(
Region::between(Position::new(12), Position::new(15)),
Token::Backpass,
),
];
assert_eq!(actual, expected);
}
}

View file

@ -10,6 +10,7 @@ pub mod ast;
pub mod blankspace; pub mod blankspace;
pub mod expr; pub mod expr;
pub mod header; pub mod header;
pub mod highlight;
pub mod ident; pub mod ident;
pub mod keyword; pub mod keyword;
pub mod module; pub mod module;

View file

@ -129,6 +129,10 @@ impl Position {
offset: self.offset - count as u32, offset: self.offset - count as u32,
} }
} }
pub fn byte_offset(&self) -> usize {
self.offset as usize
}
} }
impl Debug for Position { impl Debug for Position {
@ -322,6 +326,10 @@ impl<T> Loc<T> {
value: transform(self.value), value: transform(self.value),
} }
} }
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.region.start.byte_offset()..self.region.end.byte_offset()
}
} }
impl<T> fmt::Debug for Loc<T> impl<T> fmt::Debug for Loc<T>

View file

@ -568,6 +568,15 @@ mod test_snapshots {
Err(err) => Err(format!("{:?}", err)), Err(err) => Err(format!("{:?}", err)),
}; };
if expect == TestExpectation::Pass {
let tokens = roc_parse::highlight::highlight(&source);
for token in tokens {
if token.value == roc_parse::highlight::Token::Error {
panic!("Found an error highlight token in the input: {:?}", token);
}
}
}
let actual_result = let actual_result =
if expect == TestExpectation::Pass || expect == TestExpectation::Malformed { if expect == TestExpectation::Pass || expect == TestExpectation::Malformed {
result.expect("The source code for this test did not successfully parse!") result.expect("The source code for this test did not successfully parse!")

View file

@ -9,7 +9,7 @@ To run, `cd` into this directory and run this in your terminal:
If `roc` is on your PATH: If `roc` is on your PATH:
```bash ```bash
roc run static-site.roc input/ output/ roc run static-site.roc -- input/ output/
``` ```
If not, and you're building Roc from source: If not, and you're building Roc from source:

View file

@ -44,24 +44,92 @@ ingeniis in pugna quadripedis glandes superos. Tanta quam, illo es prole est
telis **unus verba** quisquis iuvenci annis. Nec velox sed sacra gaudia vacuos, telis **unus verba** quisquis iuvenci annis. Nec velox sed sacra gaudia vacuos,
Herculei undae calcata inmeriti quercus ignes parabant iam. Herculei undae calcata inmeriti quercus ignes parabant iam.
digitize(undoDhcp(card_record, cad_flash_dot)); ### Example Table
supercomputer(2 - load_type, yobibyteTraceroute - installHibernate, 1);
burnPci.pop_wrap.usbEmulation(hostESmm, processor_impression(4, lanNntp),
-5);
map_camera -= 73;
if (53) {
dacRootkitDrive(publicActivex.bmpNumWhite.wins_pci_firmware(scroll_cell,
4, tShortcut));
horse_algorithm_eide -= 51;
flatbed_blob(flat);
} else {
surge.pci -= open_flash_dv(4, 4, usbStation);
led.memory_fsb.matrixBinaryUrl(umlEngineOsd.agp_thick_thin.t(58));
kindle_cookie(formulaLedVpn, digital_meme);
}
Foret inpendere, haec ipse ossa, dolentes das Caystro miscuit iunctoque | Tables | Are | Cool |
spoliantis illae, ex! Bello istis nunc Aegides? Animo caelestia melior, | :------------ | :-----------: | ----: |
furoribus optat maior invecta quid harenis [est](http://example.org) sollemnia modo | col 3 is | right-aligned | $1600 |
Phineu. Suarum pectora. Relinquam in labore Medusae sororem Herculis [simillima | col 2 is | centered | $12 |
corpora](http://example.org) plus regi ignibus, totum domus! | zebra stripes | are neat | $1 |
### Example Code Blocks
```sh
# This isn't fenced roc code so its not formatted
# Use a fence like ```roc to format code blocks
```
```roc
## This is a documentation comment
# This is a comment
app "static-site"
packages { pf: "platform/main.roc" }
imports [
pf.Html.{ html, head, body, div, text, a, ul, li, link, meta },
pf.Html.Attributes.{ httpEquiv, content, href, rel, lang, class, title },
]
provides [transformFileContent] to pf
NavLink : {
# this is another comment
url : Str,
title : Str,
text : Str,
}
navLinks : List NavLink
navLinks = [
{ url: "apple.html", title: "Exempli Gratia Pagina Pomi", text: "Apple" },
{ url: "banana.html", title: "Exempli Gratia Pagina Musa", text: "Banana" },
{ url: "cherry.html", title: "Exempli Pagina Cerasus", text: "Cherry" },
]
transformFileContent : Str, Str -> Str
transformFileContent = \currentUrl, htmlContent ->
List.findFirst navLinks (\{ url } -> url == currentUrl)
|> Result.map (\currentNavLink -> view currentNavLink htmlContent)
|> Result.map Html.render
|> Result.withDefault ""
view : NavLink, Str -> Html.Node
view = \currentNavLink, htmlContent ->
html [lang "en"] [
head [] [
meta [httpEquiv "content-type", content "text/html; charset=utf-8"] [],
Html.title [] [text currentNavLink.title],
link [rel "stylesheet", href "style.css"] [],
],
body [] [
div [class "main"] [
div [class "navbar"] [
viewNavbar currentNavLink,
],
div [class "article"] [
# For now `text` is not escaped so we can use it to insert HTML
# We'll probably want something more explicit in the long term though!
text htmlContent,
],
],
],
]
viewNavbar : NavLink -> Html.Node
viewNavbar = \currentNavLink ->
ul
[]
(List.map navLinks \nl -> viewNavLink (nl == currentNavLink) nl)
viewNavLink : Bool, NavLink -> Html.Node
viewNavLink = \isCurrent, navlink ->
if isCurrent then
li [class "nav-link nav-link--current"] [
text navlink.text,
]
else
li [class "nav-link"] [
a
[href navlink.url, title navlink.title]
[text navlink.text],
]
```

View file

@ -39,7 +39,102 @@
color: #444; color: #444;
} }
.article pre { .article pre {
background-color: #222; background-color: rgb(241, 241, 241);
color: yellow; color: rgb(27, 27, 27);
padding: 16px; padding: 16px;
} }
pre {
white-space: pre-wrap;
}
samp .ann {
/* type annotation - purple in the repl */
color: #f384fd;
}
samp .autovar .comment {
/* automatic variable names in the repl, e.g. # val1 */
color: #338545;
}
samp .kw {
/* language keywords, e.g. `if`*/
color: #004cc2;
}
samp .arrow {
/* operators, e.g. `+` */
color: #0600c2;
}
samp .pipe {
/* operators, e.g. `+` */
color: #0600c2;
}
samp .op {
/* operators, e.g. `+` */
color: #0600c2;
}
samp .assign {
color: #48fd00;
}
samp .paren {
color: #ff0000;
}
samp .comma {
color: #ff00fb;
}
samp .colon {
color: #9b0098;
}
samp .number {
/* number literals */
color: #9669ff;
}
samp .str {
/* string literals */
color: #1dbf00;
}
samp .str-esc, samp .str-interp {
/* escapes inside string literals, e.g. \t */
color: #3474db;
}
samp .dim {
opacity: 0.55;
}
samp .comment {
color: #005a13;
}
table {
table-layout: fixed;
width: 100%;
border-collapse: collapse;
border: 3px solid rgb(161, 64, 0);
}
tbody tr:nth-child(even) {
background-color: #c6f4ff;
}
th {
background-color: #ffabab;
}
th,
td {
padding: 2px;
}

View file

@ -2,27 +2,194 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "addr2line"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.7.2" version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "backtrace"
version = "0.3.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.3.2" version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitmaps"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
dependencies = [
"typenum",
]
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "bumpalo"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "getrandom"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
"bumpalo",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "host" name = "host"
version = "0.0.1" version = "0.0.1"
dependencies = [ dependencies = [
"libc", "libc",
"pulldown-cmark", "pulldown-cmark",
"roc_parse",
"roc_region",
"roc_std", "roc_std",
] ]
[[package]]
name = "im"
version = "15.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9"
dependencies = [
"bitmaps",
"rand_core",
"rand_xoshiro",
"sized-chunks",
"typenum",
"version_check",
]
[[package]]
name = "im-rc"
version = "15.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af1955a75fa080c677d3972822ec4bad316169ab1cfc6c257a942c2265dbe5fe"
dependencies = [
"bitmaps",
"rand_core",
"rand_xoshiro",
"sized-chunks",
"typenum",
"version_check",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.132" version = "0.2.132"
@ -35,6 +202,39 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "miniz_oxide"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
dependencies = [
"adler",
]
[[package]]
name = "object"
version = "0.30.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "proc-macro2"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
dependencies = [
"unicode-ident",
]
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.9.2" version = "0.9.2"
@ -46,6 +246,88 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "quote"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "rand_xoshiro"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
dependencies = [
"rand_core",
]
[[package]]
name = "roc_collections"
version = "0.0.1"
dependencies = [
"bitvec",
"bumpalo",
"fnv",
"hashbrown",
"im",
"im-rc",
"wyhash",
]
[[package]]
name = "roc_error_macros"
version = "0.0.1"
[[package]]
name = "roc_ident"
version = "0.0.1"
[[package]]
name = "roc_module"
version = "0.0.1"
dependencies = [
"bumpalo",
"roc_collections",
"roc_error_macros",
"roc_ident",
"roc_region",
"snafu",
"static_assertions",
]
[[package]]
name = "roc_parse"
version = "0.0.1"
dependencies = [
"bumpalo",
"encode_unicode",
"roc_collections",
"roc_module",
"roc_region",
]
[[package]]
name = "roc_region"
version = "0.0.1"
dependencies = [
"static_assertions",
]
[[package]] [[package]]
name = "roc_std" name = "roc_std"
version = "0.0.1" version = "0.0.1"
@ -54,12 +336,74 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "rustc-demangle"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
[[package]]
name = "sized-chunks"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e"
dependencies = [
"bitmaps",
"typenum",
]
[[package]]
name = "snafu"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0656e7e3ffb70f6c39b3c2a86332bb74aa3c679da781642590f3c1118c5045"
dependencies = [
"backtrace",
"doc-comment",
"snafu-derive",
]
[[package]]
name = "snafu-derive"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "static_assertions" name = "static_assertions"
version = "1.1.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "typenum"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]] [[package]]
name = "unicase" name = "unicase"
version = "2.6.0" version = "2.6.0"
@ -69,8 +413,38 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "unicode-ident"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wyhash"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
dependencies = [
"rand_core",
]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]

View file

@ -18,7 +18,11 @@ path = "src/main.rs"
[dependencies] [dependencies]
roc_std = { path = "../../../crates/roc_std" } roc_std = { path = "../../../crates/roc_std" }
roc_region = { path = "../../../crates/compiler/region" }
roc_parse = { path = "../../../crates/compiler/parse" }
libc = "0.2" libc = "0.2"
html-escape = "0.2"
# Default features include building a binary that we don't need # Default features include building a binary that we don't need
pulldown-cmark = { version = "0.9.2", default-features = false } pulldown-cmark = { version = "0.9.2", default-features = false }

View file

@ -0,0 +1,77 @@
use roc_parse::highlight::Token;
use roc_region::all::Loc;
pub fn highlight_roc_code(code: &str) -> String {
let locations: Vec<Loc<Token>> = roc_parse::highlight::highlight(code);
let mut buf: Vec<String> = Vec::new();
let mut offset = 0;
for location in locations {
let current_text = &code[offset..location.byte_range().end];
match location.value {
Token::LineComment | Token::DocComment => {
buf = push_html_span(buf, current_text, "comment");
}
Token::SingleQuote
| Token::String
| Token::UnicodeEscape
| Token::EscapedChar
| Token::Interpolated => {
buf = push_html_span(buf, current_text, "str");
}
Token::Keyword => {
buf = push_html_span(buf, current_text, "kw");
}
Token::Number => {
buf = push_html_span(buf, current_text, "number");
}
Token::Pipe => {
buf = push_html_span(buf, current_text, "pipe");
}
Token::Arrow => {
buf = push_html_span(buf, current_text, "arrow");
}
Token::Backpass => {
buf = push_html_span(buf, current_text, "arrow");
}
Token::Comma => {
buf = push_html_span(buf, current_text, "comma");
}
Token::Colon | Token::Backslash => {
buf = push_html_span(buf, current_text, "colon");
}
Token::GreaterThan | Token::Minus | Token::LessThan | Token::Plus | Token::Equals => {
buf = push_html_span(buf, current_text, "op");
}
Token::Brace | Token::Bracket | Token::Paren => {
buf = push_html_span(buf, current_text, "paren");
}
_ => {
buf = push_html(buf, current_text);
}
}
offset = location.byte_range().end;
}
format!("<pre><samp>{}</samp></pre>", buf.join(""))
}
fn push_html_span(mut buf: Vec<String>, curr: &str, class: &str) -> Vec<String> {
// html escape strings from source code
let escaped = html_escape::encode_text(curr);
buf.push(format!("<span class=\"{}\">{}</span>", class, escaped));
buf
}
fn push_html(mut buf: Vec<String>, curr: &str) -> Vec<String> {
// html escape strings from source code
let escaped = html_escape::encode_text(curr);
buf.push(format!("{}", escaped));
buf
}

View file

@ -8,6 +8,8 @@ use std::fs;
use std::os::raw::c_char; use std::os::raw::c_char;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
mod highlight;
extern "C" { extern "C" {
#[link_name = "roc__transformFileContentForHost_1_exposed"] #[link_name = "roc__transformFileContentForHost_1_exposed"]
fn roc_transformFileContentForHost(relPath: &RocStr, content: &RocStr) -> RocStr; fn roc_transformFileContentForHost(relPath: &RocStr, content: &RocStr) -> RocStr;
@ -202,7 +204,54 @@ fn process_file(input_dir: &Path, output_dir: &Path, input_file: &Path) -> Resul
options.remove(Options::ENABLE_SMART_PUNCTUATION); options.remove(Options::ENABLE_SMART_PUNCTUATION);
let parser = Parser::new_ext(&content_md, options); let parser = Parser::new_ext(&content_md, options);
html::push_html(&mut content_html, parser);
// We'll build a new vector of events since we can only consume the parser once
let mut parser_with_highlighting = Vec::new();
// As we go along, we'll want to highlight code in bundles, not lines
let mut to_highlight = String::new();
// And track a little bit of state
let mut in_code_block = false;
let mut is_roc_code = false;
for event in parser {
match event {
pulldown_cmark::Event::Start(pulldown_cmark::Tag::CodeBlock(cbk)) => {
in_code_block = true;
is_roc_code = is_roc_code_block(&cbk);
}
pulldown_cmark::Event::End(pulldown_cmark::Tag::CodeBlock(_)) => {
if in_code_block {
// Format the whole multi-line code block as HTML all at once
let highlighted_html: String;
if is_roc_code {
highlighted_html = crate::highlight::highlight_roc_code(&to_highlight)
} else {
highlighted_html = format!("<pre><samp>{}</pre></samp>", &to_highlight)
}
// And put it into the vector
parser_with_highlighting.push(pulldown_cmark::Event::Html(
pulldown_cmark::CowStr::from(highlighted_html),
));
to_highlight = String::new();
in_code_block = false;
}
}
pulldown_cmark::Event::Text(t) => {
if in_code_block {
// If we're in a code block, build up the string of text
to_highlight.push_str(&t);
} else {
parser_with_highlighting.push(pulldown_cmark::Event::Text(t))
}
}
e => {
parser_with_highlighting.push(e);
}
}
}
html::push_html(&mut content_html, parser_with_highlighting.into_iter());
let roc_relpath = RocStr::from(output_relpath.to_str().unwrap()); let roc_relpath = RocStr::from(output_relpath.to_str().unwrap());
let roc_content_html = RocStr::from(content_html.as_str()); let roc_content_html = RocStr::from(content_html.as_str());
@ -240,3 +289,16 @@ pub fn strip_windows_prefix(path_buf: PathBuf) -> std::path::PathBuf {
std::path::Path::new(path_str.trim_start_matches(r"\\?\")).to_path_buf() std::path::Path::new(path_str.trim_start_matches(r"\\?\")).to_path_buf()
} }
fn is_roc_code_block(cbk: &pulldown_cmark::CodeBlockKind) -> bool {
match cbk {
pulldown_cmark::CodeBlockKind::Indented => false,
pulldown_cmark::CodeBlockKind::Fenced(cow_str) => {
if cow_str.contains("roc") {
true
} else {
false
}
}
}
}