diff --git a/crates/compiler/parse/src/blankspace.rs b/crates/compiler/parse/src/blankspace.rs index 3f9cc06d3d..b0dbb7bb35 100644 --- a/crates/compiler/parse/src/blankspace.rs +++ b/crates/compiler/parse/src/blankspace.rs @@ -1,5 +1,6 @@ use crate::ast::CommentOrNewline; use crate::ast::Spaceable; +use crate::parser::Progress; use crate::parser::SpaceProblem; use crate::parser::{self, and, backtrackable, BadInputError, Parser, Progress::*}; use crate::state::State; @@ -7,6 +8,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; use roc_region::all::Loc; use roc_region::all::Position; +use roc_region::all::Region; pub fn space0_around_ee<'a, P, S, E>( parser: P, @@ -386,98 +388,132 @@ pub fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E> where E: 'a + SpaceProblem, { - move |arena, mut state: State<'a>, _min_indent: u32| { + move |arena, state: State<'a>, _min_indent: u32| { let mut newlines = Vec::new_in(arena); - let mut progress = NoProgress; - loop { - let whitespace = fast_eat_whitespace(state.bytes()); - if whitespace > 0 { - state.advance_mut(whitespace); - progress = MadeProgress; - } - match state.bytes().first() { - Some(b'#') => { - state.advance_mut(1); - - let is_doc_comment = state.bytes().first() == Some(&b'#') - && (state.bytes().get(1) == Some(&b' ') - || state.bytes().get(1) == Some(&b'\n') - || begins_with_crlf(&state.bytes()[1..]) - || Option::is_none(&state.bytes().get(1))); - - if is_doc_comment { - state.advance_mut(1); - if state.bytes().first() == Some(&b' ') { - state.advance_mut(1); - } - } - - let len = fast_eat_until_control_character(state.bytes()); - - // We already checked that the string is valid UTF-8 - debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok()); - let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) }; - - let comment = if is_doc_comment { - CommentOrNewline::DocComment(text) - } else { - CommentOrNewline::LineComment(text) - }; - newlines.push(comment); - state.advance_mut(len); - - if begins_with_crlf(state.bytes()) { - state.advance_mut(1); - state = state.advance_newline(); - } else if state.bytes().first() == Some(&b'\n') { - state = state.advance_newline(); - } - - progress = MadeProgress; - } - Some(b'\r') => { - if state.bytes().get(1) == Some(&b'\n') { - newlines.push(CommentOrNewline::Newline); - state.advance_mut(1); - state = state.advance_newline(); - progress = MadeProgress; - } else { - return Err(( - progress, - E::space_problem( - BadInputError::HasMisplacedCarriageReturn, - state.pos(), - ), - )); - } - } - Some(b'\n') => { - newlines.push(CommentOrNewline::Newline); - state = state.advance_newline(); - progress = MadeProgress; - } - Some(b'\t') => { - return Err(( - progress, - E::space_problem(BadInputError::HasTab, state.pos()), - )); - } - Some(x) if *x < b' ' => { - return Err(( - progress, - E::space_problem(BadInputError::HasAsciiControl, state.pos()), - )); - } - _ => { - if !newlines.is_empty() { - state = state.mark_current_indent(); - } - break; - } - } + match consume_spaces(state, |_, space, _| newlines.push(space)) { + Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)), + Err((progress, err)) => Err((progress, err)), } - - Ok((progress, newlines.into_bump_slice(), state)) } } + +pub fn loc_spaces<'a, E>() -> impl Parser<'a, &'a [Loc>], E> +where + E: 'a + SpaceProblem, +{ + move |arena, state: State<'a>, _min_indent: u32| { + let mut newlines = Vec::new_in(arena); + + match consume_spaces(state, |start, space, end| { + newlines.push(Loc::at(Region::between(start, end), space)) + }) { + Ok((progress, state)) => Ok((progress, newlines.into_bump_slice(), state)), + Err((progress, err)) => Err((progress, err)), + } + } +} + +fn consume_spaces<'a, E, F>( + mut state: State<'a>, + mut on_space: F, +) -> Result<(Progress, State<'a>), (Progress, E)> +where + E: 'a + SpaceProblem, + F: FnMut(Position, CommentOrNewline<'a>, Position), +{ + let mut progress = NoProgress; + let mut found_newline = false; + loop { + let whitespace = fast_eat_whitespace(state.bytes()); + if whitespace > 0 { + state.advance_mut(whitespace); + progress = MadeProgress; + } + + let start = state.pos(); + + match state.bytes().first() { + Some(b'#') => { + state.advance_mut(1); + + let is_doc_comment = state.bytes().first() == Some(&b'#') + && (state.bytes().get(1) == Some(&b' ') + || state.bytes().get(1) == Some(&b'\n') + || begins_with_crlf(&state.bytes()[1..]) + || Option::is_none(&state.bytes().get(1))); + + if is_doc_comment { + state.advance_mut(1); + if state.bytes().first() == Some(&b' ') { + state.advance_mut(1); + } + } + + let len = fast_eat_until_control_character(state.bytes()); + + // We already checked that the string is valid UTF-8 + debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok()); + let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) }; + + let comment = if is_doc_comment { + CommentOrNewline::DocComment(text) + } else { + CommentOrNewline::LineComment(text) + }; + state.advance_mut(len); + on_space(start, comment, state.pos()); + found_newline = true; + + if begins_with_crlf(state.bytes()) { + state.advance_mut(1); + state = state.advance_newline(); + } else if state.bytes().first() == Some(&b'\n') { + state = state.advance_newline(); + } + + progress = MadeProgress; + } + Some(b'\r') => { + if state.bytes().get(1) == Some(&b'\n') { + state.advance_mut(1); + state = state.advance_newline(); + on_space(start, CommentOrNewline::Newline, state.pos()); + found_newline = true; + progress = MadeProgress; + } else { + return Err(( + progress, + E::space_problem(BadInputError::HasMisplacedCarriageReturn, state.pos()), + )); + } + } + Some(b'\n') => { + state = state.advance_newline(); + on_space(start, CommentOrNewline::Newline, state.pos()); + found_newline = true; + progress = MadeProgress; + } + Some(b'\t') => { + return Err(( + progress, + E::space_problem(BadInputError::HasTab, state.pos()), + )); + } + Some(x) if *x < b' ' => { + return Err(( + progress, + E::space_problem(BadInputError::HasAsciiControl, state.pos()), + )); + } + _ => { + if found_newline { + state = state.mark_current_indent(); + } + break; + } + } + } + + Ok((progress, state)) +} diff --git a/crates/compiler/parse/src/highlight.rs b/crates/compiler/parse/src/highlight.rs new file mode 100644 index 0000000000..53bd5c04c7 --- /dev/null +++ b/crates/compiler/parse/src/highlight.rs @@ -0,0 +1,565 @@ +use encode_unicode::CharExt; +use std::collections::HashSet; + +use bumpalo::Bump; +use roc_region::all::{Loc, Region}; + +use crate::{ + ast::CommentOrNewline, + blankspace::loc_spaces, + keyword::KEYWORDS, + number_literal::positive_number_literal, + parser::{EExpr, ParseResult, Parser}, + state::State, + string_literal::{parse_str_like_literal, StrLikeLiteral}, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Token { + LineComment, + DocComment, + Error, + SingleQuote, + String, + UnicodeEscape, + EscapedChar, + Interpolated, + Keyword, + UpperIdent, + LowerIdent, + Number, + Other, + Minus, + Plus, + Colon, + Bar, + Equals, + GreaterThan, + LessThan, + Comma, + Backslash, + Brace, + Bracket, + Paren, + Arrow, + Pipe, + Backpass, +} + +pub fn highlight(text: &str) -> Vec> { + let mut tokens = Vec::new(); + let state = State::new(text.as_bytes()); + + let arena = Bump::new(); + + let header_keywords = HEADER_KEYWORDS.iter().copied().collect::>(); + let body_keywords = KEYWORDS.iter().copied().collect::>(); + + if let Ok((_prog, _, new_state)) = crate::module::header().parse(&arena, state.clone(), 0) { + let inner_state = + State::new(text[..state.bytes().len() - new_state.bytes().len()].as_bytes()); + highlight_inner(&arena, inner_state, &mut tokens, &header_keywords); + highlight_inner(&arena, new_state, &mut tokens, &body_keywords); + } else { + highlight_inner(&arena, state, &mut tokens, &body_keywords); + } + + tokens = combine_tokens(tokens); + + tokens +} + +fn combine_tokens(locations: Vec>) -> Vec> { + let mut tokens: Vec> = Vec::new(); + let mut previous_location: Option> = None; + + for location in locations { + match location.value { + // Catch tokens which may combine for a different meaning + Token::LessThan => match previous_location { + Some(prev) => { + tokens.push(prev); + tokens.push(location); + previous_location = None; + } + None => { + previous_location = Some(location); + } + }, + Token::Bar => match previous_location { + Some(prev) => { + tokens.push(prev); + tokens.push(location); + previous_location = None; + } + None => { + previous_location = Some(location); + } + }, + // Combination tokens + Token::GreaterThan => { + match previous_location { + Some(prev) => { + match prev.value { + Token::Minus => { + // arrow operator "->" + tokens.push(Loc::at( + Region::between(prev.region.start(), location.region.end()), + Token::Arrow, + )); + previous_location = None; + } + Token::Bar => { + // pipe operator "|>" + tokens.push(Loc::at( + Region::between(prev.region.start(), location.region.end()), + Token::Pipe, + )); + previous_location = None; + } + _ => { + tokens.push(prev); + tokens.push(location); + previous_location = None; + } + } + } + _ => { + tokens.push(location); + } + } + } + Token::Minus => { + match previous_location { + Some(prev) => { + match prev.value { + Token::LessThan => { + // backpass operator "<-" + tokens.push(Loc::at( + Region::between(prev.region.start(), location.region.end()), + Token::Backpass, + )); + previous_location = None; + } + _ => { + tokens.push(prev); + tokens.push(location); + previous_location = None; + } + } + } + None => { + previous_location = Some(location); + } + } + } + _ => { + tokens.push(location); + } + } + } + + tokens +} + +fn highlight_inner<'a>( + arena: &'a Bump, + mut state: State<'a>, + tokens: &mut Vec>, + keywords: &HashSet<&str>, +) { + loop { + let start = state.pos(); + if let Ok((b, _width)) = char::from_utf8_slice_start(state.bytes()) { + match b { + ' ' | '\n' | '\t' | '\r' | '#' => { + let res: ParseResult<'a, _, EExpr<'a>> = + loc_spaces().parse(arena, state.clone(), 0); + if let Ok((_, spaces, new_state)) = res { + state = new_state; + for space in spaces { + let token = match space.value { + CommentOrNewline::Newline => { + continue; + } + CommentOrNewline::LineComment(_) => Token::LineComment, + CommentOrNewline::DocComment(_) => Token::DocComment, + }; + tokens.push(Loc::at(space.region, token)); + } + } else { + fast_forward_to(&mut state, tokens, start, |c| c == b'\n'); + } + } + '"' | '\'' => { + if let Ok((_, item, new_state)) = + parse_str_like_literal().parse(arena, state.clone(), 0) + { + state = new_state; + match item { + StrLikeLiteral::SingleQuote(_) => { + tokens.push(Loc::at( + Region::between(start, state.pos()), + Token::SingleQuote, + )); + } + StrLikeLiteral::Str(_) => { + tokens.push(Loc::at( + Region::between(start, state.pos()), + Token::String, + )); + } + } + } else { + fast_forward_to(&mut state, tokens, start, |c| c == b'\n'); + } + } + c if c.is_alphabetic() => { + let buffer = state.bytes(); + let mut chomped = 0; + + let is_upper = c.is_uppercase(); + + while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) { + if ch.is_alphabetic() || ch.is_ascii_digit() { + chomped += width; + } else { + // we're done + break; + } + } + + let ident = std::str::from_utf8(&buffer[..chomped]).unwrap(); + state.advance_mut(chomped); + + if keywords.contains(ident) { + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Keyword)); + } else { + tokens.push(Loc::at( + Region::between(start, state.pos()), + if is_upper { + Token::UpperIdent + } else { + Token::LowerIdent + }, + )); + } + } + '0'..='9' => { + if let Ok((_, _item, new_state)) = + positive_number_literal().parse(arena, state.clone(), 0) + { + state = new_state; + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Number)); + } else { + fast_forward_to(&mut state, tokens, start, |b| !b.is_ascii_digit()); + } + } + ':' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Colon)); + } + '|' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Bar)); + } + '-' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Minus)); + } + '+' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Plus)); + } + '=' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Equals)); + } + '>' => { + state.advance_mut(1); + tokens.push(Loc::at( + Region::between(start, state.pos()), + Token::GreaterThan, + )); + } + '<' => { + state.advance_mut(1); + tokens.push(Loc::at( + Region::between(start, state.pos()), + Token::LessThan, + )); + } + ',' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Comma)); + } + '\\' => { + state.advance_mut(1); + tokens.push(Loc::at( + Region::between(start, state.pos()), + Token::Backslash, + )); + } + '{' | '}' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Brace)); + } + '[' | ']' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Bracket)); + } + '(' | ')' => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Paren)); + } + _ => { + state.advance_mut(1); + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Other)); + } + } + } else { + break; + } + } +} + +fn fast_forward_to( + state: &mut State, + tokens: &mut Vec>, + start: roc_region::all::Position, + end: impl Fn(u8) -> bool, +) { + while let Some(b) = state.bytes().first() { + if end(*b) { + break; + } + state.advance_mut(1); + } + tokens.push(Loc::at(Region::between(start, state.pos()), Token::Error)); +} + +pub const HEADER_KEYWORDS: [&str; 14] = [ + "interface", + "app", + "package", + "platform", + "hosted", + "exposes", + "imports", + "with", + "generates", + "package", + "packages", + "requires", + "provides", + "to", +]; + +#[cfg(test)] +mod tests { + use roc_region::all::Position; + + use super::*; + + #[test] + fn test_highlight_comments() { + let text = "# a\n#b\n#c"; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![ + Loc::at( + Region::between(Position::new(0), Position::new(3)), + Token::LineComment + ), + Loc::at( + Region::between(Position::new(4), Position::new(6)), + Token::LineComment + ), + Loc::at( + Region::between(Position::new(7), Position::new(9)), + Token::LineComment + ), + ] + ); + } + + #[test] + fn test_highlight_doc_comments() { + let text = "## a\n##b\n##c"; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![ + Loc::at( + Region::between(Position::new(0), Position::new(4)), + Token::DocComment + ), + // the next two are line comments because there's not a space at the beginning + Loc::at( + Region::between(Position::new(5), Position::new(8)), + Token::LineComment + ), + Loc::at( + Region::between(Position::new(9), Position::new(12)), + Token::LineComment + ), + ] + ); + } + + #[test] + fn test_highlight_strings() { + let text = r#""a""#; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![Loc::at( + Region::between(Position::new(0), Position::new(3)), + Token::String + )] + ); + } + + #[test] + fn test_highlight_single_quotes() { + let text = r#"'a'"#; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![Loc::at( + Region::between(Position::new(0), Position::new(3)), + Token::SingleQuote + )] + ); + } + + #[test] + fn test_highlight_header() { + let text = r#"app "test-app" provides [] to "./blah""#; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![ + Loc::at( + Region::between(Position::new(0), Position::new(3)), + Token::Keyword + ), + Loc::at( + Region::between(Position::new(4), Position::new(14)), + Token::String + ), + Loc::at( + Region::between(Position::new(15), Position::new(23)), + Token::Keyword + ), + Loc::at( + Region::between(Position::new(24), Position::new(25)), + Token::Bracket + ), + Loc::at( + Region::between(Position::new(25), Position::new(26)), + Token::Bracket + ), + Loc::at( + Region::between(Position::new(27), Position::new(29)), + Token::Keyword + ), + Loc::at( + Region::between(Position::new(30), Position::new(38)), + Token::String + ), + ] + ); + } + + #[test] + fn test_highlight_numbers() { + let text = "123.0 123 123. 123.0e10 123e10 123e-10 0x123"; + let tokens = highlight(text); + assert_eq!( + tokens, + vec![ + Loc::at( + Region::between(Position::new(0), Position::new(5)), + Token::Number + ), + Loc::at( + Region::between(Position::new(6), Position::new(9)), + Token::Number + ), + Loc::at( + Region::between(Position::new(10), Position::new(14)), + Token::Number + ), + Loc::at( + Region::between(Position::new(15), Position::new(23)), + Token::Number + ), + Loc::at( + Region::between(Position::new(24), Position::new(30)), + Token::Number + ), + Loc::at( + Region::between(Position::new(31), Position::new(38)), + Token::Number + ), + Loc::at( + Region::between(Position::new(39), Position::new(44)), + Token::Number + ), + ] + ); + } + + #[test] + fn test_combine_tokens() { + let input: Vec> = vec![ + // arrow operator "->" + Loc::at( + Region::between(Position::new(0), Position::new(5)), + Token::Minus, + ), + Loc::at( + Region::between(Position::new(6), Position::new(7)), + Token::GreaterThan, + ), + // pipe operator "|>" + Loc::at( + Region::between(Position::new(8), Position::new(9)), + Token::Bar, + ), + Loc::at( + Region::between(Position::new(10), Position::new(11)), + Token::GreaterThan, + ), + // backpass operator "<-" + Loc::at( + Region::between(Position::new(12), Position::new(13)), + Token::LessThan, + ), + Loc::at( + Region::between(Position::new(14), Position::new(15)), + Token::Minus, + ), + ]; + + let actual = combine_tokens(input); + let expected = vec![ + Loc::at( + Region::between(Position::new(0), Position::new(7)), + Token::Arrow, + ), + Loc::at( + Region::between(Position::new(8), Position::new(11)), + Token::Pipe, + ), + Loc::at( + Region::between(Position::new(12), Position::new(15)), + Token::Backpass, + ), + ]; + + assert_eq!(actual, expected); + } +} diff --git a/crates/compiler/parse/src/lib.rs b/crates/compiler/parse/src/lib.rs index dbdfe30ff4..41deda54f6 100644 --- a/crates/compiler/parse/src/lib.rs +++ b/crates/compiler/parse/src/lib.rs @@ -10,6 +10,7 @@ pub mod ast; pub mod blankspace; pub mod expr; pub mod header; +pub mod highlight; pub mod ident; pub mod keyword; pub mod module; diff --git a/crates/compiler/region/src/all.rs b/crates/compiler/region/src/all.rs index 457810cdf9..fb83fe2b37 100644 --- a/crates/compiler/region/src/all.rs +++ b/crates/compiler/region/src/all.rs @@ -129,6 +129,10 @@ impl Position { offset: self.offset - count as u32, } } + + pub fn byte_offset(&self) -> usize { + self.offset as usize + } } impl Debug for Position { @@ -322,6 +326,10 @@ impl Loc { value: transform(self.value), } } + + pub fn byte_range(&self) -> std::ops::Range { + self.region.start.byte_offset()..self.region.end.byte_offset() + } } impl fmt::Debug for Loc diff --git a/crates/compiler/test_syntax/tests/test_snapshots.rs b/crates/compiler/test_syntax/tests/test_snapshots.rs index afb979f2f3..34d5062c1b 100644 --- a/crates/compiler/test_syntax/tests/test_snapshots.rs +++ b/crates/compiler/test_syntax/tests/test_snapshots.rs @@ -568,6 +568,15 @@ mod test_snapshots { Err(err) => Err(format!("{:?}", err)), }; + if expect == TestExpectation::Pass { + let tokens = roc_parse::highlight::highlight(&source); + for token in tokens { + if token.value == roc_parse::highlight::Token::Error { + panic!("Found an error highlight token in the input: {:?}", token); + } + } + } + let actual_result = if expect == TestExpectation::Pass || expect == TestExpectation::Malformed { result.expect("The source code for this test did not successfully parse!") diff --git a/examples/static-site-gen/README.md b/examples/static-site-gen/README.md index af96a62bfa..f8dd72ab56 100644 --- a/examples/static-site-gen/README.md +++ b/examples/static-site-gen/README.md @@ -9,7 +9,7 @@ To run, `cd` into this directory and run this in your terminal: If `roc` is on your PATH: ```bash -roc run static-site.roc input/ output/ +roc run static-site.roc -- input/ output/ ``` If not, and you're building Roc from source: diff --git a/examples/static-site-gen/input/apple.md b/examples/static-site-gen/input/apple.md index f4e28c985d..f5102a8eb7 100644 --- a/examples/static-site-gen/input/apple.md +++ b/examples/static-site-gen/input/apple.md @@ -44,24 +44,92 @@ ingeniis in pugna quadripedis glandes superos. Tanta quam, illo es prole est telis **unus verba** quisquis iuvenci annis. Nec velox sed sacra gaudia vacuos, Herculei undae calcata inmeriti quercus ignes parabant iam. - digitize(undoDhcp(card_record, cad_flash_dot)); - supercomputer(2 - load_type, yobibyteTraceroute - installHibernate, 1); - burnPci.pop_wrap.usbEmulation(hostESmm, processor_impression(4, lanNntp), - -5); - map_camera -= 73; - if (53) { - dacRootkitDrive(publicActivex.bmpNumWhite.wins_pci_firmware(scroll_cell, - 4, tShortcut)); - horse_algorithm_eide -= 51; - flatbed_blob(flat); - } else { - surge.pci -= open_flash_dv(4, 4, usbStation); - led.memory_fsb.matrixBinaryUrl(umlEngineOsd.agp_thick_thin.t(58)); - kindle_cookie(formulaLedVpn, digital_meme); - } +### Example Table -Foret inpendere, haec ipse ossa, dolentes das Caystro miscuit iunctoque -spoliantis illae, ex! Bello istis nunc Aegides? Animo caelestia melior, -furoribus optat maior invecta quid harenis [est](http://example.org) sollemnia modo -Phineu. Suarum pectora. Relinquam in labore Medusae sororem Herculis [simillima -corpora](http://example.org) plus regi ignibus, totum domus! +| Tables | Are | Cool | +| :------------ | :-----------: | ----: | +| col 3 is | right-aligned | $1600 | +| col 2 is | centered | $12 | +| zebra stripes | are neat | $1 | + +### Example Code Blocks + +```sh +# This isn't fenced roc code so its not formatted +# Use a fence like ```roc to format code blocks +``` + +```roc +## This is a documentation comment + +# This is a comment +app "static-site" + packages { pf: "platform/main.roc" } + imports [ + pf.Html.{ html, head, body, div, text, a, ul, li, link, meta }, + pf.Html.Attributes.{ httpEquiv, content, href, rel, lang, class, title }, + ] + provides [transformFileContent] to pf + +NavLink : { + # this is another comment + url : Str, + title : Str, + text : Str, +} + +navLinks : List NavLink +navLinks = [ + { url: "apple.html", title: "Exempli Gratia Pagina Pomi", text: "Apple" }, + { url: "banana.html", title: "Exempli Gratia Pagina Musa", text: "Banana" }, + { url: "cherry.html", title: "Exempli Pagina Cerasus", text: "Cherry" }, +] + +transformFileContent : Str, Str -> Str +transformFileContent = \currentUrl, htmlContent -> + List.findFirst navLinks (\{ url } -> url == currentUrl) + |> Result.map (\currentNavLink -> view currentNavLink htmlContent) + |> Result.map Html.render + |> Result.withDefault "" + +view : NavLink, Str -> Html.Node +view = \currentNavLink, htmlContent -> + html [lang "en"] [ + head [] [ + meta [httpEquiv "content-type", content "text/html; charset=utf-8"] [], + Html.title [] [text currentNavLink.title], + link [rel "stylesheet", href "style.css"] [], + ], + body [] [ + div [class "main"] [ + div [class "navbar"] [ + viewNavbar currentNavLink, + ], + div [class "article"] [ + # For now `text` is not escaped so we can use it to insert HTML + # We'll probably want something more explicit in the long term though! + text htmlContent, + ], + ], + ], + ] + +viewNavbar : NavLink -> Html.Node +viewNavbar = \currentNavLink -> + ul + [] + (List.map navLinks \nl -> viewNavLink (nl == currentNavLink) nl) + +viewNavLink : Bool, NavLink -> Html.Node +viewNavLink = \isCurrent, navlink -> + if isCurrent then + li [class "nav-link nav-link--current"] [ + text navlink.text, + ] + else + li [class "nav-link"] [ + a + [href navlink.url, title navlink.title] + [text navlink.text], + ] +``` \ No newline at end of file diff --git a/examples/static-site-gen/output/style.css b/examples/static-site-gen/output/style.css index cdc5bf0e97..fe77ec0472 100644 --- a/examples/static-site-gen/output/style.css +++ b/examples/static-site-gen/output/style.css @@ -39,7 +39,102 @@ color: #444; } .article pre { - background-color: #222; - color: yellow; + background-color: rgb(241, 241, 241); + color: rgb(27, 27, 27); padding: 16px; } + +pre { + white-space: pre-wrap; +} + +samp .ann { + /* type annotation - purple in the repl */ + color: #f384fd; +} + +samp .autovar .comment { + /* automatic variable names in the repl, e.g. # val1 */ + color: #338545; +} + +samp .kw { + /* language keywords, e.g. `if`*/ + color: #004cc2; +} + +samp .arrow { + /* operators, e.g. `+` */ + color: #0600c2; +} + +samp .pipe { + /* operators, e.g. `+` */ + color: #0600c2; +} + +samp .op { + /* operators, e.g. `+` */ + color: #0600c2; +} + +samp .assign { + color: #48fd00; +} + +samp .paren { + color: #ff0000; +} + +samp .comma { + color: #ff00fb; +} + +samp .colon { + color: #9b0098; +} + +samp .number { +/* number literals */ +color: #9669ff; +} + +samp .str { + /* string literals */ + color: #1dbf00; +} + +samp .str-esc, samp .str-interp { + /* escapes inside string literals, e.g. \t */ + color: #3474db; +} + +samp .dim { + opacity: 0.55; +} + +samp .comment { + color: #005a13; +} + + +table { + table-layout: fixed; + width: 100%; + border-collapse: collapse; + border: 3px solid rgb(161, 64, 0); +} + +tbody tr:nth-child(even) { + background-color: #c6f4ff; +} + +th { + background-color: #ffabab; +} + +th, +td { + + padding: 2px; +} \ No newline at end of file diff --git a/examples/static-site-gen/platform/Cargo.lock b/examples/static-site-gen/platform/Cargo.lock index 0e81055120..2aa6bd2bc5 100644 --- a/examples/static-site-gen/platform/Cargo.lock +++ b/examples/static-site-gen/platform/Cargo.lock @@ -2,27 +2,194 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "arrayvec" version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +[[package]] +name = "backtrace" +version = "0.3.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitmaps" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" +dependencies = [ + "typenum", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", + "bumpalo", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "host" version = "0.0.1" dependencies = [ "libc", "pulldown-cmark", + "roc_parse", + "roc_region", "roc_std", ] +[[package]] +name = "im" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" +dependencies = [ + "bitmaps", + "rand_core", + "rand_xoshiro", + "sized-chunks", + "typenum", + "version_check", +] + +[[package]] +name = "im-rc" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af1955a75fa080c677d3972822ec4bad316169ab1cfc6c257a942c2265dbe5fe" +dependencies = [ + "bitmaps", + "rand_core", + "rand_xoshiro", + "sized-chunks", + "typenum", + "version_check", +] + [[package]] name = "libc" version = "0.2.132" @@ -35,6 +202,39 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "miniz_oxide" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.30.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + [[package]] name = "pulldown-cmark" version = "0.9.2" @@ -46,6 +246,88 @@ dependencies = [ "unicase", ] +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core", +] + +[[package]] +name = "roc_collections" +version = "0.0.1" +dependencies = [ + "bitvec", + "bumpalo", + "fnv", + "hashbrown", + "im", + "im-rc", + "wyhash", +] + +[[package]] +name = "roc_error_macros" +version = "0.0.1" + +[[package]] +name = "roc_ident" +version = "0.0.1" + +[[package]] +name = "roc_module" +version = "0.0.1" +dependencies = [ + "bumpalo", + "roc_collections", + "roc_error_macros", + "roc_ident", + "roc_region", + "snafu", + "static_assertions", +] + +[[package]] +name = "roc_parse" +version = "0.0.1" +dependencies = [ + "bumpalo", + "encode_unicode", + "roc_collections", + "roc_module", + "roc_region", +] + +[[package]] +name = "roc_region" +version = "0.0.1" +dependencies = [ + "static_assertions", +] + [[package]] name = "roc_std" version = "0.0.1" @@ -54,12 +336,74 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + +[[package]] +name = "sized-chunks" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e" +dependencies = [ + "bitmaps", + "typenum", +] + +[[package]] +name = "snafu" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0656e7e3ffb70f6c39b3c2a86332bb74aa3c679da781642590f3c1118c5045" +dependencies = [ + "backtrace", + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicase" version = "2.6.0" @@ -69,8 +413,38 @@ dependencies = [ "version_check", ] +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wyhash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" +dependencies = [ + "rand_core", +] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] diff --git a/examples/static-site-gen/platform/Cargo.toml b/examples/static-site-gen/platform/Cargo.toml index 9b5becea49..64a9ab497d 100644 --- a/examples/static-site-gen/platform/Cargo.toml +++ b/examples/static-site-gen/platform/Cargo.toml @@ -18,7 +18,11 @@ path = "src/main.rs" [dependencies] roc_std = { path = "../../../crates/roc_std" } +roc_region = { path = "../../../crates/compiler/region" } +roc_parse = { path = "../../../crates/compiler/parse" } libc = "0.2" +html-escape = "0.2" + # Default features include building a binary that we don't need pulldown-cmark = { version = "0.9.2", default-features = false } diff --git a/examples/static-site-gen/platform/src/highlight.rs b/examples/static-site-gen/platform/src/highlight.rs new file mode 100644 index 0000000000..776918e362 --- /dev/null +++ b/examples/static-site-gen/platform/src/highlight.rs @@ -0,0 +1,77 @@ +use roc_parse::highlight::Token; +use roc_region::all::Loc; + +pub fn highlight_roc_code(code: &str) -> String { + let locations: Vec> = roc_parse::highlight::highlight(code); + let mut buf: Vec = Vec::new(); + let mut offset = 0; + + for location in locations { + let current_text = &code[offset..location.byte_range().end]; + + match location.value { + Token::LineComment | Token::DocComment => { + buf = push_html_span(buf, current_text, "comment"); + } + Token::SingleQuote + | Token::String + | Token::UnicodeEscape + | Token::EscapedChar + | Token::Interpolated => { + buf = push_html_span(buf, current_text, "str"); + } + Token::Keyword => { + buf = push_html_span(buf, current_text, "kw"); + } + Token::Number => { + buf = push_html_span(buf, current_text, "number"); + } + Token::Pipe => { + buf = push_html_span(buf, current_text, "pipe"); + } + Token::Arrow => { + buf = push_html_span(buf, current_text, "arrow"); + } + Token::Backpass => { + buf = push_html_span(buf, current_text, "arrow"); + } + Token::Comma => { + buf = push_html_span(buf, current_text, "comma"); + } + Token::Colon | Token::Backslash => { + buf = push_html_span(buf, current_text, "colon"); + } + Token::GreaterThan | Token::Minus | Token::LessThan | Token::Plus | Token::Equals => { + buf = push_html_span(buf, current_text, "op"); + } + Token::Brace | Token::Bracket | Token::Paren => { + buf = push_html_span(buf, current_text, "paren"); + } + _ => { + buf = push_html(buf, current_text); + } + } + + offset = location.byte_range().end; + } + + format!("
{}
", buf.join("")) +} + +fn push_html_span(mut buf: Vec, curr: &str, class: &str) -> Vec { + // html escape strings from source code + let escaped = html_escape::encode_text(curr); + + buf.push(format!("{}", class, escaped)); + + buf +} + +fn push_html(mut buf: Vec, curr: &str) -> Vec { + // html escape strings from source code + let escaped = html_escape::encode_text(curr); + + buf.push(format!("{}", escaped)); + + buf +} diff --git a/examples/static-site-gen/platform/src/lib.rs b/examples/static-site-gen/platform/src/lib.rs index 7db426008e..28f0070eba 100644 --- a/examples/static-site-gen/platform/src/lib.rs +++ b/examples/static-site-gen/platform/src/lib.rs @@ -8,6 +8,8 @@ use std::fs; use std::os::raw::c_char; use std::path::{Path, PathBuf}; +mod highlight; + extern "C" { #[link_name = "roc__transformFileContentForHost_1_exposed"] fn roc_transformFileContentForHost(relPath: &RocStr, content: &RocStr) -> RocStr; @@ -202,7 +204,54 @@ fn process_file(input_dir: &Path, output_dir: &Path, input_file: &Path) -> Resul options.remove(Options::ENABLE_SMART_PUNCTUATION); let parser = Parser::new_ext(&content_md, options); - html::push_html(&mut content_html, parser); + + // We'll build a new vector of events since we can only consume the parser once + let mut parser_with_highlighting = Vec::new(); + // As we go along, we'll want to highlight code in bundles, not lines + let mut to_highlight = String::new(); + // And track a little bit of state + let mut in_code_block = false; + let mut is_roc_code = false; + + for event in parser { + match event { + pulldown_cmark::Event::Start(pulldown_cmark::Tag::CodeBlock(cbk)) => { + in_code_block = true; + is_roc_code = is_roc_code_block(&cbk); + } + pulldown_cmark::Event::End(pulldown_cmark::Tag::CodeBlock(_)) => { + if in_code_block { + // Format the whole multi-line code block as HTML all at once + let highlighted_html: String; + if is_roc_code { + highlighted_html = crate::highlight::highlight_roc_code(&to_highlight) + } else { + highlighted_html = format!("
{}
", &to_highlight) + } + + // And put it into the vector + parser_with_highlighting.push(pulldown_cmark::Event::Html( + pulldown_cmark::CowStr::from(highlighted_html), + )); + to_highlight = String::new(); + in_code_block = false; + } + } + pulldown_cmark::Event::Text(t) => { + if in_code_block { + // If we're in a code block, build up the string of text + to_highlight.push_str(&t); + } else { + parser_with_highlighting.push(pulldown_cmark::Event::Text(t)) + } + } + e => { + parser_with_highlighting.push(e); + } + } + } + + html::push_html(&mut content_html, parser_with_highlighting.into_iter()); let roc_relpath = RocStr::from(output_relpath.to_str().unwrap()); let roc_content_html = RocStr::from(content_html.as_str()); @@ -240,3 +289,16 @@ pub fn strip_windows_prefix(path_buf: PathBuf) -> std::path::PathBuf { std::path::Path::new(path_str.trim_start_matches(r"\\?\")).to_path_buf() } + +fn is_roc_code_block(cbk: &pulldown_cmark::CodeBlockKind) -> bool { + match cbk { + pulldown_cmark::CodeBlockKind::Indented => false, + pulldown_cmark::CodeBlockKind::Fenced(cow_str) => { + if cow_str.contains("roc") { + true + } else { + false + } + } + } +}