finished if highlighting

This commit is contained in:
Anton-4 2022-03-07 19:44:24 +01:00
parent 37704323b1
commit ec1e2cd1d0
No known key found for this signature in database
GPG key ID: C954D6E0F9C0ABFD
12 changed files with 1988 additions and 2488 deletions

View file

@ -1,25 +1,17 @@
use crate::{syntax_highlight::HighlightStyle, slow_pool::MarkNodeId}; use crate::{syntax_highlight::HighlightStyle, slow_pool::{MarkNodeId, SlowPool}};
use super::{attribute::Attributes, nodes, nodes::MarkupNode}; use super::{attribute::Attributes, nodes::{self, make_nested_mn}, nodes::MarkupNode};
pub fn new_equals_mn() -> MarkupNode { pub fn new_equals_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::EQUALS.to_owned(), HighlightStyle::Operator, 0)
content: nodes::EQUALS.to_owned(),
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_comma_mn() -> MarkupNode { pub fn new_comma_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::COMMA.to_owned(), HighlightStyle::Operator, 0)
content: nodes::COMMA.to_owned(), }
syn_high_style: HighlightStyle::Comma,
attributes: Attributes::default(), pub fn new_dot_mn() -> MarkupNode {
parent_id_opt: None, common_text_node(nodes::DOT.to_owned(), HighlightStyle::Operator, 0)
newlines_at_end: 0,
}
} }
pub fn new_blank_mn() -> MarkupNode { pub fn new_blank_mn() -> MarkupNode {
@ -47,102 +39,98 @@ pub fn new_colon_mn() -> MarkupNode {
pub fn new_operator_mn( pub fn new_operator_mn(
content: String, content: String,
) -> MarkupNode { ) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::Operator, 0)
content,
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_left_accolade_mn() -> MarkupNode { pub fn new_left_accolade_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::LEFT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::LEFT_ACCOLADE.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_right_accolade_mn() -> MarkupNode { pub fn new_right_accolade_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::RIGHT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::RIGHT_ACCOLADE.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_left_square_mn() -> MarkupNode { pub fn new_left_square_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::LEFT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::LEFT_SQUARE_BR.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_right_square_mn() -> MarkupNode { pub fn new_right_square_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::RIGHT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::RIGHT_SQUARE_BR.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_func_name_mn(content: String) -> MarkupNode { pub fn new_func_name_mn(content: String) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::FunctionName, 0)
content,
syn_high_style: HighlightStyle::FunctionName,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_arg_name_mn(content: String) -> MarkupNode { pub fn new_arg_name_mn(content: String) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::FunctionArgName, 0)
content,
syn_high_style: HighlightStyle::FunctionArgName,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode { pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::ARROW.to_owned(), HighlightStyle::Operator, newlines_at_end)
content: nodes::ARROW.to_owned(),
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end,
}
} }
pub fn new_comments_mn( pub fn new_comments_mn(
comments: String, comment: String,
newlines_at_end: usize, newlines_at_end: usize,
) -> MarkupNode {
common_text_node(comment, HighlightStyle::Comment, newlines_at_end)
}
fn common_text_node(
content: String,
highlight_style: HighlightStyle,
newlines_at_end: usize
) -> MarkupNode { ) -> MarkupNode {
MarkupNode::Text { MarkupNode::Text {
content: comments, content,
syn_high_style: HighlightStyle::Comment, syn_high_style: highlight_style,
attributes: Attributes::default(), attributes: Attributes::default(),
parent_id_opt: None, parent_id_opt: None,
newlines_at_end, newlines_at_end,
} }
} }
pub fn assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode { pub fn new_assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode {
MarkupNode::Nested { make_nested_mn(vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], 2)
children_ids: vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], }
parent_id_opt: None,
newlines_at_end: 3, pub fn new_module_name_mn_id(mn_ids: Vec<MarkNodeId>, mark_node_pool: &mut SlowPool) -> MarkNodeId {
if mn_ids.len() == 1 {
*mn_ids.get(0).unwrap() // safe because we checked the length before
} else {
let nested_node = make_nested_mn(mn_ids, 0);
mark_node_pool.add(nested_node)
} }
} }
pub fn new_module_var_mn(module_name_id: MarkNodeId, dot_id: MarkNodeId, ident_id: MarkNodeId) -> MarkupNode {
make_nested_mn(vec![module_name_id, dot_id, ident_id], 0)
}
pub fn if_mn() -> MarkupNode {
keyword_mn("if ")
}
pub fn then_mn() -> MarkupNode {
keyword_mn(" then ")
}
pub fn else_mn() -> MarkupNode {
keyword_mn(" else ")
}
fn keyword_mn(keyword: &str) -> MarkupNode {
common_text_node(keyword.to_owned(), HighlightStyle::Keyword, 0)
}
pub fn new_if_expr_mn(
if_mn_id: MarkNodeId,
cond_expr_mn_id: MarkNodeId,
then_mn_id: MarkNodeId,
then_expr_mn_id: MarkNodeId,
else_mn_id: MarkNodeId,
else_expr_mn_id: MarkNodeId,
) -> MarkupNode {
make_nested_mn(vec![if_mn_id, cond_expr_mn_id, then_mn_id, then_expr_mn_id, else_mn_id, else_expr_mn_id], 1)
}

View file

@ -3,5 +3,4 @@ pub mod common_nodes;
pub mod convert; pub mod convert;
pub mod nodes; pub mod nodes;
pub mod top_level_def; pub mod top_level_def;
pub mod mark_id_ast_id_map; pub mod mark_id_ast_id_map;
pub mod ast_context;

View file

@ -245,6 +245,14 @@ impl MarkupNode {
} }
} }
pub fn make_nested_mn(children_ids: Vec<MarkNodeId>, newlines_at_end: usize) -> MarkupNode {
MarkupNode::Nested {
children_ids,
parent_id_opt: None,
newlines_at_end,
}
}
pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String { pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String {
pool_str.as_str(env.pool).to_owned() pool_str.as_str(env.pool).to_owned()
} }
@ -256,6 +264,7 @@ pub const LEFT_SQUARE_BR: &str = "[ ";
pub const RIGHT_SQUARE_BR: &str = " ]"; pub const RIGHT_SQUARE_BR: &str = " ]";
pub const COLON: &str = ": "; pub const COLON: &str = ": ";
pub const COMMA: &str = ", "; pub const COMMA: &str = ", ";
pub const DOT: &str = ".";
pub const STRING_QUOTES: &str = "\"\""; pub const STRING_QUOTES: &str = "\"\"";
pub const EQUALS: &str = " = "; pub const EQUALS: &str = " = ";
pub const ARROW: &str = " -> "; pub const ARROW: &str = " -> ";

View file

@ -14,7 +14,7 @@ use crate::{
syntax_highlight::HighlightStyle, syntax_highlight::HighlightStyle,
}; };
use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::assign_mn}; use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::new_assign_mn};
// represents for example: `main = "Hello, World!"` // represents for example: `main = "Hello, World!"`
pub fn assignment_mark_node<'a>( pub fn assignment_mark_node<'a>(
@ -39,7 +39,7 @@ pub fn assignment_mark_node<'a>(
let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map); let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map);
Ok(assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id)) Ok(new_assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id))
} }
pub fn tld_w_comments_mark_node( pub fn tld_w_comments_mark_node(

View file

@ -6,7 +6,6 @@ use crate::colors::{from_hsb, RgbaTup};
#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)] #[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)]
pub enum HighlightStyle { pub enum HighlightStyle {
Operator, // =+-<>... Operator, // =+-<>...
Comma,
String, String,
FunctionName, FunctionName,
FunctionArgName, FunctionArgName,
@ -23,6 +22,7 @@ pub enum HighlightStyle {
DocsComment, DocsComment,
UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent? UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent?
LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent? LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent?
Keyword, // if, else, when
} }
pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> { pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
@ -33,7 +33,6 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
let mut highlight_map = HashMap::new(); let mut highlight_map = HashMap::new();
[ [
(Operator, from_hsb(185, 50, 75)), (Operator, from_hsb(185, 50, 75)),
(Comma, from_hsb(258, 50, 90)),
(String, from_hsb(346, 65, 97)), (String, from_hsb(346, 65, 97)),
(FunctionName, almost_white), (FunctionName, almost_white),
(FunctionArgName, from_hsb(225, 50, 100)), (FunctionArgName, from_hsb(225, 50, 100)),
@ -50,6 +49,7 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
(DocsComment, from_hsb(258, 50, 90)), // TODO check color (DocsComment, from_hsb(258, 50, 90)), // TODO check color
(UppercaseIdent, almost_white), (UppercaseIdent, almost_white),
(LowercaseIdent, from_hsb(225, 50, 100)), (LowercaseIdent, from_hsb(225, 50, 100)),
(Keyword, almost_white),
] ]
.iter() .iter()
.for_each(|tup| { .for_each(|tup| {

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,6 @@ pub fn mark_node_to_html(
let css_class = match syn_high_style { let css_class = match syn_high_style {
Operator => "operator", Operator => "operator",
Comma => "comma",
String => "string", String => "string",
FunctionName => "function-name", FunctionName => "function-name",
FunctionArgName => "function-arg-name", FunctionArgName => "function-arg-name",
@ -47,6 +46,7 @@ pub fn mark_node_to_html(
DocsComment => "docs-comment", DocsComment => "docs-comment",
UppercaseIdent => "uppercase-ident", UppercaseIdent => "uppercase-ident",
LowercaseIdent => "lowercase-ident", LowercaseIdent => "lowercase-ident",
Keyword => "keyword-ident",
}; };
write_html_to_buf(content, css_class, buf); write_html_to_buf(content, css_class, buf);

View file

@ -98,7 +98,7 @@ mod insert_doc_syntax_highlighting {
fn top_level_def_val_num() { fn top_level_def_val_num() {
expect_html_def( expect_html_def(
r#"myVal = 0"#, r#"myVal = 0"#,
"<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n\n", "<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n",
); );
} }

View file

@ -1,6 +1,6 @@
use peg::error::ParseError; use peg::error::ParseError;
use roc_code_markup::markup::attribute::Attributes; use roc_code_markup::markup::attribute::Attributes;
use roc_code_markup::markup::common_nodes::{new_equals_mn, assign_mn}; use roc_code_markup::markup::common_nodes::{new_equals_mn, new_dot_mn, new_assign_mn, new_module_name_mn_id, new_module_var_mn, if_mn, then_mn, else_mn, new_if_expr_mn};
use roc_code_markup::slow_pool::{SlowPool, MarkNodeId}; use roc_code_markup::slow_pool::{SlowPool, MarkNodeId};
use roc_code_markup::{syntax_highlight::HighlightStyle}; use roc_code_markup::{syntax_highlight::HighlightStyle};
use roc_code_markup::markup::nodes::MarkupNode; use roc_code_markup::markup::nodes::MarkupNode;
@ -15,6 +15,27 @@ use crate::tokenizer::{Token, TokenTable, full_tokenize};
// license information can be found in the LEGAL_DETAILS file in // license information can be found in the LEGAL_DETAILS file in
// the root directory of this distribution. // the root directory of this distribution.
// Thank you zig contributors! // Thank you zig contributors!
/*
HOW TO ADD NEW RULES:
- go to highlight/tests/peg_grammar.rs
- find for example a variant of common_expr that is not implemented yet, like `if_expr`
- we add `if_expr()` to the `common_expr` rule, in the same order as in peg_grammar::common_expr()
- we copy the if_expr rule from `peg_grammar.rs`
- we add ` -> MarkNodeId` to the if_expr rule
- we change the first full_expr in if_expr() to cond_e_id:full_expr(), the second to then_e_id:full_expr()...
- we add if_mn(), else_mn(), then_mn() and new_if_expr_mn() to common_nodes.rs
- we replace [T::KeywordIf],[T::KeywordThen]... with a new if(),... rule that adds an if,... node to the mark_node_pool.
- we bundle everything together in a nested node and save it in the mn_pool:
```
{
mn_pool.add(
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
)
}
- we finsih up by adding a test: `test_highlight_if_expr`
```
*/
peg::parser!{ peg::parser!{
grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] { grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] {
@ -25,13 +46,33 @@ peg::parser!{
opt_same_indent_expr()* opt_same_indent_expr()*
rule opt_same_indent_expr() -> MarkNodeId = rule opt_same_indent_expr() -> MarkNodeId =
[T::SameIndent]? e:full_expr() {e} [T::SameIndent]? e_id:full_expr() {e_id}
rule opt_same_indent_def() -> MarkNodeId = rule opt_same_indent_def() -> MarkNodeId =
[T::SameIndent]? d:def() {d} [T::SameIndent]? d_id:def() {d_id}
rule common_expr() -> MarkNodeId = rule common_expr() -> MarkNodeId =
p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) } if_expr()
/ p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) }
/ module_var()
/ lowercase_ident()
rule if_expr() -> MarkNodeId =
if_id:if() cond_e_id:full_expr() then_id:then() then_e_id:full_expr() else_id:else_rule() else_e_id:full_expr()
{
mn_pool.add(
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
)
}
rule if() -> MarkNodeId =
[T::KeywordIf] {mn_pool.add(if_mn())}
rule then() -> MarkNodeId =
[T::KeywordThen] {mn_pool.add(then_mn())}
rule else_rule() -> MarkNodeId =
[T::KeywordElse] {mn_pool.add(else_mn())}
pub rule def() -> MarkNodeId = pub rule def() -> MarkNodeId =
// annotated_body() // annotated_body()
@ -45,15 +86,46 @@ peg::parser!{
rule body() -> MarkNodeId = rule body() -> MarkNodeId =
ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file()) ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file())
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) } {
mn_pool.add(
new_assign_mn(ident_id, as_id, e_id)
)
}
/ /
ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()? ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()?
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) } {
mn_pool.add(
new_assign_mn(ident_id, as_id, e_id)
)
}
rule module_var() -> MarkNodeId =
mod_name_id:module_name() dot_id:dot() ident_id:lowercase_ident() {
mn_pool.add(
new_module_var_mn(mod_name_id, dot_id, ident_id)
)
}
rule module_name() -> MarkNodeId =
first_ident_id:uppercase_ident() rest_ids:dot_idents() {
new_module_name_mn_id(
merge_ids(first_ident_id, rest_ids),
mn_pool
)
}
rule assign() -> MarkNodeId = rule assign() -> MarkNodeId =
[T::OpAssignment] { mn_pool.add(new_equals_mn()) } [T::OpAssignment] { mn_pool.add(new_equals_mn()) }
rule dot() -> MarkNodeId =
[T::Dot] { mn_pool.add(new_dot_mn()) }
rule dot_ident() -> (MarkNodeId, MarkNodeId) =
dot_id:dot() ident_id:uppercase_ident() { (dot_id, ident_id) }
rule dot_idents() -> Vec<MarkNodeId> =
di:dot_ident()* {flatten_tups(di)}
rule ident() -> MarkNodeId = rule ident() -> MarkNodeId =
uppercase_ident() uppercase_ident()
/ lowercase_ident() / lowercase_ident()
@ -69,6 +141,21 @@ peg::parser!{
} }
} }
fn merge_ids(
mn_id: MarkNodeId,
other_mn_id: Vec<MarkNodeId>
) -> Vec<MarkNodeId> {
let mut ids = vec![mn_id];
let mut rest_ids: Vec<usize> = other_mn_id;
ids.append(&mut rest_ids);
ids
}
fn flatten_tups(tup_vec: Vec<(MarkNodeId, MarkNodeId)>) -> Vec<MarkNodeId> {
tup_vec.iter().flat_map(|(a,b)| vec![*a, *b]).collect()
}
fn add_new_mn( fn add_new_mn(
text: &str, text: &str,
@ -97,34 +184,46 @@ pub fn highlight_defs(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<V
highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool) highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool)
} }
/*pub fn highlight_temp(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<MarkNodeId, ParseError<usize>> {
let token_table = full_tokenize(code_str);
highlightparser::def(&token_table.tokens, &token_table, code_str, mark_node_pool)
}*/
#[cfg(test)] #[cfg(test)]
pub mod highlight_tests { pub mod highlight_tests {
use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}}; use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}};
use crate::highlight_parser::{highlight_expr, highlight_defs}; use crate::highlight_parser::{highlight_expr, highlight_defs};
#[test] fn test_highlight_expr(input: &str, expected_output: &str) {
fn test_highlight() {
let mut mark_node_pool = SlowPool::default(); let mut mark_node_pool = SlowPool::default();
let mark_id = highlight_expr("0", &mut mark_node_pool).unwrap(); let mark_id = highlight_expr(input, &mut mark_node_pool).unwrap();
let mut str_buffer = String::new();
node_to_string_w_children(
mark_id,
&mut str_buffer,
&mark_node_pool
);
assert_eq!( assert_eq!(
&mark_node_pool &str_buffer,
.get( expected_output
mark_id
).get_content(),
"0"
); );
} }
#[test]
fn test_highlight() {
test_highlight_expr("0","0");
}
#[test]
fn test_highlight_module_var() {
test_highlight_expr("Foo.Bar.var","Foo.Bar.var");
}
#[test]
fn test_highlight_if_expr() {
test_highlight_expr("if booly then 42 else 31415", "if booly then 42 else 31415\n")
}
#[test] #[test]
fn test_highlight_defs() { fn test_highlight_defs() {
let mut mark_node_pool = SlowPool::default(); let mut mark_node_pool = SlowPool::default();
@ -140,7 +239,7 @@ pub mod highlight_tests {
assert_eq!( assert_eq!(
&str_buffer, &str_buffer,
"a = 0\n\n\n" "a = 0\n\n"
); );
} }

View file

@ -1,2 +1,2 @@
mod tokenizer; pub mod tokenizer;
pub mod highlight_parser; pub mod highlight_parser;

View file

@ -1,506 +1,485 @@
#[repr(u8)] #[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// Tokens are full of very dense information to make checking properties about them /// Tokens are full of very dense information to make checking properties about them
/// very fast. /// very fast.
/// Some bits have specific meanings: /// Some bits have specific meanings:
/// * 0b_001*_****: "Identifier-like" things /// * 0b_001*_****: "Identifier-like" things
/// * 0b_01**_****: "Punctuation" /// * 0b_01**_****: "Punctuation"
/// * 0b_0100_1***: []{}() INDENT/DEDENT /// * 0b_0100_1***: []{}() INDENT/DEDENT
/// * 0b_0100_1**0 [{(INDENT /// * 0b_0100_1**0 [{(INDENT
/// * 0b_0100_1**1 ]})DEDENT /// * 0b_0100_1**1 ]})DEDENT
/// * 0b_011*_**** Operators /// * 0b_011*_**** Operators
pub enum Token { pub enum Token {
LowercaseIdent = 0b_0010_0000, LowercaseIdent = 0b_0010_0000,
UppercaseIdent = 0b_0011_0011, UppercaseIdent = 0b_0011_0011,
MalformedIdent = 0b_0010_0001, MalformedIdent = 0b_0010_0001,
KeywordIf = 0b_0010_0010,
KeywordThen = 0b_0010_0011,
KeywordElse = 0b_0010_0100,
KeywordWhen = 0b_0010_0101,
KeywordAs = 0b_0010_0110,
KeywordIs = 0b_0010_0111,
KeywordExpect = 0b_0010_1000,
KeywordApp = 0b_0010_1001,
KeywordInterface = 0b_0010_1010,
KeywordPackages = 0b_0010_1011,
KeywordImports = 0b_0010_1100,
KeywordProvides = 0b_0010_1101,
KeywordTo = 0b_0010_1110,
KeywordExposes = 0b_0010_1111,
KeywordEffects = 0b_0011_0000,
KeywordPlatform = 0b_0011_0001,
KeywordRequires = 0b_0011_0010,
Comma = 0b_0100_0000,
Colon = 0b_0100_0001,
OpenParen = 0b_0100_1000,
CloseParen = 0b_0100_1001,
OpenCurly = 0b_0100_1010,
CloseCurly = 0b_0100_1011,
OpenSquare = 0b_0100_1100,
CloseSquare = 0b_0100_1101,
OpenIndent = 0b_0100_1110,
CloseIndent = 0b_0100_1111,
SameIndent = 0b_0101_0000,
OpPlus = 0b_0110_0000,
OpMinus = 0b_0110_0001,
OpSlash = 0b_0110_0010,
OpPercent = 0b_0110_0011,
OpCaret = 0b_0110_0100,
OpGreaterThan = 0b_0110_0101,
OpLessThan = 0b_0110_0110,
OpAssignment = 0b_0110_0111,
OpPizza = 0b_0110_1000,
OpEquals = 0b_0110_1001,
OpNotEquals = 0b_0110_1010,
OpGreaterThanOrEq = 0b_0110_1011,
OpLessThanOrEq = 0b_0110_1100,
OpAnd = 0b_0110_1101,
OpOr = 0b_0110_1110,
OpDoubleSlash = 0b_0110_1111,
OpDoublePercent = 0b_0111_0001,
OpBackpassing = 0b_0111_1010,
TodoNextThing = 0b_1000_0000,
Malformed,
MalformedOperator,
PrivateTag,
String,
NumberBase,
Number,
QuestionMark,
Underscore,
Ampersand,
Pipe,
Dot,
SpaceDot, // ` .` necessary to know difference between `Result.map .position` and `Result.map.position`
Bang,
LambdaStart,
Arrow,
FatArrow,
Asterisk,
}
#[derive(Default)]
pub struct TokenTable {
pub tokens: Vec<Token>,
pub offsets: Vec<usize>,
pub lengths: Vec<usize>,
}
pub struct LexState {
indents: Vec<usize>,
}
trait ConsumeToken {
fn token(&mut self, token: Token, _offset: usize, _length: usize);
}
#[derive(Default)] KeywordIf = 0b_0010_0010,
struct TokenConsumer{ KeywordThen = 0b_0010_0011,
token_table: TokenTable, KeywordElse = 0b_0010_0100,
} KeywordWhen = 0b_0010_0101,
KeywordAs = 0b_0010_0110,
KeywordIs = 0b_0010_0111,
KeywordExpect = 0b_0010_1000,
KeywordApp = 0b_0010_1001,
KeywordInterface = 0b_0010_1010,
KeywordPackages = 0b_0010_1011,
KeywordImports = 0b_0010_1100,
KeywordProvides = 0b_0010_1101,
KeywordTo = 0b_0010_1110,
KeywordExposes = 0b_0010_1111,
KeywordEffects = 0b_0011_0000,
KeywordPlatform = 0b_0011_0001,
KeywordRequires = 0b_0011_0010,
impl ConsumeToken for TokenConsumer { Comma = 0b_0100_0000,
fn token(&mut self, token: Token, offset: usize, length: usize){ Colon = 0b_0100_0001,
self.token_table.tokens.push(token);
self.token_table.offsets.push(offset); OpenParen = 0b_0100_1000,
self.token_table.lengths.push(length); CloseParen = 0b_0100_1001,
OpenCurly = 0b_0100_1010,
CloseCurly = 0b_0100_1011,
OpenSquare = 0b_0100_1100,
CloseSquare = 0b_0100_1101,
OpenIndent = 0b_0100_1110,
CloseIndent = 0b_0100_1111,
SameIndent = 0b_0101_0000,
OpPlus = 0b_0110_0000,
OpMinus = 0b_0110_0001,
OpSlash = 0b_0110_0010,
OpPercent = 0b_0110_0011,
OpCaret = 0b_0110_0100,
OpGreaterThan = 0b_0110_0101,
OpLessThan = 0b_0110_0110,
OpAssignment = 0b_0110_0111,
OpPizza = 0b_0110_1000,
OpEquals = 0b_0110_1001,
OpNotEquals = 0b_0110_1010,
OpGreaterThanOrEq = 0b_0110_1011,
OpLessThanOrEq = 0b_0110_1100,
OpAnd = 0b_0110_1101,
OpOr = 0b_0110_1110,
OpDoubleSlash = 0b_0110_1111,
OpDoublePercent = 0b_0111_0001,
OpBackpassing = 0b_0111_1010,
TodoNextThing = 0b_1000_0000,
Malformed,
MalformedOperator,
PrivateTag,
String,
NumberBase,
Number,
QuestionMark,
Underscore,
Ampersand,
Pipe,
Dot,
SpaceDot, // ` .` necessary to know difference between `Result.map .position` and `Result.map.position`
Bang,
LambdaStart,
Arrow,
FatArrow,
Asterisk,
}
#[derive(Default)]
pub struct TokenTable {
pub tokens: Vec<Token>,
pub offsets: Vec<usize>,
pub lengths: Vec<usize>,
}
pub struct LexState {
indents: Vec<usize>,
}
trait ConsumeToken {
fn token(&mut self, token: Token, _offset: usize, _length: usize);
}
#[derive(Default)]
struct TokenConsumer{
token_table: TokenTable,
}
impl ConsumeToken for TokenConsumer {
fn token(&mut self, token: Token, offset: usize, length: usize){
self.token_table.tokens.push(token);
self.token_table.offsets.push(offset);
self.token_table.lengths.push(length);
}
}
pub fn tokenize(code_str: &str) -> Vec<Token> {
full_tokenize(code_str).tokens
}
pub fn full_tokenize(code_str: &str) -> TokenTable {
let mut lex_state = LexState{ indents: Vec::new() };
let mut consumer = TokenConsumer::default();
consume_all_tokens(
&mut lex_state,
code_str.as_bytes(),
&mut consumer
);
consumer.token_table
}
fn consume_all_tokens(
state: &mut LexState,
bytes: &[u8],
consumer: &mut impl ConsumeToken,
) {
let mut i = 0;
while i < bytes.len() {
let bytes = &bytes[i..];
let (token, len) = match bytes[0] {
b'(' => (Token::OpenParen, 1),
b')' => (Token::CloseParen, 1),
b'{' => (Token::OpenCurly, 1),
b'}' => (Token::CloseCurly, 1),
b'[' => (Token::OpenSquare, 1),
b']' => (Token::CloseSquare, 1),
b',' => (Token::Comma, 1),
b'_' => lex_underscore(bytes),
b'@' => lex_private_tag(bytes),
b'a'..=b'z' => lex_ident(false, bytes),
b'A'..=b'Z' => lex_ident(true, bytes),
b'0'..=b'9' => lex_number(bytes),
b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\' => lex_operator(bytes),
b' ' => {
match skip_whitespace(bytes) {
SpaceDotOrSpaces::SpacesWSpaceDot(skip) => {
i += skip;
(Token::SpaceDot, 1)
},
SpaceDotOrSpaces::Spaces(skip) => {
i += skip;
continue;
}
}
}
b'\n' => {
// TODO: add newline to side_table
let skip_newline_return = skip_newlines_and_comments(bytes);
match skip_newline_return {
SkipNewlineReturn::SkipWIndent(skipped_lines, curr_line_indent) => {
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
continue;
}
SkipNewlineReturn::WSpaceDot(skipped_lines, curr_line_indent) => {
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
(Token::SpaceDot, 1)
}
}
}
b'#' => {
// TODO: add comment to side_table
i += skip_comment(bytes);
continue;
}
b'"' => lex_string(bytes),
b => todo!("handle {:?}", b as char),
};
consumer.token(token, i, len);
i += len;
} }
} }
pub fn tokenize(code_str: &str) -> Vec<Token> { fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) {
full_tokenize(code_str).tokens *curr_byte_ctr += skipped_lines;
}
pub fn full_tokenize(code_str: &str) -> TokenTable { if let Some(&prev_indent) = state.indents.last() {
let mut lex_state = LexState{ indents: Vec::new() }; if curr_line_indent > prev_indent {
let mut consumer = TokenConsumer::default();
consume_all_tokens(
&mut lex_state,
code_str.as_bytes(),
&mut consumer
);
consumer.token_table
}
fn consume_all_tokens(
state: &mut LexState,
bytes: &[u8],
consumer: &mut impl ConsumeToken,
) {
let mut i = 0;
while i < bytes.len() {
let bytes = &bytes[i..];
let (token, len) = match bytes[0] {
b'(' => (Token::OpenParen, 1),
b')' => (Token::CloseParen, 1),
b'{' => (Token::OpenCurly, 1),
b'}' => (Token::CloseCurly, 1),
b'[' => (Token::OpenSquare, 1),
b']' => (Token::CloseSquare, 1),
b',' => (Token::Comma, 1),
b'_' => lex_underscore(bytes),
b'@' => lex_private_tag(bytes),
b'a'..=b'z' => lex_ident(false, bytes),
b'A'..=b'Z' => lex_ident(true, bytes),
b'0'..=b'9' => lex_number(bytes),
b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\' => lex_operator(bytes),
b' ' => {
match skip_whitespace(bytes) {
SpaceDotOrSpaces::SpacesWSpaceDot(skip) => {
i += skip;
(Token::SpaceDot, 1)
},
SpaceDotOrSpaces::Spaces(skip) => {
i += skip;
continue;
}
}
}
b'\n' => {
// TODO: add newline to side_table
let skip_newline_return = skip_newlines_and_comments(bytes);
match skip_newline_return {
SkipNewlineReturn::SkipWIndent(skipped_lines, curr_line_indent) => {
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
continue;
}
SkipNewlineReturn::WSpaceDot(skipped_lines, curr_line_indent) => {
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
(Token::SpaceDot, 1)
}
}
}
b'#' => {
// TODO: add comment to side_table
i += skip_comment(bytes);
continue;
}
b'"' => lex_string(bytes),
b => todo!("handle {:?}", b as char),
};
consumer.token(token, i, len);
i += len;
}
}
fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) {
*curr_byte_ctr += skipped_lines;
if let Some(&prev_indent) = state.indents.last() {
if curr_line_indent > prev_indent {
state.indents.push(curr_line_indent);
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
} else {
*curr_byte_ctr += curr_line_indent;
if prev_indent == curr_line_indent {
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
} else if curr_line_indent < prev_indent {
// safe unwrap because we check first
while state.indents.last().is_some() && curr_line_indent < *state.indents.last().unwrap() {
state.indents.pop();
consumer.token(Token::CloseIndent, *curr_byte_ctr, 0);
}
}
}
} else if curr_line_indent > 0 {
state.indents.push(curr_line_indent); state.indents.push(curr_line_indent);
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0); consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
} else { } else {
consumer.token(Token::SameIndent, *curr_byte_ctr, 0); *curr_byte_ctr += curr_line_indent;
if prev_indent == curr_line_indent {
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
} else if curr_line_indent < prev_indent {
// safe unwrap because we check first
while state.indents.last().is_some() && curr_line_indent < *state.indents.last().unwrap() {
state.indents.pop();
consumer.token(Token::CloseIndent, *curr_byte_ctr, 0);
}
}
} }
} else if curr_line_indent > 0 {
state.indents.push(curr_line_indent);
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
} else {
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
} }
}
impl TokenTable {
pub fn new(text: &str) -> TokenTable {
let mut tt = TokenTable {
tokens: Vec::new(),
offsets: Vec::new(),
lengths: Vec::new(),
};
let mut offset = 0;
let mut state = LexState::new();
// while let Some((token, skip, length)) = Token::lex_single(&mut state, &text.as_bytes()[offset..]) {
// tt.tokens.push(token);
// offset += skip;
// tt.offsets.push(offset);
// offset += length;
// tt.lengths.push(length);
// }
tt
}
pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str { impl TokenTable {
// TODO remove unwrap pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str {
let len = *self.lengths.get(index).unwrap(); // TODO remove unwrap
let offset = *self.offsets.get(index).unwrap(); let len = *self.lengths.get(index).unwrap();
let offset = *self.offsets.get(index).unwrap();
&content[offset..(offset + len)] &content[offset..(offset + len)]
} }
} }
impl LexState { impl LexState {
pub fn new() -> LexState { pub fn new() -> LexState {
LexState { LexState {
indents: Vec::new(), indents: Vec::new(),
} }
} }
} }
fn skip_comment(bytes: &[u8]) -> usize { fn skip_comment(bytes: &[u8]) -> usize {
let mut skip = 0; let mut skip = 0;
while skip < bytes.len() && bytes[skip] != b'\n' { while skip < bytes.len() && bytes[skip] != b'\n' {
skip += 1;
}
if (skip + 1) < bytes.len() && bytes[skip] == b'\n' && bytes[skip+1] == b'#'{
skip += 1; skip += 1;
} }
if (skip + 1) < bytes.len() && bytes[skip] == b'\n' && bytes[skip+1] == b'#'{
skip += 1;
}
skip skip
} }
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
struct Indent(usize);
enum SpaceDotOrSpaces {
SpacesWSpaceDot(usize),
Spaces(usize)
}
fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces {
debug_assert!(bytes[0] == b' ');
let mut skip = 0;
while skip < bytes.len() && bytes[skip] == b' ' {
skip += 1;
}
if skip < bytes.len() && bytes[skip] == b'.' {
SpaceDotOrSpaces::SpacesWSpaceDot(skip)
} else {
SpaceDotOrSpaces::Spaces(skip)
}
}
enum SkipNewlineReturn {
SkipWIndent(usize, usize),
WSpaceDot(usize, usize)
}
// also skips lines that contain only whitespace
fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn {
let mut skip = 0;
let mut indent = 0;
while skip < bytes.len() && bytes[skip] == b'\n' {
skip += indent + 1;
if bytes.len() > skip {
if bytes[skip] == b' ' {
let space_dot_or_spaces = skip_whitespace(&bytes[skip..]);
match space_dot_or_spaces {
SpaceDotOrSpaces::SpacesWSpaceDot(spaces) => {
return SkipNewlineReturn::WSpaceDot(skip, spaces)
}
SpaceDotOrSpaces::Spaces(spaces) => {
if bytes.len() > (skip + spaces) {
if bytes[skip + spaces] == b'\n' {
indent = 0;
skip += spaces;
} else if bytes[skip+spaces] == b'#' {
let comment_skip = skip_comment(&bytes[(skip + spaces)..]);
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] indent = 0;
struct Indent(usize); skip += spaces + comment_skip;
enum SpaceDotOrSpaces {
SpacesWSpaceDot(usize),
Spaces(usize)
}
fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces {
debug_assert!(bytes[0] == b' ');
let mut skip = 0;
while skip < bytes.len() && bytes[skip] == b' ' {
skip += 1;
}
if skip < bytes.len() && bytes[skip] == b'.' {
SpaceDotOrSpaces::SpacesWSpaceDot(skip)
} else {
SpaceDotOrSpaces::Spaces(skip)
}
}
enum SkipNewlineReturn {
SkipWIndent(usize, usize),
WSpaceDot(usize, usize)
}
// also skips lines that contain only whitespace
fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn {
let mut skip = 0;
let mut indent = 0;
while skip < bytes.len() && bytes[skip] == b'\n' {
skip += indent + 1;
if bytes.len() > skip {
if bytes[skip] == b' ' {
let space_dot_or_spaces = skip_whitespace(&bytes[skip..]);
match space_dot_or_spaces {
SpaceDotOrSpaces::SpacesWSpaceDot(spaces) => {
return SkipNewlineReturn::WSpaceDot(skip, spaces)
}
SpaceDotOrSpaces::Spaces(spaces) => {
if bytes.len() > (skip + spaces) {
if bytes[skip + spaces] == b'\n' {
indent = 0;
skip += spaces;
} else if bytes[skip+spaces] == b'#' {
let comment_skip = skip_comment(&bytes[(skip + spaces)..]);
indent = 0;
skip += spaces + comment_skip;
} else {
indent = spaces;
}
} else { } else {
indent = spaces; indent = spaces;
} }
} else {
indent = spaces;
} }
} }
} else { }
while bytes[skip] == b'#' { } else {
let comment_skip = skip_comment(&bytes[skip..]); while bytes[skip] == b'#' {
let comment_skip = skip_comment(&bytes[skip..]);
indent = 0; indent = 0;
skip += comment_skip; skip += comment_skip;
}
} }
} }
} }
SkipNewlineReturn::SkipWIndent(skip, indent)
} }
fn is_op_continue(ch: u8) -> bool { SkipNewlineReturn::SkipWIndent(skip, indent)
matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' | }
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\')
} fn is_op_continue(ch: u8) -> bool {
matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
fn lex_operator(bytes: &[u8]) -> (Token, usize) { b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\')
let mut i = 0; }
while i < bytes.len() && is_op_continue(bytes[i]) {
i += 1; fn lex_operator(bytes: &[u8]) -> (Token, usize) {
} let mut i = 0;
let tok = match &bytes[0..i] { while i < bytes.len() && is_op_continue(bytes[i]) {
b"+" => Token::OpPlus, i += 1;
b"-" => Token::OpMinus, }
b"*" => Token::Asterisk, let tok = match &bytes[0..i] {
b"/" => Token::OpSlash, b"+" => Token::OpPlus,
b"%" => Token::OpPercent, b"-" => Token::OpMinus,
b"^" => Token::OpCaret, b"*" => Token::Asterisk,
b">" => Token::OpGreaterThan, b"/" => Token::OpSlash,
b"<" => Token::OpLessThan, b"%" => Token::OpPercent,
b"." => Token::Dot, b"^" => Token::OpCaret,
b"=" => Token::OpAssignment, b">" => Token::OpGreaterThan,
b":" => Token::Colon, b"<" => Token::OpLessThan,
b"|" => Token::Pipe, b"." => Token::Dot,
b"\\" => Token::LambdaStart, b"=" => Token::OpAssignment,
b"|>" => Token::OpPizza, b":" => Token::Colon,
b"==" => Token::OpEquals, b"|" => Token::Pipe,
b"!" => Token::Bang, b"\\" => Token::LambdaStart,
b"!=" => Token::OpNotEquals, b"|>" => Token::OpPizza,
b">=" => Token::OpGreaterThanOrEq, b"==" => Token::OpEquals,
b"<=" => Token::OpLessThanOrEq, b"!" => Token::Bang,
b"&&" => Token::OpAnd, b"!=" => Token::OpNotEquals,
b"&" => Token::Ampersand, b">=" => Token::OpGreaterThanOrEq,
b"||" => Token::OpOr, b"<=" => Token::OpLessThanOrEq,
b"//" => Token::OpDoubleSlash, b"&&" => Token::OpAnd,
b"%%" => Token::OpDoublePercent, b"&" => Token::Ampersand,
b"->" => Token::Arrow, b"||" => Token::OpOr,
b"<-" => Token::OpBackpassing, b"//" => Token::OpDoubleSlash,
op => { b"%%" => Token::OpDoublePercent,
dbg!(std::str::from_utf8(op).unwrap()); b"->" => Token::Arrow,
Token::MalformedOperator b"<-" => Token::OpBackpassing,
} op => {
}; dbg!(std::str::from_utf8(op).unwrap());
(tok, i) Token::MalformedOperator
} }
};
fn is_ident_continue(ch: u8) -> bool { (tok, i)
matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_') }
}
fn is_ident_continue(ch: u8) -> bool {
fn lex_private_tag(bytes: &[u8]) -> (Token, usize) { matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_')
debug_assert!(bytes[0] == b'@'); }
let mut i = 1;
while i < bytes.len() && is_ident_continue(bytes[i]) { fn lex_private_tag(bytes: &[u8]) -> (Token, usize) {
i += 1; debug_assert!(bytes[0] == b'@');
} let mut i = 1;
(Token::PrivateTag, i) while i < bytes.len() && is_ident_continue(bytes[i]) {
} i += 1;
}
fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) { (Token::PrivateTag, i)
let mut i = 0; }
while i < bytes.len() && is_ident_continue(bytes[i]) {
i += 1; fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) {
} let mut i = 0;
let tok = match &bytes[0..i] { while i < bytes.len() && is_ident_continue(bytes[i]) {
b"if" => Token::KeywordIf, i += 1;
b"then" => Token::KeywordThen, }
b"else" => Token::KeywordElse, let tok = match &bytes[0..i] {
b"when" => Token::KeywordWhen, b"if" => Token::KeywordIf,
b"as" => Token::KeywordAs, b"then" => Token::KeywordThen,
b"is" => Token::KeywordIs, b"else" => Token::KeywordElse,
b"expect" => Token::KeywordExpect, b"when" => Token::KeywordWhen,
b"app" => Token::KeywordApp, b"as" => Token::KeywordAs,
b"interface" => Token::KeywordInterface, b"is" => Token::KeywordIs,
b"packages" => Token::KeywordPackages, b"expect" => Token::KeywordExpect,
b"imports" => Token::KeywordImports, b"app" => Token::KeywordApp,
b"provides" => Token::KeywordProvides, b"interface" => Token::KeywordInterface,
b"to" => Token::KeywordTo, b"packages" => Token::KeywordPackages,
b"exposes" => Token::KeywordExposes, b"imports" => Token::KeywordImports,
b"effects" => Token::KeywordEffects, b"provides" => Token::KeywordProvides,
b"platform" => Token::KeywordPlatform, b"to" => Token::KeywordTo,
b"requires" => Token::KeywordRequires, b"exposes" => Token::KeywordExposes,
ident => { b"effects" => Token::KeywordEffects,
if ident.contains(&b'_') { b"platform" => Token::KeywordPlatform,
Token::MalformedIdent b"requires" => Token::KeywordRequires,
} else if uppercase { ident => {
Token::UppercaseIdent if ident.contains(&b'_') {
} else { Token::MalformedIdent
Token::LowercaseIdent } else if uppercase {
} Token::UppercaseIdent
}, } else {
}; Token::LowercaseIdent
(tok, i) }
} },
};
fn lex_underscore(bytes: &[u8]) -> (Token, usize) { (tok, i)
let mut i = 0; }
while i < bytes.len() && is_ident_continue(bytes[i]) {
i += 1; fn lex_underscore(bytes: &[u8]) -> (Token, usize) {
} let mut i = 0;
(Token::Underscore, i) while i < bytes.len() && is_ident_continue(bytes[i]) {
} i += 1;
}
fn is_int_continue(ch: u8) -> bool { (Token::Underscore, i)
matches!(ch, b'0'..=b'9' | b'_') }
}
fn is_int_continue(ch: u8) -> bool {
fn lex_number(bytes: &[u8]) -> (Token, usize) { matches!(ch, b'0'..=b'9' | b'_')
let mut i = 0; }
while i < bytes.len() && is_int_continue(bytes[i]) {
i += 1; fn lex_number(bytes: &[u8]) -> (Token, usize) {
} let mut i = 0;
while i < bytes.len() && is_int_continue(bytes[i]) {
if i < bytes.len() && bytes[i] == b'.' { i += 1;
i += 1; }
while i < bytes.len() && is_int_continue(bytes[i]) {
i += 1; if i < bytes.len() && bytes[i] == b'.' {
} i += 1;
} while i < bytes.len() && is_int_continue(bytes[i]) {
i += 1;
(Token::Number, i) }
} }
fn lex_string(bytes: &[u8]) -> (Token, usize) { (Token::Number, i)
let mut i = 0; }
assert_eq!(bytes[i], b'"');
i += 1; fn lex_string(bytes: &[u8]) -> (Token, usize) {
let mut i = 0;
while i < bytes.len() { assert_eq!(bytes[i], b'"');
match bytes[i] { i += 1;
b'"' => break,
// TODO: escapes while i < bytes.len() {
_ => i += 1, match bytes[i] {
} b'"' => break,
} // TODO: escapes
_ => i += 1,
assert_eq!(bytes[i], b'"'); }
i += 1; }
(Token::String, i) assert_eq!(bytes[i], b'"');
} i += 1;
(Token::String, i)
}
#[cfg(test)] #[cfg(test)]
mod tokenizer { mod tokenizer {

File diff suppressed because it is too large Load diff