finished if highlighting

This commit is contained in:
Anton-4 2022-03-07 19:44:24 +01:00
parent 37704323b1
commit ec1e2cd1d0
No known key found for this signature in database
GPG key ID: C954D6E0F9C0ABFD
12 changed files with 1988 additions and 2488 deletions

View file

@ -1,25 +1,17 @@
use crate::{syntax_highlight::HighlightStyle, slow_pool::MarkNodeId}; use crate::{syntax_highlight::HighlightStyle, slow_pool::{MarkNodeId, SlowPool}};
use super::{attribute::Attributes, nodes, nodes::MarkupNode}; use super::{attribute::Attributes, nodes::{self, make_nested_mn}, nodes::MarkupNode};
pub fn new_equals_mn() -> MarkupNode { pub fn new_equals_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::EQUALS.to_owned(), HighlightStyle::Operator, 0)
content: nodes::EQUALS.to_owned(),
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_comma_mn() -> MarkupNode { pub fn new_comma_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::COMMA.to_owned(), HighlightStyle::Operator, 0)
content: nodes::COMMA.to_owned(), }
syn_high_style: HighlightStyle::Comma,
attributes: Attributes::default(), pub fn new_dot_mn() -> MarkupNode {
parent_id_opt: None, common_text_node(nodes::DOT.to_owned(), HighlightStyle::Operator, 0)
newlines_at_end: 0,
}
} }
pub fn new_blank_mn() -> MarkupNode { pub fn new_blank_mn() -> MarkupNode {
@ -47,102 +39,98 @@ pub fn new_colon_mn() -> MarkupNode {
pub fn new_operator_mn( pub fn new_operator_mn(
content: String, content: String,
) -> MarkupNode { ) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::Operator, 0)
content,
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_left_accolade_mn() -> MarkupNode { pub fn new_left_accolade_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::LEFT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::LEFT_ACCOLADE.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_right_accolade_mn() -> MarkupNode { pub fn new_right_accolade_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::RIGHT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::RIGHT_ACCOLADE.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_left_square_mn() -> MarkupNode { pub fn new_left_square_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::LEFT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::LEFT_SQUARE_BR.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_right_square_mn() -> MarkupNode { pub fn new_right_square_mn() -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::RIGHT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
content: nodes::RIGHT_SQUARE_BR.to_owned(),
syn_high_style: HighlightStyle::Bracket,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_func_name_mn(content: String) -> MarkupNode { pub fn new_func_name_mn(content: String) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::FunctionName, 0)
content,
syn_high_style: HighlightStyle::FunctionName,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_arg_name_mn(content: String) -> MarkupNode { pub fn new_arg_name_mn(content: String) -> MarkupNode {
MarkupNode::Text { common_text_node(content, HighlightStyle::FunctionArgName, 0)
content,
syn_high_style: HighlightStyle::FunctionArgName,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end: 0,
}
} }
pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode { pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode {
MarkupNode::Text { common_text_node(nodes::ARROW.to_owned(), HighlightStyle::Operator, newlines_at_end)
content: nodes::ARROW.to_owned(),
syn_high_style: HighlightStyle::Operator,
attributes: Attributes::default(),
parent_id_opt: None,
newlines_at_end,
}
} }
pub fn new_comments_mn( pub fn new_comments_mn(
comments: String, comment: String,
newlines_at_end: usize, newlines_at_end: usize,
) -> MarkupNode {
common_text_node(comment, HighlightStyle::Comment, newlines_at_end)
}
fn common_text_node(
content: String,
highlight_style: HighlightStyle,
newlines_at_end: usize
) -> MarkupNode { ) -> MarkupNode {
MarkupNode::Text { MarkupNode::Text {
content: comments, content,
syn_high_style: HighlightStyle::Comment, syn_high_style: highlight_style,
attributes: Attributes::default(), attributes: Attributes::default(),
parent_id_opt: None, parent_id_opt: None,
newlines_at_end, newlines_at_end,
} }
} }
pub fn assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode { pub fn new_assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode {
MarkupNode::Nested { make_nested_mn(vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], 2)
children_ids: vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], }
parent_id_opt: None,
newlines_at_end: 3, pub fn new_module_name_mn_id(mn_ids: Vec<MarkNodeId>, mark_node_pool: &mut SlowPool) -> MarkNodeId {
if mn_ids.len() == 1 {
*mn_ids.get(0).unwrap() // safe because we checked the length before
} else {
let nested_node = make_nested_mn(mn_ids, 0);
mark_node_pool.add(nested_node)
} }
} }
pub fn new_module_var_mn(module_name_id: MarkNodeId, dot_id: MarkNodeId, ident_id: MarkNodeId) -> MarkupNode {
make_nested_mn(vec![module_name_id, dot_id, ident_id], 0)
}
pub fn if_mn() -> MarkupNode {
keyword_mn("if ")
}
pub fn then_mn() -> MarkupNode {
keyword_mn(" then ")
}
pub fn else_mn() -> MarkupNode {
keyword_mn(" else ")
}
fn keyword_mn(keyword: &str) -> MarkupNode {
common_text_node(keyword.to_owned(), HighlightStyle::Keyword, 0)
}
pub fn new_if_expr_mn(
if_mn_id: MarkNodeId,
cond_expr_mn_id: MarkNodeId,
then_mn_id: MarkNodeId,
then_expr_mn_id: MarkNodeId,
else_mn_id: MarkNodeId,
else_expr_mn_id: MarkNodeId,
) -> MarkupNode {
make_nested_mn(vec![if_mn_id, cond_expr_mn_id, then_mn_id, then_expr_mn_id, else_mn_id, else_expr_mn_id], 1)
}

View file

@ -4,4 +4,3 @@ pub mod convert;
pub mod nodes; pub mod nodes;
pub mod top_level_def; pub mod top_level_def;
pub mod mark_id_ast_id_map; pub mod mark_id_ast_id_map;
pub mod ast_context;

View file

@ -245,6 +245,14 @@ impl MarkupNode {
} }
} }
pub fn make_nested_mn(children_ids: Vec<MarkNodeId>, newlines_at_end: usize) -> MarkupNode {
MarkupNode::Nested {
children_ids,
parent_id_opt: None,
newlines_at_end,
}
}
pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String { pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String {
pool_str.as_str(env.pool).to_owned() pool_str.as_str(env.pool).to_owned()
} }
@ -256,6 +264,7 @@ pub const LEFT_SQUARE_BR: &str = "[ ";
pub const RIGHT_SQUARE_BR: &str = " ]"; pub const RIGHT_SQUARE_BR: &str = " ]";
pub const COLON: &str = ": "; pub const COLON: &str = ": ";
pub const COMMA: &str = ", "; pub const COMMA: &str = ", ";
pub const DOT: &str = ".";
pub const STRING_QUOTES: &str = "\"\""; pub const STRING_QUOTES: &str = "\"\"";
pub const EQUALS: &str = " = "; pub const EQUALS: &str = " = ";
pub const ARROW: &str = " -> "; pub const ARROW: &str = " -> ";

View file

@ -14,7 +14,7 @@ use crate::{
syntax_highlight::HighlightStyle, syntax_highlight::HighlightStyle,
}; };
use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::assign_mn}; use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::new_assign_mn};
// represents for example: `main = "Hello, World!"` // represents for example: `main = "Hello, World!"`
pub fn assignment_mark_node<'a>( pub fn assignment_mark_node<'a>(
@ -39,7 +39,7 @@ pub fn assignment_mark_node<'a>(
let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map); let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map);
Ok(assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id)) Ok(new_assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id))
} }
pub fn tld_w_comments_mark_node( pub fn tld_w_comments_mark_node(

View file

@ -6,7 +6,6 @@ use crate::colors::{from_hsb, RgbaTup};
#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)] #[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)]
pub enum HighlightStyle { pub enum HighlightStyle {
Operator, // =+-<>... Operator, // =+-<>...
Comma,
String, String,
FunctionName, FunctionName,
FunctionArgName, FunctionArgName,
@ -23,6 +22,7 @@ pub enum HighlightStyle {
DocsComment, DocsComment,
UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent? UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent?
LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent? LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent?
Keyword, // if, else, when
} }
pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> { pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
@ -33,7 +33,6 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
let mut highlight_map = HashMap::new(); let mut highlight_map = HashMap::new();
[ [
(Operator, from_hsb(185, 50, 75)), (Operator, from_hsb(185, 50, 75)),
(Comma, from_hsb(258, 50, 90)),
(String, from_hsb(346, 65, 97)), (String, from_hsb(346, 65, 97)),
(FunctionName, almost_white), (FunctionName, almost_white),
(FunctionArgName, from_hsb(225, 50, 100)), (FunctionArgName, from_hsb(225, 50, 100)),
@ -50,6 +49,7 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
(DocsComment, from_hsb(258, 50, 90)), // TODO check color (DocsComment, from_hsb(258, 50, 90)), // TODO check color
(UppercaseIdent, almost_white), (UppercaseIdent, almost_white),
(LowercaseIdent, from_hsb(225, 50, 100)), (LowercaseIdent, from_hsb(225, 50, 100)),
(Keyword, almost_white),
] ]
.iter() .iter()
.for_each(|tup| { .for_each(|tup| {

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,6 @@ pub fn mark_node_to_html(
let css_class = match syn_high_style { let css_class = match syn_high_style {
Operator => "operator", Operator => "operator",
Comma => "comma",
String => "string", String => "string",
FunctionName => "function-name", FunctionName => "function-name",
FunctionArgName => "function-arg-name", FunctionArgName => "function-arg-name",
@ -47,6 +46,7 @@ pub fn mark_node_to_html(
DocsComment => "docs-comment", DocsComment => "docs-comment",
UppercaseIdent => "uppercase-ident", UppercaseIdent => "uppercase-ident",
LowercaseIdent => "lowercase-ident", LowercaseIdent => "lowercase-ident",
Keyword => "keyword-ident",
}; };
write_html_to_buf(content, css_class, buf); write_html_to_buf(content, css_class, buf);

View file

@ -98,7 +98,7 @@ mod insert_doc_syntax_highlighting {
fn top_level_def_val_num() { fn top_level_def_val_num() {
expect_html_def( expect_html_def(
r#"myVal = 0"#, r#"myVal = 0"#,
"<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n\n", "<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n",
); );
} }

View file

@ -1,6 +1,6 @@
use peg::error::ParseError; use peg::error::ParseError;
use roc_code_markup::markup::attribute::Attributes; use roc_code_markup::markup::attribute::Attributes;
use roc_code_markup::markup::common_nodes::{new_equals_mn, assign_mn}; use roc_code_markup::markup::common_nodes::{new_equals_mn, new_dot_mn, new_assign_mn, new_module_name_mn_id, new_module_var_mn, if_mn, then_mn, else_mn, new_if_expr_mn};
use roc_code_markup::slow_pool::{SlowPool, MarkNodeId}; use roc_code_markup::slow_pool::{SlowPool, MarkNodeId};
use roc_code_markup::{syntax_highlight::HighlightStyle}; use roc_code_markup::{syntax_highlight::HighlightStyle};
use roc_code_markup::markup::nodes::MarkupNode; use roc_code_markup::markup::nodes::MarkupNode;
@ -15,6 +15,27 @@ use crate::tokenizer::{Token, TokenTable, full_tokenize};
// license information can be found in the LEGAL_DETAILS file in // license information can be found in the LEGAL_DETAILS file in
// the root directory of this distribution. // the root directory of this distribution.
// Thank you zig contributors! // Thank you zig contributors!
/*
HOW TO ADD NEW RULES:
- go to highlight/tests/peg_grammar.rs
- find for example a variant of common_expr that is not implemented yet, like `if_expr`
- we add `if_expr()` to the `common_expr` rule, in the same order as in peg_grammar::common_expr()
- we copy the if_expr rule from `peg_grammar.rs`
- we add ` -> MarkNodeId` to the if_expr rule
- we change the first full_expr in if_expr() to cond_e_id:full_expr(), the second to then_e_id:full_expr()...
- we add if_mn(), else_mn(), then_mn() and new_if_expr_mn() to common_nodes.rs
- we replace [T::KeywordIf],[T::KeywordThen]... with a new if(),... rule that adds an if,... node to the mark_node_pool.
- we bundle everything together in a nested node and save it in the mn_pool:
```
{
mn_pool.add(
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
)
}
- we finsih up by adding a test: `test_highlight_if_expr`
```
*/
peg::parser!{ peg::parser!{
grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] { grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] {
@ -25,13 +46,33 @@ peg::parser!{
opt_same_indent_expr()* opt_same_indent_expr()*
rule opt_same_indent_expr() -> MarkNodeId = rule opt_same_indent_expr() -> MarkNodeId =
[T::SameIndent]? e:full_expr() {e} [T::SameIndent]? e_id:full_expr() {e_id}
rule opt_same_indent_def() -> MarkNodeId = rule opt_same_indent_def() -> MarkNodeId =
[T::SameIndent]? d:def() {d} [T::SameIndent]? d_id:def() {d_id}
rule common_expr() -> MarkNodeId = rule common_expr() -> MarkNodeId =
p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) } if_expr()
/ p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) }
/ module_var()
/ lowercase_ident()
rule if_expr() -> MarkNodeId =
if_id:if() cond_e_id:full_expr() then_id:then() then_e_id:full_expr() else_id:else_rule() else_e_id:full_expr()
{
mn_pool.add(
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
)
}
rule if() -> MarkNodeId =
[T::KeywordIf] {mn_pool.add(if_mn())}
rule then() -> MarkNodeId =
[T::KeywordThen] {mn_pool.add(then_mn())}
rule else_rule() -> MarkNodeId =
[T::KeywordElse] {mn_pool.add(else_mn())}
pub rule def() -> MarkNodeId = pub rule def() -> MarkNodeId =
// annotated_body() // annotated_body()
@ -45,15 +86,46 @@ peg::parser!{
rule body() -> MarkNodeId = rule body() -> MarkNodeId =
ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file()) ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file())
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) } {
mn_pool.add(
new_assign_mn(ident_id, as_id, e_id)
)
}
/ /
ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()? ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()?
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) } {
mn_pool.add(
new_assign_mn(ident_id, as_id, e_id)
)
}
rule module_var() -> MarkNodeId =
mod_name_id:module_name() dot_id:dot() ident_id:lowercase_ident() {
mn_pool.add(
new_module_var_mn(mod_name_id, dot_id, ident_id)
)
}
rule module_name() -> MarkNodeId =
first_ident_id:uppercase_ident() rest_ids:dot_idents() {
new_module_name_mn_id(
merge_ids(first_ident_id, rest_ids),
mn_pool
)
}
rule assign() -> MarkNodeId = rule assign() -> MarkNodeId =
[T::OpAssignment] { mn_pool.add(new_equals_mn()) } [T::OpAssignment] { mn_pool.add(new_equals_mn()) }
rule dot() -> MarkNodeId =
[T::Dot] { mn_pool.add(new_dot_mn()) }
rule dot_ident() -> (MarkNodeId, MarkNodeId) =
dot_id:dot() ident_id:uppercase_ident() { (dot_id, ident_id) }
rule dot_idents() -> Vec<MarkNodeId> =
di:dot_ident()* {flatten_tups(di)}
rule ident() -> MarkNodeId = rule ident() -> MarkNodeId =
uppercase_ident() uppercase_ident()
/ lowercase_ident() / lowercase_ident()
@ -69,6 +141,21 @@ peg::parser!{
} }
} }
fn merge_ids(
mn_id: MarkNodeId,
other_mn_id: Vec<MarkNodeId>
) -> Vec<MarkNodeId> {
let mut ids = vec![mn_id];
let mut rest_ids: Vec<usize> = other_mn_id;
ids.append(&mut rest_ids);
ids
}
fn flatten_tups(tup_vec: Vec<(MarkNodeId, MarkNodeId)>) -> Vec<MarkNodeId> {
tup_vec.iter().flat_map(|(a,b)| vec![*a, *b]).collect()
}
fn add_new_mn( fn add_new_mn(
text: &str, text: &str,
@ -97,34 +184,46 @@ pub fn highlight_defs(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<V
highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool) highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool)
} }
/*pub fn highlight_temp(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<MarkNodeId, ParseError<usize>> {
let token_table = full_tokenize(code_str);
highlightparser::def(&token_table.tokens, &token_table, code_str, mark_node_pool)
}*/
#[cfg(test)] #[cfg(test)]
pub mod highlight_tests { pub mod highlight_tests {
use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}}; use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}};
use crate::highlight_parser::{highlight_expr, highlight_defs}; use crate::highlight_parser::{highlight_expr, highlight_defs};
#[test] fn test_highlight_expr(input: &str, expected_output: &str) {
fn test_highlight() {
let mut mark_node_pool = SlowPool::default(); let mut mark_node_pool = SlowPool::default();
let mark_id = highlight_expr("0", &mut mark_node_pool).unwrap(); let mark_id = highlight_expr(input, &mut mark_node_pool).unwrap();
let mut str_buffer = String::new();
node_to_string_w_children(
mark_id,
&mut str_buffer,
&mark_node_pool
);
assert_eq!( assert_eq!(
&mark_node_pool &str_buffer,
.get( expected_output
mark_id
).get_content(),
"0"
); );
} }
#[test]
fn test_highlight() {
test_highlight_expr("0","0");
}
#[test]
fn test_highlight_module_var() {
test_highlight_expr("Foo.Bar.var","Foo.Bar.var");
}
#[test]
fn test_highlight_if_expr() {
test_highlight_expr("if booly then 42 else 31415", "if booly then 42 else 31415\n")
}
#[test] #[test]
fn test_highlight_defs() { fn test_highlight_defs() {
let mut mark_node_pool = SlowPool::default(); let mut mark_node_pool = SlowPool::default();
@ -140,7 +239,7 @@ pub mod highlight_tests {
assert_eq!( assert_eq!(
&str_buffer, &str_buffer,
"a = 0\n\n\n" "a = 0\n\n"
); );
} }

View file

@ -1,2 +1,2 @@
mod tokenizer; pub mod tokenizer;
pub mod highlight_parser; pub mod highlight_parser;

View file

@ -1,15 +1,15 @@
#[repr(u8)] #[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// Tokens are full of very dense information to make checking properties about them /// Tokens are full of very dense information to make checking properties about them
/// very fast. /// very fast.
/// Some bits have specific meanings: /// Some bits have specific meanings:
/// * 0b_001*_****: "Identifier-like" things /// * 0b_001*_****: "Identifier-like" things
/// * 0b_01**_****: "Punctuation" /// * 0b_01**_****: "Punctuation"
/// * 0b_0100_1***: []{}() INDENT/DEDENT /// * 0b_0100_1***: []{}() INDENT/DEDENT
/// * 0b_0100_1**0 [{(INDENT /// * 0b_0100_1**0 [{(INDENT
/// * 0b_0100_1**1 ]})DEDENT /// * 0b_0100_1**1 ]})DEDENT
/// * 0b_011*_**** Operators /// * 0b_011*_**** Operators
pub enum Token { pub enum Token {
LowercaseIdent = 0b_0010_0000, LowercaseIdent = 0b_0010_0000,
UppercaseIdent = 0b_0011_0011, UppercaseIdent = 0b_0011_0011,
MalformedIdent = 0b_0010_0001, MalformedIdent = 0b_0010_0001,
@ -89,41 +89,41 @@
Arrow, Arrow,
FatArrow, FatArrow,
Asterisk, Asterisk,
} }
#[derive(Default)] #[derive(Default)]
pub struct TokenTable { pub struct TokenTable {
pub tokens: Vec<Token>, pub tokens: Vec<Token>,
pub offsets: Vec<usize>, pub offsets: Vec<usize>,
pub lengths: Vec<usize>, pub lengths: Vec<usize>,
} }
pub struct LexState { pub struct LexState {
indents: Vec<usize>, indents: Vec<usize>,
} }
trait ConsumeToken { trait ConsumeToken {
fn token(&mut self, token: Token, _offset: usize, _length: usize); fn token(&mut self, token: Token, _offset: usize, _length: usize);
} }
#[derive(Default)] #[derive(Default)]
struct TokenConsumer{ struct TokenConsumer{
token_table: TokenTable, token_table: TokenTable,
} }
impl ConsumeToken for TokenConsumer { impl ConsumeToken for TokenConsumer {
fn token(&mut self, token: Token, offset: usize, length: usize){ fn token(&mut self, token: Token, offset: usize, length: usize){
self.token_table.tokens.push(token); self.token_table.tokens.push(token);
self.token_table.offsets.push(offset); self.token_table.offsets.push(offset);
self.token_table.lengths.push(length); self.token_table.lengths.push(length);
} }
} }
pub fn tokenize(code_str: &str) -> Vec<Token> { pub fn tokenize(code_str: &str) -> Vec<Token> {
full_tokenize(code_str).tokens full_tokenize(code_str).tokens
} }
pub fn full_tokenize(code_str: &str) -> TokenTable { pub fn full_tokenize(code_str: &str) -> TokenTable {
let mut lex_state = LexState{ indents: Vec::new() }; let mut lex_state = LexState{ indents: Vec::new() };
let mut consumer = TokenConsumer::default(); let mut consumer = TokenConsumer::default();
@ -134,13 +134,13 @@
); );
consumer.token_table consumer.token_table
} }
fn consume_all_tokens( fn consume_all_tokens(
state: &mut LexState, state: &mut LexState,
bytes: &[u8], bytes: &[u8],
consumer: &mut impl ConsumeToken, consumer: &mut impl ConsumeToken,
) { ) {
let mut i = 0; let mut i = 0;
while i < bytes.len() { while i < bytes.len() {
@ -202,9 +202,9 @@
consumer.token(token, i, len); consumer.token(token, i, len);
i += len; i += len;
} }
} }
fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) { fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) {
*curr_byte_ctr += skipped_lines; *curr_byte_ctr += skipped_lines;
if let Some(&prev_indent) = state.indents.last() { if let Some(&prev_indent) = state.indents.last() {
@ -231,30 +231,9 @@
} else { } else {
consumer.token(Token::SameIndent, *curr_byte_ctr, 0); consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
} }
} }
impl TokenTable {
pub fn new(text: &str) -> TokenTable {
let mut tt = TokenTable {
tokens: Vec::new(),
offsets: Vec::new(),
lengths: Vec::new(),
};
let mut offset = 0;
let mut state = LexState::new();
// while let Some((token, skip, length)) = Token::lex_single(&mut state, &text.as_bytes()[offset..]) {
// tt.tokens.push(token);
// offset += skip;
// tt.offsets.push(offset);
// offset += length;
// tt.lengths.push(length);
// }
tt
}
impl TokenTable {
pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str { pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str {
// TODO remove unwrap // TODO remove unwrap
let len = *self.lengths.get(index).unwrap(); let len = *self.lengths.get(index).unwrap();
@ -262,17 +241,17 @@
&content[offset..(offset + len)] &content[offset..(offset + len)]
} }
} }
impl LexState { impl LexState {
pub fn new() -> LexState { pub fn new() -> LexState {
LexState { LexState {
indents: Vec::new(), indents: Vec::new(),
} }
} }
} }
fn skip_comment(bytes: &[u8]) -> usize { fn skip_comment(bytes: &[u8]) -> usize {
let mut skip = 0; let mut skip = 0;
while skip < bytes.len() && bytes[skip] != b'\n' { while skip < bytes.len() && bytes[skip] != b'\n' {
skip += 1; skip += 1;
@ -282,17 +261,17 @@
} }
skip skip
} }
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] #[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
struct Indent(usize); struct Indent(usize);
enum SpaceDotOrSpaces { enum SpaceDotOrSpaces {
SpacesWSpaceDot(usize), SpacesWSpaceDot(usize),
Spaces(usize) Spaces(usize)
} }
fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces { fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces {
debug_assert!(bytes[0] == b' '); debug_assert!(bytes[0] == b' ');
let mut skip = 0; let mut skip = 0;
@ -305,15 +284,15 @@
} else { } else {
SpaceDotOrSpaces::Spaces(skip) SpaceDotOrSpaces::Spaces(skip)
} }
} }
enum SkipNewlineReturn { enum SkipNewlineReturn {
SkipWIndent(usize, usize), SkipWIndent(usize, usize),
WSpaceDot(usize, usize) WSpaceDot(usize, usize)
} }
// also skips lines that contain only whitespace // also skips lines that contain only whitespace
fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn { fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn {
let mut skip = 0; let mut skip = 0;
let mut indent = 0; let mut indent = 0;
@ -359,14 +338,14 @@
} }
SkipNewlineReturn::SkipWIndent(skip, indent) SkipNewlineReturn::SkipWIndent(skip, indent)
} }
fn is_op_continue(ch: u8) -> bool { fn is_op_continue(ch: u8) -> bool {
matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' | matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\') b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\')
} }
fn lex_operator(bytes: &[u8]) -> (Token, usize) { fn lex_operator(bytes: &[u8]) -> (Token, usize) {
let mut i = 0; let mut i = 0;
while i < bytes.len() && is_op_continue(bytes[i]) { while i < bytes.len() && is_op_continue(bytes[i]) {
i += 1; i += 1;
@ -404,22 +383,22 @@
} }
}; };
(tok, i) (tok, i)
} }
fn is_ident_continue(ch: u8) -> bool { fn is_ident_continue(ch: u8) -> bool {
matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_') matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_')
} }
fn lex_private_tag(bytes: &[u8]) -> (Token, usize) { fn lex_private_tag(bytes: &[u8]) -> (Token, usize) {
debug_assert!(bytes[0] == b'@'); debug_assert!(bytes[0] == b'@');
let mut i = 1; let mut i = 1;
while i < bytes.len() && is_ident_continue(bytes[i]) { while i < bytes.len() && is_ident_continue(bytes[i]) {
i += 1; i += 1;
} }
(Token::PrivateTag, i) (Token::PrivateTag, i)
} }
fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) { fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) {
let mut i = 0; let mut i = 0;
while i < bytes.len() && is_ident_continue(bytes[i]) { while i < bytes.len() && is_ident_continue(bytes[i]) {
i += 1; i += 1;
@ -453,21 +432,21 @@
}, },
}; };
(tok, i) (tok, i)
} }
fn lex_underscore(bytes: &[u8]) -> (Token, usize) { fn lex_underscore(bytes: &[u8]) -> (Token, usize) {
let mut i = 0; let mut i = 0;
while i < bytes.len() && is_ident_continue(bytes[i]) { while i < bytes.len() && is_ident_continue(bytes[i]) {
i += 1; i += 1;
} }
(Token::Underscore, i) (Token::Underscore, i)
} }
fn is_int_continue(ch: u8) -> bool { fn is_int_continue(ch: u8) -> bool {
matches!(ch, b'0'..=b'9' | b'_') matches!(ch, b'0'..=b'9' | b'_')
} }
fn lex_number(bytes: &[u8]) -> (Token, usize) { fn lex_number(bytes: &[u8]) -> (Token, usize) {
let mut i = 0; let mut i = 0;
while i < bytes.len() && is_int_continue(bytes[i]) { while i < bytes.len() && is_int_continue(bytes[i]) {
i += 1; i += 1;
@ -481,9 +460,9 @@
} }
(Token::Number, i) (Token::Number, i)
} }
fn lex_string(bytes: &[u8]) -> (Token, usize) { fn lex_string(bytes: &[u8]) -> (Token, usize) {
let mut i = 0; let mut i = 0;
assert_eq!(bytes[i], b'"'); assert_eq!(bytes[i], b'"');
i += 1; i += 1;
@ -500,7 +479,7 @@
i += 1; i += 1;
(Token::String, i) (Token::String, i)
} }
#[cfg(test)] #[cfg(test)]
mod tokenizer { mod tokenizer {

File diff suppressed because it is too large Load diff