mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 06:44:46 +00:00
finished if highlighting
This commit is contained in:
parent
37704323b1
commit
ec1e2cd1d0
12 changed files with 1988 additions and 2488 deletions
|
@ -1,25 +1,17 @@
|
||||||
use crate::{syntax_highlight::HighlightStyle, slow_pool::MarkNodeId};
|
use crate::{syntax_highlight::HighlightStyle, slow_pool::{MarkNodeId, SlowPool}};
|
||||||
|
|
||||||
use super::{attribute::Attributes, nodes, nodes::MarkupNode};
|
use super::{attribute::Attributes, nodes::{self, make_nested_mn}, nodes::MarkupNode};
|
||||||
|
|
||||||
pub fn new_equals_mn() -> MarkupNode {
|
pub fn new_equals_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::EQUALS.to_owned(), HighlightStyle::Operator, 0)
|
||||||
content: nodes::EQUALS.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Operator,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_comma_mn() -> MarkupNode {
|
pub fn new_comma_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::COMMA.to_owned(), HighlightStyle::Operator, 0)
|
||||||
content: nodes::COMMA.to_owned(),
|
}
|
||||||
syn_high_style: HighlightStyle::Comma,
|
|
||||||
attributes: Attributes::default(),
|
pub fn new_dot_mn() -> MarkupNode {
|
||||||
parent_id_opt: None,
|
common_text_node(nodes::DOT.to_owned(), HighlightStyle::Operator, 0)
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_blank_mn() -> MarkupNode {
|
pub fn new_blank_mn() -> MarkupNode {
|
||||||
|
@ -47,102 +39,98 @@ pub fn new_colon_mn() -> MarkupNode {
|
||||||
pub fn new_operator_mn(
|
pub fn new_operator_mn(
|
||||||
content: String,
|
content: String,
|
||||||
) -> MarkupNode {
|
) -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(content, HighlightStyle::Operator, 0)
|
||||||
content,
|
|
||||||
syn_high_style: HighlightStyle::Operator,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_left_accolade_mn() -> MarkupNode {
|
pub fn new_left_accolade_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::LEFT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
|
||||||
content: nodes::LEFT_ACCOLADE.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Bracket,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_right_accolade_mn() -> MarkupNode {
|
pub fn new_right_accolade_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::RIGHT_ACCOLADE.to_owned(), HighlightStyle::Bracket, 0)
|
||||||
content: nodes::RIGHT_ACCOLADE.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Bracket,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_left_square_mn() -> MarkupNode {
|
pub fn new_left_square_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::LEFT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
|
||||||
content: nodes::LEFT_SQUARE_BR.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Bracket,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_right_square_mn() -> MarkupNode {
|
pub fn new_right_square_mn() -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::RIGHT_SQUARE_BR.to_owned(), HighlightStyle::Bracket, 0)
|
||||||
content: nodes::RIGHT_SQUARE_BR.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Bracket,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_func_name_mn(content: String) -> MarkupNode {
|
pub fn new_func_name_mn(content: String) -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(content, HighlightStyle::FunctionName, 0)
|
||||||
content,
|
|
||||||
syn_high_style: HighlightStyle::FunctionName,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_arg_name_mn(content: String) -> MarkupNode {
|
pub fn new_arg_name_mn(content: String) -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(content, HighlightStyle::FunctionArgName, 0)
|
||||||
content,
|
|
||||||
syn_high_style: HighlightStyle::FunctionArgName,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 0,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode {
|
pub fn new_arrow_mn(newlines_at_end: usize) -> MarkupNode {
|
||||||
MarkupNode::Text {
|
common_text_node(nodes::ARROW.to_owned(), HighlightStyle::Operator, newlines_at_end)
|
||||||
content: nodes::ARROW.to_owned(),
|
|
||||||
syn_high_style: HighlightStyle::Operator,
|
|
||||||
attributes: Attributes::default(),
|
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_comments_mn(
|
pub fn new_comments_mn(
|
||||||
comments: String,
|
comment: String,
|
||||||
newlines_at_end: usize,
|
newlines_at_end: usize,
|
||||||
|
) -> MarkupNode {
|
||||||
|
common_text_node(comment, HighlightStyle::Comment, newlines_at_end)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn common_text_node(
|
||||||
|
content: String,
|
||||||
|
highlight_style: HighlightStyle,
|
||||||
|
newlines_at_end: usize
|
||||||
) -> MarkupNode {
|
) -> MarkupNode {
|
||||||
MarkupNode::Text {
|
MarkupNode::Text {
|
||||||
content: comments,
|
content,
|
||||||
syn_high_style: HighlightStyle::Comment,
|
syn_high_style: highlight_style,
|
||||||
attributes: Attributes::default(),
|
attributes: Attributes::default(),
|
||||||
parent_id_opt: None,
|
parent_id_opt: None,
|
||||||
newlines_at_end,
|
newlines_at_end,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode {
|
pub fn new_assign_mn(val_name_mn_id: MarkNodeId, equals_mn_id: MarkNodeId, expr_mark_node_id: MarkNodeId) -> MarkupNode {
|
||||||
MarkupNode::Nested {
|
make_nested_mn(vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], 2)
|
||||||
children_ids: vec![val_name_mn_id, equals_mn_id, expr_mark_node_id],
|
}
|
||||||
parent_id_opt: None,
|
|
||||||
newlines_at_end: 3,
|
pub fn new_module_name_mn_id(mn_ids: Vec<MarkNodeId>, mark_node_pool: &mut SlowPool) -> MarkNodeId {
|
||||||
|
if mn_ids.len() == 1 {
|
||||||
|
*mn_ids.get(0).unwrap() // safe because we checked the length before
|
||||||
|
} else {
|
||||||
|
let nested_node = make_nested_mn(mn_ids, 0);
|
||||||
|
mark_node_pool.add(nested_node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_module_var_mn(module_name_id: MarkNodeId, dot_id: MarkNodeId, ident_id: MarkNodeId) -> MarkupNode {
|
||||||
|
make_nested_mn(vec![module_name_id, dot_id, ident_id], 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn if_mn() -> MarkupNode {
|
||||||
|
keyword_mn("if ")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn then_mn() -> MarkupNode {
|
||||||
|
keyword_mn(" then ")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn else_mn() -> MarkupNode {
|
||||||
|
keyword_mn(" else ")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn keyword_mn(keyword: &str) -> MarkupNode {
|
||||||
|
common_text_node(keyword.to_owned(), HighlightStyle::Keyword, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_if_expr_mn(
|
||||||
|
if_mn_id: MarkNodeId,
|
||||||
|
cond_expr_mn_id: MarkNodeId,
|
||||||
|
then_mn_id: MarkNodeId,
|
||||||
|
then_expr_mn_id: MarkNodeId,
|
||||||
|
else_mn_id: MarkNodeId,
|
||||||
|
else_expr_mn_id: MarkNodeId,
|
||||||
|
) -> MarkupNode {
|
||||||
|
make_nested_mn(vec![if_mn_id, cond_expr_mn_id, then_mn_id, then_expr_mn_id, else_mn_id, else_expr_mn_id], 1)
|
||||||
|
}
|
||||||
|
|
|
@ -3,5 +3,4 @@ pub mod common_nodes;
|
||||||
pub mod convert;
|
pub mod convert;
|
||||||
pub mod nodes;
|
pub mod nodes;
|
||||||
pub mod top_level_def;
|
pub mod top_level_def;
|
||||||
pub mod mark_id_ast_id_map;
|
pub mod mark_id_ast_id_map;
|
||||||
pub mod ast_context;
|
|
|
@ -245,6 +245,14 @@ impl MarkupNode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn make_nested_mn(children_ids: Vec<MarkNodeId>, newlines_at_end: usize) -> MarkupNode {
|
||||||
|
MarkupNode::Nested {
|
||||||
|
children_ids,
|
||||||
|
parent_id_opt: None,
|
||||||
|
newlines_at_end,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String {
|
pub fn get_string<'a>(env: &Env<'a>, pool_str: &PoolStr) -> String {
|
||||||
pool_str.as_str(env.pool).to_owned()
|
pool_str.as_str(env.pool).to_owned()
|
||||||
}
|
}
|
||||||
|
@ -256,6 +264,7 @@ pub const LEFT_SQUARE_BR: &str = "[ ";
|
||||||
pub const RIGHT_SQUARE_BR: &str = " ]";
|
pub const RIGHT_SQUARE_BR: &str = " ]";
|
||||||
pub const COLON: &str = ": ";
|
pub const COLON: &str = ": ";
|
||||||
pub const COMMA: &str = ", ";
|
pub const COMMA: &str = ", ";
|
||||||
|
pub const DOT: &str = ".";
|
||||||
pub const STRING_QUOTES: &str = "\"\"";
|
pub const STRING_QUOTES: &str = "\"\"";
|
||||||
pub const EQUALS: &str = " = ";
|
pub const EQUALS: &str = " = ";
|
||||||
pub const ARROW: &str = " -> ";
|
pub const ARROW: &str = " -> ";
|
||||||
|
|
|
@ -14,7 +14,7 @@ use crate::{
|
||||||
syntax_highlight::HighlightStyle,
|
syntax_highlight::HighlightStyle,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::assign_mn};
|
use super::{mark_id_ast_id_map::MarkIdAstIdMap, convert::from_def2::add_node, common_nodes::new_assign_mn};
|
||||||
|
|
||||||
// represents for example: `main = "Hello, World!"`
|
// represents for example: `main = "Hello, World!"`
|
||||||
pub fn assignment_mark_node<'a>(
|
pub fn assignment_mark_node<'a>(
|
||||||
|
@ -39,7 +39,7 @@ pub fn assignment_mark_node<'a>(
|
||||||
|
|
||||||
let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map);
|
let equals_mn_id = add_node(new_equals_mn(), ast_node_id, mark_node_pool, mark_id_ast_id_map);
|
||||||
|
|
||||||
Ok(assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id))
|
Ok(new_assign_mn(val_name_mn_id, equals_mn_id, expr_mark_node_id))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn tld_w_comments_mark_node(
|
pub fn tld_w_comments_mark_node(
|
||||||
|
|
|
@ -6,7 +6,6 @@ use crate::colors::{from_hsb, RgbaTup};
|
||||||
#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)]
|
#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)]
|
||||||
pub enum HighlightStyle {
|
pub enum HighlightStyle {
|
||||||
Operator, // =+-<>...
|
Operator, // =+-<>...
|
||||||
Comma,
|
|
||||||
String,
|
String,
|
||||||
FunctionName,
|
FunctionName,
|
||||||
FunctionArgName,
|
FunctionArgName,
|
||||||
|
@ -23,6 +22,7 @@ pub enum HighlightStyle {
|
||||||
DocsComment,
|
DocsComment,
|
||||||
UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent?
|
UppercaseIdent, // TODO remove other HighlightStyle subtypes of UppercaseIdent?
|
||||||
LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent?
|
LowercaseIdent, // TODO remove other HighlightStyle subtypes of LowercaseIdent?
|
||||||
|
Keyword, // if, else, when
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
|
pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
|
||||||
|
@ -33,7 +33,6 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
|
||||||
let mut highlight_map = HashMap::new();
|
let mut highlight_map = HashMap::new();
|
||||||
[
|
[
|
||||||
(Operator, from_hsb(185, 50, 75)),
|
(Operator, from_hsb(185, 50, 75)),
|
||||||
(Comma, from_hsb(258, 50, 90)),
|
|
||||||
(String, from_hsb(346, 65, 97)),
|
(String, from_hsb(346, 65, 97)),
|
||||||
(FunctionName, almost_white),
|
(FunctionName, almost_white),
|
||||||
(FunctionArgName, from_hsb(225, 50, 100)),
|
(FunctionArgName, from_hsb(225, 50, 100)),
|
||||||
|
@ -50,6 +49,7 @@ pub fn default_highlight_map() -> HashMap<HighlightStyle, RgbaTup> {
|
||||||
(DocsComment, from_hsb(258, 50, 90)), // TODO check color
|
(DocsComment, from_hsb(258, 50, 90)), // TODO check color
|
||||||
(UppercaseIdent, almost_white),
|
(UppercaseIdent, almost_white),
|
||||||
(LowercaseIdent, from_hsb(225, 50, 100)),
|
(LowercaseIdent, from_hsb(225, 50, 100)),
|
||||||
|
(Keyword, almost_white),
|
||||||
]
|
]
|
||||||
.iter()
|
.iter()
|
||||||
.for_each(|tup| {
|
.for_each(|tup| {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -30,7 +30,6 @@ pub fn mark_node_to_html(
|
||||||
|
|
||||||
let css_class = match syn_high_style {
|
let css_class = match syn_high_style {
|
||||||
Operator => "operator",
|
Operator => "operator",
|
||||||
Comma => "comma",
|
|
||||||
String => "string",
|
String => "string",
|
||||||
FunctionName => "function-name",
|
FunctionName => "function-name",
|
||||||
FunctionArgName => "function-arg-name",
|
FunctionArgName => "function-arg-name",
|
||||||
|
@ -47,6 +46,7 @@ pub fn mark_node_to_html(
|
||||||
DocsComment => "docs-comment",
|
DocsComment => "docs-comment",
|
||||||
UppercaseIdent => "uppercase-ident",
|
UppercaseIdent => "uppercase-ident",
|
||||||
LowercaseIdent => "lowercase-ident",
|
LowercaseIdent => "lowercase-ident",
|
||||||
|
Keyword => "keyword-ident",
|
||||||
};
|
};
|
||||||
|
|
||||||
write_html_to_buf(content, css_class, buf);
|
write_html_to_buf(content, css_class, buf);
|
||||||
|
|
|
@ -98,7 +98,7 @@ mod insert_doc_syntax_highlighting {
|
||||||
fn top_level_def_val_num() {
|
fn top_level_def_val_num() {
|
||||||
expect_html_def(
|
expect_html_def(
|
||||||
r#"myVal = 0"#,
|
r#"myVal = 0"#,
|
||||||
"<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n\n",
|
"<span class=\"syntax-lowercase-ident\">myVal</span><span class=\"syntax-operator\"> = </span><span class=\"syntax-number\">0</span>\n\n",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use peg::error::ParseError;
|
use peg::error::ParseError;
|
||||||
use roc_code_markup::markup::attribute::Attributes;
|
use roc_code_markup::markup::attribute::Attributes;
|
||||||
use roc_code_markup::markup::common_nodes::{new_equals_mn, assign_mn};
|
use roc_code_markup::markup::common_nodes::{new_equals_mn, new_dot_mn, new_assign_mn, new_module_name_mn_id, new_module_var_mn, if_mn, then_mn, else_mn, new_if_expr_mn};
|
||||||
use roc_code_markup::slow_pool::{SlowPool, MarkNodeId};
|
use roc_code_markup::slow_pool::{SlowPool, MarkNodeId};
|
||||||
use roc_code_markup::{syntax_highlight::HighlightStyle};
|
use roc_code_markup::{syntax_highlight::HighlightStyle};
|
||||||
use roc_code_markup::markup::nodes::MarkupNode;
|
use roc_code_markup::markup::nodes::MarkupNode;
|
||||||
|
@ -15,6 +15,27 @@ use crate::tokenizer::{Token, TokenTable, full_tokenize};
|
||||||
// license information can be found in the LEGAL_DETAILS file in
|
// license information can be found in the LEGAL_DETAILS file in
|
||||||
// the root directory of this distribution.
|
// the root directory of this distribution.
|
||||||
// Thank you zig contributors!
|
// Thank you zig contributors!
|
||||||
|
|
||||||
|
/*
|
||||||
|
HOW TO ADD NEW RULES:
|
||||||
|
- go to highlight/tests/peg_grammar.rs
|
||||||
|
- find for example a variant of common_expr that is not implemented yet, like `if_expr`
|
||||||
|
- we add `if_expr()` to the `common_expr` rule, in the same order as in peg_grammar::common_expr()
|
||||||
|
- we copy the if_expr rule from `peg_grammar.rs`
|
||||||
|
- we add ` -> MarkNodeId` to the if_expr rule
|
||||||
|
- we change the first full_expr in if_expr() to cond_e_id:full_expr(), the second to then_e_id:full_expr()...
|
||||||
|
- we add if_mn(), else_mn(), then_mn() and new_if_expr_mn() to common_nodes.rs
|
||||||
|
- we replace [T::KeywordIf],[T::KeywordThen]... with a new if(),... rule that adds an if,... node to the mark_node_pool.
|
||||||
|
- we bundle everything together in a nested node and save it in the mn_pool:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
mn_pool.add(
|
||||||
|
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
- we finsih up by adding a test: `test_highlight_if_expr`
|
||||||
|
```
|
||||||
|
*/
|
||||||
peg::parser!{
|
peg::parser!{
|
||||||
grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] {
|
grammar highlightparser(t_table: &TokenTable, code_str: &str, mn_pool: &mut SlowPool) for [T] {
|
||||||
|
|
||||||
|
@ -25,13 +46,33 @@ peg::parser!{
|
||||||
opt_same_indent_expr()*
|
opt_same_indent_expr()*
|
||||||
|
|
||||||
rule opt_same_indent_expr() -> MarkNodeId =
|
rule opt_same_indent_expr() -> MarkNodeId =
|
||||||
[T::SameIndent]? e:full_expr() {e}
|
[T::SameIndent]? e_id:full_expr() {e_id}
|
||||||
|
|
||||||
rule opt_same_indent_def() -> MarkNodeId =
|
rule opt_same_indent_def() -> MarkNodeId =
|
||||||
[T::SameIndent]? d:def() {d}
|
[T::SameIndent]? d_id:def() {d_id}
|
||||||
|
|
||||||
rule common_expr() -> MarkNodeId =
|
rule common_expr() -> MarkNodeId =
|
||||||
p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) }
|
if_expr()
|
||||||
|
/ p:position!() [T::Number] { add_new_mn(t_table.extract_str(p, code_str), HighlightStyle::Number, mn_pool) }
|
||||||
|
/ module_var()
|
||||||
|
/ lowercase_ident()
|
||||||
|
|
||||||
|
rule if_expr() -> MarkNodeId =
|
||||||
|
if_id:if() cond_e_id:full_expr() then_id:then() then_e_id:full_expr() else_id:else_rule() else_e_id:full_expr()
|
||||||
|
{
|
||||||
|
mn_pool.add(
|
||||||
|
new_if_expr_mn(if_id, cond_e_id, then_id, then_e_id, else_id, else_e_id)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
rule if() -> MarkNodeId =
|
||||||
|
[T::KeywordIf] {mn_pool.add(if_mn())}
|
||||||
|
|
||||||
|
rule then() -> MarkNodeId =
|
||||||
|
[T::KeywordThen] {mn_pool.add(then_mn())}
|
||||||
|
|
||||||
|
rule else_rule() -> MarkNodeId =
|
||||||
|
[T::KeywordElse] {mn_pool.add(else_mn())}
|
||||||
|
|
||||||
pub rule def() -> MarkNodeId =
|
pub rule def() -> MarkNodeId =
|
||||||
// annotated_body()
|
// annotated_body()
|
||||||
|
@ -45,15 +86,46 @@ peg::parser!{
|
||||||
|
|
||||||
rule body() -> MarkNodeId =
|
rule body() -> MarkNodeId =
|
||||||
ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file())
|
ident_id:ident() as_id:assign() [T::OpenIndent] e_id:full_expr() /*TODO not sure when this is needed> es:full_exprs()*/ ([T::CloseIndent] / end_of_file())
|
||||||
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) }
|
{
|
||||||
|
mn_pool.add(
|
||||||
|
new_assign_mn(ident_id, as_id, e_id)
|
||||||
|
)
|
||||||
|
}
|
||||||
/
|
/
|
||||||
ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()?
|
ident_id:ident() as_id:assign() e_id:full_expr() end_of_file()?
|
||||||
{mn_pool.add(assign_mn(ident_id, as_id, e_id)) }
|
{
|
||||||
|
mn_pool.add(
|
||||||
|
new_assign_mn(ident_id, as_id, e_id)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
rule module_var() -> MarkNodeId =
|
||||||
|
mod_name_id:module_name() dot_id:dot() ident_id:lowercase_ident() {
|
||||||
|
mn_pool.add(
|
||||||
|
new_module_var_mn(mod_name_id, dot_id, ident_id)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
rule module_name() -> MarkNodeId =
|
||||||
|
first_ident_id:uppercase_ident() rest_ids:dot_idents() {
|
||||||
|
new_module_name_mn_id(
|
||||||
|
merge_ids(first_ident_id, rest_ids),
|
||||||
|
mn_pool
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
rule assign() -> MarkNodeId =
|
rule assign() -> MarkNodeId =
|
||||||
[T::OpAssignment] { mn_pool.add(new_equals_mn()) }
|
[T::OpAssignment] { mn_pool.add(new_equals_mn()) }
|
||||||
|
|
||||||
|
rule dot() -> MarkNodeId =
|
||||||
|
[T::Dot] { mn_pool.add(new_dot_mn()) }
|
||||||
|
|
||||||
|
rule dot_ident() -> (MarkNodeId, MarkNodeId) =
|
||||||
|
dot_id:dot() ident_id:uppercase_ident() { (dot_id, ident_id) }
|
||||||
|
|
||||||
|
rule dot_idents() -> Vec<MarkNodeId> =
|
||||||
|
di:dot_ident()* {flatten_tups(di)}
|
||||||
|
|
||||||
rule ident() -> MarkNodeId =
|
rule ident() -> MarkNodeId =
|
||||||
uppercase_ident()
|
uppercase_ident()
|
||||||
/ lowercase_ident()
|
/ lowercase_ident()
|
||||||
|
@ -69,6 +141,21 @@ peg::parser!{
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn merge_ids(
|
||||||
|
mn_id: MarkNodeId,
|
||||||
|
other_mn_id: Vec<MarkNodeId>
|
||||||
|
) -> Vec<MarkNodeId> {
|
||||||
|
let mut ids = vec![mn_id];
|
||||||
|
let mut rest_ids: Vec<usize> = other_mn_id;
|
||||||
|
|
||||||
|
ids.append(&mut rest_ids);
|
||||||
|
|
||||||
|
ids
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flatten_tups(tup_vec: Vec<(MarkNodeId, MarkNodeId)>) -> Vec<MarkNodeId> {
|
||||||
|
tup_vec.iter().flat_map(|(a,b)| vec![*a, *b]).collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn add_new_mn(
|
fn add_new_mn(
|
||||||
text: &str,
|
text: &str,
|
||||||
|
@ -97,34 +184,46 @@ pub fn highlight_defs(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<V
|
||||||
highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool)
|
highlightparser::module_defs(&token_table.tokens, &token_table, code_str, mark_node_pool)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*pub fn highlight_temp(code_str: &str, mark_node_pool: &mut SlowPool) -> Result<MarkNodeId, ParseError<usize>> {
|
|
||||||
let token_table = full_tokenize(code_str);
|
|
||||||
|
|
||||||
highlightparser::def(&token_table.tokens, &token_table, code_str, mark_node_pool)
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod highlight_tests {
|
pub mod highlight_tests {
|
||||||
use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}};
|
use roc_code_markup::{slow_pool::{SlowPool}, markup::nodes::{node_to_string_w_children}};
|
||||||
|
|
||||||
use crate::highlight_parser::{highlight_expr, highlight_defs};
|
use crate::highlight_parser::{highlight_expr, highlight_defs};
|
||||||
|
|
||||||
#[test]
|
fn test_highlight_expr(input: &str, expected_output: &str) {
|
||||||
fn test_highlight() {
|
|
||||||
let mut mark_node_pool = SlowPool::default();
|
let mut mark_node_pool = SlowPool::default();
|
||||||
|
|
||||||
let mark_id = highlight_expr("0", &mut mark_node_pool).unwrap();
|
let mark_id = highlight_expr(input, &mut mark_node_pool).unwrap();
|
||||||
|
|
||||||
|
let mut str_buffer = String::new();
|
||||||
|
|
||||||
|
node_to_string_w_children(
|
||||||
|
mark_id,
|
||||||
|
&mut str_buffer,
|
||||||
|
&mark_node_pool
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&mark_node_pool
|
&str_buffer,
|
||||||
.get(
|
expected_output
|
||||||
mark_id
|
|
||||||
).get_content(),
|
|
||||||
"0"
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_highlight() {
|
||||||
|
test_highlight_expr("0","0");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_highlight_module_var() {
|
||||||
|
test_highlight_expr("Foo.Bar.var","Foo.Bar.var");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_highlight_if_expr() {
|
||||||
|
test_highlight_expr("if booly then 42 else 31415", "if booly then 42 else 31415\n")
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_highlight_defs() {
|
fn test_highlight_defs() {
|
||||||
let mut mark_node_pool = SlowPool::default();
|
let mut mark_node_pool = SlowPool::default();
|
||||||
|
@ -140,7 +239,7 @@ pub mod highlight_tests {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&str_buffer,
|
&str_buffer,
|
||||||
"a = 0\n\n\n"
|
"a = 0\n\n"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
mod tokenizer;
|
pub mod tokenizer;
|
||||||
pub mod highlight_parser;
|
pub mod highlight_parser;
|
|
@ -1,506 +1,485 @@
|
||||||
#[repr(u8)]
|
#[repr(u8)]
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
/// Tokens are full of very dense information to make checking properties about them
|
/// Tokens are full of very dense information to make checking properties about them
|
||||||
/// very fast.
|
/// very fast.
|
||||||
/// Some bits have specific meanings:
|
/// Some bits have specific meanings:
|
||||||
/// * 0b_001*_****: "Identifier-like" things
|
/// * 0b_001*_****: "Identifier-like" things
|
||||||
/// * 0b_01**_****: "Punctuation"
|
/// * 0b_01**_****: "Punctuation"
|
||||||
/// * 0b_0100_1***: []{}() INDENT/DEDENT
|
/// * 0b_0100_1***: []{}() INDENT/DEDENT
|
||||||
/// * 0b_0100_1**0 [{(INDENT
|
/// * 0b_0100_1**0 [{(INDENT
|
||||||
/// * 0b_0100_1**1 ]})DEDENT
|
/// * 0b_0100_1**1 ]})DEDENT
|
||||||
/// * 0b_011*_**** Operators
|
/// * 0b_011*_**** Operators
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
LowercaseIdent = 0b_0010_0000,
|
LowercaseIdent = 0b_0010_0000,
|
||||||
UppercaseIdent = 0b_0011_0011,
|
UppercaseIdent = 0b_0011_0011,
|
||||||
MalformedIdent = 0b_0010_0001,
|
MalformedIdent = 0b_0010_0001,
|
||||||
|
|
||||||
KeywordIf = 0b_0010_0010,
|
|
||||||
KeywordThen = 0b_0010_0011,
|
|
||||||
KeywordElse = 0b_0010_0100,
|
|
||||||
KeywordWhen = 0b_0010_0101,
|
|
||||||
KeywordAs = 0b_0010_0110,
|
|
||||||
KeywordIs = 0b_0010_0111,
|
|
||||||
KeywordExpect = 0b_0010_1000,
|
|
||||||
KeywordApp = 0b_0010_1001,
|
|
||||||
KeywordInterface = 0b_0010_1010,
|
|
||||||
KeywordPackages = 0b_0010_1011,
|
|
||||||
KeywordImports = 0b_0010_1100,
|
|
||||||
KeywordProvides = 0b_0010_1101,
|
|
||||||
KeywordTo = 0b_0010_1110,
|
|
||||||
KeywordExposes = 0b_0010_1111,
|
|
||||||
KeywordEffects = 0b_0011_0000,
|
|
||||||
KeywordPlatform = 0b_0011_0001,
|
|
||||||
KeywordRequires = 0b_0011_0010,
|
|
||||||
|
|
||||||
Comma = 0b_0100_0000,
|
|
||||||
Colon = 0b_0100_0001,
|
|
||||||
|
|
||||||
OpenParen = 0b_0100_1000,
|
|
||||||
CloseParen = 0b_0100_1001,
|
|
||||||
OpenCurly = 0b_0100_1010,
|
|
||||||
CloseCurly = 0b_0100_1011,
|
|
||||||
OpenSquare = 0b_0100_1100,
|
|
||||||
CloseSquare = 0b_0100_1101,
|
|
||||||
OpenIndent = 0b_0100_1110,
|
|
||||||
CloseIndent = 0b_0100_1111,
|
|
||||||
SameIndent = 0b_0101_0000,
|
|
||||||
|
|
||||||
OpPlus = 0b_0110_0000,
|
|
||||||
OpMinus = 0b_0110_0001,
|
|
||||||
OpSlash = 0b_0110_0010,
|
|
||||||
OpPercent = 0b_0110_0011,
|
|
||||||
OpCaret = 0b_0110_0100,
|
|
||||||
OpGreaterThan = 0b_0110_0101,
|
|
||||||
OpLessThan = 0b_0110_0110,
|
|
||||||
OpAssignment = 0b_0110_0111,
|
|
||||||
OpPizza = 0b_0110_1000,
|
|
||||||
OpEquals = 0b_0110_1001,
|
|
||||||
OpNotEquals = 0b_0110_1010,
|
|
||||||
OpGreaterThanOrEq = 0b_0110_1011,
|
|
||||||
OpLessThanOrEq = 0b_0110_1100,
|
|
||||||
OpAnd = 0b_0110_1101,
|
|
||||||
OpOr = 0b_0110_1110,
|
|
||||||
OpDoubleSlash = 0b_0110_1111,
|
|
||||||
OpDoublePercent = 0b_0111_0001,
|
|
||||||
OpBackpassing = 0b_0111_1010,
|
|
||||||
|
|
||||||
TodoNextThing = 0b_1000_0000,
|
|
||||||
|
|
||||||
Malformed,
|
|
||||||
MalformedOperator,
|
|
||||||
|
|
||||||
PrivateTag,
|
|
||||||
|
|
||||||
String,
|
|
||||||
|
|
||||||
NumberBase,
|
|
||||||
Number,
|
|
||||||
|
|
||||||
QuestionMark,
|
|
||||||
|
|
||||||
Underscore,
|
|
||||||
|
|
||||||
Ampersand,
|
|
||||||
Pipe,
|
|
||||||
Dot,
|
|
||||||
SpaceDot, // ` .` necessary to know difference between `Result.map .position` and `Result.map.position`
|
|
||||||
Bang,
|
|
||||||
LambdaStart,
|
|
||||||
Arrow,
|
|
||||||
FatArrow,
|
|
||||||
Asterisk,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct TokenTable {
|
|
||||||
pub tokens: Vec<Token>,
|
|
||||||
pub offsets: Vec<usize>,
|
|
||||||
pub lengths: Vec<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LexState {
|
|
||||||
indents: Vec<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
trait ConsumeToken {
|
|
||||||
fn token(&mut self, token: Token, _offset: usize, _length: usize);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
KeywordIf = 0b_0010_0010,
|
||||||
struct TokenConsumer{
|
KeywordThen = 0b_0010_0011,
|
||||||
token_table: TokenTable,
|
KeywordElse = 0b_0010_0100,
|
||||||
}
|
KeywordWhen = 0b_0010_0101,
|
||||||
|
KeywordAs = 0b_0010_0110,
|
||||||
|
KeywordIs = 0b_0010_0111,
|
||||||
|
KeywordExpect = 0b_0010_1000,
|
||||||
|
KeywordApp = 0b_0010_1001,
|
||||||
|
KeywordInterface = 0b_0010_1010,
|
||||||
|
KeywordPackages = 0b_0010_1011,
|
||||||
|
KeywordImports = 0b_0010_1100,
|
||||||
|
KeywordProvides = 0b_0010_1101,
|
||||||
|
KeywordTo = 0b_0010_1110,
|
||||||
|
KeywordExposes = 0b_0010_1111,
|
||||||
|
KeywordEffects = 0b_0011_0000,
|
||||||
|
KeywordPlatform = 0b_0011_0001,
|
||||||
|
KeywordRequires = 0b_0011_0010,
|
||||||
|
|
||||||
impl ConsumeToken for TokenConsumer {
|
Comma = 0b_0100_0000,
|
||||||
fn token(&mut self, token: Token, offset: usize, length: usize){
|
Colon = 0b_0100_0001,
|
||||||
self.token_table.tokens.push(token);
|
|
||||||
self.token_table.offsets.push(offset);
|
OpenParen = 0b_0100_1000,
|
||||||
self.token_table.lengths.push(length);
|
CloseParen = 0b_0100_1001,
|
||||||
|
OpenCurly = 0b_0100_1010,
|
||||||
|
CloseCurly = 0b_0100_1011,
|
||||||
|
OpenSquare = 0b_0100_1100,
|
||||||
|
CloseSquare = 0b_0100_1101,
|
||||||
|
OpenIndent = 0b_0100_1110,
|
||||||
|
CloseIndent = 0b_0100_1111,
|
||||||
|
SameIndent = 0b_0101_0000,
|
||||||
|
|
||||||
|
OpPlus = 0b_0110_0000,
|
||||||
|
OpMinus = 0b_0110_0001,
|
||||||
|
OpSlash = 0b_0110_0010,
|
||||||
|
OpPercent = 0b_0110_0011,
|
||||||
|
OpCaret = 0b_0110_0100,
|
||||||
|
OpGreaterThan = 0b_0110_0101,
|
||||||
|
OpLessThan = 0b_0110_0110,
|
||||||
|
OpAssignment = 0b_0110_0111,
|
||||||
|
OpPizza = 0b_0110_1000,
|
||||||
|
OpEquals = 0b_0110_1001,
|
||||||
|
OpNotEquals = 0b_0110_1010,
|
||||||
|
OpGreaterThanOrEq = 0b_0110_1011,
|
||||||
|
OpLessThanOrEq = 0b_0110_1100,
|
||||||
|
OpAnd = 0b_0110_1101,
|
||||||
|
OpOr = 0b_0110_1110,
|
||||||
|
OpDoubleSlash = 0b_0110_1111,
|
||||||
|
OpDoublePercent = 0b_0111_0001,
|
||||||
|
OpBackpassing = 0b_0111_1010,
|
||||||
|
|
||||||
|
TodoNextThing = 0b_1000_0000,
|
||||||
|
|
||||||
|
Malformed,
|
||||||
|
MalformedOperator,
|
||||||
|
|
||||||
|
PrivateTag,
|
||||||
|
|
||||||
|
String,
|
||||||
|
|
||||||
|
NumberBase,
|
||||||
|
Number,
|
||||||
|
|
||||||
|
QuestionMark,
|
||||||
|
|
||||||
|
Underscore,
|
||||||
|
|
||||||
|
Ampersand,
|
||||||
|
Pipe,
|
||||||
|
Dot,
|
||||||
|
SpaceDot, // ` .` necessary to know difference between `Result.map .position` and `Result.map.position`
|
||||||
|
Bang,
|
||||||
|
LambdaStart,
|
||||||
|
Arrow,
|
||||||
|
FatArrow,
|
||||||
|
Asterisk,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct TokenTable {
|
||||||
|
pub tokens: Vec<Token>,
|
||||||
|
pub offsets: Vec<usize>,
|
||||||
|
pub lengths: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LexState {
|
||||||
|
indents: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
trait ConsumeToken {
|
||||||
|
fn token(&mut self, token: Token, _offset: usize, _length: usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct TokenConsumer{
|
||||||
|
token_table: TokenTable,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConsumeToken for TokenConsumer {
|
||||||
|
fn token(&mut self, token: Token, offset: usize, length: usize){
|
||||||
|
self.token_table.tokens.push(token);
|
||||||
|
self.token_table.offsets.push(offset);
|
||||||
|
self.token_table.lengths.push(length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tokenize(code_str: &str) -> Vec<Token> {
|
||||||
|
full_tokenize(code_str).tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn full_tokenize(code_str: &str) -> TokenTable {
|
||||||
|
let mut lex_state = LexState{ indents: Vec::new() };
|
||||||
|
let mut consumer = TokenConsumer::default();
|
||||||
|
|
||||||
|
consume_all_tokens(
|
||||||
|
&mut lex_state,
|
||||||
|
code_str.as_bytes(),
|
||||||
|
&mut consumer
|
||||||
|
);
|
||||||
|
|
||||||
|
consumer.token_table
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_all_tokens(
|
||||||
|
state: &mut LexState,
|
||||||
|
bytes: &[u8],
|
||||||
|
consumer: &mut impl ConsumeToken,
|
||||||
|
) {
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
while i < bytes.len() {
|
||||||
|
let bytes = &bytes[i..];
|
||||||
|
|
||||||
|
let (token, len) = match bytes[0] {
|
||||||
|
b'(' => (Token::OpenParen, 1),
|
||||||
|
b')' => (Token::CloseParen, 1),
|
||||||
|
b'{' => (Token::OpenCurly, 1),
|
||||||
|
b'}' => (Token::CloseCurly, 1),
|
||||||
|
b'[' => (Token::OpenSquare, 1),
|
||||||
|
b']' => (Token::CloseSquare, 1),
|
||||||
|
b',' => (Token::Comma, 1),
|
||||||
|
b'_' => lex_underscore(bytes),
|
||||||
|
b'@' => lex_private_tag(bytes),
|
||||||
|
b'a'..=b'z' => lex_ident(false, bytes),
|
||||||
|
b'A'..=b'Z' => lex_ident(true, bytes),
|
||||||
|
b'0'..=b'9' => lex_number(bytes),
|
||||||
|
b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
|
||||||
|
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\' => lex_operator(bytes),
|
||||||
|
b' ' => {
|
||||||
|
match skip_whitespace(bytes) {
|
||||||
|
SpaceDotOrSpaces::SpacesWSpaceDot(skip) => {
|
||||||
|
i += skip;
|
||||||
|
(Token::SpaceDot, 1)
|
||||||
|
},
|
||||||
|
SpaceDotOrSpaces::Spaces(skip) => {
|
||||||
|
i += skip;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
b'\n' => {
|
||||||
|
// TODO: add newline to side_table
|
||||||
|
let skip_newline_return = skip_newlines_and_comments(bytes);
|
||||||
|
|
||||||
|
match skip_newline_return {
|
||||||
|
SkipNewlineReturn::SkipWIndent(skipped_lines, curr_line_indent) => {
|
||||||
|
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
SkipNewlineReturn::WSpaceDot(skipped_lines, curr_line_indent) => {
|
||||||
|
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
|
||||||
|
(Token::SpaceDot, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
b'#' => {
|
||||||
|
// TODO: add comment to side_table
|
||||||
|
i += skip_comment(bytes);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
b'"' => lex_string(bytes),
|
||||||
|
b => todo!("handle {:?}", b as char),
|
||||||
|
};
|
||||||
|
|
||||||
|
consumer.token(token, i, len);
|
||||||
|
i += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn tokenize(code_str: &str) -> Vec<Token> {
|
fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) {
|
||||||
full_tokenize(code_str).tokens
|
*curr_byte_ctr += skipped_lines;
|
||||||
}
|
|
||||||
|
|
||||||
pub fn full_tokenize(code_str: &str) -> TokenTable {
|
if let Some(&prev_indent) = state.indents.last() {
|
||||||
let mut lex_state = LexState{ indents: Vec::new() };
|
if curr_line_indent > prev_indent {
|
||||||
let mut consumer = TokenConsumer::default();
|
|
||||||
|
|
||||||
consume_all_tokens(
|
|
||||||
&mut lex_state,
|
|
||||||
code_str.as_bytes(),
|
|
||||||
&mut consumer
|
|
||||||
);
|
|
||||||
|
|
||||||
consumer.token_table
|
|
||||||
}
|
|
||||||
|
|
||||||
fn consume_all_tokens(
|
|
||||||
state: &mut LexState,
|
|
||||||
bytes: &[u8],
|
|
||||||
consumer: &mut impl ConsumeToken,
|
|
||||||
) {
|
|
||||||
let mut i = 0;
|
|
||||||
|
|
||||||
while i < bytes.len() {
|
|
||||||
let bytes = &bytes[i..];
|
|
||||||
|
|
||||||
let (token, len) = match bytes[0] {
|
|
||||||
b'(' => (Token::OpenParen, 1),
|
|
||||||
b')' => (Token::CloseParen, 1),
|
|
||||||
b'{' => (Token::OpenCurly, 1),
|
|
||||||
b'}' => (Token::CloseCurly, 1),
|
|
||||||
b'[' => (Token::OpenSquare, 1),
|
|
||||||
b']' => (Token::CloseSquare, 1),
|
|
||||||
b',' => (Token::Comma, 1),
|
|
||||||
b'_' => lex_underscore(bytes),
|
|
||||||
b'@' => lex_private_tag(bytes),
|
|
||||||
b'a'..=b'z' => lex_ident(false, bytes),
|
|
||||||
b'A'..=b'Z' => lex_ident(true, bytes),
|
|
||||||
b'0'..=b'9' => lex_number(bytes),
|
|
||||||
b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
|
|
||||||
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\' => lex_operator(bytes),
|
|
||||||
b' ' => {
|
|
||||||
match skip_whitespace(bytes) {
|
|
||||||
SpaceDotOrSpaces::SpacesWSpaceDot(skip) => {
|
|
||||||
i += skip;
|
|
||||||
(Token::SpaceDot, 1)
|
|
||||||
},
|
|
||||||
SpaceDotOrSpaces::Spaces(skip) => {
|
|
||||||
i += skip;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
b'\n' => {
|
|
||||||
// TODO: add newline to side_table
|
|
||||||
let skip_newline_return = skip_newlines_and_comments(bytes);
|
|
||||||
|
|
||||||
match skip_newline_return {
|
|
||||||
SkipNewlineReturn::SkipWIndent(skipped_lines, curr_line_indent) => {
|
|
||||||
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
SkipNewlineReturn::WSpaceDot(skipped_lines, curr_line_indent) => {
|
|
||||||
add_indents(skipped_lines, curr_line_indent, state, consumer, &mut i);
|
|
||||||
(Token::SpaceDot, 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
b'#' => {
|
|
||||||
// TODO: add comment to side_table
|
|
||||||
i += skip_comment(bytes);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
b'"' => lex_string(bytes),
|
|
||||||
b => todo!("handle {:?}", b as char),
|
|
||||||
};
|
|
||||||
|
|
||||||
consumer.token(token, i, len);
|
|
||||||
i += len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_indents(skipped_lines: usize, curr_line_indent: usize, state: &mut LexState, consumer: &mut impl ConsumeToken, curr_byte_ctr: &mut usize) {
|
|
||||||
*curr_byte_ctr += skipped_lines;
|
|
||||||
|
|
||||||
if let Some(&prev_indent) = state.indents.last() {
|
|
||||||
if curr_line_indent > prev_indent {
|
|
||||||
state.indents.push(curr_line_indent);
|
|
||||||
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
|
|
||||||
} else {
|
|
||||||
*curr_byte_ctr += curr_line_indent;
|
|
||||||
|
|
||||||
if prev_indent == curr_line_indent {
|
|
||||||
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
|
|
||||||
} else if curr_line_indent < prev_indent {
|
|
||||||
// safe unwrap because we check first
|
|
||||||
while state.indents.last().is_some() && curr_line_indent < *state.indents.last().unwrap() {
|
|
||||||
state.indents.pop();
|
|
||||||
consumer.token(Token::CloseIndent, *curr_byte_ctr, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
} else if curr_line_indent > 0 {
|
|
||||||
state.indents.push(curr_line_indent);
|
state.indents.push(curr_line_indent);
|
||||||
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
|
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
|
||||||
} else {
|
} else {
|
||||||
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
|
*curr_byte_ctr += curr_line_indent;
|
||||||
|
|
||||||
|
if prev_indent == curr_line_indent {
|
||||||
|
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
|
||||||
|
} else if curr_line_indent < prev_indent {
|
||||||
|
// safe unwrap because we check first
|
||||||
|
while state.indents.last().is_some() && curr_line_indent < *state.indents.last().unwrap() {
|
||||||
|
state.indents.pop();
|
||||||
|
consumer.token(Token::CloseIndent, *curr_byte_ctr, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
} else if curr_line_indent > 0 {
|
||||||
|
state.indents.push(curr_line_indent);
|
||||||
|
consumer.token(Token::OpenIndent, *curr_byte_ctr, 0);
|
||||||
|
} else {
|
||||||
|
consumer.token(Token::SameIndent, *curr_byte_ctr, 0);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
impl TokenTable {
|
|
||||||
pub fn new(text: &str) -> TokenTable {
|
|
||||||
let mut tt = TokenTable {
|
|
||||||
tokens: Vec::new(),
|
|
||||||
offsets: Vec::new(),
|
|
||||||
lengths: Vec::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut offset = 0;
|
|
||||||
let mut state = LexState::new();
|
|
||||||
|
|
||||||
// while let Some((token, skip, length)) = Token::lex_single(&mut state, &text.as_bytes()[offset..]) {
|
|
||||||
// tt.tokens.push(token);
|
|
||||||
// offset += skip;
|
|
||||||
// tt.offsets.push(offset);
|
|
||||||
// offset += length;
|
|
||||||
// tt.lengths.push(length);
|
|
||||||
// }
|
|
||||||
|
|
||||||
tt
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str {
|
impl TokenTable {
|
||||||
// TODO remove unwrap
|
pub fn extract_str<'a>(&self, index: usize, content: &'a str) -> &'a str {
|
||||||
let len = *self.lengths.get(index).unwrap();
|
// TODO remove unwrap
|
||||||
let offset = *self.offsets.get(index).unwrap();
|
let len = *self.lengths.get(index).unwrap();
|
||||||
|
let offset = *self.offsets.get(index).unwrap();
|
||||||
|
|
||||||
&content[offset..(offset + len)]
|
&content[offset..(offset + len)]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LexState {
|
impl LexState {
|
||||||
pub fn new() -> LexState {
|
pub fn new() -> LexState {
|
||||||
LexState {
|
LexState {
|
||||||
indents: Vec::new(),
|
indents: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skip_comment(bytes: &[u8]) -> usize {
|
fn skip_comment(bytes: &[u8]) -> usize {
|
||||||
let mut skip = 0;
|
let mut skip = 0;
|
||||||
while skip < bytes.len() && bytes[skip] != b'\n' {
|
while skip < bytes.len() && bytes[skip] != b'\n' {
|
||||||
skip += 1;
|
|
||||||
}
|
|
||||||
if (skip + 1) < bytes.len() && bytes[skip] == b'\n' && bytes[skip+1] == b'#'{
|
|
||||||
skip += 1;
|
skip += 1;
|
||||||
}
|
}
|
||||||
|
if (skip + 1) < bytes.len() && bytes[skip] == b'\n' && bytes[skip+1] == b'#'{
|
||||||
|
skip += 1;
|
||||||
|
}
|
||||||
|
|
||||||
skip
|
skip
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
|
||||||
|
struct Indent(usize);
|
||||||
|
|
||||||
|
enum SpaceDotOrSpaces {
|
||||||
|
SpacesWSpaceDot(usize),
|
||||||
|
Spaces(usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces {
|
||||||
|
debug_assert!(bytes[0] == b' ');
|
||||||
|
|
||||||
|
let mut skip = 0;
|
||||||
|
while skip < bytes.len() && bytes[skip] == b' ' {
|
||||||
|
skip += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if skip < bytes.len() && bytes[skip] == b'.' {
|
||||||
|
SpaceDotOrSpaces::SpacesWSpaceDot(skip)
|
||||||
|
} else {
|
||||||
|
SpaceDotOrSpaces::Spaces(skip)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum SkipNewlineReturn {
|
||||||
|
SkipWIndent(usize, usize),
|
||||||
|
WSpaceDot(usize, usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// also skips lines that contain only whitespace
|
||||||
|
fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn {
|
||||||
|
let mut skip = 0;
|
||||||
|
let mut indent = 0;
|
||||||
|
|
||||||
|
while skip < bytes.len() && bytes[skip] == b'\n' {
|
||||||
|
skip += indent + 1;
|
||||||
|
|
||||||
|
|
||||||
|
if bytes.len() > skip {
|
||||||
|
if bytes[skip] == b' ' {
|
||||||
|
let space_dot_or_spaces = skip_whitespace(&bytes[skip..]);
|
||||||
|
|
||||||
|
match space_dot_or_spaces {
|
||||||
|
SpaceDotOrSpaces::SpacesWSpaceDot(spaces) => {
|
||||||
|
return SkipNewlineReturn::WSpaceDot(skip, spaces)
|
||||||
|
}
|
||||||
|
SpaceDotOrSpaces::Spaces(spaces) => {
|
||||||
|
if bytes.len() > (skip + spaces) {
|
||||||
|
if bytes[skip + spaces] == b'\n' {
|
||||||
|
indent = 0;
|
||||||
|
skip += spaces;
|
||||||
|
} else if bytes[skip+spaces] == b'#' {
|
||||||
|
let comment_skip = skip_comment(&bytes[(skip + spaces)..]);
|
||||||
|
|
||||||
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
|
indent = 0;
|
||||||
struct Indent(usize);
|
skip += spaces + comment_skip;
|
||||||
|
|
||||||
enum SpaceDotOrSpaces {
|
|
||||||
SpacesWSpaceDot(usize),
|
|
||||||
Spaces(usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn skip_whitespace(bytes: &[u8]) -> SpaceDotOrSpaces {
|
|
||||||
debug_assert!(bytes[0] == b' ');
|
|
||||||
|
|
||||||
let mut skip = 0;
|
|
||||||
while skip < bytes.len() && bytes[skip] == b' ' {
|
|
||||||
skip += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if skip < bytes.len() && bytes[skip] == b'.' {
|
|
||||||
SpaceDotOrSpaces::SpacesWSpaceDot(skip)
|
|
||||||
} else {
|
|
||||||
SpaceDotOrSpaces::Spaces(skip)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum SkipNewlineReturn {
|
|
||||||
SkipWIndent(usize, usize),
|
|
||||||
WSpaceDot(usize, usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
// also skips lines that contain only whitespace
|
|
||||||
fn skip_newlines_and_comments(bytes: &[u8]) -> SkipNewlineReturn {
|
|
||||||
let mut skip = 0;
|
|
||||||
let mut indent = 0;
|
|
||||||
|
|
||||||
while skip < bytes.len() && bytes[skip] == b'\n' {
|
|
||||||
skip += indent + 1;
|
|
||||||
|
|
||||||
|
|
||||||
if bytes.len() > skip {
|
|
||||||
if bytes[skip] == b' ' {
|
|
||||||
let space_dot_or_spaces = skip_whitespace(&bytes[skip..]);
|
|
||||||
|
|
||||||
match space_dot_or_spaces {
|
|
||||||
SpaceDotOrSpaces::SpacesWSpaceDot(spaces) => {
|
|
||||||
return SkipNewlineReturn::WSpaceDot(skip, spaces)
|
|
||||||
}
|
|
||||||
SpaceDotOrSpaces::Spaces(spaces) => {
|
|
||||||
if bytes.len() > (skip + spaces) {
|
|
||||||
if bytes[skip + spaces] == b'\n' {
|
|
||||||
indent = 0;
|
|
||||||
skip += spaces;
|
|
||||||
} else if bytes[skip+spaces] == b'#' {
|
|
||||||
let comment_skip = skip_comment(&bytes[(skip + spaces)..]);
|
|
||||||
|
|
||||||
indent = 0;
|
|
||||||
skip += spaces + comment_skip;
|
|
||||||
} else {
|
|
||||||
indent = spaces;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
indent = spaces;
|
indent = spaces;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
indent = spaces;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
while bytes[skip] == b'#' {
|
} else {
|
||||||
let comment_skip = skip_comment(&bytes[skip..]);
|
while bytes[skip] == b'#' {
|
||||||
|
let comment_skip = skip_comment(&bytes[skip..]);
|
||||||
|
|
||||||
indent = 0;
|
indent = 0;
|
||||||
skip += comment_skip;
|
skip += comment_skip;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SkipNewlineReturn::SkipWIndent(skip, indent)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_op_continue(ch: u8) -> bool {
|
SkipNewlineReturn::SkipWIndent(skip, indent)
|
||||||
matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
|
}
|
||||||
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\')
|
|
||||||
}
|
fn is_op_continue(ch: u8) -> bool {
|
||||||
|
matches!(ch, b'-' | b':' | b'!' | b'.' | b'*' | b'/' | b'&' |
|
||||||
fn lex_operator(bytes: &[u8]) -> (Token, usize) {
|
b'%' | b'^' | b'+' | b'<' | b'=' | b'>' | b'|' | b'\\')
|
||||||
let mut i = 0;
|
}
|
||||||
while i < bytes.len() && is_op_continue(bytes[i]) {
|
|
||||||
i += 1;
|
fn lex_operator(bytes: &[u8]) -> (Token, usize) {
|
||||||
}
|
let mut i = 0;
|
||||||
let tok = match &bytes[0..i] {
|
while i < bytes.len() && is_op_continue(bytes[i]) {
|
||||||
b"+" => Token::OpPlus,
|
i += 1;
|
||||||
b"-" => Token::OpMinus,
|
}
|
||||||
b"*" => Token::Asterisk,
|
let tok = match &bytes[0..i] {
|
||||||
b"/" => Token::OpSlash,
|
b"+" => Token::OpPlus,
|
||||||
b"%" => Token::OpPercent,
|
b"-" => Token::OpMinus,
|
||||||
b"^" => Token::OpCaret,
|
b"*" => Token::Asterisk,
|
||||||
b">" => Token::OpGreaterThan,
|
b"/" => Token::OpSlash,
|
||||||
b"<" => Token::OpLessThan,
|
b"%" => Token::OpPercent,
|
||||||
b"." => Token::Dot,
|
b"^" => Token::OpCaret,
|
||||||
b"=" => Token::OpAssignment,
|
b">" => Token::OpGreaterThan,
|
||||||
b":" => Token::Colon,
|
b"<" => Token::OpLessThan,
|
||||||
b"|" => Token::Pipe,
|
b"." => Token::Dot,
|
||||||
b"\\" => Token::LambdaStart,
|
b"=" => Token::OpAssignment,
|
||||||
b"|>" => Token::OpPizza,
|
b":" => Token::Colon,
|
||||||
b"==" => Token::OpEquals,
|
b"|" => Token::Pipe,
|
||||||
b"!" => Token::Bang,
|
b"\\" => Token::LambdaStart,
|
||||||
b"!=" => Token::OpNotEquals,
|
b"|>" => Token::OpPizza,
|
||||||
b">=" => Token::OpGreaterThanOrEq,
|
b"==" => Token::OpEquals,
|
||||||
b"<=" => Token::OpLessThanOrEq,
|
b"!" => Token::Bang,
|
||||||
b"&&" => Token::OpAnd,
|
b"!=" => Token::OpNotEquals,
|
||||||
b"&" => Token::Ampersand,
|
b">=" => Token::OpGreaterThanOrEq,
|
||||||
b"||" => Token::OpOr,
|
b"<=" => Token::OpLessThanOrEq,
|
||||||
b"//" => Token::OpDoubleSlash,
|
b"&&" => Token::OpAnd,
|
||||||
b"%%" => Token::OpDoublePercent,
|
b"&" => Token::Ampersand,
|
||||||
b"->" => Token::Arrow,
|
b"||" => Token::OpOr,
|
||||||
b"<-" => Token::OpBackpassing,
|
b"//" => Token::OpDoubleSlash,
|
||||||
op => {
|
b"%%" => Token::OpDoublePercent,
|
||||||
dbg!(std::str::from_utf8(op).unwrap());
|
b"->" => Token::Arrow,
|
||||||
Token::MalformedOperator
|
b"<-" => Token::OpBackpassing,
|
||||||
}
|
op => {
|
||||||
};
|
dbg!(std::str::from_utf8(op).unwrap());
|
||||||
(tok, i)
|
Token::MalformedOperator
|
||||||
}
|
}
|
||||||
|
};
|
||||||
fn is_ident_continue(ch: u8) -> bool {
|
(tok, i)
|
||||||
matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_')
|
}
|
||||||
}
|
|
||||||
|
fn is_ident_continue(ch: u8) -> bool {
|
||||||
fn lex_private_tag(bytes: &[u8]) -> (Token, usize) {
|
matches!(ch, b'a'..=b'z'|b'A'..=b'Z'|b'0'..=b'9'|b'_')
|
||||||
debug_assert!(bytes[0] == b'@');
|
}
|
||||||
let mut i = 1;
|
|
||||||
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
fn lex_private_tag(bytes: &[u8]) -> (Token, usize) {
|
||||||
i += 1;
|
debug_assert!(bytes[0] == b'@');
|
||||||
}
|
let mut i = 1;
|
||||||
(Token::PrivateTag, i)
|
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
||||||
}
|
i += 1;
|
||||||
|
}
|
||||||
fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) {
|
(Token::PrivateTag, i)
|
||||||
let mut i = 0;
|
}
|
||||||
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
|
||||||
i += 1;
|
fn lex_ident(uppercase: bool, bytes: &[u8]) -> (Token, usize) {
|
||||||
}
|
let mut i = 0;
|
||||||
let tok = match &bytes[0..i] {
|
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
||||||
b"if" => Token::KeywordIf,
|
i += 1;
|
||||||
b"then" => Token::KeywordThen,
|
}
|
||||||
b"else" => Token::KeywordElse,
|
let tok = match &bytes[0..i] {
|
||||||
b"when" => Token::KeywordWhen,
|
b"if" => Token::KeywordIf,
|
||||||
b"as" => Token::KeywordAs,
|
b"then" => Token::KeywordThen,
|
||||||
b"is" => Token::KeywordIs,
|
b"else" => Token::KeywordElse,
|
||||||
b"expect" => Token::KeywordExpect,
|
b"when" => Token::KeywordWhen,
|
||||||
b"app" => Token::KeywordApp,
|
b"as" => Token::KeywordAs,
|
||||||
b"interface" => Token::KeywordInterface,
|
b"is" => Token::KeywordIs,
|
||||||
b"packages" => Token::KeywordPackages,
|
b"expect" => Token::KeywordExpect,
|
||||||
b"imports" => Token::KeywordImports,
|
b"app" => Token::KeywordApp,
|
||||||
b"provides" => Token::KeywordProvides,
|
b"interface" => Token::KeywordInterface,
|
||||||
b"to" => Token::KeywordTo,
|
b"packages" => Token::KeywordPackages,
|
||||||
b"exposes" => Token::KeywordExposes,
|
b"imports" => Token::KeywordImports,
|
||||||
b"effects" => Token::KeywordEffects,
|
b"provides" => Token::KeywordProvides,
|
||||||
b"platform" => Token::KeywordPlatform,
|
b"to" => Token::KeywordTo,
|
||||||
b"requires" => Token::KeywordRequires,
|
b"exposes" => Token::KeywordExposes,
|
||||||
ident => {
|
b"effects" => Token::KeywordEffects,
|
||||||
if ident.contains(&b'_') {
|
b"platform" => Token::KeywordPlatform,
|
||||||
Token::MalformedIdent
|
b"requires" => Token::KeywordRequires,
|
||||||
} else if uppercase {
|
ident => {
|
||||||
Token::UppercaseIdent
|
if ident.contains(&b'_') {
|
||||||
} else {
|
Token::MalformedIdent
|
||||||
Token::LowercaseIdent
|
} else if uppercase {
|
||||||
}
|
Token::UppercaseIdent
|
||||||
},
|
} else {
|
||||||
};
|
Token::LowercaseIdent
|
||||||
(tok, i)
|
}
|
||||||
}
|
},
|
||||||
|
};
|
||||||
fn lex_underscore(bytes: &[u8]) -> (Token, usize) {
|
(tok, i)
|
||||||
let mut i = 0;
|
}
|
||||||
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
|
||||||
i += 1;
|
fn lex_underscore(bytes: &[u8]) -> (Token, usize) {
|
||||||
}
|
let mut i = 0;
|
||||||
(Token::Underscore, i)
|
while i < bytes.len() && is_ident_continue(bytes[i]) {
|
||||||
}
|
i += 1;
|
||||||
|
}
|
||||||
fn is_int_continue(ch: u8) -> bool {
|
(Token::Underscore, i)
|
||||||
matches!(ch, b'0'..=b'9' | b'_')
|
}
|
||||||
}
|
|
||||||
|
fn is_int_continue(ch: u8) -> bool {
|
||||||
fn lex_number(bytes: &[u8]) -> (Token, usize) {
|
matches!(ch, b'0'..=b'9' | b'_')
|
||||||
let mut i = 0;
|
}
|
||||||
while i < bytes.len() && is_int_continue(bytes[i]) {
|
|
||||||
i += 1;
|
fn lex_number(bytes: &[u8]) -> (Token, usize) {
|
||||||
}
|
let mut i = 0;
|
||||||
|
while i < bytes.len() && is_int_continue(bytes[i]) {
|
||||||
if i < bytes.len() && bytes[i] == b'.' {
|
i += 1;
|
||||||
i += 1;
|
}
|
||||||
while i < bytes.len() && is_int_continue(bytes[i]) {
|
|
||||||
i += 1;
|
if i < bytes.len() && bytes[i] == b'.' {
|
||||||
}
|
i += 1;
|
||||||
}
|
while i < bytes.len() && is_int_continue(bytes[i]) {
|
||||||
|
i += 1;
|
||||||
(Token::Number, i)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex_string(bytes: &[u8]) -> (Token, usize) {
|
(Token::Number, i)
|
||||||
let mut i = 0;
|
}
|
||||||
assert_eq!(bytes[i], b'"');
|
|
||||||
i += 1;
|
fn lex_string(bytes: &[u8]) -> (Token, usize) {
|
||||||
|
let mut i = 0;
|
||||||
while i < bytes.len() {
|
assert_eq!(bytes[i], b'"');
|
||||||
match bytes[i] {
|
i += 1;
|
||||||
b'"' => break,
|
|
||||||
// TODO: escapes
|
while i < bytes.len() {
|
||||||
_ => i += 1,
|
match bytes[i] {
|
||||||
}
|
b'"' => break,
|
||||||
}
|
// TODO: escapes
|
||||||
|
_ => i += 1,
|
||||||
assert_eq!(bytes[i], b'"');
|
}
|
||||||
i += 1;
|
}
|
||||||
|
|
||||||
(Token::String, i)
|
assert_eq!(bytes[i], b'"');
|
||||||
}
|
i += 1;
|
||||||
|
|
||||||
|
(Token::String, i)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tokenizer {
|
mod tokenizer {
|
||||||
|
|
1324
highlight/tests/peg_grammar.rs
Normal file
1324
highlight/tests/peg_grammar.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue