This commit is contained in:
Leonard Hecker 2025-07-04 21:01:01 +02:00
parent 837fcb3fd9
commit 579898fd80
8 changed files with 421 additions and 265 deletions

2
.vscode/launch.json vendored
View file

@ -12,9 +12,9 @@
"args": [
"assets/highlighting-tests/bash.sh",
"assets/highlighting-tests/batch.bat",
"assets/highlighting-tests/json.json",
"assets/highlighting-tests/powershell.ps1",
"assets/highlighting-tests/yaml.yml",
"assets/highlighting-tests/json.json",
],
},
{

View file

@ -291,3 +291,13 @@ impl AsciiStringHelpers for str {
p.len() <= s.len() && s[..p.len()].eq_ignore_ascii_case(p)
}
}
impl AsciiStringHelpers for [u8] {
fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool {
// Casting to bytes first ensures we skip any UTF8 boundary checks.
// Since the comparison is ASCII, we don't need to worry about that.
let s = self;
let p = prefix.as_bytes();
p.len() <= s.len() && s[..p.len()].eq_ignore_ascii_case(p)
}
}

View file

@ -5,44 +5,66 @@ use super::*;
type T = Transition;
// NOTE: These are indices into the `LANG.charsets` array.
const C_DIGITS: usize = 0;
const C_VARIABLE: usize = 1;
// NOTE: These are indices into the `LANG.states` array.
const _GROUND: u8 = 0;
const COMMENT: u8 = 1;
const STRING_SINGLE: u8 = 2;
const STRING_DOUBLE: u8 = 3;
const STRING_ESCAPE: u8 = 4;
const VARIABLE: u8 = 5;
const _S_GROUND: u8 = 0;
const S_COMMENT: u8 = 1;
const S_STRING_SINGLE: u8 = 2;
const S_STRING_DOUBLE: u8 = 3;
const S_STRING_ESCAPE: u8 = 4;
const S_VARIABLE: u8 = 5;
pub const LANG: Language = Language {
name: "Bash",
extensions: &["sh", "bash", "zsh", "ksh", "csh", "tcsh"],
word_chars: &[
// /.-,+*)('&%$#"!
0b_0000110000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1000000000000000,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
charsets: &[
// C_DIGITS
&[
// /.-,+*)('&%$#"!
0b_0010100000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_0000000000000000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000000000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
// C_VARIABLE
&[
// /.-,+*)('&%$#"!
0b_0000110000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1000000000000000,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
],
states: &[
// GROUND
// S_GROUND
&[
// Comments
T { test: Prefix("#"), kind: Comment, state: Push(COMMENT) },
T { test: Prefix("<#"), kind: Comment, state: Push(COMMENT) },
T { test: Prefix("#"), kind: Comment, state: Push(S_COMMENT) },
T { test: Prefix("<#"), kind: Comment, state: Push(S_COMMENT) },
// Strings
T { test: Prefix("'"), kind: String, state: Push(STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(STRING_DOUBLE) },
T { test: Prefix("'"), kind: String, state: Push(S_STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING_DOUBLE) },
// Variables
T { test: Prefix("$"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("$"), kind: Variable, state: Push(S_VARIABLE) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Operators
T { test: Prefix("|"), kind: Operator, state: Pop(1) },
T { test: Prefix("&"), kind: Operator, state: Pop(1) },
@ -63,19 +85,19 @@ pub const LANG: Language = Language {
T { test: Prefix("esac"), kind: Keyword, state: Pop(1) },
T { test: Prefix("function"), kind: Keyword, state: Pop(1) },
],
// COMMENT
// S_COMMENT
&[T { test: Line, kind: Comment, state: Pop(1) }],
// STRING_SINGLE
// S_STRING_SINGLE
&[T { test: Prefix("'"), kind: String, state: Pop(1) }],
// STRING_DOUBLE
// S_STRING_DOUBLE
&[
T { test: Prefix("\\"), kind: String, state: Push(STRING_ESCAPE) },
T { test: Prefix("$"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("\\"), kind: String, state: Push(S_STRING_ESCAPE) },
T { test: Prefix("$"), kind: Variable, state: Push(S_VARIABLE) },
T { test: Prefix("\""), kind: String, state: Pop(1) },
],
// STRING_ESCAPE
// S_STRING_ESCAPE
&[T { test: Chars(1), kind: String, state: Pop(1) }],
// VARIABLE
&[T { test: Word, kind: Variable, state: Pop(1) }],
// S_VARIABLE
&[T { test: Charset(C_VARIABLE), kind: Variable, state: Pop(1) }],
],
};

View file

@ -5,67 +5,89 @@ use super::*;
type T = Transition;
// NOTE: These are indices into the `LANG.charsets` array.
const C_DIGITS: usize = 0;
const C_VARIABLE: usize = 1;
// NOTE: These are indices into the `LANG.states` array.
const _GROUND: u8 = 0;
const COMMENT: u8 = 1;
const STRING: u8 = 2;
const VARIABLE: u8 = 3;
const _S_GROUND: u8 = 0;
const S_COMMENT: u8 = 1;
const S_STRING: u8 = 2;
const S_VARIABLE: u8 = 3;
pub const LANG: Language = Language {
name: "Batch",
extensions: &["bat", "cmd"],
word_chars: &[
// /.-,+*)('&%$#"!
0b_0000000000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1000000000000000,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
charsets: &[
// C_DIGITS
&[
// /.-,+*)('&%$#"!
0b_0010100000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_0000000000000000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000000000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
// C_COMMAND
&[
// /.-,+*)('&%$#"!
0b_0000000000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1000011111111111,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0100011111111111,
],
],
states: &[
// GROUND
// S_GROUND
&[
// Comments (REM or ::)
T { test: Prefix("REM "), kind: Comment, state: Push(COMMENT) },
T { test: Prefix("::"), kind: Comment, state: Push(COMMENT) },
T { test: PrefixInsensitive("rem "), kind: Comment, state: Push(S_COMMENT) },
T { test: Prefix("::"), kind: Comment, state: Push(S_COMMENT) },
// Strings (quoted)
T { test: Prefix("\""), kind: String, state: Push(STRING) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING) },
// Variables
T { test: Prefix("%"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("%"), kind: Variable, state: Push(S_VARIABLE) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Operators
T { test: Prefix("|"), kind: Operator, state: Pop(1) },
T { test: Prefix("&"), kind: Operator, state: Pop(1) },
T { test: Prefix("<"), kind: Operator, state: Pop(1) },
T { test: Prefix(">"), kind: Operator, state: Pop(1) },
// Keywords (common)
T { test: Prefix("if"), kind: Keyword, state: Pop(1) },
T { test: Prefix("else"), kind: Keyword, state: Pop(1) },
T { test: Prefix("for"), kind: Keyword, state: Pop(1) },
T { test: Prefix("in"), kind: Keyword, state: Pop(1) },
T { test: Prefix("do"), kind: Keyword, state: Pop(1) },
T { test: Prefix("not"), kind: Keyword, state: Pop(1) },
T { test: Prefix("exist"), kind: Keyword, state: Pop(1) },
T { test: Prefix("set"), kind: Keyword, state: Pop(1) },
T { test: Prefix("echo"), kind: Keyword, state: Pop(1) },
T { test: Prefix("goto"), kind: Keyword, state: Pop(1) },
T { test: Prefix("call"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("if"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("else"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("for"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("in"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("do"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("not"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("exist"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("set"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("echo"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("goto"), kind: Keyword, state: Pop(1) },
T { test: PrefixInsensitive("call"), kind: Keyword, state: Pop(1) },
],
// COMMENT
// S_COMMENT
&[T { test: Line, kind: Comment, state: Pop(1) }],
// STRING
// S_STRING
&[T { test: Prefix("\""), kind: String, state: Pop(1) }],
// VARIABLE
// S_VARIABLE
&[
T { test: Prefix("%"), kind: Variable, state: Pop(1) },
T { test: Word, kind: Variable, state: Pop(1) },
T { test: Charset(C_VARIABLE), kind: Variable, state: Pop(1) },
],
],
};

View file

@ -5,55 +5,61 @@ use super::*;
type T = Transition;
// NOTE: These are indices into the `LANG.charsets` array.
const C_DIGITS: usize = 0;
// NOTE: These are indices into the `LANG.states` array.
const _GROUND: u8 = 0;
const LINE_COMMENT: u8 = 1;
const BLOCK_COMMENT: u8 = 2;
const STRING: u8 = 3;
const STRING_ESCAPE: u8 = 4;
const _S_GROUND: u8 = 0;
const S_LINE_COMMENT: u8 = 1;
const S_BLOCK_COMMENT: u8 = 2;
const S_STRING: u8 = 3;
const S_STRING_ESCAPE: u8 = 4;
pub const LANG: Language = Language {
name: "JSON",
extensions: &["json", "jsonc"],
word_chars: &[
// /.-,+*)('&%$#"!
0b_0000000000000000,
// ?>=<;:9876543210
0b_0000000000000000,
// ONMLKJIHGFEDCBA@
0b_0000000000000000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000000000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
charsets: &[
// C_DIGITS
&[
// /.-,+*)('&%$#"!
0b_0110100000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_0000000000100000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000100000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
],
states: &[
// GROUND
// S_GROUND
&[
// Comments (jsonc)
T { test: Prefix("//"), kind: Comment, state: Push(LINE_COMMENT) },
T { test: Prefix("/*"), kind: Comment, state: Push(BLOCK_COMMENT) },
T { test: Prefix("//"), kind: Comment, state: Push(S_LINE_COMMENT) },
T { test: Prefix("/*"), kind: Comment, state: Push(S_BLOCK_COMMENT) },
// Strings
T { test: Prefix("\""), kind: String, state: Push(STRING) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING) },
// Numbers (start: minus or digit)
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Booleans/null
T { test: Prefix("true"), kind: Keyword, state: Pop(1) },
T { test: Prefix("false"), kind: Keyword, state: Pop(1) },
T { test: Prefix("null"), kind: Keyword, state: Pop(1) },
],
// LINE_COMMENT (// single-line)
// S_LINE_COMMENT (// single-line)
&[T { test: Line, kind: Comment, state: Pop(1) }],
// BLOCK_COMMENT (/* ... */)
// S_BLOCK_COMMENT (/* ... */)
&[T { test: Prefix("*/"), kind: Comment, state: Pop(1) }],
// STRING ("...")
// S_STRING ("...")
&[
T { test: Prefix("\\"), kind: String, state: Push(STRING_ESCAPE) },
T { test: Prefix("\\"), kind: String, state: Push(S_STRING_ESCAPE) },
T { test: Prefix("\""), kind: String, state: Pop(1) },
],
// STRING_ESCAPE (escape in string)
// S_STRING_ESCAPE (escape in string)
&[T { test: Chars(1), kind: String, state: Pop(1) }],
],
};

View file

@ -5,53 +5,75 @@ use super::*;
type T = Transition;
// NOTE: These are indices into the `LANG.charsets` array.
const C_DIGITS: usize = 0;
const C_METHOD: usize = 1;
// NOTE: These are indices into the `LANG.states` array.
const _GROUND: u8 = 0;
const LINE_COMMENT: u8 = 1;
const BLOCK_COMMENT: u8 = 2;
const STRING_SINGLE: u8 = 3;
const STRING_DOUBLE: u8 = 4;
const STRING_ESCAPE: u8 = 5;
const VARIABLE: u8 = 6;
const VARIABLE_BRACE: u8 = 7;
const VARIABLE_PAREN: u8 = 8;
const PARAMETER: u8 = 9;
const KEYWORD: u8 = 10;
const METHOD: u8 = 11;
const _S_GROUND: u8 = 0;
const S_LINE_COMMENT: u8 = 1;
const S_BLOCK_COMMENT: u8 = 2;
const S_STRING_SINGLE: u8 = 3;
const S_STRING_DOUBLE: u8 = 4;
const S_STRING_ESCAPE: u8 = 5;
const S_VARIABLE: u8 = 6;
const S_VARIABLE_BRACE: u8 = 7;
const S_VARIABLE_PAREN: u8 = 8;
const S_PARAMETER: u8 = 9;
const S_KEYWORD: u8 = 10;
const S_METHOD: u8 = 11;
pub const LANG: Language = Language {
name: "PowerShell",
extensions: &["ps1", "psm1", "psd1"],
word_chars: &[
// /.-,+*)('&%$#"!
0b_1110110000101010,
// ?>=<;:9876543210
0b_1111011111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1111111111111111,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0100011111111111,
charsets: &[
// C_DIGITS
&[
// /.-,+*)('&%$#"!
0b_0110100000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_0000000000100000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000100000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
// C_METHOD
&[
// /.-,+*)('&%$#"!
0b_1110110000101010,
// ?>=<;:9876543210
0b_1111011111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111110,
// _^]\[ZYXWVUTSRQP
0b_1111111111111111,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_0100011111111111,
],
],
states: &[
// GROUND
// S_GROUND
&[
// Comments
T { test: Prefix("#"), kind: Comment, state: Push(LINE_COMMENT) },
T { test: Prefix("<#"), kind: Comment, state: Push(BLOCK_COMMENT) },
T { test: Prefix("#"), kind: Comment, state: Push(S_LINE_COMMENT) },
T { test: Prefix("<#"), kind: Comment, state: Push(S_BLOCK_COMMENT) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Strings
T { test: Prefix("'"), kind: String, state: Push(STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(STRING_DOUBLE) },
T { test: Prefix("'"), kind: String, state: Push(S_STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING_DOUBLE) },
// Variables
T { test: Prefix("$("), kind: Other, state: Push(VARIABLE_PAREN) },
T { test: Prefix("$"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("$("), kind: Other, state: Push(S_VARIABLE_PAREN) },
T { test: Prefix("$"), kind: Variable, state: Push(S_VARIABLE) },
// Operators
T { test: Prefix("-"), kind: Operator, state: Push(PARAMETER) },
T { test: Prefix("-"), kind: Operator, state: Push(S_PARAMETER) },
T { test: Prefix("!"), kind: Operator, state: Pop(1) },
T { test: Prefix("*"), kind: Operator, state: Pop(1) },
T { test: Prefix("/"), kind: Operator, state: Pop(1) },
@ -62,61 +84,61 @@ pub const LANG: Language = Language {
T { test: Prefix(">"), kind: Operator, state: Pop(1) },
T { test: Prefix("|"), kind: Operator, state: Pop(1) },
// Keywords
T { test: Prefix("break"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("catch"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("continue"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("do"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("else"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("finally"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("foreach"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("function"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("if"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("return"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("switch"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("throw"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("try"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("using"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("while"), kind: Keyword, state: Push(KEYWORD) },
T { test: PrefixInsensitive("break"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("catch"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("continue"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("do"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("else"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("finally"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("foreach"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("function"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("if"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("return"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("switch"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("throw"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("try"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("using"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("while"), kind: Keyword, state: Push(S_KEYWORD) },
// Methods
T { test: Word, kind: Method, state: Push(METHOD) },
T { test: Charset(C_METHOD), kind: Method, state: Push(S_METHOD) },
],
// LINE_COMMENT: # comment
// S_LINE_COMMENT: # comment
&[T { test: Line, kind: Comment, state: Pop(1) }],
// BLOCK_COMMENT: <# comment #>
// S_BLOCK_COMMENT: <# comment #>
&[T { test: Prefix("#>"), kind: Comment, state: Pop(1) }],
// STRING_SINGLE: 'string'
// S_STRING_SINGLE: 'string'
&[T { test: Prefix("'"), kind: String, state: Pop(1) }],
// STRING_DOUBLE: "string"
// S_STRING_DOUBLE: "string"
&[
T { test: Prefix("`"), kind: String, state: Push(STRING_ESCAPE) },
T { test: Prefix("$("), kind: Other, state: Push(VARIABLE_PAREN) },
T { test: Prefix("$"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("`"), kind: String, state: Push(S_STRING_ESCAPE) },
T { test: Prefix("$("), kind: Other, state: Push(S_VARIABLE_PAREN) },
T { test: Prefix("$"), kind: Variable, state: Push(S_VARIABLE) },
T { test: Prefix("\""), kind: String, state: Pop(1) },
],
// STRING_ESCAPE: "`a"
// S_STRING_ESCAPE: "`a"
&[T { test: Chars(1), kind: String, state: Pop(1) }],
// VARIABLE: $variable
// S_VARIABLE: $variable
&[
T { test: Prefix("{"), kind: Variable, state: Change(VARIABLE_BRACE) },
T { test: Word, kind: Variable, state: Pop(1) },
T { test: Prefix("{"), kind: Variable, state: Change(S_VARIABLE_BRACE) },
T { test: Charset(C_METHOD), kind: Variable, state: Pop(1) },
],
// VARIABLE_BRACE: ${variable}
// S_VARIABLE_BRACE: ${variable}
&[T { test: Prefix("}"), kind: Variable, state: Pop(1) }],
// VARIABLE_PAREN: $(command)
// S_VARIABLE_PAREN: $(command)
// This is largely a copy of the ground state.
&[
// Ground state Overrides
T { test: Prefix("("), kind: Other, state: Push(VARIABLE_PAREN) },
T { test: Prefix("("), kind: Other, state: Push(S_VARIABLE_PAREN) },
T { test: Prefix(")"), kind: Other, state: Pop(1) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Strings
T { test: Prefix("'"), kind: String, state: Push(STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(STRING_DOUBLE) },
T { test: Prefix("'"), kind: String, state: Push(S_STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING_DOUBLE) },
// Variables
T { test: Prefix("$"), kind: Variable, state: Push(VARIABLE) },
T { test: Prefix("$"), kind: Variable, state: Push(S_VARIABLE) },
// Operators
T { test: Prefix("-"), kind: Operator, state: Push(PARAMETER) },
T { test: Prefix("-"), kind: Operator, state: Push(S_PARAMETER) },
T { test: Prefix("!"), kind: Operator, state: Pop(1) },
T { test: Prefix("*"), kind: Operator, state: Pop(1) },
T { test: Prefix("/"), kind: Operator, state: Pop(1) },
@ -127,38 +149,38 @@ pub const LANG: Language = Language {
T { test: Prefix(">"), kind: Operator, state: Pop(1) },
T { test: Prefix("|"), kind: Operator, state: Pop(1) },
// Keywords
T { test: Prefix("break"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("catch"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("continue"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("do"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("else"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("finally"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("foreach"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("function"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("if"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("return"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("switch"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("throw"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("try"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("using"), kind: Keyword, state: Push(KEYWORD) },
T { test: Prefix("while"), kind: Keyword, state: Push(KEYWORD) },
T { test: PrefixInsensitive("break"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("catch"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("continue"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("do"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("else"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("finally"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("foreach"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("function"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("if"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("return"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("switch"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("throw"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("try"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("using"), kind: Keyword, state: Push(S_KEYWORD) },
T { test: PrefixInsensitive("while"), kind: Keyword, state: Push(S_KEYWORD) },
// Methods
T { test: Word, kind: Method, state: Push(METHOD) },
T { test: Charset(C_METHOD), kind: Method, state: Push(S_METHOD) },
],
// PARAMETER: -parameter
// S_PARAMETER: -parameter
&[
T { test: Word, kind: Operator, state: Pop(1) },
T { test: Charset(C_METHOD), kind: Operator, state: Pop(1) },
T { test: Chars(0), kind: Operator, state: Pop(1) },
],
// KEYWORD: foreach, if, etc.
// S_KEYWORD: foreach, if, etc.
&[
T { test: Word, kind: Method, state: Change(METHOD) },
T { test: Charset(C_METHOD), kind: Method, state: Change(S_METHOD) },
T { test: Chars(0), kind: Keyword, state: Pop(1) },
],
// METHOD: Foo-Bar
// S_METHOD: Foo-Bar
&[
T { test: Word, kind: Method, state: Change(METHOD) },
T { test: Prefix("-"), kind: Method, state: Change(METHOD) },
T { test: Charset(C_METHOD), kind: Method, state: Change(S_METHOD) },
T { test: Prefix("-"), kind: Method, state: Change(S_METHOD) },
T { test: Chars(0), kind: Method, state: Pop(1) },
],
],

View file

@ -5,58 +5,116 @@ use super::*;
type T = Transition;
// NOTE: These are indices into the `LANG.charsets` array.
const C_DIGITS: usize = 0;
const C_KEY_STRING: usize = 1;
const C_VALUE_STRING: usize = 2;
// NOTE: These are indices into the `LANG.states` array.
const _GROUND: u8 = 0;
const COMMENT: u8 = 1;
const STRING_SINGLE: u8 = 2;
const STRING_DOUBLE: u8 = 3;
const _S_GROUND: u8 = 0;
const S_COMMENT: u8 = 1;
const S_STRING_SINGLE: u8 = 2;
const S_STRING_DOUBLE: u8 = 3;
const S_KEYWORD_MAYBE: u8 = 4;
const S_KEYVALUE: u8 = 5;
pub const LANG: Language = Language {
name: "YAML",
extensions: &["yaml", "yml"],
word_chars: &[
// /.-,+*)('&%$#"!
0b_0000000000000000,
// ?>=<;:9876543210
0b_0000000000000000,
// ONMLKJIHGFEDCBA@
0b_0000000000000000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000000000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
charsets: &[
// C_DIGITS
&[
// /.-,+*)('&%$#"!
0b_0000000000000000,
// ?>=<;:9876543210
0b_0000001111111111,
// ONMLKJIHGFEDCBA@
0b_0000000000000000,
// _^]\[ZYXWVUTSRQP
0b_0000000000000000,
// onmlkjihgfedcba`
0b_0000000000000000,
// ~}|{zyxwvutsrqp
0b_0000000000000000,
],
// C_KEY_STRING
&[
// /.-,+*)('&%$#"!
0b_1111111111111110,
// ?>=<;:9876543210
0b_1111101111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111111,
// _^]\[ZYXWVUTSRQP
0b_1111111111111111,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_1111111111111111,
],
// C_VALUE_STRING
&[
// /.-,+*)('&%$#"!
0b_1111111101110011,
// ?>=<;:9876543210
0b_1111111111111111,
// ONMLKJIHGFEDCBA@
0b_1111111111111111,
// _^]\[ZYXWVUTSRQP
0b_1111111111111111,
// onmlkjihgfedcba`
0b_1111111111111111,
// ~}|{zyxwvutsrqp
0b_1111111111111111,
],
],
states: &[
// GROUND
// S_GROUND
&[
// Comments
T { test: Prefix("#"), kind: Comment, state: Push(COMMENT) },
T { test: Prefix("<#"), kind: Comment, state: Push(COMMENT) },
T { test: Prefix("#"), kind: Comment, state: Push(S_COMMENT) },
// Strings
T { test: Prefix("'"), kind: String, state: Push(STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(STRING_DOUBLE) },
T { test: Prefix("'"), kind: String, state: Push(S_STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING_DOUBLE) },
// Numbers
T { test: Digits, kind: Number, state: Pop(1) },
// Booleans/null
T { test: Prefix("true"), kind: Keyword, state: Pop(1) },
T { test: Prefix("false"), kind: Keyword, state: Pop(1) },
T { test: Prefix("null"), kind: Keyword, state: Pop(1) },
// Punctuation
T { test: Prefix("-"), kind: Operator, state: Pop(1) },
T { test: Prefix(":"), kind: Operator, state: Pop(1) },
T { test: Prefix(","), kind: Operator, state: Pop(1) },
T { test: Prefix("["), kind: Operator, state: Pop(1) },
T { test: Prefix("]"), kind: Operator, state: Pop(1) },
T { test: Prefix("{"), kind: Operator, state: Pop(1) },
T { test: Prefix("}"), kind: Operator, state: Pop(1) },
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Booleans/Null
T { test: Prefix("true"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Prefix("false"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Prefix("null"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Charset(C_KEY_STRING), kind: Other, state: Push(S_KEYVALUE) },
],
// COMMENT
// S_COMMENT
&[T { test: Line, kind: Comment, state: Pop(1) }],
// STRING_SINGLE
// S_STRING_SINGLE
&[T { test: Prefix("'"), kind: String, state: Pop(1) }],
// STRING_DOUBLE
// S_STRING_DOUBLE
&[T { test: Prefix("\""), kind: String, state: Pop(1) }],
// S_KEYWORD_MAYBE
&[
T { test: Charset(C_KEY_STRING), kind: Other, state: Push(S_KEYVALUE) },
T { test: Chars(0), kind: Keyword, state: Pop(1) },
],
// S_KEYVALUE
&[
T { test: Prefix(":"), kind: Other, state: Push(S_KEYWORD_MAYBE) },
T { test: Chars(0), kind: Other, state: Pop(1) },
],
// S_VALUE
&[
// Comments
T { test: Prefix("#"), kind: Comment, state: Push(S_COMMENT) },
// Strings
T { test: Prefix("'"), kind: String, state: Push(S_STRING_SINGLE) },
T { test: Prefix("\""), kind: String, state: Push(S_STRING_DOUBLE) },
// Numbers
T { test: Charset(C_DIGITS), kind: Number, state: Pop(1) },
// Booleans/Null
T { test: Prefix("true"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Prefix("false"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Prefix("null"), kind: Keyword, state: Push(S_KEYWORD_MAYBE) },
T { test: Charset(C_KEY_STRING), kind: Other, state: Push(S_KEYVALUE) },
T { test: Chars(0), kind: Other, state: Pop(1) },
],
],
};

View file

@ -25,13 +25,13 @@ pub struct Language {
#[allow(dead_code)]
name: &'static str,
extensions: &'static [&'static str],
word_chars: &'static [u16; 6],
charsets: &'static [&'static [u16; 6]],
states: &'static [&'static [Transition]],
}
impl Language {
pub fn from_path(path: &Path) -> Option<&'static Language> {
let ext = path.extension()?;
let ext = path.extension().unwrap();
LANGUAGES.iter().copied().find(|lang| lang.extensions.iter().any(|&e| OsStr::new(e) == ext))
}
}
@ -45,8 +45,8 @@ struct Transition {
enum Consume {
Chars(usize),
Prefix(&'static str),
Digits,
Word,
PrefixInsensitive(&'static str),
Charset(usize),
Line,
}
@ -90,7 +90,7 @@ pub struct Highlighter<'a> {
logical_pos_y: CoordType,
language: &'static Language,
word_chars: [bool; 256],
charsets: Vec<[bool; 256]>,
starter: Vec<[bool; 256]>,
state: usize,
@ -100,31 +100,46 @@ pub struct Highlighter<'a> {
impl<'doc> Highlighter<'doc> {
pub fn new(doc: &'doc dyn ReadableDocument, language: &'static Language) -> Self {
let mut word_chars = [false; 256];
Self::fill_word_chars(&mut word_chars, language.word_chars);
let starter = Vec::from_iter(language.states.iter().map(|&transitions| {
let mut starter = [false; 256];
for t in transitions {
match t.test {
Consume::Chars(_) => starter.fill(true),
Consume::Prefix(prefix) => starter[prefix.as_bytes()[0] as usize] = true,
Consume::Digits => starter[b'0' as usize..=b'9' as usize].fill(true),
Consume::Word => Self::fill_word_chars(&mut starter, language.word_chars),
Consume::Line => {}
}
}
starter
}));
Self {
doc,
offset: 0,
logical_pos_y: 0,
language,
word_chars,
starter,
charsets: language
.charsets
.iter()
.map(|&charset| {
let mut word_chars = [false; 256];
Self::fill_word_chars(&mut word_chars, charset);
word_chars
})
.collect(),
starter: language
.states
.iter()
.map(|&transitions| {
let mut starter = [false; 256];
for t in transitions {
match t.test {
Consume::Chars(_) => starter.fill(true),
Consume::Prefix(prefix) => {
starter[prefix.as_bytes()[0] as usize] = true;
}
Consume::PrefixInsensitive(prefix) => {
let ch = prefix.as_bytes()[0];
starter[ch.to_ascii_lowercase() as usize] = true;
starter[ch.to_ascii_uppercase() as usize] = true;
}
Consume::Charset(i) => {
Self::fill_word_chars(&mut starter, language.charsets[i]);
}
Consume::Line => {}
}
}
starter
})
.collect(),
state: 0,
kind: Default::default(),
@ -227,21 +242,19 @@ impl<'doc> Highlighter<'doc> {
break;
}
}
Consume::Digits => {
if off < line_buf.len() && line_buf[off].is_ascii_digit() {
while {
off += 1;
off < line_buf.len() && line_buf[off].is_ascii_digit()
} {}
Consume::PrefixInsensitive(str) => {
if line_buf[off..].starts_with_ignore_ascii_case(str) {
off += str.len();
hit = Some(t);
break;
}
}
Consume::Word => {
if off < line_buf.len() && self.word_chars[line_buf[off] as usize] {
Consume::Charset(i) => {
let charset = &self.charsets[i];
if off < line_buf.len() && charset[line_buf[off] as usize] {
while {
off += 1;
off < line_buf.len() && self.word_chars[line_buf[off] as usize]
off < line_buf.len() && charset[line_buf[off] as usize]
} {}
hit = Some(t);
break;
@ -270,6 +283,9 @@ impl<'doc> Highlighter<'doc> {
match t.state {
StateStack::Change(to) => {
if let Some(last) = res.last_mut() {
last.kind = t.kind;
}
self.state = to as usize;
self.kind = t.kind;
}