A first draft for simple syntax highlighting

This commit is contained in:
Leonard Hecker 2025-08-25 16:28:09 +02:00
parent 59c08a6353
commit 3d8e5d973d
32 changed files with 3191 additions and 195 deletions

6
.vscode/launch.json vendored
View file

@ -10,7 +10,7 @@
"program": "${workspaceFolder}/target/debug/edit",
"cwd": "${workspaceFolder}",
"args": [
"${workspaceFolder}/src/bin/edit/main.rs"
"${workspaceFolder}/assets/highlighting-tests/COMMIT_EDITMSG"
],
},
{
@ -21,7 +21,7 @@
"program": "${workspaceFolder}/target/debug/edit",
"cwd": "${workspaceFolder}",
"args": [
"${workspaceFolder}/src/bin/edit/main.rs"
"${workspaceFolder}/assets/highlighting-tests/COMMIT_EDITMSG"
],
},
{
@ -32,7 +32,7 @@
"program": "${workspaceFolder}/target/debug/edit",
"cwd": "${workspaceFolder}",
"args": [
"${workspaceFolder}/src/bin/edit/main.rs"
"${workspaceFolder}/assets/highlighting-tests/COMMIT_EDITMSG"
],
}
]

118
Cargo.lock generated
View file

@ -31,9 +31,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.9.1"
version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]]
name = "bumpalo"
@ -49,9 +49,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.2.30"
version = "1.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
dependencies = [
"jobserver",
"libc",
@ -60,9 +60,9 @@ dependencies = [
[[package]]
name = "cfg-if"
version = "1.0.1"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "ciborium"
@ -93,18 +93,18 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.42"
version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.5.42"
version = "4.5.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8"
dependencies = [
"anstyle",
"clap_lex",
@ -186,6 +186,7 @@ version = "1.2.1"
dependencies = [
"criterion",
"libc",
"regex-syntax",
"serde",
"serde_json",
"toml-span",
@ -239,9 +240,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.33"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom",
"libc",
@ -259,9 +260,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.174"
version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]]
name = "log"
@ -332,9 +333,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.95"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
@ -356,9 +357,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rayon"
version = "1.10.0"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
@ -366,9 +367,9 @@ dependencies = [
[[package]]
name = "rayon-core"
version = "1.12.1"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
@ -376,9 +377,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.11.1"
version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [
"aho-corasick",
"memchr",
@ -388,9 +389,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.9"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [
"aho-corasick",
"memchr",
@ -399,15 +400,15 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.5"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "rustversion"
version = "1.0.21"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
@ -446,9 +447,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.141"
version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [
"itoa",
"memchr",
@ -470,9 +471,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.104"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
@ -599,28 +600,35 @@ dependencies = [
[[package]]
name = "winapi-util"
version = "0.1.9"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
name = "windows-link"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
[[package]]
name = "windows-sys"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
version = "0.53.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
dependencies = [
"windows-link",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
@ -633,51 +641,51 @@ dependencies = [
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
name = "winresource"

View file

@ -40,6 +40,7 @@ lto = "thin" # Similarly, speed up linking by a ton
libc = "0.2"
[build-dependencies]
regex-syntax = { version = "0.8", default-features = false }
# The default toml crate bundles its dependencies with bad compile times. Thanks.
# Thankfully toml-span exists. FWIW the alternative is yaml-rust (without the 2 suffix).
toml-span = { version = "0.5", default-features = false }
@ -48,7 +49,7 @@ toml-span = { version = "0.5", default-features = false }
winresource = { version = "0.1.22", default-features = false }
[target.'cfg(windows)'.dependencies.windows-sys]
version = "0.59"
version = "0.60"
features = [
"Win32_Globalization",
"Win32_Security",

View file

@ -0,0 +1,62 @@
lsh: flip legacy flag, add follow-up hook & diff test
Refactors placeholder logic and introduces a lightweight follow-up hook.
Adds diff highlighting test and removes outdated asset to streamline visuals.
# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# On branch foobar
# Your branch is up to date with 'origin/foobar'.
#
# Changes to be committed:
# renamed: foo.rs -> bar.rs
# modified: src/lsh/definitions.rs
# deleted: assets/old_logo.svg
# new file: baz.rs
#
# ------------------------ >8 ------------------------
# Do not modify or remove the line above.
# Everything below it will be ignored.
diff --git a/src/lsh/definitions.rs b/src/lsh/definitions.rs
index a1b2c3d..d4c3b2a 100644
--- a/src/lsh/definitions.rs
+++ b/src/lsh/definitions.rs
@@ -12,9 +12,11 @@ // context line 12
// context line 13
-// old placeholder logic A
-const LEGACY_FLAG: bool = true;
-// end legacy block
+// updated placeholder logic A
+const LEGACY_FLAG: bool = false; // flipped for test
+// added note: migration in progress
+// end legacy block
// context line 19
// context line 20
@@ -42,7 +44,12 @@ // context line 42
// context line 43
- do_placeholder_action(alpha, beta);
+ // Split actions for clarity
+ do_placeholder_prepare(alpha);
+ do_placeholder_action(alpha, beta);
+ if (enable_extra()) {
+ do_placeholder_followup(beta);
+ }
// context line 50
// context line 51
// context line 52
@@ -90,6 +97,15 @@ // context line 90
// context line 91
// context line 92
+/// Added lightweight fake helper
+fn enable_extra() -> bool {
+ // Pretend this consults a config value
+ true
+}
+
+// Temporary debug instrumentation (to be removed)
+const _DEBUG_HOOK: &str = "lsh:extra";
+
// context line 93
// context line 94
// context line 95

View file

@ -0,0 +1,71 @@
#!/usr/bin/env bash
# This is a comment
readonly VAR1="Hello" # String literal
VAR2=42 # Integer literal
VAR3=$((VAR2 + 8)) # Arithmetic expansion
VAR4=$(echo "World") # Command substitution
function greet() { # Function definition
local name="$1" # Local variable, parameter expansion
echo "${VAR1}, $name! $VAR4" # String, parameter expansion, variable
}
greet "User" # Function call, string literal
if [[ $VAR2 -gt 40 && $VAR3 -eq 50 ]]; then # Conditional, test, operators
echo "Numbers are correct" # String literal
elif (( VAR2 < 40 )); then # Arithmetic test
echo 'VAR2 is less than 40' # Single-quoted string
else
echo "Other case"
fi
for i in {1..3}; do # Brace expansion, for loop
echo "Loop $i" # String, variable
done
case "$VAR4" in # Case statement
World) echo "It's World";; # Pattern, string
*) echo "Unknown";; # Wildcard
esac
arr=(one two three) # Array
echo "${arr[1]}" # Array access
declare -A assoc # Associative array
assoc[key]="value"
echo "${assoc[key]}"
# Here document
cat <<EOF
Multi-line
string with $VAR1
EOF
# Here string
grep H <<< "$VAR1"
# Subshell
(subshell_var=99; echo $subshell_var)
# Redirection
echo "Redirected" > /dev/null
# Background job
sleep 1 &
# Arithmetic assignment
let VAR2+=1
# Process substitution
diff <(echo foo) <(echo bar)
# Command grouping
{ echo "Group 1"; echo "Group 2"; }
# Escaped characters
echo "A quote: \" and a backslash: \\"
# End of file

View file

@ -0,0 +1,41 @@
@echo off
REM --- String, Variable, Label, Command, Operator, Number, Delimiter, Comment ---
:: Label
:Start
:: Variable assignment and usage
set "VAR1=Hello"
set VAR2=World
:: String with spaces and special characters
set "STR=Batch ^& CMD!"
:: Arithmetic operation (number, operator)
set /a SUM=5+10
:: IF statement (keyword, operator, string, variable)
if "%VAR1%"=="Hello" (
echo %VAR1%, %VAR2%! %STR%
) else (
echo Not matched!
)
:: FOR loop (keyword, variable, delimiter, string)
for %%F in (*.bat) do (
echo Found file: %%F
)
:: CALL command (keyword, label)
call :SubRoutine
:: GOTO command (keyword, label)
goto :End
:: Subroutine with parameter
:SubRoutine
echo In subroutine with SUM=%SUM%
goto :eof
:End
REM End of script

View file

@ -0,0 +1,142 @@
diff --git a/src/lsh/definitions.rs b/src/lsh/definitions.rs
index a1b2c3d..d4c3b2a 100644
--- a/src/lsh/definitions.rs
+++ b/src/lsh/definitions.rs
@@ -12,9 +12,11 @@ // context line 12
// context line 13
-// old placeholder logic A
-const LEGACY_FLAG: bool = true;
-// end legacy block
+// updated placeholder logic A
+const LEGACY_FLAG: bool = false; // flipped for test
+// added note: migration in progress
+// end legacy block
// context line 19
// context line 20
@@ -42,7 +44,12 @@ // context line 42
// context line 43
- do_placeholder_action(alpha, beta);
+ // Split actions for clarity
+ do_placeholder_prepare(alpha);
+ do_placeholder_action(alpha, beta);
+ if (enable_extra()) {
+ do_placeholder_followup(beta);
+ }
// context line 50
// context line 51
// context line 52
@@ -90,6 +97,15 @@ // context line 90
// context line 91
// context line 92
+/// Added lightweight fake helper
+fn enable_extra() -> bool {
+ // Pretend this consults a config value
+ true
+}
+
+// Temporary debug instrumentation (to be removed)
+const _DEBUG_HOOK: &str = "lsh:extra";
+
// context line 93
// context line 94
// context line 95
diff --git a/src/tui.rs b/src/tui.rs
index 1122334..5566778 100644
--- a/src/tui.rs
+++ b/src/tui.rs
@@ -5,8 +5,13 @@ // context line 5
// context line 6
// context line 7
-// previous rendering stub
-render_placeholder(frame);
+// refined rendering sequence
+begin_frame(frame);
+render_header(frame);
+render_body(frame);
+render_footer(frame);
+end_frame(frame);
+
// context line 14
// context line 15
// context line 16
diff --git a/README.md b/README.md
index 9aa9aa9..1bb1bb1 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,11 @@
# Project Title
-Some brief description here.
+Some brief description here (updated).
+
+Experimental notes:
+- Added fake diff example
+- Demonstrates multi-hunk & multi-file patch
+
## Usage
Run the binary as needed.
diff --git a/Cargo.toml b/Cargo.toml
index cafe123..fade321 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,10 @@ edition = "2021"
# context
[features]
-default = []
+default = ["experimental-preview"]
+experimental-preview = []
+
+# NOTE: Above feature is placeholder & fake
[dependencies]
# context
@@ -20,6 +23,7 @@ anyhow = "1"
# context
# instrumentation (fake addition)
tracing = "0.1"
+tracing-subscriber = "0.3"
diff --git a/src/feature/experimental.rs b/src/feature/experimental.rs
new file mode 100644
index 0000000..abc1234
--- /dev/null
+++ b/src/feature/experimental.rs
@@ -0,0 +1,24 @@
+//! Placeholder experimental module (fake content).
+//! This file exists only to demonstrate a multi-file diff.
+#![allow(dead_code)]
+
+pub struct ExperimentalToggle {
+ enabled: bool,
+}
+
+impl ExperimentalToggle {
+ pub fn new() -> Self {
+ Self { enabled: true }
+ }
+ pub fn enabled(&self) -> bool {
+ self.enabled
+ }
+}
+
+pub fn run_experimental_path() {
+ if ExperimentalToggle::new().enabled() {
+ // Fake behavior
+ eprintln!("(fake) running experimental path");
+ }
+}
+
diff --git a/assets/old_logo.svg b/assets/old_logo.svg
deleted file mode 100644
index 55aa55a..0000000
--- a/assets/old_logo.svg
+++ /dev/null
@@ -1,5 +0,0 @@
-<!-- Fake removed asset -->
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 10 10">
- <rect width="10" height="10" fill="#FF00FF"/>
- <text x="1" y="9" font-size="3">X</text>
-</svg>

View file

@ -0,0 +1,54 @@
pick 3f9d2ab # chore(ci): add initial pipeline
pick e12c0ff # feat(io): introduce streaming reader
reword 7ac14d0 feat(io): add zero-copy buffered writer
edit 4d92b67 fix(io): handle partial UTF-8 sequences at buffer boundary
pick 8bb3a21 docs: expand README performance section
squash 1c02f55 docs: tidy wording in performance section
pick 54ae91b feat(cli): add --color=auto detection
fixup 0b77c3e feat(cli): typo in help text
fixup -c 2f4e8ab feat(cli): adjust arg parsing error message
pick 6d8e5f1 refactor(core): split large module into submodules
break
pick a41e9d3 test(core): add regression tests for issue #410
exec cargo test --package edit --lib --quiet
pick c3bb921 perf(render): SIMD accelerate line diffing
drop 91d0f2e debug(render): temporary logging (to be dropped)
label pre-merge-split
pick d2f7ac4 feat(ui): add mouse wheel smooth scrolling
pick f1a4bd9 feat(ui): add high-DPI awareness
label post-ui
update-ref refs/heads/feature/ui-stack-post
reset pre-merge-split
merge -C 5ab77e3 post-ui # Merge UI feature stack
pick d8c4b91 build: bump version to 0.9.0
# Rebase 3f9d2ab..d8c4b91 onto d8c4b91 (18 commands)
#
# Commands:
# p, pick <commit> = use commit
# r, reword <commit> = use commit, but edit the commit message
# e, edit <commit> = use commit, but stop for amending
# s, squash <commit> = use commit, but meld into previous commit
# f, fixup [-C | -c] <commit> = like "squash" but keep only the previous
# commit's log message, unless -C is used, in which case
# keep only this commit's message; -c is same as -C but
# opens the editor
# x, exec <command> = run command (the rest of the line) using shell
# b, break = stop here (continue rebase later with 'git rebase --continue')
# d, drop <commit> = remove commit
# l, label <label> = label current HEAD with a name
# t, reset <label> = reset HEAD to a label
# m, merge [-C <commit> | -c <commit>] <label> [# <oneline>]
# create a merge commit using the original merge commit's
# message (or the oneline, if no original merge commit was
# specified); use -c <commit> to reword the commit message
# u, update-ref <ref> = track a placeholder for the <ref> to be updated
# to this position in the new commits. The <ref> is
# updated at the end of the rebase
#
# These lines can be re-ordered; they are executed from top to bottom.
#
# If you remove a line here THAT COMMIT WILL BE LOST.
#
# However, if you remove everything, the rebase will be aborted.
#

View file

@ -0,0 +1,26 @@
{
// Object with various value types
"string": "Hello, world!", // string literal
"numberInt": 42, // integer number
"numberFloat": -3.14e+2, // floating point with exponent
"booleanTrue": true, // boolean true
"booleanFalse": false, // boolean false
"nullValue": null, // null literal
"array": [
"item1", // string in array
2, // number in array
false, // boolean in array
null, // null in array
{
"nested": "object"
} // object in array
],
"emptyObject": {}, // empty object
"emptyArray": [], // empty array
/* Multi-line comment:
This is a block comment
inside JSONC.
*/
"unicodeString": "Emoji: \uD83D\uDE03", // Unicode escape
"escapedChars": "Line1\nLine2\tTabbed\\Backslash\"Quote" // Escaped characters
}

View file

@ -0,0 +1,78 @@
# Single-line comment
<#
Multi-line
comment
#>
function Get-SampleData {
param(
[string]$Name = "World", # String literal, parameter
[int]$Count = 3
)
$array = @(1, 2, 3) # Array literal
$hashtable = @{ Key1 = 'Value1'; Key2 = 42 } # Hashtable literal
$nullVar = $null
$boolTrue = $true
$boolFalse = $false
$regexMatch = "abc123" -match '\d+' # Regex literal
for ($i = 0; $i -lt $Count; $i++) {
Write-Host "Hello, $Name! Iteration: $i" # Variable interpolation, string
}
if ($hashtable.Key2 -eq 42) {
Write-Output "Hashtable value is 42"
}
elseif ($hashtable.Key2 -gt 40) {
Write-Output "Hashtable value is greater than 40"
}
else {
Write-Output "Hashtable value is less than or equal to 40"
}
switch ($Name) {
"World" { Write-Host "Default name used." }
default { Write-Host "Custom name: $Name" }
}
try {
throw "An error occurred"
}
catch {
Write-Warning $_
}
finally {
Write-Verbose "Finally block executed"
}
$script:globalVar = 99 # Scope modifier
# Here-String
$hereString = @"
This is a here-string.
Name: $Name
"@
return $hereString
}
# Command invocation, pipeline, splatting
$paramSplat = @{
Name = 'PowerShell'
Count = 2
}
Get-SampleData @paramSplat | Out-File -FilePath "./output.txt"
# Type literal, member access, method call
[System.DateTime]::Now.ToString("yyyy-MM-dd")
# Subexpression
Write-Host "2 + 2 = $($array[0] + $array[1])"
# Command substitution
$pwdPath = $(Get-Location).Path
Write-Host "Current directory: $pwdPath"

View file

@ -0,0 +1,41 @@
# This is a comment
---
string: "Hello, world!"
plain: plainValue
multiline: |
This is a
multiline string.
folded: >
This is a
folded string.
number_int: 42
number_float: 3.1415
number_scientific: 1.23e45
number_negative: -7
boolean_true: true
boolean_false: false
null_value: null
explicit_null: ~
date: 2024-06-01
timestamp: 2024-06-01T12:34:56Z
confusable_string_number: 1.23e45 1.23e45 # This is a comment
sequence:
- item1
- item2
- 3
- true
mapping:
key1: value1
key2: value2
nested:
- name: Alice
age: 30
married: false
- name: Bob
age: 25
married: true
empty_sequence: [foo, 123, bar]
empty_mapping: { foo: bar }
literal_colon: "value:with:colons"
literal_dash: "-not-a-sequence"
special_chars: "Tab:\t Newline:\n Unicode:\u2713"

View file

@ -3,12 +3,14 @@
use std::hint::black_box;
use std::io::Cursor;
use std::path::Path;
use std::{mem, vec};
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use edit::arena::scratch_arena;
use edit::helpers::*;
use edit::simd::MemsetSafe;
use edit::{arena, buffer, hash, oklab, simd, unicode};
use edit::{arena, buffer, hash, lsh, oklab, simd, unicode};
use serde::Deserialize;
#[derive(Deserialize)]
@ -125,6 +127,29 @@ fn bench_hash(c: &mut Criterion) {
});
}
fn bench_lsh(c: &mut Criterion) {
let bytes = include_bytes!("../assets/highlighting-tests/powershell.ps1");
let bytes = &bytes[..];
let lang = lsh::language_from_path(Path::new("powershell.ps1")).unwrap();
let highlighter = lsh::Highlighter::new(black_box(&bytes), lang);
c.benchmark_group("lsh").throughput(Throughput::Bytes(bytes.len() as u64)).bench_function(
"powershell",
|b| {
b.iter(|| {
let mut h = highlighter.clone();
loop {
let scratch = scratch_arena(None);
let res = h.parse_next_line(&scratch);
if res.is_empty() {
break;
}
}
})
},
);
}
fn bench_oklab(c: &mut Criterion) {
c.benchmark_group("oklab")
.bench_function("StraightRgba::as_oklab", |b| {
@ -231,6 +256,7 @@ fn bench(c: &mut Criterion) {
bench_buffer(c);
bench_hash(c);
bench_lsh(c);
bench_oklab(c);
bench_simd_lines_fwd(c);
bench_simd_memchr2(c);

432
build/lsh/definitions.rs Normal file
View file

@ -0,0 +1,432 @@
#![allow(dead_code)]
use HighlightKind::*;
pub const LANGUAGES: &[&Language] = &[&LANG_GIT_COMMIT, &LANG_GIT_REBASE];
const LANG_DIFF: Language = Language {
name: "Diff",
filenames: &["*.diff", "*.patch"],
states: &[
State {
name: "ground",
rules: &[
re(r#"diff"#).is(BrightBlue).then_jump("ignore"),
re(r#"---"#).is(BrightBlue).then_jump("ignore"),
re(r#"\+\+\+"#).is(BrightBlue).then_jump("ignore"),
re(r#"-"#).is(BrightRed).then_jump("ignore"),
re(r#"\+"#).is(BrightGreen).then_jump("ignore"),
re(r#""#).then_jump("ignore"),
],
},
State { name: "ignore", rules: &[re(r#".*"#)] },
],
};
const LANG_GIT_COMMIT: Language = Language {
name: "Git Commit Message",
filenames: &["COMMIT_EDITMSG", "MERGE_MSG"],
states: &[
State {
name: "ground",
rules: &[
re(r#"#"#).is(Comment).then_call("comment"),
re(r#"diff \-\-git.*"#).is(BrightBlue).then_call("diff_transition"),
re(r#""#).then_jump("ignore"),
],
},
State {
name: "comment",
rules: &[
re(r#"\tdeleted:.*"#).is(BrightRed).then_return(),
re(r#"\tmodified:.*"#).is(BrightBlue).then_return(),
re(r#"\tnew file:.*"#).is(BrightGreen).then_return(),
re(r#"\trenamed:.*"#).is(BrightBlue).then_return(),
re(r#".*"#).then_return(),
],
},
State { name: "diff_transition", rules: &[re(r#""#).is(Other).then_call("diff")] },
// TODO: The ability to invoke another language (here: LANG_DIFF). :)
State {
name: "diff",
rules: &[
re(r#"diff"#).is(BrightBlue).then_jump("ignore"),
re(r#"---"#).is(BrightBlue).then_jump("ignore"),
re(r#"\+\+\+"#).is(BrightBlue).then_jump("ignore"),
re(r#"-"#).is(BrightRed).then_jump("ignore"),
re(r#"\+"#).is(BrightGreen).then_jump("ignore"),
re(r#""#).then_jump("ignore"),
],
},
State { name: "ignore", rules: &[re(r#".*"#)] },
],
};
const LANG_GIT_REBASE: Language = Language {
name: "Git Rebase Message",
filenames: &["git-rebase-todo"], // TODO: https://github.com/microsoft/vscode/issues/156954
states: &[
State {
name: "ground",
rules: &[
re(r#"(?:break|exec|b|x)\b{end-half}"#).is(Keyword).then_call("comment"),
re(r#"(?:drop|edit|fixup|pick|reword|squash|d|e|f|p|r|s)\b{end-half}"#)
.is(Keyword)
.then_call("hash"),
re(r#"#.*"#).is(Comment),
],
},
State {
name: "hash",
rules: &[
re(r#"\S+"#).is(Variable).then_call("comment"),
re(r#"\s+"#),
re(r#".*"#).then_return(),
],
},
State { name: "comment", rules: &[re(r#".*"#).is(Comment).then_return()] },
],
};
const LANG_JSON: Language = Language {
name: "JSON",
filenames: &["*.json", "*.jsonc"],
states: &[
State {
name: "ground",
rules: &[
re(r#"//.*"#).is(Comment),
re(r#"/\*"#).is(Comment).then_call("comment"),
re(r#"""#).is(String).then_jump("string_double"),
re(r#"(?:-\d+|\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?"#)
.is(Number)
.then_jump("resolve_type"),
re(r#"(?i:false)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:null)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:true)"#).is(Keyword).then_jump("resolve_type"),
],
},
State { name: "resolve_type", rules: &[re(r#"\w+"#).is(Other), re(r#""#)] },
State { name: "comment", rules: &[re(r#"\*/"#).then_return()] },
State {
name: "string_double",
rules: &[re(r#"""#), re(r#"\\."#).then_jump("string_double")],
},
],
};
const LANG_YAML: Language = Language {
name: "YAML",
filenames: &["*.yaml", "*.yml"],
states: &[
State {
name: "ground",
rules: &[
re(r#"#.*"#).is(Comment),
re(r#"""#).is(String).then_jump("string_double"),
re(r#"'"#).is(String).then_jump("string_single"),
re(r#"(?:-\d+|\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?"#)
.is(Number)
.then_jump("resolve_type"),
re(r#"(?i:false)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:null)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:true)"#).is(Keyword).then_jump("resolve_type"),
re(r#"\w+"#).is(String).then_jump("resolve_type"),
],
},
State {
name: "resolve_type",
rules: &[
re(r#"\s*[^\s#:]+:"#).is(Keyword).then_jump("resolve_type_maybe_keyword"),
re(r#"\s*[^\s#:]+"#).is(String),
re(r#"\s*:"#).is(Keyword).then_jump("resolve_type_maybe_keyword"),
re(r#""#),
],
},
State {
name: "resolve_type_maybe_keyword",
rules: &[re(r#"[^\s#:]+[^#]*"#).is(String), re(r#""#)],
},
State {
name: "string_double",
rules: &[re(r#"""#), re(r#"\\."#).then_jump("string_double")],
},
State {
name: "string_single",
rules: &[re(r#"'"#), re(r#"\\."#).then_jump("string_single")],
},
],
};
const LANG_BASH: Language = Language {
name: "Bash",
filenames: &["*.sh", "*.zsh"],
states: &[
State {
name: "ground",
rules: &[
re(r#"#.*"#).is(Comment),
re(r#"'"#).is(String).then_call("string_single"),
re(r#"""#).is(String).then_call("string_double"),
re(r#"\$"#).is(Variable).then_call("variable"),
re(r#"[!*/%+<=>|]"#).is(Operator),
re(r"(?i:break)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:case)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:continue)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:done)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:do)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:elif)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:else)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:esac)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:fi)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:for)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:function)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:if)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:in)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:return)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:select)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:then)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:until)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:while)").is(Keyword).then_jump("resolve_type"),
re(r#"\d+"#).is(Number),
re(r"\w+").is(Method),
],
},
State {
name: "string_single",
rules: &[re(r#"'"#).then_return(), re(r#"\\."#).then_jump("string_single")],
},
State {
name: "string_double",
rules: &[
re(r#"""#).then_return(),
re(r#"\\."#).then_jump("string_double"),
re(r#"\$"#).is(Other).then_call("variable"),
],
},
State {
name: "variable",
rules: &[
re(r#"[#?]"#).is(Variable).then_return(),
re(r#"\{[^}]*\}"#).is(Variable).then_return(),
re(r#"\w+"#).is(Variable).then_return(),
re(r#""#).is(Other).then_return(),
],
},
State { name: "resolve_type", rules: &[re(r#"\w+"#).is(Other), re(r#""#)] },
],
};
const LANG_POWERSHELL: Language = Language {
name: "PowerShell",
filenames: &["*.ps1", "*.psm1", "*.psd1"],
states: &[
State {
name: "ground",
rules: &[
re(r#"#.*"#).is(Comment),
re(r#"<#"#).is(Comment).then_call("comment"),
re(r#"'"#).is(String).then_call("string_single"),
re(r#"\""#).is(String).then_call("string_double"),
re(r#"\$\("#).is(Other).then_call("ground"),
re(r#"\$"#).is(Variable).then_call("variable"),
re(r#"\("#).is(Other).then_call("ground"),
re(r#"\)"#).is(Other).then_return(),
re(r#"(?:-\d+|\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?"#).is(Number),
re(r#"-\w+"#).is(Operator),
re(r#"[!*/%+<=>|]"#).is(Operator),
re(r#"(?i:break)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:catch)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:continue)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:do)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:elseif)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:else)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:finally)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:foreach)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:for)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:function)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:if)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:return)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:switch)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:throw)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:try)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:using)"#).is(Keyword).then_jump("resolve_type"),
re(r#"(?i:while)"#).is(Keyword).then_jump("resolve_type"),
re(r#"[\w-]+"#).is(Method),
],
},
State { name: "comment", rules: &[re(r#"#>"#).is(Comment).then_return()] },
State {
name: "string_single",
rules: &[re(r#"'"#).then_return(), re(r#"`."#).then_jump("string_single")],
},
State {
name: "string_double",
rules: &[
re(r#"""#).then_return(),
re(r#"`."#).then_jump("string_double"),
re(r#"\$\("#).is(Other).then_call("ground"),
re(r#"\$"#).is(Variable).then_call("variable"),
],
},
State {
name: "variable",
rules: &[
re(r#"[$^?]"#).then_return(),
re(r#"\{[^}]*\}"#).then_return(),
re(r#"\w+"#).then_return(),
re(r#""#).is(Other).then_return(),
],
},
State { name: "resolve_type", rules: &[re(r#"[\w-]+"#).is(Other), re(r#""#)] },
],
};
const LANG_BATCH: Language = Language {
name: "Batch",
filenames: &["*.bat", "*.cmd"],
states: &[
State {
name: "ground",
rules: &[
re(r#"(?i:rem)\S+"#).is(Other),
re(r#"(?i:rem).*"#).is(Comment),
re(r#"::.*"#).is(Comment),
re(r#"""#).is(String).then_call("string_double"),
re(r#"%%"#).is(Other),
re(r#"%"#).is(Variable).then_call("variable"),
re(r#"[!*/+<=>|]"#).is(Operator),
re(r"(?i:break)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:call)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:cd)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:chdir)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:cls)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:copy)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:del)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:dir)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:echo)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:exit)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:for)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:goto)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:if)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:md)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:mkdir)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:move)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:pause)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:ren)").is(Keyword).then_jump("resolve_type"),
re(r"(?i:set)").is(Keyword).then_jump("resolve_type"),
re(r#"\d+"#).is(Number),
],
},
State {
name: "string_double",
rules: &[re(r#"""#).then_return(), re(r#"\\."#).then_jump("string_double")],
},
State { name: "variable", rules: &[re(r#"%"#).then_return()] },
State { name: "resolve_type", rules: &[re(r#"\w+"#).is(Other), re(r#""#)] },
],
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightKind {
Black,
Red,
Green,
Yellow,
Blue,
Magenta,
Cyan,
White,
BrightBlack,
BrightRed,
BrightGreen,
BrightYellow,
BrightBlue,
BrightMagenta,
BrightCyan,
BrightWhite,
Other,
Comment,
Number,
String,
Variable,
Operator,
Keyword,
Method,
}
impl HighlightKind {
pub const fn as_usize(self) -> usize {
unsafe { std::mem::transmute::<HighlightKind, u8>(self) as usize }
}
pub const unsafe fn from_usize(value: usize) -> Self {
debug_assert!(value <= Method.as_usize());
unsafe { std::mem::transmute::<u8, HighlightKind>(value as u8) }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightKindOp {
None,
Some(HighlightKind),
}
pub struct Language {
pub name: &'static str,
pub filenames: &'static [&'static str],
pub states: &'static [State],
}
pub struct State {
pub name: &'static str,
pub rules: &'static [Rule],
}
pub enum Instruction {
Continue,
Jump(&'static str),
Push(&'static str),
Pop,
}
pub struct Rule {
pub pattern: &'static str,
pub kind: HighlightKindOp,
pub action: ActionDefinition,
}
const fn re(s: &'static str) -> Rule {
Rule { pattern: s, kind: HighlightKindOp::None, action: ActionDefinition::Continue }
}
impl Rule {
const fn is(mut self, kind: HighlightKind) -> Self {
self.kind = HighlightKindOp::Some(kind);
self
}
const fn then_jump(mut self, target: &'static str) -> Self {
self.action = ActionDefinition::Jump(target);
self
}
const fn then_call(mut self, target: &'static str) -> Self {
self.action = ActionDefinition::Push(target);
self
}
const fn then_return(mut self) -> Self {
self.action = ActionDefinition::Pop;
self
}
}
#[derive(Debug, Clone, Copy)]
pub enum ActionDefinition {
Continue,
Jump(&'static str),
Push(&'static str),
Pop,
}

111
build/lsh/handles.rs Normal file
View file

@ -0,0 +1,111 @@
use std::marker::PhantomData;
use std::ops::{Deref, DerefMut, Index, IndexMut};
use std::slice;
pub struct HandleVec<H, T> {
list: Vec<T>,
_handle: PhantomData<H>,
}
impl<H, T> HandleVec<H, T>
where
H: Into<usize> + From<usize>,
{
pub fn push(&mut self, value: T) -> H {
let l = self.list.len();
self.list.push(value);
H::from(l)
}
pub fn indices(&self) -> impl DoubleEndedIterator<Item = H> + use<H, T> {
(0..self.list.len()).map(H::from)
}
pub fn enumerate(&self) -> impl DoubleEndedIterator<Item = (H, &T)> {
self.list.iter().enumerate().map(|(i, v)| (H::from(i), v))
}
}
impl<H, T> Default for HandleVec<H, T> {
fn default() -> Self {
HandleVec { list: Vec::new(), _handle: PhantomData }
}
}
impl<H, T> Deref for HandleVec<H, T> {
type Target = Vec<T>;
fn deref(&self) -> &Self::Target {
&self.list
}
}
impl<H, T> DerefMut for HandleVec<H, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.list
}
}
impl<'a, H, T> IntoIterator for &'a HandleVec<H, T> {
type Item = &'a T;
type IntoIter = slice::Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a, H, T> IntoIterator for &'a mut HandleVec<H, T> {
type Item = &'a mut T;
type IntoIter = slice::IterMut<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter_mut()
}
}
impl<H, T> Index<H> for HandleVec<H, T>
where
H: Into<usize>,
{
type Output = T;
fn index(&self, index: H) -> &Self::Output {
&self.list[index.into()]
}
}
impl<H, T> IndexMut<H> for HandleVec<H, T>
where
H: Into<usize>,
{
fn index_mut(&mut self, index: H) -> &mut Self::Output {
&mut self.list[index.into()]
}
}
macro_rules! declare_handle {
($vis:vis $name:ident($type:ident)) => {
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
$vis struct $name(pub $type);
impl $name {
pub const MIN: Self = Self($type::MIN);
pub const MAX: Self = Self($type::MAX);
}
impl From<$name> for usize {
fn from(value: $name) -> Self {
value.0 as usize
}
}
impl From<usize> for $name {
fn from(value: usize) -> Self {
Self(value.try_into().unwrap())
}
}
};
}
pub(crate) use declare_handle;

234
build/lsh/mod.rs Normal file
View file

@ -0,0 +1,234 @@
//! This file takes a [`LanguageDefinition`] which describes syntax highlighting rules
//! for a language via a list of regular expressions that result in
//! * a highlight kind (comment, string, number, etc.)
//! * a push/pop action of another state (allows for nesting languages, such as in Markdown)
//!
//! It then transforms the definition into a list of [`WipState`], which are directions
//! to our custom DFA engine. The engine is very simple to reduce binary size.
//! Each defined state represents a root. Each additional state represents one step in
//! the regular expression. The difference between the two is that the root states will
//! seek to the next possible occurrence of any of the defined regular expressions,
//! whereas the additional states will try to match the next character without seeking.
//! If it doesn't match, it will fall back to the next possible defined regular expression.
mod definitions;
mod handles;
mod transformer;
use std::fmt::Write as _;
use definitions::*;
use transformer::*;
pub fn generate() -> String {
let mut output = String::new();
output.push_str(
"\
// This file is generated by build.rs. Do not edit it manually.
use Action::*;
use HighlightKind::*;
use Test::*;
pub struct Language {
pub name: &'static str,
pub filenames: &'static [&'static str],
pub transitions: &'static [Transition<'static>],
}
impl PartialEq for Language {
fn eq(&self, other: &Self) -> bool {
std::ptr::eq(self, other)
}
}
pub struct Transition<'a> {
pub test: Test<'a>,
pub kind: Option<HighlightKind>,
pub action: Action,
}
pub enum Test<'a> {
Chars(usize),
Charset(&'a [u16; 16]),
Prefix(*const u8),
PrefixInsensitive(*const u8),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightKind {
Black,
Red,
Green,
Yellow,
Blue,
Magenta,
Cyan,
White,
BrightBlack,
BrightRed,
BrightGreen,
BrightYellow,
BrightBlue,
BrightMagenta,
BrightCyan,
BrightWhite,
Other,
Comment,
Number,
String,
Variable,
Operator,
Keyword,
Method,
}
pub enum Action {
Jump(u8),
Push(u8),
Pop(u8),
Loop(u8),
}
const fn t<'a>(test: Test<'a>, kind: Option<HighlightKind>, action: Action) -> Transition<'a> {
Transition { test, kind, action }
}
",
);
for lang in LANGUAGES {
let name_uppercase = lang.name.bytes().fold(String::new(), |mut acc, ch| {
if ch.is_ascii_alphanumeric() {
acc.push(ch.to_ascii_uppercase() as char);
} else if !acc.ends_with('_') {
acc.push('_');
}
acc
});
let mut builder = GraphBuilder::new();
for s in lang.states {
builder.declare_root(s.name);
}
for state in lang.states {
for rule in state.rules {
builder.parse(state.name, rule);
}
}
builder.finalize();
_ = write!(
output,
"\
/**
---
title: {}
config:
layout: elk
elk:
considerModelOrder: NONE
---
{}
**/
",
lang.name,
builder.format_as_mermaid()
);
for (h, cs) in builder.extract_charsets() {
_ = write!(
output,
"#[rustfmt::skip] const LANG_{}_CHARSET_{}: &[u16; 16] = &[",
name_uppercase, h.0,
);
for lo in 0..16 {
if lo > 0 {
_ = write!(output, ", ");
}
let mut u = 0u16;
for hi in 0..16 {
u |= (cs[hi * 16 + lo] as u16) << hi;
}
_ = write!(output, "0x{u:04x}");
}
_ = writeln!(output, "];");
}
for (h, s) in builder.extract_strings() {
_ = write!(
output,
"#[rustfmt::skip] const LANG_{}_STRING_{}: *const u8 = [",
name_uppercase, h.0,
);
_ = write!(output, "{}", s.len());
for &c in s.as_bytes() {
_ = write!(output, ", 0x{:02x}", c);
}
_ = writeln!(output, "].as_ptr();");
}
_ = write!(
output,
"\
#[rustfmt::skip] pub const LANG_{name_uppercase}: &Language = &Language {{
name: {name:?},
filenames: &{filenames:?},
transitions: &[
",
name = lang.name,
name_uppercase = name_uppercase,
filenames = lang.filenames,
);
for t in builder.extract_transitions() {
let test = match &t.test {
GraphTest::Chars(usize::MAX) => "Chars(usize::MAX)".to_string(),
GraphTest::Chars(n) => {
format!("Chars({n})")
}
GraphTest::Charset(cs) => {
format!("Charset(LANG_{}_CHARSET_{})", name_uppercase, cs.0)
}
GraphTest::Prefix(s) => {
format!("Prefix(LANG_{}_STRING_{})", name_uppercase, s.0)
}
GraphTest::PrefixInsensitive(s) => {
format!("PrefixInsensitive(LANG_{}_STRING_{})", name_uppercase, s.0)
}
};
let action = match &t.dst {
GraphAction::Jump(dst) => format!("Jump({})", dst.0),
GraphAction::Push(dst) => format!("Push({})", dst.0),
GraphAction::Pop(count) => format!("Pop({})", count),
GraphAction::Loop(dst) => format!("Loop({})", dst.0),
GraphAction::Fallback => unreachable!(),
};
_ = writeln!(output, " t({test}, {kind:?}, {action}),", kind = t.kind,);
}
_ = write!(output, " ],\n}};\n\n");
}
_ = write!(output, "#[rustfmt::skip] pub const LANGUAGES: &[&Language] = &[");
for lang in LANGUAGES {
let name_uppercase: String = lang.name.chars().fold(String::new(), |mut acc, ch| {
if ch.is_whitespace() || ch.is_control() {
if !acc.ends_with('_') {
acc.push('_');
}
} else {
for up in ch.to_uppercase() {
acc.push(up);
}
}
acc
});
_ = writeln!(output, " LANG_{name_uppercase},");
}
_ = writeln!(output, "];");
output
}

1151
build/lsh/transformer.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@ use crate::helpers::env_opt;
mod helpers;
mod i18n;
mod lsh;
#[derive(Clone, Copy, PartialEq, Eq)]
enum TargetOs {
@ -22,12 +23,20 @@ fn main() {
_ => TargetOs::Unix,
};
compile_lsh();
compile_i18n();
configure_icu(target_os);
#[cfg(windows)]
configure_windows_binary(target_os);
}
fn compile_lsh() {
let contents = lsh::generate();
let out_dir = env_opt("OUT_DIR");
let path = format!("{out_dir}/lsh_definitions.rs");
std::fs::write(path, contents).unwrap();
}
fn compile_i18n() {
const PATH: &str = "i18n/edit.toml";

View file

@ -1422,6 +1422,32 @@ uk = "Закрити"
zh_hans = "关闭"
zh_hant = "關閉"
[LanguageSelectMode]
en = "Select Language Mode"
de = "Sprachmodus auswählen"
es = "Seleccionar modo de lenguaje"
fr = "Sélectionner le mode du langage"
it = "Seleziona modalità del linguaggio"
ja = "言語モードの選択"
ko = "언어 모드 선택"
pt_br = "Selecionar modo de linguagem"
ru = "Выбрать режим языка"
zh_hans = "选择语言模式"
zh_hant = "選擇語言模式"
[LanguageAutoDetect]
en = "Auto Detect"
de = "Automatisch erkennen"
es = "Detección automática"
fr = "Détection automatique"
it = "Rilevamento automatico"
ja = "自動検出"
ko = "자동 감지"
pt_br = "Detectar automaticamente"
ru = "Определить автоматически"
zh_hans = "自动检测"
zh_hant = "自動偵測"
[EncodingReopen]
en = "Reopen with encoding…"
bn = "এনকোডিং সহ পুনরায় খুলুন"

View file

@ -8,6 +8,7 @@ use std::path::{Path, PathBuf};
use edit::buffer::{RcTextBuffer, TextBuffer};
use edit::helpers::{CoordType, Point};
use edit::lsh::{Language, language_from_path};
use edit::{apperr, path, sys};
use crate::state::DisplayablePathBuf;
@ -19,6 +20,7 @@ pub struct Document {
pub filename: String,
pub file_id: Option<sys::FileId>,
pub new_file_counter: usize,
pub language_override: Option<Option<&'static Language>>,
}
impl Document {
@ -61,15 +63,33 @@ impl Document {
fn set_path(&mut self, path: PathBuf) {
let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned();
let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default();
self.filename = filename;
self.dir = Some(DisplayablePathBuf::from_path(dir));
self.path = Some(path);
self.update_file_mode();
self.buffer.borrow_mut().set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 });
self.update_language();
}
fn update_file_mode(&mut self) {
let mut tb = self.buffer.borrow_mut();
tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 });
pub fn auto_detect_language(&mut self) {
self.language_override = None;
self.update_language();
}
pub fn override_language(&mut self, lang: Option<&'static Language>) {
self.language_override = Some(lang);
self.update_language();
}
fn update_language(&mut self) {
self.buffer.borrow_mut().set_language(if let Some(lang) = self.language_override {
lang
} else if let Some(path) = &self.path {
language_from_path(path)
} else {
None
})
}
}
@ -121,6 +141,7 @@ impl DocumentManager {
filename: Default::default(),
file_id: None,
new_file_counter: 0,
language_override: None,
};
self.gen_untitled_name(&mut doc);
@ -181,6 +202,7 @@ impl DocumentManager {
filename: Default::default(),
file_id,
new_file_counter: 0,
language_override: None,
};
doc.set_path(path);

View file

@ -6,6 +6,7 @@ use edit::framebuffer::{Attributes, IndexedColor};
use edit::fuzzy::score_fuzzy;
use edit::helpers::*;
use edit::input::vk;
use edit::lsh::LANGUAGES;
use edit::tui::*;
use edit::{arena_format, icu};
@ -26,15 +27,21 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) {
ctx.table_next_row();
if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) {
let is_crlf = tb.is_crlf();
tb.normalize_newlines(!is_crlf);
}
state.wants_language_picker |= ctx.button(
"language",
tb.language().map_or("Plain Text", |l| l.name),
ButtonStyle::default(),
);
if state.wants_statusbar_focus {
state.wants_statusbar_focus = false;
ctx.steal_focus();
}
if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) {
let is_crlf = tb.is_crlf();
tb.normalize_newlines(!is_crlf);
}
state.wants_encoding_picker |=
ctx.button("encoding", tb.encoding(), ButtonStyle::default());
if state.wants_encoding_picker {
@ -199,6 +206,55 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) {
ctx.table_end();
}
pub fn draw_dialog_language_change(ctx: &mut Context, state: &mut State) {
let doc = state.documents.active_mut();
let mut done = doc.is_none();
ctx.modal_begin("language", loc(LocId::LanguageSelectMode));
if let Some(doc) = doc {
let width = (ctx.size().width - 20).max(10);
let height = (ctx.size().height - 10).max(10);
ctx.scrollarea_begin("scrollarea", Size { width, height });
ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4));
ctx.inherit_focus();
{
ctx.list_begin("languages");
ctx.inherit_focus();
let auto_detect = doc.language_override.is_none();
let selected = if auto_detect { None } else { doc.buffer.borrow().language() };
if ctx.list_item(auto_detect, loc(LocId::LanguageAutoDetect))
== ListSelection::Activated
{
doc.auto_detect_language();
done = true;
}
if ctx.list_item(selected.is_none(), "Plain Text") == ListSelection::Activated {
doc.override_language(None);
done = true;
}
for &lang in LANGUAGES {
if ctx.list_item(Some(lang) == selected, lang.name) == ListSelection::Activated {
doc.override_language(Some(lang));
done = true;
}
}
ctx.list_end();
}
ctx.scrollarea_end();
}
done |= ctx.modal_end();
if done {
state.wants_language_picker = false;
ctx.needs_rerender();
}
}
pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) {
let encoding = state.documents.active_mut().map_or("", |doc| doc.buffer.borrow().encoding());
let reopen = state.wants_encoding_change == StateEncodingChange::Reopen;

View file

@ -313,6 +313,9 @@ fn draw(ctx: &mut Context, state: &mut State) {
if state.wants_save {
draw_handle_save(ctx, state);
}
if state.wants_language_picker {
draw_dialog_language_change(ctx, state);
}
if state.wants_encoding_change != StateEncodingChange::None {
draw_dialog_encoding_change(ctx, state);
}

View file

@ -152,6 +152,8 @@ pub struct State {
pub search_options: buffer::SearchOptions,
pub search_success: bool,
pub wants_language_picker: bool,
pub wants_encoding_picker: bool,
pub wants_encoding_change: StateEncodingChange,
pub encoding_picker_needle: String,
@ -200,6 +202,8 @@ impl State {
search_options: Default::default(),
search_success: true,
wants_language_picker: false,
wants_encoding_picker: false,
encoding_picker_needle: Default::default(),
encoding_picker_results: Default::default(),

View file

@ -1,116 +0,0 @@
use std::ops::Range;
use crate::{document::ReadableDocument, simd::memchr2};
/// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations
const CACHE_EVERY: usize = 1024 * 64;
#[derive(Clone)]
pub struct CachePoint {
pub index: usize,
pub line: usize,
// pub snapshot: ParserSnapshot
}
pub struct LineCache {
cache: Vec<CachePoint>,
}
impl LineCache {
pub fn new() -> Self {
Self { cache: vec![] }
}
pub fn from_document<T: ReadableDocument>(&mut self, document: &T) {
self.cache.clear();
let mut offset = 0;
let mut line = 0;
loop {
let text = document.read_forward(offset);
if text.is_empty() { return; }
let mut off = 0;
loop {
off = memchr2(b'\n', b'\n', text, off);
if off == text.len() { break; }
if line % CACHE_EVERY == 0 {
self.cache.push(CachePoint { index: offset+off, line });
}
line += 1;
off += 1;
}
offset += text.len();
}
}
/// Updates the cache after a deletion.
/// `range` is the deleted byte range, and `text` is the content that was deleted.
pub fn delete(&mut self, range: Range<usize>, text: &Vec<u8>) {
let mut newlines = 0;
for c in text {
if *c == b'\n' {
newlines += 1;
}
}
let mut beg_del = None;
let mut end_del = None;
for (i, point) in self.cache.iter_mut().enumerate() {
if point.index >= range.start {
if point.index < range.end {
// cache point is within the deleted range
if beg_del.is_none() { beg_del = Some(i); }
end_del = Some(i + 1);
}
else {
point.index -= text.len();
point.line -= newlines;
}
}
}
if let (Some(beg), Some(end)) = (beg_del, end_del) {
self.cache.drain(beg..end);
}
}
/// Updates the cache after an insertion.
/// `offset` is where the insertion occurs, and `text` is the inserted content.
pub fn insert(&mut self, offset: usize, text: &[u8]) {
// Count how many newlines were inserted
let mut newlines = 0;
for c in text {
if *c == b'\n' {
newlines += 1;
}
}
let len = text.len();
for point in &mut self.cache {
if point.index > offset {
point.index += len;
point.line += newlines;
}
}
// TODO: This also needs to insert new cache points
}
/// Finds the nearest cached line-offset pair relative to a target line.
/// If `reverse` is false, it returns the closest *before* the target.
/// If `reverse` is true, it returns the closest *after or at* the target.
pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option<CachePoint> {
match self.cache.binary_search_by_key(&target_count, |p| p.line) {
Ok(i) => Some(self.cache[i].clone()),
Err(i) => {
if i == 0 || i == self.cache.len() { None } // target < lowest cache point || target > highest cache point
else {
Some(self.cache[ if reverse {i} else {i-1} ].clone())
}
}
}
}
}

View file

@ -42,6 +42,8 @@ use crate::clipboard::Clipboard;
use crate::document::{ReadableDocument, WriteableDocument};
use crate::framebuffer::{Framebuffer, IndexedColor};
use crate::helpers::*;
use crate::lsh::cache::HighlighterCache;
use crate::lsh::{HighlightKind, Highlighter, Language};
use crate::oklab::StraightRgba;
use crate::simd::memchr2;
use crate::unicode::{self, Cursor, MeasurementConfig, Utf8Chars};
@ -229,6 +231,7 @@ pub struct TextBuffer {
selection: Option<TextBufferSelection>,
selection_generation: u32,
search: Option<UnsafeCell<ActiveSearch>>,
highlighter_cache: HighlighterCache,
width: CoordType,
margin_width: CoordType,
@ -238,6 +241,7 @@ pub struct TextBuffer {
tab_size: CoordType,
indent_with_tabs: bool,
line_highlight_enabled: bool,
language: Option<&'static Language>,
ruler: CoordType,
encoding: &'static str,
newlines_are_crlf: bool,
@ -277,6 +281,7 @@ impl TextBuffer {
selection: None,
selection_generation: 0,
search: None,
highlighter_cache: HighlighterCache::new(),
width: 0,
margin_width: 0,
@ -286,6 +291,7 @@ impl TextBuffer {
tab_size: 4,
indent_with_tabs: false,
line_highlight_enabled: false,
language: None,
ruler: 0,
encoding: "UTF-8",
newlines_are_crlf: cfg!(windows), // Windows users want CRLF
@ -578,6 +584,15 @@ impl TextBuffer {
self.line_highlight_enabled = enabled;
}
pub fn language(&self) -> Option<&'static Language> {
self.language
}
pub fn set_language(&mut self, language: Option<&'static Language>) {
self.language = language;
self.highlighter_cache.invalidate_from(0);
}
/// Sets a ruler column, e.g. 80.
pub fn set_ruler(&mut self, column: CoordType) {
self.ruler = column;
@ -656,6 +671,7 @@ impl TextBuffer {
self.set_selection(None);
self.mark_as_clean();
self.reflow();
self.highlighter_cache.invalidate_from(0);
}
/// Copies the contents of the buffer into a string.
@ -1714,14 +1730,13 @@ impl TextBuffer {
return None;
}
let scratch = scratch_arena(None);
let width = destination.width();
let height = destination.height();
let line_number_width = self.margin_width.max(3) as usize - 3;
let text_width = width - self.margin_width;
let mut visualizer_buf = [0xE2, 0x90, 0x80]; // U+2400 in UTF8
let mut line = ArenaString::new_in(&scratch);
let mut visual_pos_x_max = 0;
let mut highlighter = self.language.map(|l| Highlighter::new(&self.buffer, l));
// Pick the cursor closer to the `origin.y`.
let mut cursor = {
@ -1737,10 +1752,10 @@ impl TextBuffer {
Some(TextBufferSelection { beg, end }) => minmax(beg, end),
};
line.reserve(width as usize * 2);
for y in 0..height {
line.clear();
let scratch = scratch_arena(None);
let mut line = ArenaString::new_in(&scratch);
line.reserve(width as usize * 2);
let visual_line = origin.y + y;
let mut cursor_beg =
@ -1960,6 +1975,53 @@ impl TextBuffer {
global_off += chunk.len();
}
if let Some(highlighter) = &mut highlighter {
let highlights = self.highlighter_cache.parse_line(
&scratch,
highlighter,
cursor_beg.logical_pos.y,
);
let mut highlights = highlights.iter();
if let Some(first) = highlights.next() {
let mut highlight_kind = first.kind;
let mut highlight_beg =
self.cursor_move_to_offset_internal(cursor_beg, first.start);
for next in highlights {
let kind = highlight_kind;
highlight_kind = next.kind;
let beg = highlight_beg.visual_pos;
highlight_beg =
self.cursor_move_to_offset_internal(highlight_beg, next.start);
let end = highlight_beg.visual_pos;
let color = match kind {
_ if (kind as u8) < 16 => IndexedColor::from(kind as u8),
HighlightKind::Comment => IndexedColor::Green,
HighlightKind::Number => IndexedColor::BrightGreen,
HighlightKind::String => IndexedColor::BrightRed,
HighlightKind::Variable => IndexedColor::BrightBlue,
HighlightKind::Operator => IndexedColor::White,
HighlightKind::Keyword => IndexedColor::BrightMagenta,
HighlightKind::Method => IndexedColor::BrightYellow,
_ => continue,
};
fb.blend_fg(
Rect {
left: destination.left + self.margin_width + beg.x - origin.x,
top: destination.top + y,
right: destination.left + self.margin_width + end.x - origin.x,
bottom: destination.top + y + 1,
},
fb.indexed(color),
);
}
}
}
visual_pos_x_max = visual_pos_x_max.max(cursor_end.visual_pos.x);
}
@ -2587,6 +2649,7 @@ impl TextBuffer {
}
self.active_edit_off = cursor.offset;
self.highlighter_cache.invalidate_from(cursor.logical_pos.y);
// If word-wrap is enabled, the visual layout of all logical lines affected by the write
// may have changed. This includes even text before the insertion point up to the line
@ -2717,6 +2780,7 @@ impl TextBuffer {
fn undo_redo(&mut self, undo: bool) {
let buffer_generation = self.buffer.generation();
let mut entry_buffer_generation = None;
let mut damage_start = CoordType::MAX;
loop {
// Transfer the last entry from the undo stack to the redo stack or vice versa.
@ -2761,6 +2825,8 @@ impl TextBuffer {
cursor
};
damage_start = damage_start.min(cursor.logical_pos.y);
{
let mut change = change.borrow_mut();
let change = &mut *change;
@ -2830,6 +2896,13 @@ impl TextBuffer {
}
}
if damage_start == CoordType::MAX {
// There weren't any undo/redo entries.
return;
}
self.highlighter_cache.invalidate_from(damage_start);
if entry_buffer_generation.is_some() {
self.recalc_after_content_changed();
}

View file

@ -53,6 +53,12 @@ pub enum IndexedColor {
Foreground,
}
impl<T: Into<u8>> From<T> for IndexedColor {
fn from(value: T) -> Self {
unsafe { std::mem::transmute(value.into() & 0xF) }
}
}
/// Number of indices used by [`IndexedColor`].
pub const INDEXED_COLORS_COUNT: usize = 18;

View file

@ -160,7 +160,7 @@ where
#[inline(always)]
#[allow(clippy::ptr_eq)]
fn opt_ptr<T>(a: Option<&T>) -> *const T {
pub fn opt_ptr<T>(a: Option<&T>) -> *const T {
unsafe { mem::transmute(a) }
}

View file

@ -32,6 +32,7 @@ pub mod hash;
pub mod helpers;
pub mod icu;
pub mod input;
pub mod lsh;
pub mod oklab;
pub mod path;
pub mod simd;

82
src/lsh/cache.rs Normal file
View file

@ -0,0 +1,82 @@
use crate::arena::{Arena, scratch_arena};
use crate::helpers::CoordType;
use crate::lsh::Higlight;
use crate::lsh::highlighter::{Highlighter, HighlighterState};
#[cfg(debug_assertions)]
const INTERVAL: CoordType = 16;
#[cfg(not(debug_assertions))]
const INTERVAL: CoordType = 1024;
#[derive(Default)]
pub struct HighlighterCache {
checkpoints: Vec<HighlighterState>,
}
impl HighlighterCache {
pub fn new() -> Self {
Self::default()
}
/// Drop any cached states starting at (including) the given logical line.
pub fn invalidate_from(&mut self, line: CoordType) {
self.checkpoints.truncate(Self::ceil_line_to_offset(line));
}
/// Parse the given logical line. Returns the highlight spans.
pub fn parse_line<'a>(
&mut self,
arena: &'a Arena,
highlighter: &mut Highlighter,
line: CoordType,
) -> Vec<Higlight, &'a Arena> {
// Do we need to random seek?
if line != highlighter.logical_pos_y() {
// If so, restore the nearest, preceeding checkpoint...
if !self.checkpoints.is_empty() {
let n = Self::floor_line_to_offset(line);
let n = n.min(self.checkpoints.len() - 1);
highlighter.restore(&self.checkpoints[n]);
} else {
// The assumption is that you pass in a default constructed highlighter,
// and this class handles random seeking for you. As such, there should
// never be a case where we don't have a checkpoint for line 0,
// but you have a highlighter for line >0.
debug_assert!(highlighter.logical_pos_y() == 0);
}
// ...and then seek in front of the requested line.
while highlighter.logical_pos_y() < line {
// There's a bit of waste here, because we just throw away the results,
// but that's better than duplicating the logic. The arena is very fast.
let scratch = scratch_arena(Some(arena));
_ = self.parse_line_impl(&scratch, highlighter);
}
}
self.parse_line_impl(arena, highlighter)
}
fn parse_line_impl<'a>(
&mut self,
arena: &'a Arena,
highlighter: &mut Highlighter,
) -> Vec<Higlight, &'a Arena> {
// If we need to store a checkpoint for the start of the next line, do so now.
if Self::floor_line_to_offset(highlighter.logical_pos_y()) == self.checkpoints.len() {
self.checkpoints.push(highlighter.snapshot());
}
highlighter.parse_next_line(arena)
}
/// Since this line cache is super simplistic (no insertions, only append),
/// we can directly map from line numbers to offsets in the cache.
fn floor_line_to_offset(line: CoordType) -> usize {
(line / INTERVAL).try_into().unwrap_or(0)
}
fn ceil_line_to_offset(line: CoordType) -> usize {
((line + INTERVAL - 1) / INTERVAL).try_into().unwrap_or(0)
}
}

1
src/lsh/definitions.rs Normal file
View file

@ -0,0 +1 @@
include!(concat!(env!("OUT_DIR"), "/lsh_definitions.rs"));

343
src/lsh/highlighter.rs Normal file
View file

@ -0,0 +1,343 @@
use std::fmt::Debug;
use std::path::Path;
use std::slice;
use crate::arena::{Arena, scratch_arena};
use crate::document::ReadableDocument;
use crate::helpers::*;
use crate::lsh::definitions::*;
use crate::{simd, unicode};
pub fn language_from_path(path: &Path) -> Option<&'static Language> {
let filename = path.file_name()?.as_encoded_bytes();
for &l in LANGUAGES {
for f in l.filenames {
let f = f.as_bytes();
if let Some(suffix) = f.strip_prefix(b"*") {
if filename.ends_with(suffix) {
return Some(l);
}
} else if filename == f {
return Some(l);
}
}
}
None
}
#[derive(Clone, PartialEq, Eq)]
pub struct Higlight {
pub start: usize,
pub kind: HighlightKind,
}
impl Debug for Higlight {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({}, {:?})", self.start, self.kind)
}
}
#[derive(Clone, Copy, PartialEq, Eq, Default)]
pub struct State {}
#[derive(Clone)]
pub struct Highlighter<'a> {
doc: &'a dyn ReadableDocument,
language: &'static Language,
offset: usize,
logical_pos_y: CoordType,
state_stack: Vec<(u8, HighlightKind)>,
}
#[derive(Clone)]
pub struct HighlighterState {
pub offset: usize,
pub logical_pos_y: CoordType,
pub state_stack: Vec<(u8, HighlightKind)>,
}
impl<'doc> Highlighter<'doc> {
pub fn new(doc: &'doc dyn ReadableDocument, language: &'static Language) -> Self {
Self { doc, language, offset: 0, logical_pos_y: 0, state_stack: Vec::new() }
}
pub fn logical_pos_y(&self) -> CoordType {
self.logical_pos_y
}
/// Create a restorable snapshot of the current highlighter state
/// so we can resume highlighting from this point later.
pub fn snapshot(&self) -> HighlighterState {
HighlighterState {
offset: self.offset,
logical_pos_y: self.logical_pos_y,
state_stack: self.state_stack.clone(),
}
}
/// Restore the highlighter state from a previously captured snapshot.
pub fn restore(&mut self, snapshot: &HighlighterState) {
self.offset = snapshot.offset;
self.logical_pos_y = snapshot.logical_pos_y;
self.state_stack = snapshot.state_stack.clone();
}
pub fn parse_next_line<'a>(&mut self, arena: &'a Arena) -> Vec<Higlight, &'a Arena> {
const MAX_LEN: usize = 32 * KIBI;
let scratch = scratch_arena(Some(arena));
let line_beg = self.offset;
let mut res = Vec::new_in(arena);
self.logical_pos_y += 1;
// Accumulate a line of text into `line_buf`.
let line = 'read: {
let mut chunk;
let mut line_buf;
// Try to read a chunk and see if it contains a newline.
// In that case we can skip concatenating chunks.
{
chunk = self.doc.read_forward(self.offset);
if chunk.is_empty() {
break 'read chunk;
}
let (off, line) = simd::lines_fwd(chunk, 0, 0, 1);
self.offset += off;
if line == 1 {
break 'read &chunk[..off];
}
let next_chunk = self.doc.read_forward(self.offset);
if next_chunk.is_empty() {
break 'read &chunk[..off];
}
line_buf = Vec::new_in(&*scratch);
// Ensure we don't overflow the heap size with a 1GB long line.
let end = off.min(MAX_LEN - line_buf.len());
let end = end.min(chunk.len());
line_buf.extend_from_slice(&chunk[..end]);
chunk = next_chunk;
}
// Concatenate chunks until we get a full line.
while line_buf.len() < MAX_LEN {
let (off, line) = simd::lines_fwd(chunk, 0, 0, 1);
self.offset += off;
// Ensure we don't overflow the heap size with a 1GB long line.
let end = off.min(MAX_LEN - line_buf.len());
let end = end.min(chunk.len());
line_buf.extend_from_slice(&chunk[..end]);
// Start of the next line found.
if line == 1 {
break;
}
chunk = self.doc.read_forward(self.offset);
if chunk.is_empty() {
break;
}
}
line_buf.leak()
};
// If the line is empty, we reached the end of the document.
//
// If the line is too long, we don't highlight it.
// This is to prevent performance issues with very long lines.
if line.is_empty() || line.len() >= MAX_LEN {
return res;
}
let line = unicode::strip_newline(line);
let mut off = 0usize;
let mut start = 0usize;
let (state, mut kind) =
self.state_stack.last().cloned().unwrap_or((0, HighlightKind::Other));
let mut state = state as usize;
let mut push = |start: usize, kind: HighlightKind| {
if let Some(last) = res.last_mut() {
if last.start == start {
last.kind = kind;
}
if last.kind == kind {
return;
}
}
res.push(Higlight { start, kind });
};
state = state.wrapping_sub(1);
loop {
state = state.wrapping_add(1);
let t = unsafe { self.language.transitions.get_unchecked(state) };
match t.test {
Test::Chars(n) => {
off = off + n.min(line.len() - off);
}
Test::Prefix(str) => {
let str = unsafe { slice::from_raw_parts(str.add(1), str.read() as usize) };
if !Self::inlined_memcmp(line, off, str) {
continue;
}
off += str.len();
}
Test::PrefixInsensitive(str) => {
let str = unsafe { slice::from_raw_parts(str.add(1), str.read() as usize) };
if !Self::inlined_memicmp(line, off, str) {
continue;
}
off += str.len();
}
Test::Charset(cs) => {
// TODO: http://0x80.pl/notesen/2018-10-18-simd-byte-lookup.html#alternative-implementation
if off >= line.len() || !Self::in_set(cs, line[off]) {
continue;
}
while {
off += 1;
off < line.len() && Self::in_set(cs, line[off])
} {}
}
}
if let Some(k) = t.kind {
kind = k;
}
match t.action {
Action::Jump(dst) => {
state = dst as usize;
}
Action::Push(dst) => {
state = dst as usize;
push(start, kind);
self.state_stack.push((dst, kind));
start = off;
}
Action::Pop(count) => {
push(start, kind);
if count != 0 {
self.state_stack
.truncate(self.state_stack.len().saturating_sub(count as usize));
}
let v = self.state_stack.last().cloned().unwrap_or((0, HighlightKind::Other));
state = v.0 as usize;
kind = v.1;
start = off;
if count == 0 && off >= line.len() {
break;
}
}
Action::Loop(dst) => {
push(start, kind);
state = dst as usize;
start = off;
if off >= line.len() {
break;
}
}
}
state = state.wrapping_sub(1);
}
push(start, kind);
res.push(Higlight { start: line.len(), kind: HighlightKind::Other });
// Adjust the range to account for the line offset.
for h in &mut res {
h.start = line_beg + h.start.min(line.len());
}
res
}
/// A mini-memcmp implementation for short needles.
/// Compares the `haystack` at `off` with the `needle`.
#[inline]
fn inlined_memcmp(haystack: &[u8], off: usize, needle: &[u8]) -> bool {
unsafe {
let needle_len = needle.len();
if haystack.len() - off < needle_len {
return false;
}
let a = haystack.as_ptr().add(off);
let b = needle.as_ptr();
let mut i = 0;
while i < needle_len {
let a = *a.add(i);
let b = *b.add(i);
i += 1;
if a != b {
return false;
}
}
true
}
}
/// Like `inlined_memcmp`, but case-insensitive.
#[inline]
fn inlined_memicmp(haystack: &[u8], off: usize, needle: &[u8]) -> bool {
unsafe {
let needle_len = needle.len();
if haystack.len() - off < needle_len {
return false;
}
let a = haystack.as_ptr().add(off);
let b = needle.as_ptr();
let mut i = 0;
while i < needle_len {
// str in PrefixInsensitive(str) is expected to be lowercase, printable ASCII.
let a = a.add(i).read().to_ascii_lowercase();
let b = b.add(i).read();
i += 1;
if a != b {
return false;
}
}
true
}
}
#[inline]
fn in_set(bitmap: &[u16; 16], byte: u8) -> bool {
let lo_nibble = byte & 0xf;
let hi_nibble = byte >> 4;
let bitset = bitmap[lo_nibble as usize];
let bitmask = 1u16 << hi_nibble;
(bitset & bitmask) != 0
}
}

8
src/lsh/mod.rs Normal file
View file

@ -0,0 +1,8 @@
//! Welcome to Leonard's Shitty Highlighter.
pub mod cache;
mod definitions;
mod highlighter;
pub use definitions::*;
pub use highlighter::*;

View file

@ -14,7 +14,7 @@ use windows_sys::Win32::Storage::FileSystem;
use windows_sys::Win32::System::Diagnostics::Debug;
use windows_sys::Win32::System::{Console, IO, LibraryLoader, Memory, Threading};
use windows_sys::Win32::{Foundation, Globalization};
use windows_sys::w;
use windows_sys::core::*;
use crate::apperr;
use crate::arena::{Arena, ArenaString, scratch_arena};
@ -55,7 +55,7 @@ type ReadConsoleInputExW = unsafe extern "system" fn(
n_length: u32,
lp_number_of_events_read: *mut u32,
w_flags: u16,
) -> Foundation::BOOL;
) -> BOOL;
unsafe extern "system" fn read_console_input_ex_placeholder(
_: Foundation::HANDLE,
@ -63,7 +63,7 @@ unsafe extern "system" fn read_console_input_ex_placeholder(
_: u32,
_: *mut u32,
_: u16,
) -> Foundation::BOOL {
) -> BOOL {
panic!();
}
@ -97,7 +97,7 @@ static mut STATE: State = State {
wants_exit: false,
};
extern "system" fn console_ctrl_handler(_ctrl_type: u32) -> Foundation::BOOL {
extern "system" fn console_ctrl_handler(_ctrl_type: u32) -> BOOL {
unsafe {
STATE.wants_exit = true;
IO::CancelIoEx(STATE.stdin, null());
@ -757,7 +757,7 @@ pub fn apperr_is_not_found(err: apperr::Error) -> bool {
err == gle_to_apperr(Foundation::ERROR_FILE_NOT_FOUND)
}
fn check_bool_return(ret: Foundation::BOOL) -> apperr::Result<()> {
fn check_bool_return(ret: BOOL) -> apperr::Result<()> {
if ret == 0 { Err(get_last_error()) } else { Ok(()) }
}