mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-02 19:32:17 +00:00
Re-improve perf of skipping spaces and comments
On my M1 mac this shows as ~25% faster at parsing Num.roc than the old implementation, probably because nobody wrote any NEON code. Even on my x86_64 linux box (Ryzen 2700x), this shows as 10% faster than the current SSE implementation (running with RUSTFLAGS="-C target-cpu=native").
This commit is contained in:
parent
5f29402297
commit
e83cd8f191
4 changed files with 195 additions and 6 deletions
63
Cargo.lock
generated
63
Cargo.lock
generated
|
@ -3172,6 +3172,26 @@ version = "1.0.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f61dcf0b917cd75d4521d7343d1ffff3d1583054133c9b5cbea3375c703c40d"
|
||||
|
||||
[[package]]
|
||||
name = "proptest"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"bitflags",
|
||||
"byteorder",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"quick-error 2.0.1",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rand_xorshift",
|
||||
"regex-syntax",
|
||||
"rusty-fork",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptr_meta"
|
||||
version = "0.1.4"
|
||||
|
@ -3203,6 +3223,18 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.22.0"
|
||||
|
@ -3295,6 +3327,15 @@ dependencies = [
|
|||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xoshiro"
|
||||
version = "0.6.0"
|
||||
|
@ -4178,6 +4219,7 @@ dependencies = [
|
|||
"encode_unicode 1.0.0",
|
||||
"indoc",
|
||||
"pretty_assertions",
|
||||
"proptest",
|
||||
"quickcheck",
|
||||
"quickcheck_macros",
|
||||
"roc_collections",
|
||||
|
@ -4575,6 +4617,18 @@ version = "1.0.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"
|
||||
|
||||
[[package]]
|
||||
name = "rusty-fork"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"quick-error 1.2.3",
|
||||
"tempfile",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustyline"
|
||||
version = "9.1.1"
|
||||
|
@ -5751,6 +5805,15 @@ dependencies = [
|
|||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wait-timeout"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
|
|
|
@ -19,6 +19,7 @@ encode_unicode.workspace = true
|
|||
|
||||
[dev-dependencies]
|
||||
roc_test_utils = { path = "../../test_utils" }
|
||||
proptest = "1.0.0"
|
||||
|
||||
criterion.workspace = true
|
||||
pretty_assertions.workspace = true
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use roc_parse::{module, module::module_defs, parser::Parser, state::State};
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub fn parse_benchmark(c: &mut Criterion) {
|
||||
c.bench_function("parse false-interpreter", |b| {
|
||||
|
|
|
@ -161,7 +161,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn eat_whitespace(bytes: &[u8]) -> usize {
|
||||
pub fn simple_eat_whitespace(bytes: &[u8]) -> usize {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
|
@ -172,7 +172,54 @@ fn eat_whitespace(bytes: &[u8]) -> usize {
|
|||
i
|
||||
}
|
||||
|
||||
fn eat_until_newline(bytes: &[u8]) -> usize {
|
||||
pub fn fast_eat_whitespace(bytes: &[u8]) -> usize {
|
||||
// Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
|
||||
let mut i = 0;
|
||||
while i + 8 <= bytes.len() {
|
||||
let chunk = unsafe {
|
||||
// Safe because we know the pointer is in bounds
|
||||
(bytes.as_ptr().add(i) as *const u64)
|
||||
.read_unaligned()
|
||||
.to_le()
|
||||
};
|
||||
|
||||
// Space character is 0x20, which has a single bit set
|
||||
// We can check for any space character by checking if any other bit is set
|
||||
let spaces = 0x2020_2020_2020_2020;
|
||||
|
||||
// First, generate a mask where each byte is 0xff if the byte is a space,
|
||||
// and some other bit sequence otherwise
|
||||
let mask = !(chunk ^ spaces);
|
||||
|
||||
// Now mask off the high bit, so there's some place to carry into without
|
||||
// overflowing into the next byte.
|
||||
let mask = mask & !0x8080_8080_8080_8080;
|
||||
|
||||
// Now add 0x0101_0101_0101_0101 to each byte, which will carry into the high bit
|
||||
// if and only if the byte is a space.
|
||||
let mask = mask + 0x0101_0101_0101_0101;
|
||||
|
||||
// Now mask off areas where the original bytes had the high bit set, so that
|
||||
// 0x80|0x20 = 0xa0 will not be considered a space.
|
||||
let mask = mask & !(chunk & 0x8080_8080_8080_8080);
|
||||
|
||||
// Make sure all the _other_ bits aside from the high bit are set,
|
||||
// and count the number of trailing one bits, dividing by 8 to get the number of
|
||||
// bytes that are spaces.
|
||||
let count = ((mask | !0x8080_8080_8080_8080).trailing_ones() as usize) / 8;
|
||||
|
||||
if count == 8 {
|
||||
i += 8;
|
||||
} else {
|
||||
return i + count;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the remaining bytes
|
||||
simple_eat_whitespace(&bytes[i..]) + i
|
||||
}
|
||||
|
||||
pub fn simple_eat_until_control_character(bytes: &[u8]) -> usize {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] < b' ' {
|
||||
|
@ -184,6 +231,85 @@ fn eat_until_newline(bytes: &[u8]) -> usize {
|
|||
i
|
||||
}
|
||||
|
||||
pub fn fast_eat_until_control_character(bytes: &[u8]) -> usize {
|
||||
// Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
|
||||
let mut i = 0;
|
||||
while i + 8 <= bytes.len() {
|
||||
let chunk = unsafe {
|
||||
// Safe because we know the pointer is in bounds
|
||||
(bytes.as_ptr().add(i) as *const u64)
|
||||
.read_unaligned()
|
||||
.to_le()
|
||||
};
|
||||
|
||||
// Control characters are 0x00-0x1F, and don't have any high bits set.
|
||||
// They only have bits set that fall under the 0x1F mask.
|
||||
let control = 0x1F1F_1F1F_1F1F_1F1F;
|
||||
|
||||
// First we set up a value where, if a given byte is a control character,
|
||||
// it'll have a all the non-control bits set to 1. All control bits are set to zero.
|
||||
let mask = !(chunk & !control) & !control;
|
||||
|
||||
// Now, down shift by one bit. This will leave room for the following add to
|
||||
// carry, without impacting the next byte.
|
||||
let mask = mask >> 1;
|
||||
|
||||
// Add one (shifted by the right amount), causing all the one bits in the control
|
||||
// characters to cascade, and put a one in the high bit.
|
||||
let mask = mask.wrapping_add(0x1010_1010_1010_1010);
|
||||
|
||||
// Now, we can count the number of trailing zero bits, dividing by 8 to get the
|
||||
// number of bytes before the first control character.
|
||||
let count = (mask & 0x8080_8080_8080_8080).trailing_zeros() as usize / 8;
|
||||
|
||||
if count == 8 {
|
||||
i += 8;
|
||||
} else {
|
||||
return i + count;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the remaining bytes
|
||||
simple_eat_until_control_character(&bytes[i..]) + i
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use proptest::prelude::*;
|
||||
|
||||
#[test]
|
||||
fn test_eat_whitespace_simple() {
|
||||
let bytes = &[0, 0, 0, 0, 0, 0, 0, 0];
|
||||
assert_eq!(simple_eat_whitespace(bytes), fast_eat_whitespace(bytes));
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_eat_whitespace(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
|
||||
prop_assert_eq!(simple_eat_whitespace(&bytes), fast_eat_whitespace(&bytes));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eat_until_control_character_simple() {
|
||||
let bytes = &[32, 0, 0, 0, 0, 0, 0, 0];
|
||||
assert_eq!(
|
||||
simple_eat_until_control_character(bytes),
|
||||
fast_eat_until_control_character(bytes)
|
||||
);
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_eat_until_control_character(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
|
||||
prop_assert_eq!(
|
||||
simple_eat_until_control_character(&bytes),
|
||||
fast_eat_until_control_character(&bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn space0_e<'a, E>(
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
|
@ -213,7 +339,7 @@ where
|
|||
let mut newlines = Vec::new_in(arena);
|
||||
let mut progress = NoProgress;
|
||||
loop {
|
||||
let whitespace = eat_whitespace(state.bytes());
|
||||
let whitespace = fast_eat_whitespace(state.bytes());
|
||||
if whitespace > 0 {
|
||||
state.advance_mut(whitespace);
|
||||
progress = MadeProgress;
|
||||
|
@ -235,7 +361,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
let len = eat_until_newline(state.bytes());
|
||||
let len = fast_eat_until_control_character(state.bytes());
|
||||
|
||||
// We already checked that the string is valid UTF-8
|
||||
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue