use simd on supported platforms

This commit is contained in:
Folkert 2022-05-18 00:50:22 +02:00
parent b241bcf777
commit 500382c900
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C

View file

@ -226,11 +226,11 @@ fn fast_eat_spaces(state: &State) -> FastSpaceState {
use FastSpaceState::*;
let mut newlines = 0;
let mut index = 0;
let mut line_start = state.line_start.offset as usize;
let base_offset = state.pos().offset as usize;
let bytes = state.bytes();
let mut index = base_offset;
let bytes = state.original_bytes();
let length = bytes.len();
'outer: while index < length {
@ -241,18 +241,50 @@ fn fast_eat_spaces(state: &State) -> FastSpaceState {
b'\n' => {
newlines += 1;
index += 1;
line_start = base_offset + index;
line_start = index;
}
b'\r' => {
index += 1;
line_start = base_offset + index;
line_start = index;
}
b'\t' => {
return HasTab(Position::new((base_offset + index) as u32));
return HasTab(Position::new(index as u32));
}
b'#' => {
index += 1;
// try to use SIMD instructions explicitly
if cfg!(target_arch = "x86_64") {
use std::arch::x86_64::*;
// a bytestring with the three characters we're looking for (the rest is ignored)
let needle = b"\r\n\t=============";
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
while index < length {
let remaining = length - index;
let length = if remaining < 16 { remaining as i32 } else { 16 };
// the source bytes we'll be looking at
let haystack =
unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
// use first 3 characters of needle, first `length` characters of haystack
// finds the first index where one of the `needle` characters occurs
// or 16 when none of the needle characters occur
let first_special_char = unsafe {
_mm_cmpestri(needle, 3, haystack, length, _SIDD_CMP_EQUAL_ANY)
};
// we've made `first_special_char` characters of progress
index += first_special_char as usize;
// if we found a special char, let the outer loop handle it
if first_special_char != 16 {
continue 'outer;
}
}
} else {
while index < length {
match bytes[index] {
b'\n' | b'\t' | b'\r' => {
@ -265,14 +297,15 @@ fn fast_eat_spaces(state: &State) -> FastSpaceState {
}
}
}
}
_ => break,
}
}
Good {
newlines,
consumed: index,
column: ((base_offset + index) - line_start) as u32,
consumed: index - base_offset,
column: (index - line_start) as u32,
}
}