mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-12 06:46:19 +00:00
560 lines
21 KiB
Rust
560 lines
21 KiB
Rust
// Loads Roc source files (from strings or from files) into a structure which is
|
|
// guaranteed to have the following properties, all of which the SIMD parser requires:
|
|
// - 16B alignment
|
|
// - byte length is a multiple of 64
|
|
// - if the source bytes were not a multiple of 64, the extra space is filled with trailing newlines
|
|
//
|
|
// (Trailing newlines are the filler of choice because they are irrelevant to the parser.)
|
|
//
|
|
// It does this as efficiently as possible by using branchless SIMD to fill padding bytes,
|
|
// and reading the contents of the file directly into an arena in as few syscalls as possible.
|
|
|
|
use bumpalo::{self, Bump};
|
|
use core::{
|
|
alloc::Layout,
|
|
mem::{align_of, MaybeUninit},
|
|
ptr::{self, NonNull},
|
|
};
|
|
|
|
#[cfg(not(test))]
|
|
/// We store both line and column numbers as u16s, so the largest possible file you could open
|
|
/// would be every line having the longest possible column length, or u16::MAX * u16::MAX.
|
|
const MAX_ROC_SOURCE_FILE_SIZE: usize = u16::MAX as usize * u16::MAX as usize; // 4GB
|
|
|
|
#[cfg(test)]
|
|
const MAX_ROC_SOURCE_FILE_SIZE: usize = 1024; // small enough that we can create a tempfile to exercise this scenario
|
|
|
|
pub struct Src64<'a> {
|
|
/// These bytes are guaranteed to have a 16B-aligned address (so the parser can do 128-bit SIMD on it).
|
|
/// This slice is guaranteed to have a length that's a multiple of 64B, because the parser iterates in
|
|
/// chunks of 64B. (If extra bytes are needed to make it a multiple of 64B, we add trailing newlines
|
|
/// because the parser ignores those.)
|
|
bytes: &'a [u8],
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
|
pub enum FileErr {
|
|
FileWasEmpty,
|
|
ReadErr,
|
|
FileWasTooBig(usize),
|
|
ErrReadingFileSize,
|
|
FileOpenFailed,
|
|
}
|
|
|
|
impl<'a> Src64<'a> {
|
|
const BYTES_ALIGNMENT: usize = 64;
|
|
|
|
/// The underlying source bytes that originally came from a file or from a string.
|
|
///
|
|
/// These bytes are guaranteed to have a 16B-aligned address (so the parser can do 128-bit SIMD on it).
|
|
/// This slice is guaranteed to have a length that's a multiple of 64B, because the parser iterates in
|
|
/// chunks of 64B. (If extra bytes are needed to make it a multiple of 64B, we add trailing newlines
|
|
/// because the parser ignores those.)
|
|
pub fn bytes(&self) -> &[u8] {
|
|
self.bytes
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.bytes.len()
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.bytes.is_empty()
|
|
}
|
|
|
|
/// Returns None if the given string exceeds the maximum size of a Roc source file.
|
|
pub fn from_str(arena: &'a Bump, src: &'a str) -> Option<Src64<'a>> {
|
|
let src_len = src.len();
|
|
|
|
if src_len == 0 {
|
|
return None;
|
|
}
|
|
|
|
let capacity = round_up_to_nearest_64(src_len);
|
|
|
|
debug_assert_eq!(capacity % 64, 0);
|
|
|
|
if capacity == src_len && src.as_ptr().align_offset(Self::BYTES_ALIGNMENT) == 0 {
|
|
// If the string already happens to meet our capacity and alignment requirements, just return it.
|
|
return Some(Self {
|
|
bytes: src.as_bytes(),
|
|
});
|
|
}
|
|
|
|
// Safety: we got capacity by rounding up to the nearest 64B
|
|
let dest = unsafe { allocate_chunks(arena, capacity)? }.as_ptr();
|
|
|
|
// Safety: `dest` has a length of `capacity`, which has been rounded up to a multiple of 64.
|
|
unsafe {
|
|
let trailing_newlines_needed = capacity - src_len;
|
|
|
|
// Start writing newlines right after the last of the bytes we got from the file.
|
|
write_newlines(dest.add(src_len), trailing_newlines_needed);
|
|
};
|
|
|
|
// Safety: we just allocated `dest` to have len >= src.len(), and they're both u8 arrays.
|
|
unsafe {
|
|
ptr::copy_nonoverlapping(src.as_bytes().as_ptr(), dest, src_len);
|
|
}
|
|
|
|
Some(Self {
|
|
// Safety: all the bytes should now be initialized
|
|
bytes: unsafe { core::slice::from_raw_parts_mut(dest, capacity) },
|
|
})
|
|
}
|
|
|
|
#[cfg(any(unix, windows))] // This is not available on wasm32. We could make it work with WASI if desired.
|
|
pub fn from_file(arena: &'a Bump, path: &std::path::Path) -> Result<Self, FileErr> {
|
|
use core::ffi::c_void;
|
|
|
|
let file = match std::fs::File::open(path) {
|
|
Ok(file) => file,
|
|
Err(_) => {
|
|
return Err(FileErr::FileOpenFailed);
|
|
}
|
|
};
|
|
|
|
let file_size = match file.metadata() {
|
|
Ok(metadata) => {
|
|
#[cfg(unix)]
|
|
{
|
|
use std::os::unix::prelude::MetadataExt;
|
|
|
|
metadata.size() as usize
|
|
}
|
|
|
|
#[cfg(windows)]
|
|
{
|
|
use std::os::windows::prelude::MetadataExt;
|
|
|
|
metadata.file_size() as usize
|
|
}
|
|
}
|
|
Err(_io_err) => {
|
|
return Err(FileErr::ErrReadingFileSize);
|
|
}
|
|
};
|
|
|
|
if file_size == 0 {
|
|
return Err(FileErr::FileWasEmpty);
|
|
}
|
|
|
|
let capacity = round_up_to_nearest_64(file_size);
|
|
|
|
// Safety: round_up_to_nearest_u64 will give us a capacity that is
|
|
// at least 64, and also a multiple of 64.
|
|
match unsafe { allocate_chunks(arena, capacity) } {
|
|
Some(buf) => {
|
|
// Read bytes equal to file_size into the arena allocation.
|
|
//
|
|
// We use the native OS read() operation here to avoid UB; file.read_exact()
|
|
// only reads into a slice, and constructing a slice with uninitialized
|
|
// data is UB (per the slice::from_raw_parts docs). The allocation is uninitialized here,
|
|
// and initializing it would be a waste of CPU cycles because we're about to overwrite
|
|
// those bytes with bytes from the file anyway.
|
|
let bytes_read = {
|
|
#[cfg(unix)]
|
|
unsafe {
|
|
use std::os::fd::AsRawFd;
|
|
|
|
// This extern lets us avoid an entire libc crate dependency.
|
|
extern "C" {
|
|
// https://linux.die.net/man/2/read
|
|
pub fn read(
|
|
fd: core::ffi::c_int,
|
|
buf: *mut c_void,
|
|
count: usize,
|
|
) -> isize;
|
|
}
|
|
|
|
read(file.as_raw_fd(), buf.as_ptr() as *mut c_void, file_size) as usize
|
|
}
|
|
|
|
#[cfg(windows)]
|
|
unsafe {
|
|
use std::os::windows::io::AsRawHandle;
|
|
|
|
// This extern lets us avoid an entire winapi crate dependency.
|
|
extern "system" {
|
|
// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
|
|
pub fn ReadFile(
|
|
hFile: *mut c_void,
|
|
lpBuffer: *mut c_void,
|
|
nNumberOfBytesToRead: u32,
|
|
lpNumberOfBytesRead: *mut u32,
|
|
lpOverlapped: *mut c_void, // this should be a pointer to a struct, but we always pass null.
|
|
) -> i32;
|
|
}
|
|
|
|
let mut bytes_read = core::mem::MaybeUninit::uninit();
|
|
|
|
// We should have already errored out if file_size exceeded u32::MAX,
|
|
// due to our maximum source file size. This debug_assert! is here to
|
|
// make sure casting file_size to u32 is safe in the ReadFile call.
|
|
debug_assert!(MAX_ROC_SOURCE_FILE_SIZE <= u32::MAX as usize);
|
|
|
|
ReadFile(
|
|
file.as_raw_handle() as *mut c_void,
|
|
buf.as_ptr() as *mut c_void,
|
|
file_size as u32,
|
|
bytes_read.as_mut_ptr(),
|
|
core::ptr::null_mut(),
|
|
);
|
|
|
|
bytes_read.assume_init() as usize
|
|
}
|
|
};
|
|
|
|
// We can close the file now; we're done with it.
|
|
drop(file);
|
|
|
|
// It's crucial that we successfully read the entire file; otherwise, it would be unsafe
|
|
// to make a slice out of it because we might not have overwritten the uninitialized
|
|
// memory leading up to the newlines at the end!
|
|
//
|
|
// Note that on UNIX, bytes_read might be -1 if this was a file read error. This
|
|
// condition will catch that too, since we know file_size won't be (-1isize as usize)
|
|
// beacuse if it was, then this match would have taken the None branch due to
|
|
// (-1isize as usize) exceeding our maximum file size.
|
|
if bytes_read != file_size {
|
|
return Err(FileErr::ReadErr);
|
|
}
|
|
|
|
// Before we write newlines to the last chunk, branchlessly prefetch the first four 64-byte chunks.
|
|
// We're about to have a cache miss due to loading the last chunk from main memory (DMA will have
|
|
// written it there without having gone through the CPU), and if we don't prefetch here, then we'll
|
|
// immediately get a second cache miss when we start traversing the loaded file. The prefetch means
|
|
// by the time we finish resolving the first cache miss on the last chunk, continuing with the first
|
|
// chunk(s) won't be a cache miss anymore because they'll already be in cache.
|
|
//
|
|
// We can do further prefetches in the actual tokenization loop.
|
|
{
|
|
// We know capacity >= 64, so this will never wrap.
|
|
let last_chunk_offset = capacity - 64;
|
|
|
|
// Prefetch the first 64-byte chunk.
|
|
prefetch_read(buf, 0);
|
|
|
|
// Prefetch the second 64-byte chunk, using min() to branchlessly avoid prefetching an address we might not own.
|
|
prefetch_read(buf, 64.min(last_chunk_offset));
|
|
|
|
// Prefetch the third 64-byte chunk, using min() to branchlessly avoid prefetching an address we might not own.
|
|
prefetch_read(buf, 128.min(last_chunk_offset));
|
|
|
|
// Prefetch the fourth 64-byte chunk, using min() to branchlessly avoid prefetching an address we might not own.
|
|
prefetch_read(buf, 192.min(last_chunk_offset));
|
|
|
|
// Further prefetching can happen in the tokenization loop. Now that we've prefetched the first pages,
|
|
// we should be able to prefetch the others in the tokenization loop before it needs to read them.
|
|
}
|
|
|
|
// We may have coincidentally had a file size that was a multiple of 64, but if not,
|
|
// we'll need to fill the allocation with trailing newlines so we aren't tokenizing
|
|
// uninitialized memory.
|
|
if capacity > file_size {
|
|
debug_assert!(capacity - file_size < 64);
|
|
let trailing_newlines_needed = capacity - file_size;
|
|
|
|
// Safety: `buf_ptr` has a length of `capacity`, which has been rounded up to a multiple of 64.
|
|
unsafe {
|
|
// Start writing newlines right after the last of the bytes we got from the file.
|
|
write_newlines(buf.as_ptr().add(file_size), trailing_newlines_needed);
|
|
};
|
|
}
|
|
|
|
// Safety: bytes_ptr came from an allocation of `capacity` bytes, it's had
|
|
// newlines filled at the end, and `file_size` bytes written over the rest.
|
|
let bytes = unsafe { core::slice::from_raw_parts_mut(buf.as_ptr(), capacity) };
|
|
|
|
Ok(Self { bytes })
|
|
}
|
|
None => Err(FileErr::FileWasTooBig(file_size)),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn round_up_to_nearest_64(num: usize) -> usize {
|
|
// Round up to the nearest 64. (Writing this out as 64 - 1 so it's clearer where the numbers came from.)
|
|
// We can do saturating addition here rather than overflow checking, because if we overflow usize::MAX,
|
|
// we will most definitely be over the max source file size and return None anyway.
|
|
(num.saturating_add(64 - 1)) & !(64 - 1)
|
|
}
|
|
|
|
/// Safety: capacity must be a multiple of 64, and must be at least 64.
|
|
unsafe fn allocate_chunks(arena: &Bump, capacity: usize) -> Option<NonNull<u8>> {
|
|
// Compare capacity here instead of size because this file limit is based on what we can record row and line
|
|
// numbers for, and those can theoretically oveflow on the trailing newlines we may have added.
|
|
// This distinction will most likely come up in practice zero times ever, but it could come up in fuzzing.
|
|
if capacity > MAX_ROC_SOURCE_FILE_SIZE {
|
|
return None;
|
|
}
|
|
|
|
debug_assert!(capacity >= 64);
|
|
debug_assert!(capacity % 64 == 0);
|
|
|
|
// Safety: the rules we follow are https://doc.rust-lang.org/core/alloc/struct.Layout.html#method.from_size_align_unchecked
|
|
// `align` is valid because it's hardcoded, and we already rounded `capacity` up to something even bigger.
|
|
// We align it to 64B so that it's on cache line boundaries on many CPUs, which makes prefetching simpler.
|
|
let layout = unsafe { Layout::from_size_align_unchecked(capacity, Src64::BYTES_ALIGNMENT) };
|
|
|
|
// We have to use alloc_layout here because we have stricter alignment requirements than normal slices.
|
|
Some(arena.alloc_layout(layout))
|
|
}
|
|
|
|
/// This is branchless so there can't be mispredictions.
|
|
///
|
|
/// Safety: this pointer must have an alignment of at least 64,
|
|
/// and the length must be both at least 64 and also a multiple of 64.
|
|
unsafe fn write_newlines(dest: *mut u8, len: usize) {
|
|
debug_assert!(len <= 64);
|
|
|
|
#[cfg(target_feature = "sse2")]
|
|
{
|
|
use core::arch::x86_64::{__m128i, _mm_set1_epi8, _mm_storeu_si128};
|
|
|
|
let mut buf: MaybeUninit<[__m128i; 4]> = MaybeUninit::uninit();
|
|
let newline = _mm_set1_epi8(b'\n' as i8);
|
|
let ptr = buf.as_mut_ptr() as *mut __m128i;
|
|
|
|
debug_assert_eq!(ptr.align_offset(align_of::<__m128i>()), 0);
|
|
|
|
_mm_storeu_si128(ptr.add(0), newline);
|
|
_mm_storeu_si128(ptr.add(1), newline);
|
|
_mm_storeu_si128(ptr.add(2), newline);
|
|
_mm_storeu_si128(ptr.add(3), newline);
|
|
|
|
core::ptr::copy_nonoverlapping(ptr as *const u8, dest, len);
|
|
}
|
|
|
|
#[cfg(target_feature = "neon")]
|
|
{
|
|
use core::arch::aarch64::{int8x16_t, vdupq_n_s8, vst1q_s8};
|
|
|
|
let mut buf: MaybeUninit<[int8x16_t; 4]> = MaybeUninit::uninit();
|
|
let newline = vdupq_n_s8(b'\n' as i8);
|
|
let ptr = buf.as_mut_ptr() as *mut i8;
|
|
|
|
debug_assert_eq!(ptr.align_offset(align_of::<int8x16_t>()), 0);
|
|
|
|
vst1q_s8(ptr.add(0), newline);
|
|
vst1q_s8(ptr.add(16), newline);
|
|
vst1q_s8(ptr.add(32), newline);
|
|
vst1q_s8(ptr.add(48), newline);
|
|
|
|
core::ptr::copy_nonoverlapping(ptr as *const u8, dest, len);
|
|
}
|
|
|
|
#[cfg(not(any(target_feature = "sse2", target_feature = "neon")))]
|
|
{
|
|
// We don't have access to SIMD, so do eight 64-bit writes instead of four 128-bit writes.
|
|
let mut buf: MaybeUninit<[u64; 8]> = MaybeUninit::uninit();
|
|
let newline_repeated = (b'\n' as u64) * 0x0101010101010101;
|
|
let ptr = buf.as_mut_ptr() as *mut u64;
|
|
|
|
debug_assert_eq!(ptr.align_offset(align_of::<u64>()), 0);
|
|
|
|
*ptr.add(0) = newline_repeated;
|
|
*ptr.add(1) = newline_repeated;
|
|
*ptr.add(2) = newline_repeated;
|
|
*ptr.add(3) = newline_repeated;
|
|
*ptr.add(4) = newline_repeated;
|
|
*ptr.add(5) = newline_repeated;
|
|
*ptr.add(6) = newline_repeated;
|
|
*ptr.add(7) = newline_repeated;
|
|
|
|
core::ptr::copy_nonoverlapping(ptr as *const u8, dest, len);
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn prefetch_read<T>(non_null_ptr: NonNull<T>, offset: usize) {
|
|
// Use inline asm until this is stabilized:
|
|
// https://doc.rust-lang.org/std/intrinsics/fn.prefetch_read_data.html
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
unsafe {
|
|
core::arch::asm!(
|
|
"prefetcht0 [{}]",
|
|
in(reg) non_null_ptr.as_ptr().add(offset)
|
|
);
|
|
}
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
unsafe {
|
|
core::arch::asm!(
|
|
"prfm PLDL1KEEP, [{}]",
|
|
in(reg) non_null_ptr.as_ptr().add(offset)
|
|
);
|
|
}
|
|
|
|
// If we're not on x64 or aarch64, just do nothing!
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod src64_tests {
|
|
use super::{FileErr, Src64, MAX_ROC_SOURCE_FILE_SIZE};
|
|
use bumpalo::Bump;
|
|
use quickcheck::{quickcheck, Arbitrary, Gen};
|
|
use std::fs::File;
|
|
use std::io::Write;
|
|
use tempfile::tempdir;
|
|
|
|
fn expect_from_str(arena: &Bump, contents: &str, expected: &Result<Vec<u8>, FileErr>) {
|
|
match Src64::from_str(arena, contents) {
|
|
Some(actual) => {
|
|
assert_eq!(actual.len() % 64, 0);
|
|
assert_eq!(
|
|
expected.as_ref().ok(),
|
|
Some(&actual.bytes().into()),
|
|
"Src64::from_str had unexpected output"
|
|
)
|
|
}
|
|
None => {
|
|
assert_eq!(
|
|
expected.as_ref().ok(),
|
|
None,
|
|
"Src64::from_str had unexpected output"
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn expect_from_file(arena: &Bump, contents: &str, expected: &Result<Vec<u8>, FileErr>) {
|
|
let dir = tempdir().expect("Failed to create temp dir");
|
|
let file_path = dir.path().join("temp_file");
|
|
|
|
// Write contents to the temp file
|
|
{
|
|
let mut file = File::create(&file_path).expect("Failed to create temp file");
|
|
file.write_all(contents.as_bytes())
|
|
.expect("Failed to write to temp file");
|
|
}
|
|
|
|
match Src64::from_file(arena, &file_path) {
|
|
Ok(actual) => {
|
|
assert_eq!(actual.len() % 64, 0);
|
|
assert_eq!(
|
|
expected,
|
|
&Ok(actual.bytes().into()),
|
|
"Src64::from_file had unexpected output"
|
|
)
|
|
}
|
|
Err(err) => assert_eq!(
|
|
expected,
|
|
&Err(err),
|
|
"Src64::from_file had unexpected output"
|
|
),
|
|
}
|
|
}
|
|
|
|
/// Runs both Src64::from_str and Src64::from_file on the given str, then
|
|
/// asserts the output of both of those functions is equal to `expected`.
|
|
/// (Since from_str returns an Option, we call .ok() on `expected` before comparing it.)
|
|
fn expect_from(contents: &str, expected: Result<Vec<u8>, FileErr>) {
|
|
let arena = Bump::new();
|
|
|
|
expect_from_str(&arena, contents, &expected);
|
|
expect_from_file(&arena, contents, &expected);
|
|
}
|
|
|
|
#[test]
|
|
fn empty() {
|
|
expect_from("", Err(FileErr::FileWasEmpty));
|
|
}
|
|
|
|
#[test]
|
|
fn one_newline() {
|
|
expect_from("\n", Ok([b'\n'; 64].into()));
|
|
}
|
|
|
|
#[test]
|
|
fn one_byte() {
|
|
expect_from(
|
|
"x",
|
|
Ok({
|
|
let mut vec: Vec<u8> = [b'\n'; 64].as_mut_slice().into();
|
|
|
|
vec[0] = b'x';
|
|
|
|
vec
|
|
}),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn two_bytes() {
|
|
expect_from(
|
|
"xy",
|
|
Ok({
|
|
let mut vec: Vec<u8> = [b'\n'; 64].as_mut_slice().into();
|
|
|
|
vec[0] = b'x';
|
|
vec[1] = b'y';
|
|
|
|
vec
|
|
}),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn max_file_size() {
|
|
let bytes = [b'z'; MAX_ROC_SOURCE_FILE_SIZE];
|
|
|
|
expect_from(
|
|
core::str::from_utf8(bytes.as_slice()).unwrap(),
|
|
Ok(bytes.into()),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn too_big() {
|
|
let bytes = [b'z'; MAX_ROC_SOURCE_FILE_SIZE + 1];
|
|
|
|
expect_from(
|
|
core::str::from_utf8(bytes.as_slice()).unwrap(),
|
|
Err(FileErr::FileWasTooBig(bytes.len())),
|
|
);
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct FileBytes(Vec<u8>);
|
|
|
|
impl Arbitrary for FileBytes {
|
|
fn arbitrary(g: &mut Gen) -> Self {
|
|
let len = g.size() % (MAX_ROC_SOURCE_FILE_SIZE + 1); // Wrap around to avoid clustering
|
|
//
|
|
FileBytes((0..len).map(|_| u8::arbitrary(g)).collect())
|
|
}
|
|
}
|
|
|
|
quickcheck! {
|
|
/// Creates a tempfile containing arbitrary bytes, then reads it with Str::from_file. Asserts that:
|
|
/// - the returned Result<Str64> is Ok
|
|
/// - its length is a multiple of 64
|
|
/// - it's at least as long as the input bytes were
|
|
/// - it starts_with the input bytes
|
|
fn from_arb_file(bytes: FileBytes) -> bool {
|
|
let FileBytes(bytes) = bytes;
|
|
|
|
let dir = tempdir().expect("Failed to create temp dir");
|
|
let file_path = dir.path().join("temp_file");
|
|
|
|
// Write random bytes to the temp file
|
|
{
|
|
let mut file = File::create(&file_path).expect("Failed to create temp file");
|
|
file.write_all(&bytes).expect("Failed to write to temp file");
|
|
}
|
|
|
|
let arena = Bump::new();
|
|
|
|
match Src64::from_file(&arena, &file_path) {
|
|
Ok(src64) => {
|
|
let len = src64.len();
|
|
|
|
len % 64 == 0 && len >= bytes.len() && src64.bytes().starts_with(&bytes)
|
|
}
|
|
Err(_) => false
|
|
}
|
|
}
|
|
}
|
|
}
|