diff --git a/Cargo.lock b/Cargo.lock index 1414f20e6f..d473e23664 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3770,6 +3770,7 @@ name = "roc_std" version = "0.1.0" dependencies = [ "indoc", + "libc", "pretty_assertions", "quickcheck", "quickcheck_macros", diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 7e03f8e3b9..0e8a416627 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -423,7 +423,7 @@ pub fn sort_can_defs( let mut defined_symbols: Vec = Vec::new(); let mut defined_symbols_set: ImSet = ImSet::default(); - for symbol in can_defs_by_symbol.keys().into_iter() { + for symbol in can_defs_by_symbol.keys() { defined_symbols.push(*symbol); defined_symbols_set.insert(*symbol); } diff --git a/compiler/can/src/pattern.rs b/compiler/can/src/pattern.rs index 41aa49ca9a..7fa010d419 100644 --- a/compiler/can/src/pattern.rs +++ b/compiler/can/src/pattern.rs @@ -254,7 +254,7 @@ pub fn canonicalize_pattern<'a>( } Ok((int, bound)) => { let sign_str = if is_negative { "-" } else { "" }; - let int_str = format!("{}{}", sign_str, int.to_string()).into_boxed_str(); + let int_str = format!("{}{}", sign_str, int).into_boxed_str(); let i = match int { // Safety: this is fine because I128::MAX = |I128::MIN| - 1 IntValue::I128(n) if is_negative => IntValue::I128(-n), diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 0e0c39019a..6a4c1e5449 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -3430,6 +3430,7 @@ pub fn with_hole<'a>( let mut field_symbols = Vec::with_capacity_in(fields.len(), env.arena); let mut can_fields = Vec::with_capacity_in(fields.len(), env.arena); + #[allow(clippy::enum_variant_names)] enum Field { // TODO: rename this since it can handle unspecialized expressions now too Function(Symbol, Variable), diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index ebdd072f11..d005f846f5 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -13,3 +13,4 @@ indoc = "1.0.3" pretty_assertions = "1.0.0" quickcheck = "1.0.3" quickcheck_macros = "1.0.0" +libc = "0.2" diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 0880c83038..4eecf04adb 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -2,10 +2,15 @@ #![no_std] use core::convert::From; use core::ffi::c_void; -use core::fmt::{self, Display, Formatter}; +use core::fmt; use core::mem::{ManuallyDrop, MaybeUninit}; -use core::ops::{Deref, DerefMut, Drop}; -use core::{mem, ptr, slice}; +use core::ops::Drop; + +mod roc_list; +mod roc_str; + +pub use roc_list::RocList; +pub use roc_str::RocStr; // A list of C functions that are being imported extern "C" { @@ -29,28 +34,6 @@ pub enum RocOrder { Lt = 2, } -//#[macro_export] -//macro_rules! roclist { -// () => ( -// $crate::RocList::default() -// ); -// ($($x:expr),+ $(,)?) => ( -// $crate::RocList::from_slice(&[$($x),+]) -// ); -//} - -#[repr(C)] -pub struct RocList { - elements: *mut T, - length: usize, -} - -impl Clone for RocList { - fn clone(&self) -> Self { - Self::from_slice(self.as_slice()) - } -} - #[derive(Clone, Copy, Debug)] pub enum Storage { ReadOnly, @@ -58,752 +41,6 @@ pub enum Storage { Capacity(usize), } -impl RocList { - pub fn len(&self) -> usize { - self.length - } - - pub fn is_empty(&self) -> bool { - self.length == 0 - } - - pub fn get(&self, index: usize) -> Option<&T> { - if index < self.len() { - Some(unsafe { - let raw = self.elements.add(index); - - &*raw - }) - } else { - None - } - } - - pub fn storage(&self) -> Option { - use core::cmp::Ordering::*; - - if self.length == 0 { - return None; - } - - unsafe { - let value = *self.get_storage_ptr(); - - // NOTE doesn't work with elements of 16 or more bytes - match isize::cmp(&value, &0) { - Equal => Some(Storage::ReadOnly), - Less => Some(Storage::Refcounted(value)), - Greater => Some(Storage::Capacity(value as usize)), - } - } - } - - fn get_storage_ptr_help(elements: *mut T) -> *mut isize { - let ptr = elements as *mut isize; - - unsafe { ptr.offset(-1) } - } - - fn get_storage_ptr(&self) -> *const isize { - Self::get_storage_ptr_help(self.elements) - } - - fn get_storage_ptr_mut(&mut self) -> *mut isize { - self.get_storage_ptr() as *mut isize - } - - fn set_storage_ptr(&mut self, ptr: *const isize) { - self.elements = unsafe { ptr.offset(1) as *mut T }; - } - - fn get_element_ptr(elements: *const T) -> *const T { - let elem_alignment = core::mem::align_of::(); - let ptr = elements as *const usize; - - unsafe { - if elem_alignment <= core::mem::align_of::() { - ptr.add(1) as *const T - } else { - // If elements have an alignment bigger than usize (e.g. an i128), - // we will have necessarily allocated two usize slots worth of - // space for the storage value (with the first usize slot being - // padding for alignment's sake), and we need to skip past both. - ptr.add(2) as *const T - } - } - } - - pub fn from_slice_with_capacity(slice: &[T], capacity: usize) -> Self - where - T: Clone, - { - assert!(capacity > 0); - assert!(slice.len() <= capacity); - - let element_bytes = capacity * core::mem::size_of::(); - - let padding = { - if core::mem::align_of::() <= core::mem::align_of::() { - // aligned on usize (8 bytes on 64-bit systems) - 0 - } else { - // aligned on 2*usize (16 bytes on 64-bit systems) - core::mem::size_of::() - } - }; - - let num_bytes = core::mem::size_of::() + padding + element_bytes; - - let elements = unsafe { - let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; - - // pointer to the first element - let raw_ptr = Self::get_element_ptr(raw_ptr as *mut T) as *mut T; - - // write the refcount - let refcount_ptr = raw_ptr as *mut isize; - *(refcount_ptr.offset(-1)) = isize::MIN; - - // Clone the elements into the new array. - let target_ptr = raw_ptr; - for (i, value) in slice.iter().cloned().enumerate() { - let target_ptr = target_ptr.add(i); - target_ptr.write(value); - } - - raw_ptr - }; - - Self { - length: slice.len(), - elements, - } - } - - pub fn from_slice(slice: &[T]) -> Self - where - T: Clone, - { - // Avoid allocation with empty list. - if slice.is_empty() { - Self::default() - } else { - Self::from_slice_with_capacity(slice, slice.len()) - } - } - - pub fn as_slice(&self) -> &[T] { - unsafe { core::slice::from_raw_parts(self.elements, self.length) } - } - - pub fn as_mut_slice(&mut self) -> &mut [T] { - unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } - } - - /// Copy the contents of the given slice into the end of this list, - /// reallocating and resizing as necessary. - pub fn append_slice(&mut self, slice: &[T]) { - let new_len = self.len() + slice.len(); - let storage_ptr = self.get_storage_ptr_mut(); - - // First, ensure that there's enough storage space. - unsafe { - let storage_val = *storage_ptr as isize; - - // Check if this is refcounted, readonly, or has a capcacity. - // (Capacity will be positive if it has a capacity.) - if storage_val > 0 { - let capacity = storage_val as usize; - - // We don't have enough capacity, so we need to get some more. - if capacity < new_len { - // Double our capacity using realloc - let new_cap = 2 * capacity; - let new_ptr = roc_realloc( - storage_ptr as *mut c_void, - new_cap, - capacity, - Self::align_of_storage_ptr(), - ) as *mut isize; - - // Write the new capacity into the new memory - *new_ptr = new_cap as isize; - - // Copy all the existing elements into the new allocation. - ptr::copy_nonoverlapping(self.elements, new_ptr as *mut T, self.len()); - - // Update our storage pointer to be the new one - self.set_storage_ptr(new_ptr); - } - } else { - // If this was reference counted, decrement the refcount! - if storage_val < 0 { - let refcount = storage_val; - - // Either deallocate or decrement. - if refcount == REFCOUNT_1 { - roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); - } else { - *storage_ptr = refcount - 1; - } - } - - // This is either refcounted or readonly; either way, we need - // to clone the elements! - - // Double the capacity we need, in case there are future additions. - let new_cap = new_len * 2; - let new_ptr = roc_alloc(new_cap, Self::align_of_storage_ptr()) as *mut isize; - - // Write the new capacity into the new memory; this list is - // now unique, and gets its own capacity! - *new_ptr = new_cap as isize; - - // Copy all the existing elements into the new allocation. - ptr::copy_nonoverlapping(self.elements, new_ptr as *mut T, self.len()); - - // Update our storage pointer to be the new one - self.set_storage_ptr(new_ptr); - } - - // Since this is an append, we want to start writing new elements - // into the memory immediately after the current last element. - let dest = self.elements.add(self.len()); - - // There's now enough storage to append the contents of the slice - // in-place, so do that! - ptr::copy_nonoverlapping(slice.as_ptr(), dest, self.len()); - } - - self.length = new_len; - } - - /// The alignment we need is either the alignment of T, or else - /// the alignment of usize, whichever is higher. That's because we need - /// to store both T values as well as the refcount/capacity storage slot. - fn align_of_storage_ptr() -> u32 { - mem::align_of::().max(mem::align_of::()) as u32 - } - - unsafe fn drop_pointer_to_first_argument(ptr: *mut T) { - let storage_ptr = Self::get_storage_ptr_help(ptr); - let storage_val = *storage_ptr; - - if storage_val == REFCOUNT_1 || storage_val > 0 { - // If we have no more references, or if this was unique, - // deallocate it. - roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); - } else if storage_val < 0 { - // If this still has more references, decrement one. - *storage_ptr = storage_val - 1; - } - - // The only remaining option is that this is in readonly memory, - // in which case we shouldn't attempt to do anything to it. - } -} - -impl Deref for RocList { - type Target = [T]; - - fn deref(&self) -> &[T] { - self.as_slice() - } -} - -impl DerefMut for RocList { - fn deref_mut(&mut self) -> &mut [T] { - self.as_mut_slice() - } -} - -impl<'a, T> IntoIterator for &'a RocList { - type Item = &'a T; - - type IntoIter = <&'a [T] as IntoIterator>::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.as_slice().iter() - } -} - -impl IntoIterator for RocList { - type Item = T; - - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - let remaining = self.len(); - - let buf = unsafe { NonNull::new_unchecked(self.elements as _) }; - let ptr = self.elements; - - IntoIter { - buf, - ptr, - remaining, - } - } -} - -use core::ptr::NonNull; - -pub struct IntoIter { - buf: NonNull, - // pub cap: usize, - ptr: *const T, - remaining: usize, -} - -impl Iterator for IntoIter { - type Item = T; - - fn next(&mut self) -> Option { - next_help(self) - } -} - -fn next_help(this: &mut IntoIter) -> Option { - if this.remaining == 0 { - None - } else if mem::size_of::() == 0 { - // purposefully don't use 'ptr.offset' because for - // vectors with 0-size elements this would return the - // same pointer. - this.remaining -= 1; - - // Make up a value of this ZST. - Some(unsafe { mem::zeroed() }) - } else { - let old = this.ptr; - this.ptr = unsafe { this.ptr.offset(1) }; - this.remaining -= 1; - - Some(unsafe { ptr::read(old) }) - } -} - -impl Drop for IntoIter { - fn drop(&mut self) { - // drop the elements that we have not yet returned. - while let Some(item) = next_help(self) { - drop(item); - } - - // deallocate the whole buffer - unsafe { - RocList::drop_pointer_to_first_argument(self.buf.as_mut()); - } - } -} - -impl Default for RocList { - fn default() -> Self { - Self { - length: 0, - elements: core::ptr::null_mut(), - } - } -} - -impl fmt::Debug for RocList { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // RocList { storage: Refcounted(3), elements: [ 1,2,3,4] } - f.debug_struct("RocList") - .field("storage", &self.storage()) - .field("elements", &self.as_slice()) - .finish() - } -} - -impl PartialEq for RocList { - fn eq(&self, other: &Self) -> bool { - if self.length != other.length { - return false; - } - - for i in 0..self.length { - unsafe { - if *self.elements.add(i) != *other.elements.add(i) { - return false; - } - } - } - - true - } -} - -impl Eq for RocList {} - -impl Drop for RocList { - fn drop(&mut self) { - if !self.is_empty() { - let storage_ptr = self.get_storage_ptr_mut(); - - unsafe { - let storage_val = *storage_ptr; - - if storage_val == REFCOUNT_1 || storage_val > 0 { - // If we have no more references, or if this was unique, - // deallocate it. - roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); - } else if storage_val < 0 { - // If this still has more references, decrement one. - *storage_ptr = storage_val - 1; - } - - // The only remaining option is that this is in readonly memory, - // in which case we shouldn't attempt to do anything to it. - } - } - } -} - -#[repr(C)] -pub struct RocStr { - elements: *mut u8, - length: usize, -} - -impl RocStr { - pub fn len(&self) -> usize { - if self.is_small_str() { - let bytes = self.length.to_ne_bytes(); - let last_byte = bytes[mem::size_of::() - 1]; - - (last_byte ^ 0b1000_0000) as usize - } else { - self.length - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn is_small_str(&self) -> bool { - (self.length as isize) < 0 - } - - pub fn get(&self, index: usize) -> Option<&u8> { - if index < self.len() { - Some(unsafe { - let raw = if self.is_small_str() { - self.get_small_str_ptr().add(index) - } else { - self.elements.add(index) - }; - - &*raw - }) - } else { - None - } - } - - pub fn get_bytes(&self) -> *const u8 { - if self.is_small_str() { - self.get_small_str_ptr() - } else { - self.elements - } - } - - pub fn storage(&self) -> Option { - use core::cmp::Ordering::*; - - if self.is_small_str() { - return None; - } - - unsafe { - let value = *self.get_storage_ptr(); - - // NOTE doesn't work with elements of 16 or more bytes - match isize::cmp(&(value as isize), &0) { - Equal => Some(Storage::ReadOnly), - Less => Some(Storage::Refcounted(value)), - Greater => Some(Storage::Capacity(value as usize)), - } - } - } - - fn get_storage_ptr(&self) -> *const isize { - let ptr = self.elements as *const isize; - - unsafe { ptr.offset(-1) } - } - - fn get_storage_ptr_mut(&mut self) -> *mut isize { - self.get_storage_ptr() as *mut isize - } - - fn get_element_ptr(elements: *const u8) -> *const usize { - let elem_alignment = core::mem::align_of::(); - let ptr = elements as *const usize; - - unsafe { - if elem_alignment <= core::mem::align_of::() { - ptr.add(1) - } else { - // If elements have an alignment bigger than usize (e.g. an i128), - // we will have necessarily allocated two usize slots worth of - // space for the storage value (with the first usize slot being - // padding for alignment's sake), and we need to skip past both. - ptr.add(2) - } - } - } - - fn get_small_str_ptr(&self) -> *const u8 { - (self as *const Self).cast() - } - - fn get_small_str_ptr_mut(&mut self) -> *mut u8 { - (self as *mut Self).cast() - } - - fn from_slice_with_capacity_str(slice: &[u8], capacity: usize) -> Self { - assert!( - slice.len() <= capacity, - "RocStr::from_slice_with_capacity_str length bigger than capacity {} {}", - slice.len(), - capacity - ); - if capacity < core::mem::size_of::() { - let mut rocstr = Self::default(); - let target_ptr = rocstr.get_small_str_ptr_mut(); - let source_ptr = slice.as_ptr() as *const u8; - for index in 0..slice.len() { - unsafe { - *target_ptr.add(index) = *source_ptr.add(index); - } - } - // Write length and small string bit to last byte of length. - let mut bytes = rocstr.length.to_ne_bytes(); - bytes[mem::size_of::() - 1] = capacity as u8 ^ 0b1000_0000; - rocstr.length = usize::from_ne_bytes(bytes); - - rocstr - } else { - let ptr = slice.as_ptr(); - let element_bytes = capacity; - - let num_bytes = core::mem::size_of::() + element_bytes; - - let elements = unsafe { - let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; - // write the capacity - let capacity_ptr = raw_ptr as *mut usize; - *capacity_ptr = capacity; - - let raw_ptr = Self::get_element_ptr(raw_ptr as *mut u8); - - // write the refcount - let refcount_ptr = raw_ptr as *mut isize; - *(refcount_ptr.offset(-1)) = isize::MIN; - - { - // NOTE: using a memcpy here causes weird issues - let target_ptr = raw_ptr as *mut u8; - let source_ptr = ptr as *const u8; - let length = slice.len(); - - for index in 0..length { - *target_ptr.add(index) = *source_ptr.add(index); - } - } - - raw_ptr as *mut u8 - }; - - Self { - length: slice.len(), - elements, - } - } - } - - pub fn from_slice(slice: &[u8]) -> Self { - Self::from_slice_with_capacity_str(slice, slice.len()) - } - - pub fn as_slice(&self) -> &[u8] { - if self.is_empty() { - &[] - } else if self.is_small_str() { - unsafe { core::slice::from_raw_parts(self.get_small_str_ptr(), self.len()) } - } else { - unsafe { core::slice::from_raw_parts(self.elements, self.length) } - } - } - - pub fn as_mut_slice(&mut self) -> &mut [u8] { - if self.is_empty() { - &mut [] - } else if self.is_small_str() { - unsafe { core::slice::from_raw_parts_mut(self.get_small_str_ptr_mut(), self.len()) } - } else { - unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } - } - } - - pub fn as_str(&self) -> &str { - let slice = self.as_slice(); - - unsafe { core::str::from_utf8_unchecked(slice) } - } - - pub fn as_mut_str(&mut self) -> &mut str { - let slice = self.as_mut_slice(); - - unsafe { core::str::from_utf8_unchecked_mut(slice) } - } - - /// Write a CStr (null-terminated) representation of this RocStr into - /// the given buffer. - /// - /// # Safety - /// This assumes the given buffer has enough space, so make sure you only - /// pass in a pointer to an allocation that's at least as long as this Str! - pub unsafe fn write_c_str(&self, buf: *mut char) { - if self.is_small_str() { - ptr::copy_nonoverlapping(self.get_small_str_ptr(), buf as *mut u8, self.len()); - } else { - ptr::copy_nonoverlapping(self.elements, buf as *mut u8, self.len()); - } - - // null-terminate - *(buf.add(self.len())) = '\0'; - } -} - -impl Deref for RocStr { - type Target = str; - - fn deref(&self) -> &str { - self.as_str() - } -} - -impl DerefMut for RocStr { - fn deref_mut(&mut self) -> &mut str { - self.as_mut_str() - } -} - -impl Default for RocStr { - fn default() -> Self { - Self { - length: isize::MIN as usize, - elements: core::ptr::null_mut(), - } - } -} - -impl From<&str> for RocStr { - fn from(str: &str) -> Self { - Self::from_slice(str.as_bytes()) - } -} - -impl Display for RocStr { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - self.as_str().fmt(f) - } -} - -impl fmt::Debug for RocStr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // RocStr { is_small_str: false, storage: Refcounted(3), elements: [ 1,2,3,4] } - - match core::str::from_utf8(self.as_slice()) { - Ok(string) => f - .debug_struct("RocStr") - .field("is_small_str", &self.is_small_str()) - .field("storage", &self.storage()) - .field("string_contents", &string) - .finish(), - Err(_) => f - .debug_struct("RocStr") - .field("is_small_str", &self.is_small_str()) - .field("storage", &self.storage()) - .field("byte_contents", &self.as_slice()) - .finish(), - } - } -} - -impl PartialEq for RocStr { - fn eq(&self, other: &Self) -> bool { - self.as_slice() == other.as_slice() - } -} - -impl Eq for RocStr {} - -impl Clone for RocStr { - fn clone(&self) -> Self { - if self.is_small_str() { - Self { - elements: self.elements, - length: self.length, - } - } else { - let capacity_size = core::mem::size_of::(); - let copy_length = self.length + capacity_size; - let elements = unsafe { - // We use *mut u8 here even though technically these are - // usize-aligned (due to the refcount slot). - // This avoids any potential edge cases around there somehow - // being unreadable memory after the last byte, which would - // potentially get read when reading bytes at a time. - let raw_ptr = - roc_alloc(copy_length, core::mem::size_of::() as u32) as *mut u8; - let dest_slice = slice::from_raw_parts_mut(raw_ptr, copy_length); - let src_ptr = self.elements.offset(-(capacity_size as isize)) as *mut u8; - let src_slice = slice::from_raw_parts(src_ptr, copy_length); - - dest_slice.copy_from_slice(src_slice); - - *(raw_ptr as *mut usize) = self.length; - - (raw_ptr as *mut u8).add(capacity_size) - }; - - Self { - elements, - length: self.length, - } - } - } -} - -impl Drop for RocStr { - fn drop(&mut self) { - if !self.is_small_str() { - let storage_ptr = self.get_storage_ptr_mut(); - - unsafe { - let storage_val = *storage_ptr; - - if storage_val == REFCOUNT_1 || storage_val > 0 { - // If we have no more references, or if this was unique, - // deallocate it. - roc_dealloc(storage_ptr as *mut c_void, mem::align_of::() as u32); - } else if storage_val < 0 { - // If this still has more references, decrement one. - *storage_ptr = storage_val - 1; - } - - // The only remaining option is that this is in readonly memory, - // in which case we shouldn't attempt to do anything to it. - } - } - } -} - /// Like a Rust `Result`, but following Roc's ABI instead of Rust's. /// (Using Rust's `Result` instead of this will not work properly with Roc code!) /// diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs new file mode 100644 index 0000000000..796a9115e7 --- /dev/null +++ b/roc_std/src/roc_list.rs @@ -0,0 +1,420 @@ +use core::ffi::c_void; +use core::fmt; +use core::ops::{Deref, DerefMut, Drop}; +use core::{mem, ptr}; + +use crate::{roc_alloc, roc_dealloc, roc_realloc, Storage, REFCOUNT_1}; + +#[repr(C)] +pub struct RocList { + elements: *mut T, + length: usize, +} + +impl Clone for RocList { + fn clone(&self) -> Self { + Self::from_slice(self.as_slice()) + } +} + +impl RocList { + pub fn len(&self) -> usize { + self.length + } + + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + pub fn get(&self, index: usize) -> Option<&T> { + if index < self.len() { + Some(unsafe { + let raw = self.elements.add(index); + + &*raw + }) + } else { + None + } + } + + pub fn storage(&self) -> Option { + use core::cmp::Ordering::*; + + if self.length == 0 { + return None; + } + + unsafe { + let value = *self.get_storage_ptr(); + + // NOTE doesn't work with elements of 16 or more bytes + match isize::cmp(&value, &0) { + Equal => Some(Storage::ReadOnly), + Less => Some(Storage::Refcounted(value)), + Greater => Some(Storage::Capacity(value as usize)), + } + } + } + + fn get_storage_ptr_help(elements: *mut T) -> *mut isize { + let ptr = elements as *mut isize; + + unsafe { ptr.offset(-1) } + } + + fn get_storage_ptr(&self) -> *const isize { + Self::get_storage_ptr_help(self.elements) + } + + fn get_storage_ptr_mut(&mut self) -> *mut isize { + self.get_storage_ptr() as *mut isize + } + + fn set_storage_ptr(&mut self, ptr: *const isize) { + self.elements = unsafe { ptr.offset(1) as *mut T }; + } + + fn get_element_ptr(elements: *const T) -> *const T { + let elem_alignment = core::mem::align_of::(); + let ptr = elements as *const usize; + + unsafe { + if elem_alignment <= core::mem::align_of::() { + ptr.add(1) as *const T + } else { + // If elements have an alignment bigger than usize (e.g. an i128), + // we will have necessarily allocated two usize slots worth of + // space for the storage value (with the first usize slot being + // padding for alignment's sake), and we need to skip past both. + ptr.add(2) as *const T + } + } + } + + pub fn from_slice_with_capacity(slice: &[T], capacity: usize) -> Self + where + T: Clone, + { + assert!(capacity > 0); + assert!(slice.len() <= capacity); + + let element_bytes = capacity * core::mem::size_of::(); + + let padding = { + if core::mem::align_of::() <= core::mem::align_of::() { + // aligned on usize (8 bytes on 64-bit systems) + 0 + } else { + // aligned on 2*usize (16 bytes on 64-bit systems) + core::mem::size_of::() + } + }; + + let num_bytes = core::mem::size_of::() + padding + element_bytes; + + let elements = unsafe { + let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; + + // pointer to the first element + let raw_ptr = Self::get_element_ptr(raw_ptr as *mut T) as *mut T; + + // write the refcount + let refcount_ptr = raw_ptr as *mut isize; + *(refcount_ptr.offset(-1)) = isize::MIN; + + // Clone the elements into the new array. + let target_ptr = raw_ptr; + for (i, value) in slice.iter().cloned().enumerate() { + let target_ptr = target_ptr.add(i); + target_ptr.write(value); + } + + raw_ptr + }; + + Self { + length: slice.len(), + elements, + } + } + + pub fn from_slice(slice: &[T]) -> Self + where + T: Clone, + { + // Avoid allocation with empty list. + if slice.is_empty() { + Self::default() + } else { + Self::from_slice_with_capacity(slice, slice.len()) + } + } + + pub fn as_slice(&self) -> &[T] { + unsafe { core::slice::from_raw_parts(self.elements, self.length) } + } + + pub fn as_mut_slice(&mut self) -> &mut [T] { + unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } + } + + /// Copy the contents of the given slice into the end of this list, + /// reallocating and resizing as necessary. + pub fn append_slice(&mut self, slice: &[T]) { + let new_len = self.len() + slice.len(); + let storage_ptr = self.get_storage_ptr_mut(); + + // First, ensure that there's enough storage space. + unsafe { + let storage_val = *storage_ptr as isize; + + // Check if this is refcounted, readonly, or has a capcacity. + // (Capacity will be positive if it has a capacity.) + if storage_val > 0 { + let capacity = storage_val as usize; + + // We don't have enough capacity, so we need to get some more. + if capacity < new_len { + // Double our capacity using realloc + let new_cap = 2 * capacity; + let new_ptr = roc_realloc( + storage_ptr as *mut c_void, + new_cap, + capacity, + Self::align_of_storage_ptr(), + ) as *mut isize; + + // Write the new capacity into the new memory + *new_ptr = new_cap as isize; + + // Copy all the existing elements into the new allocation. + ptr::copy_nonoverlapping(self.elements, new_ptr as *mut T, self.len()); + + // Update our storage pointer to be the new one + self.set_storage_ptr(new_ptr); + } + } else { + // If this was reference counted, decrement the refcount! + if storage_val < 0 { + let refcount = storage_val; + + // Either deallocate or decrement. + if refcount == REFCOUNT_1 { + roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); + } else { + *storage_ptr = refcount - 1; + } + } + + // This is either refcounted or readonly; either way, we need + // to clone the elements! + + // Double the capacity we need, in case there are future additions. + let new_cap = new_len * 2; + let new_ptr = roc_alloc(new_cap, Self::align_of_storage_ptr()) as *mut isize; + + // Write the new capacity into the new memory; this list is + // now unique, and gets its own capacity! + *new_ptr = new_cap as isize; + + // Copy all the existing elements into the new allocation. + ptr::copy_nonoverlapping(self.elements, new_ptr as *mut T, self.len()); + + // Update our storage pointer to be the new one + self.set_storage_ptr(new_ptr); + } + + // Since this is an append, we want to start writing new elements + // into the memory immediately after the current last element. + let dest = self.elements.add(self.len()); + + // There's now enough storage to append the contents of the slice + // in-place, so do that! + ptr::copy_nonoverlapping(slice.as_ptr(), dest, self.len()); + } + + self.length = new_len; + } + + /// The alignment we need is either the alignment of T, or else + /// the alignment of usize, whichever is higher. That's because we need + /// to store both T values as well as the refcount/capacity storage slot. + fn align_of_storage_ptr() -> u32 { + mem::align_of::().max(mem::align_of::()) as u32 + } + + unsafe fn drop_pointer_to_first_argument(ptr: *mut T) { + let storage_ptr = Self::get_storage_ptr_help(ptr); + let storage_val = *storage_ptr; + + if storage_val == REFCOUNT_1 || storage_val > 0 { + // If we have no more references, or if this was unique, + // deallocate it. + roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); + } else if storage_val < 0 { + // If this still has more references, decrement one. + *storage_ptr = storage_val - 1; + } + + // The only remaining option is that this is in readonly memory, + // in which case we shouldn't attempt to do anything to it. + } +} + +impl Deref for RocList { + type Target = [T]; + + fn deref(&self) -> &[T] { + self.as_slice() + } +} + +impl DerefMut for RocList { + fn deref_mut(&mut self) -> &mut [T] { + self.as_mut_slice() + } +} + +impl<'a, T> IntoIterator for &'a RocList { + type Item = &'a T; + + type IntoIter = <&'a [T] as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.as_slice().iter() + } +} + +impl IntoIterator for RocList { + type Item = T; + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + let remaining = self.len(); + + let buf = unsafe { NonNull::new_unchecked(self.elements as _) }; + let ptr = self.elements; + + IntoIter { + buf, + ptr, + remaining, + } + } +} + +use core::ptr::NonNull; + +pub struct IntoIter { + buf: NonNull, + // pub cap: usize, + ptr: *const T, + remaining: usize, +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + next_help(self) + } +} + +fn next_help(this: &mut IntoIter) -> Option { + if this.remaining == 0 { + None + } else if mem::size_of::() == 0 { + // purposefully don't use 'ptr.offset' because for + // vectors with 0-size elements this would return the + // same pointer. + this.remaining -= 1; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + let old = this.ptr; + this.ptr = unsafe { this.ptr.offset(1) }; + this.remaining -= 1; + + Some(unsafe { ptr::read(old) }) + } +} + +impl Drop for IntoIter { + fn drop(&mut self) { + // drop the elements that we have not yet returned. + while let Some(item) = next_help(self) { + drop(item); + } + + // deallocate the whole buffer + unsafe { + RocList::drop_pointer_to_first_argument(self.buf.as_mut()); + } + } +} + +impl Default for RocList { + fn default() -> Self { + Self { + length: 0, + elements: core::ptr::null_mut(), + } + } +} + +impl fmt::Debug for RocList { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // RocList { storage: Refcounted(3), elements: [ 1,2,3,4] } + f.debug_struct("RocList") + .field("storage", &self.storage()) + .field("elements", &self.as_slice()) + .finish() + } +} + +impl PartialEq for RocList { + fn eq(&self, other: &Self) -> bool { + if self.length != other.length { + return false; + } + + for i in 0..self.length { + unsafe { + if *self.elements.add(i) != *other.elements.add(i) { + return false; + } + } + } + + true + } +} + +impl Eq for RocList {} + +impl Drop for RocList { + fn drop(&mut self) { + if !self.is_empty() { + let storage_ptr = self.get_storage_ptr_mut(); + + unsafe { + let storage_val = *storage_ptr; + + if storage_val == REFCOUNT_1 || storage_val > 0 { + // If we have no more references, or if this was unique, + // deallocate it. + roc_dealloc(storage_ptr as *mut c_void, Self::align_of_storage_ptr()); + } else if storage_val < 0 { + // If this still has more references, decrement one. + *storage_ptr = storage_val - 1; + } + + // The only remaining option is that this is in readonly memory, + // in which case we shouldn't attempt to do anything to it. + } + } + } +} diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs new file mode 100644 index 0000000000..e2c1c343e1 --- /dev/null +++ b/roc_std/src/roc_str.rs @@ -0,0 +1,362 @@ +use core::ffi::c_void; +use core::fmt::{self, Display, Formatter}; +use core::ops::{Deref, DerefMut, Drop}; +use core::{mem, ptr, slice}; + +use crate::{roc_alloc, roc_dealloc, Storage, REFCOUNT_1}; + +#[repr(C)] +pub struct RocStr { + elements: *mut u8, + length: usize, +} + +impl RocStr { + pub const SIZE: usize = core::mem::size_of::(); + pub const MASK: u8 = 0b1000_0000; + + pub fn len(&self) -> usize { + if self.is_small_str() { + let bytes = self.length.to_ne_bytes(); + let last_byte = bytes[mem::size_of::() - 1]; + + (last_byte ^ Self::MASK) as usize + } else { + self.length + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn is_small_str(&self) -> bool { + (self.length as isize) < 0 + } + + pub const fn empty() -> Self { + Self { + length: isize::MIN as usize, + elements: core::ptr::null_mut(), + } + } + + pub fn get(&self, index: usize) -> Option<&u8> { + if index < self.len() { + Some(unsafe { + let raw = if self.is_small_str() { + self.get_small_str_ptr().add(index) + } else { + self.elements.add(index) + }; + + &*raw + }) + } else { + None + } + } + + pub fn get_bytes(&self) -> *const u8 { + if self.is_small_str() { + self.get_small_str_ptr() + } else { + self.elements + } + } + + pub fn storage(&self) -> Option { + use core::cmp::Ordering::*; + + if self.is_small_str() { + return None; + } + + unsafe { + let value = *self.get_storage_ptr(); + + // NOTE doesn't work with elements of 16 or more bytes + match isize::cmp(&(value as isize), &0) { + Equal => Some(Storage::ReadOnly), + Less => Some(Storage::Refcounted(value)), + Greater => Some(Storage::Capacity(value as usize)), + } + } + } + + fn get_storage_ptr(&self) -> *const isize { + let ptr = self.elements as *const isize; + + unsafe { ptr.offset(-1) } + } + + fn get_storage_ptr_mut(&mut self) -> *mut isize { + self.get_storage_ptr() as *mut isize + } + + fn get_element_ptr(elements: *const u8) -> *const usize { + let elem_alignment = core::mem::align_of::(); + let ptr = elements as *const usize; + + unsafe { + if elem_alignment <= core::mem::align_of::() { + ptr.add(1) + } else { + // If elements have an alignment bigger than usize (e.g. an i128), + // we will have necessarily allocated two usize slots worth of + // space for the storage value (with the first usize slot being + // padding for alignment's sake), and we need to skip past both. + ptr.add(2) + } + } + } + + fn get_small_str_ptr(&self) -> *const u8 { + (self as *const Self).cast() + } + + fn get_small_str_ptr_mut(&mut self) -> *mut u8 { + (self as *mut Self).cast() + } + + const fn from_slice_small_str(slice: &[u8]) -> Self { + assert!(slice.len() < Self::SIZE); + + let mut array = [0u8; Self::SIZE]; + + // while loop because for uses Iterator and is not available in const contexts + let mut i = 0; + while i < slice.len() { + array[i] = slice[i]; + i += 1; + } + + let highest_index = Self::SIZE - 1; + array[highest_index] = slice.len() as u8 | Self::MASK; + + unsafe { core::mem::transmute(array) } + } + + fn from_slice_with_capacity_str(slice: &[u8], capacity: usize) -> Self { + assert!( + slice.len() <= capacity, + "RocStr::from_slice_with_capacity_str length bigger than capacity {} {}", + slice.len(), + capacity + ); + if capacity < core::mem::size_of::() { + Self::from_slice_small_str(slice) + } else { + let ptr = slice.as_ptr(); + let element_bytes = capacity; + + let num_bytes = core::mem::size_of::() + element_bytes; + + let elements = unsafe { + let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; + // write the capacity + let capacity_ptr = raw_ptr as *mut usize; + *capacity_ptr = capacity; + + let raw_ptr = Self::get_element_ptr(raw_ptr as *mut u8); + + // write the refcount + let refcount_ptr = raw_ptr as *mut isize; + *(refcount_ptr.offset(-1)) = isize::MIN; + + { + // NOTE: using a memcpy here causes weird issues + let target_ptr = raw_ptr as *mut u8; + let source_ptr = ptr as *const u8; + let length = slice.len(); + + for index in 0..length { + *target_ptr.add(index) = *source_ptr.add(index); + } + } + + raw_ptr as *mut u8 + }; + + Self { + length: slice.len(), + elements, + } + } + } + + pub fn from_slice(slice: &[u8]) -> Self { + Self::from_slice_with_capacity_str(slice, slice.len()) + } + + pub fn as_slice(&self) -> &[u8] { + if self.is_empty() { + &[] + } else if self.is_small_str() { + unsafe { core::slice::from_raw_parts(self.get_small_str_ptr(), self.len()) } + } else { + unsafe { core::slice::from_raw_parts(self.elements, self.length) } + } + } + + pub fn as_mut_slice(&mut self) -> &mut [u8] { + if self.is_empty() { + &mut [] + } else if self.is_small_str() { + unsafe { core::slice::from_raw_parts_mut(self.get_small_str_ptr_mut(), self.len()) } + } else { + unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } + } + } + + pub fn as_str(&self) -> &str { + let slice = self.as_slice(); + + unsafe { core::str::from_utf8_unchecked(slice) } + } + + pub fn as_mut_str(&mut self) -> &mut str { + let slice = self.as_mut_slice(); + + unsafe { core::str::from_utf8_unchecked_mut(slice) } + } + + /// Write a CStr (null-terminated) representation of this RocStr into + /// the given buffer. + /// + /// # Safety + /// This assumes the given buffer has enough space, so make sure you only + /// pass in a pointer to an allocation that's at least as long as this Str! + pub unsafe fn write_c_str(&self, buf: *mut char) { + if self.is_small_str() { + ptr::copy_nonoverlapping(self.get_small_str_ptr(), buf as *mut u8, self.len()); + } else { + ptr::copy_nonoverlapping(self.elements, buf as *mut u8, self.len()); + } + + // null-terminate + *(buf.add(self.len())) = '\0'; + } +} + +impl Deref for RocStr { + type Target = str; + + fn deref(&self) -> &str { + self.as_str() + } +} + +impl DerefMut for RocStr { + fn deref_mut(&mut self) -> &mut str { + self.as_mut_str() + } +} + +impl Default for RocStr { + fn default() -> Self { + Self::empty() + } +} + +impl From<&str> for RocStr { + fn from(str: &str) -> Self { + Self::from_slice(str.as_bytes()) + } +} + +impl Display for RocStr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.as_str().fmt(f) + } +} + +impl fmt::Debug for RocStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // RocStr { is_small_str: false, storage: Refcounted(3), elements: [ 1,2,3,4] } + + match core::str::from_utf8(self.as_slice()) { + Ok(string) => f + .debug_struct("RocStr") + .field("is_small_str", &self.is_small_str()) + .field("storage", &self.storage()) + .field("string_contents", &string) + .finish(), + Err(_) => f + .debug_struct("RocStr") + .field("is_small_str", &self.is_small_str()) + .field("storage", &self.storage()) + .field("byte_contents", &self.as_slice()) + .finish(), + } + } +} + +impl PartialEq for RocStr { + fn eq(&self, other: &Self) -> bool { + self.as_slice() == other.as_slice() + } +} + +impl Eq for RocStr {} + +impl Clone for RocStr { + fn clone(&self) -> Self { + if self.is_small_str() { + Self { + elements: self.elements, + length: self.length, + } + } else { + let capacity_size = core::mem::size_of::(); + let copy_length = self.length + capacity_size; + let elements = unsafe { + // We use *mut u8 here even though technically these are + // usize-aligned (due to the refcount slot). + // This avoids any potential edge cases around there somehow + // being unreadable memory after the last byte, which would + // potentially get read when reading bytes at a time. + let raw_ptr = + roc_alloc(copy_length, core::mem::size_of::() as u32) as *mut u8; + let dest_slice = slice::from_raw_parts_mut(raw_ptr, copy_length); + let src_ptr = self.elements.offset(-(capacity_size as isize)) as *mut u8; + let src_slice = slice::from_raw_parts(src_ptr, copy_length); + + dest_slice.copy_from_slice(src_slice); + + *(raw_ptr as *mut usize) = self.length; + + (raw_ptr as *mut u8).add(capacity_size) + }; + + Self { + elements, + length: self.length, + } + } + } +} + +impl Drop for RocStr { + fn drop(&mut self) { + if !self.is_small_str() { + let storage_ptr = self.get_storage_ptr_mut(); + + unsafe { + let storage_val = *storage_ptr; + + if storage_val == REFCOUNT_1 || storage_val > 0 { + // If we have no more references, or if this was unique, + // deallocate it. + roc_dealloc(storage_ptr as *mut c_void, mem::align_of::() as u32); + } else if storage_val < 0 { + // If this still has more references, decrement one. + *storage_ptr = storage_val - 1; + } + + // The only remaining option is that this is in readonly memory, + // in which case we shouldn't attempt to do anything to it. + } + } + } +} diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 4d59fdbd63..a8ee28eaf8 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -5,9 +5,69 @@ extern crate pretty_assertions; extern crate quickcheck; extern crate roc_std; +use core::ffi::c_void; + +#[no_mangle] +pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { + libc::malloc(size) +} + +#[no_mangle] +pub unsafe extern "C" fn roc_realloc( + c_ptr: *mut c_void, + new_size: usize, + _old_size: usize, + _alignment: u32, +) -> *mut c_void { + libc::realloc(c_ptr, new_size) +} + +#[no_mangle] +pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { + libc::free(c_ptr) +} + #[cfg(test)] mod test_roc_std { use roc_std::RocResult; + use roc_std::RocStr; + + fn roc_str_byte_representation(string: &RocStr) -> [u8; RocStr::SIZE] { + unsafe { core::mem::transmute_copy(string) } + } + + #[test] + fn roc_str_empty() { + let actual = roc_str_byte_representation(&RocStr::empty()); + + let mut expected = [0u8; RocStr::SIZE]; + expected[RocStr::SIZE - 1] = RocStr::MASK; + + assert_eq!(actual, expected); + } + + #[test] + fn roc_str_single_char() { + let actual = roc_str_byte_representation(&RocStr::from_slice(b"a")); + + let mut expected = [0u8; RocStr::SIZE]; + expected[0] = b'a'; + expected[RocStr::SIZE - 1] = RocStr::MASK | 1; + + assert_eq!(actual, expected); + } + + #[test] + fn roc_str_max_small_string() { + let bytes: Vec<_> = std::iter::repeat(b'a').take(RocStr::SIZE - 1).collect(); + let actual = roc_str_byte_representation(&RocStr::from_slice(&bytes)); + + let mut expected = [0u8; RocStr::SIZE]; + expected[..RocStr::SIZE - 1].copy_from_slice(&bytes); + expected[RocStr::SIZE - 1] = RocStr::MASK | bytes.len() as u8; + + assert_eq!(actual, expected); + } #[test] fn roc_result_to_rust_result() {