mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-02 16:21:11 +00:00
expose list/string slices to rust users
This commit is contained in:
parent
95a4bb988c
commit
9865096c24
3 changed files with 535 additions and 126 deletions
|
@ -12,6 +12,7 @@ use core::{
|
||||||
ops::{Deref, DerefMut},
|
ops::{Deref, DerefMut},
|
||||||
ptr::{self, NonNull},
|
ptr::{self, NonNull},
|
||||||
};
|
};
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage};
|
use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage};
|
||||||
|
|
||||||
|
@ -208,7 +209,7 @@ impl<T> RocList<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Useful for doing memcpy on the elements. Returns NULL if list is empty.
|
/// Useful for doing memcpy on the elements. Returns NULL if list is empty.
|
||||||
pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T {
|
pub(crate) fn ptr_to_first_elem(&self) -> *const T {
|
||||||
unsafe { core::mem::transmute(self.elements) }
|
unsafe { core::mem::transmute(self.elements) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,6 +223,15 @@ impl<T> RocList<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
pub(crate) fn ptr_to_refcount(&self) -> *mut c_void {
|
||||||
|
if self.is_seamless_slice() {
|
||||||
|
((self.capacity_or_ref_ptr << 1) - std::mem::size_of::<usize>()) as *mut _
|
||||||
|
} else {
|
||||||
|
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) as *mut _ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void {
|
unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void {
|
||||||
unsafe {
|
unsafe {
|
||||||
alloc_ptr
|
alloc_ptr
|
||||||
|
@ -230,6 +240,44 @@ impl<T> RocList<T> {
|
||||||
.cast()
|
.cast()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn append(&mut self, value: T) {
|
||||||
|
self.push(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&mut self, value: T) {
|
||||||
|
if self.capacity() <= self.len() {
|
||||||
|
// reserve space for (at least!) one more element
|
||||||
|
self.reserve(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let elements = self.elements.unwrap().as_ptr();
|
||||||
|
let append_ptr = unsafe { elements.add(self.len()) };
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// Write the element into the slot, without dropping it.
|
||||||
|
ptr::write(append_ptr, ManuallyDrop::new(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's important that the length is increased one by one, to
|
||||||
|
// make sure that we don't drop uninitialized elements, even when
|
||||||
|
// a incrementing the reference count panics.
|
||||||
|
self.length += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// - `bytes` must be allocated for `cap` elements
|
||||||
|
/// - `bytes` must be initialized for `len` elements
|
||||||
|
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
|
||||||
|
/// - `cap` >= `len`
|
||||||
|
pub unsafe fn from_raw_parts(bytes: *mut T, len: usize, cap: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
elements: NonNull::new(bytes.cast()),
|
||||||
|
length: len,
|
||||||
|
capacity_or_ref_ptr: cap,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> RocList<T>
|
impl<T> RocList<T>
|
||||||
|
@ -323,6 +371,38 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> RocList<T> {
|
impl<T> RocList<T> {
|
||||||
|
#[track_caller]
|
||||||
|
pub fn slice_range(&self, range: Range<usize>) -> Self {
|
||||||
|
match self.try_slice_range(range) {
|
||||||
|
Some(x) => x,
|
||||||
|
None => panic!("slice index out of range"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
|
||||||
|
if self.as_slice().get(range.start..range.end).is_none() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
// increment the refcount
|
||||||
|
std::mem::forget(self.clone());
|
||||||
|
|
||||||
|
let element_ptr = self.as_slice()[range.start..]
|
||||||
|
.as_ptr()
|
||||||
|
.cast::<ManuallyDrop<T>>();
|
||||||
|
|
||||||
|
let capacity_or_ref_ptr =
|
||||||
|
(self.ptr_to_first_elem() as usize) >> 1 | isize::MIN as usize;
|
||||||
|
|
||||||
|
let roc_list = RocList {
|
||||||
|
elements: NonNull::new(element_ptr as *mut ManuallyDrop<T>),
|
||||||
|
length: range.end - range.start,
|
||||||
|
capacity_or_ref_ptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(roc_list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Increase a RocList's capacity by at least the requested number of elements (possibly more).
|
/// Increase a RocList's capacity by at least the requested number of elements (possibly more).
|
||||||
///
|
///
|
||||||
/// May return a new RocList, if the provided one was not unique.
|
/// May return a new RocList, if the provided one was not unique.
|
||||||
|
|
|
@ -19,8 +19,9 @@ use core::{
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::{CStr, CString};
|
||||||
|
use std::{ops::Range, ptr::NonNull};
|
||||||
|
|
||||||
use crate::RocList;
|
use crate::{roc_realloc, RocList};
|
||||||
|
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct RocStr(RocStrInner);
|
pub struct RocStr(RocStrInner);
|
||||||
|
@ -73,8 +74,34 @@ impl RocStr {
|
||||||
Self(RocStrInner { small_string })
|
Self(RocStrInner { small_string })
|
||||||
} else {
|
} else {
|
||||||
let heap_allocated = RocList::from_slice(slice);
|
let heap_allocated = RocList::from_slice(slice);
|
||||||
|
let big_string = unsafe { std::mem::transmute(heap_allocated) };
|
||||||
Self(RocStrInner {
|
Self(RocStrInner {
|
||||||
heap_allocated: ManuallyDrop::new(heap_allocated),
|
heap_allocated: ManuallyDrop::new(big_string),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// - `bytes` must be allocated for `cap` bytes
|
||||||
|
/// - `bytes` must be initialized for `len` bytes
|
||||||
|
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
|
||||||
|
/// - `bytes` must represent valid UTF-8
|
||||||
|
/// - `cap` >= `len`
|
||||||
|
pub unsafe fn from_raw_parts(bytes: *mut u8, len: usize, cap: usize) -> Self {
|
||||||
|
if len <= SmallString::CAPACITY {
|
||||||
|
unsafe {
|
||||||
|
let slice = std::slice::from_raw_parts(bytes, len);
|
||||||
|
let small_string = SmallString::try_from_utf8_bytes(slice).unwrap_unchecked();
|
||||||
|
Self(RocStrInner { small_string })
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Self(RocStrInner {
|
||||||
|
heap_allocated: ManuallyDrop::new(BigString {
|
||||||
|
elements: unsafe { NonNull::new_unchecked(bytes) },
|
||||||
|
length: len,
|
||||||
|
capacity_or_ref_ptr: cap,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +120,7 @@ impl RocStr {
|
||||||
|
|
||||||
pub fn capacity(&self) -> usize {
|
pub fn capacity(&self) -> usize {
|
||||||
match self.as_enum_ref() {
|
match self.as_enum_ref() {
|
||||||
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(),
|
RocStrInnerRef::HeapAllocated(big_string) => big_string.capacity(),
|
||||||
RocStrInnerRef::SmallString(_) => SmallString::CAPACITY,
|
RocStrInnerRef::SmallString(_) => SmallString::CAPACITY,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,10 +164,12 @@ impl RocStr {
|
||||||
/// There is no way to tell how many references it has and if it is safe to free.
|
/// There is no way to tell how many references it has and if it is safe to free.
|
||||||
/// As such, only values that should have a static lifetime for the entire application run
|
/// As such, only values that should have a static lifetime for the entire application run
|
||||||
/// should be considered for marking read-only.
|
/// should be considered for marking read-only.
|
||||||
pub unsafe fn set_readonly(&self) {
|
pub unsafe fn set_readonly(&mut self) {
|
||||||
match self.as_enum_ref() {
|
if self.is_small_str() {
|
||||||
RocStrInnerRef::HeapAllocated(roc_list) => unsafe { roc_list.set_readonly() },
|
/* do nothing */
|
||||||
RocStrInnerRef::SmallString(_) => {}
|
} else {
|
||||||
|
let big = unsafe { &mut self.0.heap_allocated };
|
||||||
|
big.set_readonly()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +196,7 @@ impl RocStr {
|
||||||
} else {
|
} else {
|
||||||
// The requested capacity won't fit in a small string; we need to go big.
|
// The requested capacity won't fit in a small string; we need to go big.
|
||||||
RocStr(RocStrInner {
|
RocStr(RocStrInner {
|
||||||
heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)),
|
heap_allocated: ManuallyDrop::new(BigString::with_capacity(bytes)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -182,21 +211,33 @@ impl RocStr {
|
||||||
|
|
||||||
if target_cap > SmallString::CAPACITY {
|
if target_cap > SmallString::CAPACITY {
|
||||||
// The requested capacity won't fit in a small string; we need to go big.
|
// The requested capacity won't fit in a small string; we need to go big.
|
||||||
let mut roc_list = RocList::with_capacity(target_cap);
|
let mut big_string = BigString::with_capacity(target_cap);
|
||||||
|
|
||||||
roc_list.extend_from_slice(small_str.as_bytes());
|
unsafe {
|
||||||
|
std::ptr::copy_nonoverlapping(
|
||||||
|
self.as_bytes().as_ptr(),
|
||||||
|
big_string.ptr_to_first_elem(),
|
||||||
|
self.len(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
*self = RocStr(RocStrInner {
|
big_string.length = self.len();
|
||||||
heap_allocated: ManuallyDrop::new(roc_list),
|
big_string.capacity_or_ref_ptr = target_cap;
|
||||||
|
|
||||||
|
let mut updated = RocStr(RocStrInner {
|
||||||
|
heap_allocated: ManuallyDrop::new(big_string),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
mem::swap(self, &mut updated);
|
||||||
|
mem::forget(updated);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
|
let mut big_string = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
|
||||||
|
|
||||||
roc_list.reserve(bytes);
|
big_string.reserve(bytes);
|
||||||
|
|
||||||
let mut updated = RocStr(RocStrInner {
|
let mut updated = RocStr(RocStrInner {
|
||||||
heap_allocated: ManuallyDrop::new(roc_list),
|
heap_allocated: ManuallyDrop::new(big_string),
|
||||||
});
|
});
|
||||||
|
|
||||||
mem::swap(self, &mut updated);
|
mem::swap(self, &mut updated);
|
||||||
|
@ -204,12 +245,57 @@ impl RocStr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
pub fn slice_range(&self, range: Range<usize>) -> Self {
|
||||||
|
match self.try_slice_range(range) {
|
||||||
|
Some(x) => x,
|
||||||
|
None => panic!("slice index out of range"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
|
||||||
|
if self.as_str().get(range.start..range.end).is_none() {
|
||||||
|
None
|
||||||
|
} else if range.end - range.start <= SmallString::CAPACITY {
|
||||||
|
let slice = &self.as_bytes()[range];
|
||||||
|
let small_string =
|
||||||
|
unsafe { SmallString::try_from_utf8_bytes(slice).unwrap_unchecked() };
|
||||||
|
|
||||||
|
// NOTE decrements `self`
|
||||||
|
Some(RocStr(RocStrInner { small_string }))
|
||||||
|
} else {
|
||||||
|
// increment the refcount
|
||||||
|
std::mem::forget(self.clone());
|
||||||
|
|
||||||
|
let big = unsafe { &self.0.heap_allocated };
|
||||||
|
let ptr = unsafe { (self.as_bytes().as_ptr() as *mut u8).add(range.start) };
|
||||||
|
|
||||||
|
let heap_allocated = ManuallyDrop::new(BigString {
|
||||||
|
elements: unsafe { NonNull::new_unchecked(ptr) },
|
||||||
|
length: (isize::MIN as usize) | (range.end - range.start),
|
||||||
|
capacity_or_ref_ptr: (big.ptr_to_first_elem() as usize) >> 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(RocStr(RocStrInner { heap_allocated }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn split_once(&self, delimiter: &str) -> Option<(Self, Self)> {
|
||||||
|
let (a, b) = self.as_str().split_once(delimiter)?;
|
||||||
|
|
||||||
|
let x = self.slice_range(0..a.len());
|
||||||
|
let y = self.slice_range(self.len() - b.len()..self.len());
|
||||||
|
|
||||||
|
Some((x, y))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
|
||||||
|
SplitWhitespace(self.as_str().char_indices().peekable(), self)
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
|
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
|
||||||
fn first_nul_byte(&self) -> Option<usize> {
|
fn first_nul_byte(&self) -> Option<usize> {
|
||||||
match self.as_enum_ref() {
|
self.as_bytes().iter().position(|byte| *byte == 0)
|
||||||
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0),
|
|
||||||
RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the string is under this many bytes, the with_terminator family
|
// If the string is under this many bytes, the with_terminator family
|
||||||
|
@ -267,60 +353,49 @@ impl RocStr {
|
||||||
};
|
};
|
||||||
|
|
||||||
match self.as_enum_ref() {
|
match self.as_enum_ref() {
|
||||||
RocStrInnerRef::HeapAllocated(roc_list) => {
|
RocStrInnerRef::HeapAllocated(big_string) => {
|
||||||
unsafe {
|
unsafe {
|
||||||
match roc_list.storage() {
|
if big_string.is_unique() {
|
||||||
Some(storage) if storage.is_unique() => {
|
// The backing RocList was unique, so we can mutate it in-place.
|
||||||
// The backing RocList was unique, so we can mutate it in-place.
|
let len = big_string.len();
|
||||||
let len = roc_list.len();
|
let ptr = if len < big_string.capacity() {
|
||||||
let ptr = if len < roc_list.capacity() {
|
// We happen to have excess capacity already, so we will be able
|
||||||
// We happen to have excess capacity already, so we will be able
|
// to write the terminator into the first byte of excess capacity.
|
||||||
// to write the terminator into the first byte of excess capacity.
|
big_string.ptr_to_first_elem() as *mut u8
|
||||||
roc_list.ptr_to_first_elem() as *mut u8
|
} else {
|
||||||
} else {
|
// We always have an allocation that's even bigger than necessary,
|
||||||
// We always have an allocation that's even bigger than necessary,
|
// because the refcount bytes take up more than the 1B needed for
|
||||||
// because the refcount bytes take up more than the 1B needed for
|
// the terminator. We just need to shift the bytes over on top
|
||||||
// the terminator. We just need to shift the bytes over on top
|
// of the refcount.
|
||||||
// of the refcount.
|
let alloc_ptr = big_string.ptr_to_allocation() as *mut u8;
|
||||||
let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8;
|
|
||||||
|
|
||||||
// First, copy the bytes over the original allocation - effectively
|
// First, copy the bytes over the original allocation - effectively
|
||||||
// shifting everything over by one `usize`. Now we no longer have a
|
// shifting everything over by one `usize`. Now we no longer have a
|
||||||
// refcount (but the terminated won't use that anyway), but we do
|
// refcount (but the terminated won't use that anyway), but we do
|
||||||
// have a free `usize` at the end.
|
// have a free `usize` at the end.
|
||||||
//
|
|
||||||
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
|
|
||||||
// because the regions definitely overlap!
|
|
||||||
ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
|
|
||||||
|
|
||||||
alloc_ptr
|
|
||||||
};
|
|
||||||
|
|
||||||
terminate(ptr, len)
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
let len = roc_list.len();
|
|
||||||
|
|
||||||
// The backing list was not unique, so we can't mutate it in-place.
|
|
||||||
// ask for `len + 1` to store the original string and the terminator
|
|
||||||
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
|
|
||||||
let alloc_ptr = alloc_ptr as *mut u8;
|
|
||||||
let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8;
|
|
||||||
|
|
||||||
// memcpy the bytes into the stack allocation
|
|
||||||
ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
|
|
||||||
|
|
||||||
terminate(alloc_ptr, len)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
// The backing list was empty.
|
|
||||||
//
|
//
|
||||||
// No need to do a heap allocation for an empty string - we
|
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
|
||||||
// can just do a stack allocation that will live for the
|
// because the regions definitely overlap!
|
||||||
// duration of the function.
|
ptr::copy(big_string.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
|
||||||
func([terminator].as_mut_ptr(), 0)
|
|
||||||
}
|
alloc_ptr
|
||||||
|
};
|
||||||
|
|
||||||
|
terminate(ptr, len)
|
||||||
|
} else {
|
||||||
|
let len = big_string.len();
|
||||||
|
|
||||||
|
// The backing list was not unique, so we can't mutate it in-place.
|
||||||
|
// ask for `len + 1` to store the original string and the terminator
|
||||||
|
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
|
||||||
|
let alloc_ptr = alloc_ptr as *mut u8;
|
||||||
|
let elem_ptr = big_string.ptr_to_first_elem() as *mut u8;
|
||||||
|
|
||||||
|
// memcpy the bytes into the stack allocation
|
||||||
|
std::ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
|
||||||
|
|
||||||
|
terminate(alloc_ptr, len)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -485,57 +560,46 @@ impl RocStr {
|
||||||
};
|
};
|
||||||
|
|
||||||
match self.as_enum_ref() {
|
match self.as_enum_ref() {
|
||||||
RocStrInnerRef::HeapAllocated(roc_list) => {
|
RocStrInnerRef::HeapAllocated(big_string) => {
|
||||||
let len = roc_list.len();
|
let len = big_string.len();
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
match roc_list.storage() {
|
if big_string.is_unique() {
|
||||||
Some(storage) if storage.is_unique() => {
|
// The backing RocList was unique, so we can mutate it in-place.
|
||||||
// The backing RocList was unique, so we can mutate it in-place.
|
|
||||||
|
|
||||||
// We need 1 extra elem for the terminator. It must be an elem,
|
// We need 1 extra elem for the terminator. It must be an elem,
|
||||||
// not a byte, because we'll be providing a pointer to elems.
|
// not a byte, because we'll be providing a pointer to elems.
|
||||||
let needed_bytes = (len + 1) * size_of::<E>();
|
let needed_bytes = (len + 1) * size_of::<E>();
|
||||||
|
|
||||||
// We can use not only the capacity on the heap, but also
|
// We can use not only the capacity on the heap, but also
|
||||||
// the bytes originally used for the refcount.
|
// the bytes originally used for the refcount.
|
||||||
let available_bytes = roc_list.capacity() + size_of::<Storage>();
|
let available_bytes = big_string.capacity() + size_of::<Storage>();
|
||||||
|
|
||||||
if needed_bytes < available_bytes {
|
if needed_bytes < available_bytes {
|
||||||
debug_assert!(align_of::<Storage>() >= align_of::<E>());
|
debug_assert!(align_of::<Storage>() >= align_of::<E>());
|
||||||
|
|
||||||
// We happen to have sufficient excess capacity already,
|
// We happen to have sufficient excess capacity already,
|
||||||
// so we will be able to write the new elements as well as
|
// so we will be able to write the new elements as well as
|
||||||
// the terminator into the existing allocation.
|
// the terminator into the existing allocation.
|
||||||
let ptr = roc_list.ptr_to_allocation() as *mut E;
|
let ptr = big_string.ptr_to_allocation() as *mut E;
|
||||||
let answer = terminate(ptr, self.as_str());
|
let answer = terminate(ptr, self.as_str());
|
||||||
|
|
||||||
// We cannot rely on the RocStr::drop implementation, because
|
// We cannot rely on the RocStr::drop implementation, because
|
||||||
// it tries to use the refcount - which we just overwrote
|
// it tries to use the refcount - which we just overwrote
|
||||||
// with string bytes.
|
// with string bytes.
|
||||||
mem::forget(self);
|
mem::forget(self);
|
||||||
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
|
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
|
||||||
|
|
||||||
answer
|
answer
|
||||||
} else {
|
} else {
|
||||||
// We didn't have sufficient excess capacity already,
|
// We didn't have sufficient excess capacity already,
|
||||||
// so we need to do either a new stack allocation or a new
|
// so we need to do either a new stack allocation or a new
|
||||||
// heap allocation.
|
// heap allocation.
|
||||||
fallback(self.as_str())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(_) => {
|
|
||||||
// The backing list was not unique, so we can't mutate it in-place.
|
|
||||||
fallback(self.as_str())
|
fallback(self.as_str())
|
||||||
}
|
}
|
||||||
None => {
|
} else {
|
||||||
// The backing list was empty.
|
// The backing list was not unique, so we can't mutate it in-place.
|
||||||
//
|
fallback(self.as_str())
|
||||||
// No need to do a heap allocation for an empty string - we
|
|
||||||
// can just do a stack allocation that will live for the
|
|
||||||
// duration of the function.
|
|
||||||
func([terminator].as_mut_ptr() as *mut E, "")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -558,12 +622,44 @@ impl RocStr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct SplitWhitespace<'a>(std::iter::Peekable<std::str::CharIndices<'a>>, &'a RocStr);
|
||||||
|
|
||||||
|
impl Iterator for SplitWhitespace<'_> {
|
||||||
|
type Item = RocStr;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let start = 'blk: {
|
||||||
|
while let Some((pos, c)) = self.0.peek() {
|
||||||
|
if c.is_whitespace() {
|
||||||
|
self.0.next();
|
||||||
|
} else {
|
||||||
|
break 'blk *pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
|
let end = 'blk: {
|
||||||
|
for (pos, c) in self.0.by_ref() {
|
||||||
|
if c.is_whitespace() {
|
||||||
|
break 'blk pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break 'blk self.1.len();
|
||||||
|
};
|
||||||
|
|
||||||
|
self.1.try_slice_range(start..end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Deref for RocStr {
|
impl Deref for RocStr {
|
||||||
type Target = str;
|
type Target = str;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
match self.as_enum_ref() {
|
match self.as_enum_ref() {
|
||||||
RocStrInnerRef::HeapAllocated(h) => unsafe { core::str::from_utf8_unchecked(h) },
|
RocStrInnerRef::HeapAllocated(h) => h.as_str(),
|
||||||
RocStrInnerRef::SmallString(s) => s,
|
RocStrInnerRef::SmallString(s) => s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -697,6 +793,203 @@ impl From<SendSafeRocStr> for RocStr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct BigString {
|
||||||
|
elements: NonNull<u8>,
|
||||||
|
length: usize,
|
||||||
|
capacity_or_ref_ptr: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
const SEAMLESS_SLICE_BIT: usize = isize::MIN as usize;
|
||||||
|
|
||||||
|
impl BigString {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.length & !SEAMLESS_SLICE_BIT
|
||||||
|
}
|
||||||
|
|
||||||
|
fn capacity(&self) -> usize {
|
||||||
|
if self.is_seamless_slice() {
|
||||||
|
self.len()
|
||||||
|
} else {
|
||||||
|
self.capacity_or_ref_ptr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_seamless_slice(&self) -> bool {
|
||||||
|
(self.length as isize) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ptr_to_first_elem(&self) -> *mut u8 {
|
||||||
|
unsafe { core::mem::transmute(self.elements) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ptr_to_allocation(&self) -> *mut usize {
|
||||||
|
// these are the same because the alignment of u8 is just 1
|
||||||
|
self.ptr_to_refcount()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ptr_to_refcount(&self) -> *mut usize {
|
||||||
|
if self.is_seamless_slice() {
|
||||||
|
unsafe { ((self.capacity_or_ref_ptr << 1) as *mut usize).sub(1) }
|
||||||
|
} else {
|
||||||
|
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_bytes(&self) -> &[u8] {
|
||||||
|
unsafe { std::slice::from_raw_parts(self.ptr_to_first_elem(), self.len()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str(&self) -> &str {
|
||||||
|
unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_unique(&self) -> bool {
|
||||||
|
if self.capacity() == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ptr = self.ptr_to_refcount();
|
||||||
|
let rc = unsafe { std::ptr::read(ptr) as isize };
|
||||||
|
|
||||||
|
rc == isize::MIN
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_readonly(&self) -> bool {
|
||||||
|
if self.capacity() == 0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ptr = self.ptr_to_refcount();
|
||||||
|
let rc = unsafe { std::ptr::read(ptr) as isize };
|
||||||
|
|
||||||
|
rc == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_readonly(&mut self) {
|
||||||
|
assert_ne!(self.capacity(), 0);
|
||||||
|
|
||||||
|
let ptr = self.ptr_to_refcount();
|
||||||
|
unsafe { std::ptr::write(ptr, 0) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inc(&mut self, n: usize) {
|
||||||
|
let ptr = self.ptr_to_refcount();
|
||||||
|
unsafe {
|
||||||
|
let value = std::ptr::read(ptr);
|
||||||
|
std::ptr::write(ptr, Ord::max(0, ((value as isize) + n as isize) as usize));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dec(&mut self) {
|
||||||
|
if self.capacity() == 0 {
|
||||||
|
// no valid allocation, elements pointer is dangling
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ptr = self.ptr_to_refcount();
|
||||||
|
unsafe {
|
||||||
|
let value = std::ptr::read(ptr) as isize;
|
||||||
|
match value {
|
||||||
|
0 => {
|
||||||
|
// static lifetime, do nothing
|
||||||
|
}
|
||||||
|
isize::MIN => {
|
||||||
|
// refcount becomes zero; free allocation
|
||||||
|
crate::roc_dealloc(self.ptr_to_allocation().cast(), 1);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
std::ptr::write(ptr, (value - 1) as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_capacity(cap: usize) -> Self {
|
||||||
|
let mut this = Self {
|
||||||
|
elements: NonNull::dangling(),
|
||||||
|
length: 0,
|
||||||
|
capacity_or_ref_ptr: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.reserve(cap);
|
||||||
|
|
||||||
|
this
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increase a BigString's capacity by at least the requested number of elements (possibly more).
|
||||||
|
///
|
||||||
|
/// May return a new BigString, if the provided one was not unique.
|
||||||
|
fn reserve(&mut self, n: usize) {
|
||||||
|
let align = std::mem::size_of::<usize>();
|
||||||
|
let desired_cap = self.len() + n;
|
||||||
|
let desired_alloc = align + desired_cap;
|
||||||
|
|
||||||
|
if self.is_unique() {
|
||||||
|
if self.capacity() >= desired_cap {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_alloc = unsafe {
|
||||||
|
roc_realloc(
|
||||||
|
self.ptr_to_allocation().cast(),
|
||||||
|
desired_alloc as _,
|
||||||
|
align + self.capacity(),
|
||||||
|
align as _,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let elements = unsafe { NonNull::new_unchecked(new_alloc.cast::<u8>().add(align)) };
|
||||||
|
|
||||||
|
let mut this = Self {
|
||||||
|
elements,
|
||||||
|
length: self.len(),
|
||||||
|
capacity_or_ref_ptr: desired_cap,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::mem::swap(&mut this, self);
|
||||||
|
std::mem::forget(this);
|
||||||
|
} else {
|
||||||
|
let ptr = unsafe { crate::roc_alloc(desired_alloc, align as _) } as *mut u8;
|
||||||
|
let elements = unsafe { NonNull::new_unchecked(ptr.cast::<u8>().add(align)) };
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// Copy the old elements to the new allocation.
|
||||||
|
std::ptr::copy_nonoverlapping(self.ptr_to_first_elem(), ptr.add(align), self.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut this = Self {
|
||||||
|
elements,
|
||||||
|
length: self.len(),
|
||||||
|
capacity_or_ref_ptr: desired_cap,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::mem::swap(&mut this, self);
|
||||||
|
std::mem::drop(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for BigString {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
let mut this = Self {
|
||||||
|
elements: self.elements,
|
||||||
|
length: self.length,
|
||||||
|
capacity_or_ref_ptr: self.capacity_or_ref_ptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.inc(1);
|
||||||
|
|
||||||
|
this
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for BigString {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.dec()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
union RocStrInner {
|
union RocStrInner {
|
||||||
// TODO: this really should be separated from the List type.
|
// TODO: this really should be separated from the List type.
|
||||||
|
@ -704,12 +997,12 @@ union RocStrInner {
|
||||||
// Currently, there are work arounds in RocList to handle both via removing the highest bit of length in many cases.
|
// Currently, there are work arounds in RocList to handle both via removing the highest bit of length in many cases.
|
||||||
// With glue changes, we should probably rewrite these cleanly to match what is in the zig bitcode.
|
// With glue changes, we should probably rewrite these cleanly to match what is in the zig bitcode.
|
||||||
// It is definitely a bit stale now and I think the storage mechanism can be quite confusing with our extra pieces of state.
|
// It is definitely a bit stale now and I think the storage mechanism can be quite confusing with our extra pieces of state.
|
||||||
heap_allocated: ManuallyDrop<RocList<u8>>,
|
heap_allocated: ManuallyDrop<BigString>,
|
||||||
small_string: SmallString,
|
small_string: SmallString,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RocStrInnerRef<'a> {
|
enum RocStrInnerRef<'a> {
|
||||||
HeapAllocated(&'a RocList<u8>),
|
HeapAllocated(&'a BigString),
|
||||||
SmallString(&'a SmallString),
|
SmallString(&'a SmallString),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -756,17 +1049,6 @@ impl SmallString {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
usize::from(self.len & !RocStr::MASK)
|
usize::from(self.len & !RocStr::MASK)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
|
|
||||||
fn first_nul_byte(&self) -> Option<usize> {
|
|
||||||
for (index, byte) in self.bytes[0..self.len()].iter().enumerate() {
|
|
||||||
if *byte == 0 {
|
|
||||||
return Some(index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Deref for SmallString {
|
impl Deref for SmallString {
|
||||||
|
|
|
@ -358,6 +358,53 @@ mod test_roc_std {
|
||||||
let roc_list = RocList::<RocStr>::empty();
|
let roc_list = RocList::<RocStr>::empty();
|
||||||
assert!(roc_list.is_unique());
|
assert!(roc_list.is_unique());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn slicing_and_dicing_list() {
|
||||||
|
let example = RocList::from_slice(b"chaos is a ladder");
|
||||||
|
|
||||||
|
// basic slice from the start
|
||||||
|
assert_eq!(example.slice_range(0..5).as_slice(), b"chaos");
|
||||||
|
|
||||||
|
// slice in the middle
|
||||||
|
assert_eq!(example.slice_range(6..10).as_slice(), b"is a");
|
||||||
|
|
||||||
|
// slice of slice
|
||||||
|
let first = example.slice_range(0..5);
|
||||||
|
assert_eq!(first.slice_range(0..3).as_slice(), b"cha");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn slicing_and_dicing_str() {
|
||||||
|
let example = RocStr::from("chaos is a ladder");
|
||||||
|
|
||||||
|
// basic slice from the start
|
||||||
|
assert_eq!(example.slice_range(0..5).as_str(), "chaos");
|
||||||
|
|
||||||
|
// slice in the middle
|
||||||
|
assert_eq!(example.slice_range(6..10).as_str(), "is a");
|
||||||
|
|
||||||
|
// slice of slice
|
||||||
|
let first = example.slice_range(0..5);
|
||||||
|
assert_eq!(first.slice_range(0..3).as_str(), "cha");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_whitespace() {
|
||||||
|
let example = RocStr::from("chaos is a ladder");
|
||||||
|
|
||||||
|
let split: Vec<_> = example.split_whitespace().collect();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
split,
|
||||||
|
vec![
|
||||||
|
RocStr::from("chaos"),
|
||||||
|
RocStr::from("is"),
|
||||||
|
RocStr::from("a"),
|
||||||
|
RocStr::from("ladder"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue