expose list/string slices to rust users

This commit is contained in:
Folkert 2023-12-09 16:21:33 +01:00
parent 95a4bb988c
commit 9865096c24
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
3 changed files with 535 additions and 126 deletions

View file

@ -12,6 +12,7 @@ use core::{
ops::{Deref, DerefMut}, ops::{Deref, DerefMut},
ptr::{self, NonNull}, ptr::{self, NonNull},
}; };
use std::ops::Range;
use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage}; use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage};
@ -208,7 +209,7 @@ impl<T> RocList<T> {
} }
/// Useful for doing memcpy on the elements. Returns NULL if list is empty. /// Useful for doing memcpy on the elements. Returns NULL if list is empty.
pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T { pub(crate) fn ptr_to_first_elem(&self) -> *const T {
unsafe { core::mem::transmute(self.elements) } unsafe { core::mem::transmute(self.elements) }
} }
@ -222,6 +223,15 @@ impl<T> RocList<T> {
} }
} }
#[allow(unused)]
pub(crate) fn ptr_to_refcount(&self) -> *mut c_void {
if self.is_seamless_slice() {
((self.capacity_or_ref_ptr << 1) - std::mem::size_of::<usize>()) as *mut _
} else {
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) as *mut _ }
}
}
unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void { unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void {
unsafe { unsafe {
alloc_ptr alloc_ptr
@ -230,6 +240,44 @@ impl<T> RocList<T> {
.cast() .cast()
} }
} }
pub fn append(&mut self, value: T) {
self.push(value)
}
pub fn push(&mut self, value: T) {
if self.capacity() <= self.len() {
// reserve space for (at least!) one more element
self.reserve(1);
}
let elements = self.elements.unwrap().as_ptr();
let append_ptr = unsafe { elements.add(self.len()) };
unsafe {
// Write the element into the slot, without dropping it.
ptr::write(append_ptr, ManuallyDrop::new(value));
}
// It's important that the length is increased one by one, to
// make sure that we don't drop uninitialized elements, even when
// a incrementing the reference count panics.
self.length += 1;
}
/// # Safety
///
/// - `bytes` must be allocated for `cap` elements
/// - `bytes` must be initialized for `len` elements
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
/// - `cap` >= `len`
pub unsafe fn from_raw_parts(bytes: *mut T, len: usize, cap: usize) -> Self {
Self {
elements: NonNull::new(bytes.cast()),
length: len,
capacity_or_ref_ptr: cap,
}
}
} }
impl<T> RocList<T> impl<T> RocList<T>
@ -323,6 +371,38 @@ where
} }
impl<T> RocList<T> { impl<T> RocList<T> {
#[track_caller]
pub fn slice_range(&self, range: Range<usize>) -> Self {
match self.try_slice_range(range) {
Some(x) => x,
None => panic!("slice index out of range"),
}
}
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
if self.as_slice().get(range.start..range.end).is_none() {
None
} else {
// increment the refcount
std::mem::forget(self.clone());
let element_ptr = self.as_slice()[range.start..]
.as_ptr()
.cast::<ManuallyDrop<T>>();
let capacity_or_ref_ptr =
(self.ptr_to_first_elem() as usize) >> 1 | isize::MIN as usize;
let roc_list = RocList {
elements: NonNull::new(element_ptr as *mut ManuallyDrop<T>),
length: range.end - range.start,
capacity_or_ref_ptr,
};
Some(roc_list)
}
}
/// Increase a RocList's capacity by at least the requested number of elements (possibly more). /// Increase a RocList's capacity by at least the requested number of elements (possibly more).
/// ///
/// May return a new RocList, if the provided one was not unique. /// May return a new RocList, if the provided one was not unique.

View file

@ -19,8 +19,9 @@ use core::{
#[cfg(feature = "std")] #[cfg(feature = "std")]
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::{ops::Range, ptr::NonNull};
use crate::RocList; use crate::{roc_realloc, RocList};
#[repr(transparent)] #[repr(transparent)]
pub struct RocStr(RocStrInner); pub struct RocStr(RocStrInner);
@ -73,8 +74,34 @@ impl RocStr {
Self(RocStrInner { small_string }) Self(RocStrInner { small_string })
} else { } else {
let heap_allocated = RocList::from_slice(slice); let heap_allocated = RocList::from_slice(slice);
let big_string = unsafe { std::mem::transmute(heap_allocated) };
Self(RocStrInner { Self(RocStrInner {
heap_allocated: ManuallyDrop::new(heap_allocated), heap_allocated: ManuallyDrop::new(big_string),
})
}
}
/// # Safety
///
/// - `bytes` must be allocated for `cap` bytes
/// - `bytes` must be initialized for `len` bytes
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
/// - `bytes` must represent valid UTF-8
/// - `cap` >= `len`
pub unsafe fn from_raw_parts(bytes: *mut u8, len: usize, cap: usize) -> Self {
if len <= SmallString::CAPACITY {
unsafe {
let slice = std::slice::from_raw_parts(bytes, len);
let small_string = SmallString::try_from_utf8_bytes(slice).unwrap_unchecked();
Self(RocStrInner { small_string })
}
} else {
Self(RocStrInner {
heap_allocated: ManuallyDrop::new(BigString {
elements: unsafe { NonNull::new_unchecked(bytes) },
length: len,
capacity_or_ref_ptr: cap,
}),
}) })
} }
} }
@ -93,7 +120,7 @@ impl RocStr {
pub fn capacity(&self) -> usize { pub fn capacity(&self) -> usize {
match self.as_enum_ref() { match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(), RocStrInnerRef::HeapAllocated(big_string) => big_string.capacity(),
RocStrInnerRef::SmallString(_) => SmallString::CAPACITY, RocStrInnerRef::SmallString(_) => SmallString::CAPACITY,
} }
} }
@ -137,10 +164,12 @@ impl RocStr {
/// There is no way to tell how many references it has and if it is safe to free. /// There is no way to tell how many references it has and if it is safe to free.
/// As such, only values that should have a static lifetime for the entire application run /// As such, only values that should have a static lifetime for the entire application run
/// should be considered for marking read-only. /// should be considered for marking read-only.
pub unsafe fn set_readonly(&self) { pub unsafe fn set_readonly(&mut self) {
match self.as_enum_ref() { if self.is_small_str() {
RocStrInnerRef::HeapAllocated(roc_list) => unsafe { roc_list.set_readonly() }, /* do nothing */
RocStrInnerRef::SmallString(_) => {} } else {
let big = unsafe { &mut self.0.heap_allocated };
big.set_readonly()
} }
} }
@ -167,7 +196,7 @@ impl RocStr {
} else { } else {
// The requested capacity won't fit in a small string; we need to go big. // The requested capacity won't fit in a small string; we need to go big.
RocStr(RocStrInner { RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)), heap_allocated: ManuallyDrop::new(BigString::with_capacity(bytes)),
}) })
} }
} }
@ -182,21 +211,33 @@ impl RocStr {
if target_cap > SmallString::CAPACITY { if target_cap > SmallString::CAPACITY {
// The requested capacity won't fit in a small string; we need to go big. // The requested capacity won't fit in a small string; we need to go big.
let mut roc_list = RocList::with_capacity(target_cap); let mut big_string = BigString::with_capacity(target_cap);
roc_list.extend_from_slice(small_str.as_bytes()); unsafe {
std::ptr::copy_nonoverlapping(
self.as_bytes().as_ptr(),
big_string.ptr_to_first_elem(),
self.len(),
)
};
*self = RocStr(RocStrInner { big_string.length = self.len();
heap_allocated: ManuallyDrop::new(roc_list), big_string.capacity_or_ref_ptr = target_cap;
let mut updated = RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(big_string),
}); });
mem::swap(self, &mut updated);
mem::forget(updated);
} }
} else { } else {
let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) }; let mut big_string = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
roc_list.reserve(bytes); big_string.reserve(bytes);
let mut updated = RocStr(RocStrInner { let mut updated = RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(roc_list), heap_allocated: ManuallyDrop::new(big_string),
}); });
mem::swap(self, &mut updated); mem::swap(self, &mut updated);
@ -204,12 +245,57 @@ impl RocStr {
} }
} }
#[track_caller]
pub fn slice_range(&self, range: Range<usize>) -> Self {
match self.try_slice_range(range) {
Some(x) => x,
None => panic!("slice index out of range"),
}
}
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
if self.as_str().get(range.start..range.end).is_none() {
None
} else if range.end - range.start <= SmallString::CAPACITY {
let slice = &self.as_bytes()[range];
let small_string =
unsafe { SmallString::try_from_utf8_bytes(slice).unwrap_unchecked() };
// NOTE decrements `self`
Some(RocStr(RocStrInner { small_string }))
} else {
// increment the refcount
std::mem::forget(self.clone());
let big = unsafe { &self.0.heap_allocated };
let ptr = unsafe { (self.as_bytes().as_ptr() as *mut u8).add(range.start) };
let heap_allocated = ManuallyDrop::new(BigString {
elements: unsafe { NonNull::new_unchecked(ptr) },
length: (isize::MIN as usize) | (range.end - range.start),
capacity_or_ref_ptr: (big.ptr_to_first_elem() as usize) >> 1,
});
Some(RocStr(RocStrInner { heap_allocated }))
}
}
pub fn split_once(&self, delimiter: &str) -> Option<(Self, Self)> {
let (a, b) = self.as_str().split_once(delimiter)?;
let x = self.slice_range(0..a.len());
let y = self.slice_range(self.len() - b.len()..self.len());
Some((x, y))
}
pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
SplitWhitespace(self.as_str().char_indices().peekable(), self)
}
/// Returns the index of the first interior \0 byte in the string, or None if there are none. /// Returns the index of the first interior \0 byte in the string, or None if there are none.
fn first_nul_byte(&self) -> Option<usize> { fn first_nul_byte(&self) -> Option<usize> {
match self.as_enum_ref() { self.as_bytes().iter().position(|byte| *byte == 0)
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0),
RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(),
}
} }
// If the string is under this many bytes, the with_terminator family // If the string is under this many bytes, the with_terminator family
@ -267,60 +353,49 @@ impl RocStr {
}; };
match self.as_enum_ref() { match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => { RocStrInnerRef::HeapAllocated(big_string) => {
unsafe { unsafe {
match roc_list.storage() { if big_string.is_unique() {
Some(storage) if storage.is_unique() => { // The backing RocList was unique, so we can mutate it in-place.
// The backing RocList was unique, so we can mutate it in-place. let len = big_string.len();
let len = roc_list.len(); let ptr = if len < big_string.capacity() {
let ptr = if len < roc_list.capacity() { // We happen to have excess capacity already, so we will be able
// We happen to have excess capacity already, so we will be able // to write the terminator into the first byte of excess capacity.
// to write the terminator into the first byte of excess capacity. big_string.ptr_to_first_elem() as *mut u8
roc_list.ptr_to_first_elem() as *mut u8 } else {
} else { // We always have an allocation that's even bigger than necessary,
// We always have an allocation that's even bigger than necessary, // because the refcount bytes take up more than the 1B needed for
// because the refcount bytes take up more than the 1B needed for // the terminator. We just need to shift the bytes over on top
// the terminator. We just need to shift the bytes over on top // of the refcount.
// of the refcount. let alloc_ptr = big_string.ptr_to_allocation() as *mut u8;
let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8;
// First, copy the bytes over the original allocation - effectively // First, copy the bytes over the original allocation - effectively
// shifting everything over by one `usize`. Now we no longer have a // shifting everything over by one `usize`. Now we no longer have a
// refcount (but the terminated won't use that anyway), but we do // refcount (but the terminated won't use that anyway), but we do
// have a free `usize` at the end. // have a free `usize` at the end.
//
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
// because the regions definitely overlap!
ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
alloc_ptr
};
terminate(ptr, len)
}
Some(_) => {
let len = roc_list.len();
// The backing list was not unique, so we can't mutate it in-place.
// ask for `len + 1` to store the original string and the terminator
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
let alloc_ptr = alloc_ptr as *mut u8;
let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8;
// memcpy the bytes into the stack allocation
ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
terminate(alloc_ptr, len)
})
}
None => {
// The backing list was empty.
// //
// No need to do a heap allocation for an empty string - we // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
// can just do a stack allocation that will live for the // because the regions definitely overlap!
// duration of the function. ptr::copy(big_string.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
func([terminator].as_mut_ptr(), 0)
} alloc_ptr
};
terminate(ptr, len)
} else {
let len = big_string.len();
// The backing list was not unique, so we can't mutate it in-place.
// ask for `len + 1` to store the original string and the terminator
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
let alloc_ptr = alloc_ptr as *mut u8;
let elem_ptr = big_string.ptr_to_first_elem() as *mut u8;
// memcpy the bytes into the stack allocation
std::ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
terminate(alloc_ptr, len)
})
} }
} }
} }
@ -485,57 +560,46 @@ impl RocStr {
}; };
match self.as_enum_ref() { match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => { RocStrInnerRef::HeapAllocated(big_string) => {
let len = roc_list.len(); let len = big_string.len();
unsafe { unsafe {
match roc_list.storage() { if big_string.is_unique() {
Some(storage) if storage.is_unique() => { // The backing RocList was unique, so we can mutate it in-place.
// The backing RocList was unique, so we can mutate it in-place.
// We need 1 extra elem for the terminator. It must be an elem, // We need 1 extra elem for the terminator. It must be an elem,
// not a byte, because we'll be providing a pointer to elems. // not a byte, because we'll be providing a pointer to elems.
let needed_bytes = (len + 1) * size_of::<E>(); let needed_bytes = (len + 1) * size_of::<E>();
// We can use not only the capacity on the heap, but also // We can use not only the capacity on the heap, but also
// the bytes originally used for the refcount. // the bytes originally used for the refcount.
let available_bytes = roc_list.capacity() + size_of::<Storage>(); let available_bytes = big_string.capacity() + size_of::<Storage>();
if needed_bytes < available_bytes { if needed_bytes < available_bytes {
debug_assert!(align_of::<Storage>() >= align_of::<E>()); debug_assert!(align_of::<Storage>() >= align_of::<E>());
// We happen to have sufficient excess capacity already, // We happen to have sufficient excess capacity already,
// so we will be able to write the new elements as well as // so we will be able to write the new elements as well as
// the terminator into the existing allocation. // the terminator into the existing allocation.
let ptr = roc_list.ptr_to_allocation() as *mut E; let ptr = big_string.ptr_to_allocation() as *mut E;
let answer = terminate(ptr, self.as_str()); let answer = terminate(ptr, self.as_str());
// We cannot rely on the RocStr::drop implementation, because // We cannot rely on the RocStr::drop implementation, because
// it tries to use the refcount - which we just overwrote // it tries to use the refcount - which we just overwrote
// with string bytes. // with string bytes.
mem::forget(self); mem::forget(self);
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32); crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
answer answer
} else { } else {
// We didn't have sufficient excess capacity already, // We didn't have sufficient excess capacity already,
// so we need to do either a new stack allocation or a new // so we need to do either a new stack allocation or a new
// heap allocation. // heap allocation.
fallback(self.as_str())
}
}
Some(_) => {
// The backing list was not unique, so we can't mutate it in-place.
fallback(self.as_str()) fallback(self.as_str())
} }
None => { } else {
// The backing list was empty. // The backing list was not unique, so we can't mutate it in-place.
// fallback(self.as_str())
// No need to do a heap allocation for an empty string - we
// can just do a stack allocation that will live for the
// duration of the function.
func([terminator].as_mut_ptr() as *mut E, "")
}
} }
} }
} }
@ -558,12 +622,44 @@ impl RocStr {
} }
} }
pub struct SplitWhitespace<'a>(std::iter::Peekable<std::str::CharIndices<'a>>, &'a RocStr);
impl Iterator for SplitWhitespace<'_> {
type Item = RocStr;
fn next(&mut self) -> Option<Self::Item> {
let start = 'blk: {
while let Some((pos, c)) = self.0.peek() {
if c.is_whitespace() {
self.0.next();
} else {
break 'blk *pos;
}
}
return None;
};
let end = 'blk: {
for (pos, c) in self.0.by_ref() {
if c.is_whitespace() {
break 'blk pos;
}
}
break 'blk self.1.len();
};
self.1.try_slice_range(start..end)
}
}
impl Deref for RocStr { impl Deref for RocStr {
type Target = str; type Target = str;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
match self.as_enum_ref() { match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => unsafe { core::str::from_utf8_unchecked(h) }, RocStrInnerRef::HeapAllocated(h) => h.as_str(),
RocStrInnerRef::SmallString(s) => s, RocStrInnerRef::SmallString(s) => s,
} }
} }
@ -697,6 +793,203 @@ impl From<SendSafeRocStr> for RocStr {
} }
} }
#[repr(C)]
struct BigString {
elements: NonNull<u8>,
length: usize,
capacity_or_ref_ptr: usize,
}
const SEAMLESS_SLICE_BIT: usize = isize::MIN as usize;
impl BigString {
fn len(&self) -> usize {
self.length & !SEAMLESS_SLICE_BIT
}
fn capacity(&self) -> usize {
if self.is_seamless_slice() {
self.len()
} else {
self.capacity_or_ref_ptr
}
}
fn is_seamless_slice(&self) -> bool {
(self.length as isize) < 0
}
fn ptr_to_first_elem(&self) -> *mut u8 {
unsafe { core::mem::transmute(self.elements) }
}
fn ptr_to_allocation(&self) -> *mut usize {
// these are the same because the alignment of u8 is just 1
self.ptr_to_refcount()
}
fn ptr_to_refcount(&self) -> *mut usize {
if self.is_seamless_slice() {
unsafe { ((self.capacity_or_ref_ptr << 1) as *mut usize).sub(1) }
} else {
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) }
}
}
fn as_bytes(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr_to_first_elem(), self.len()) }
}
fn as_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
}
fn is_unique(&self) -> bool {
if self.capacity() == 0 {
return false;
}
let ptr = self.ptr_to_refcount();
let rc = unsafe { std::ptr::read(ptr) as isize };
rc == isize::MIN
}
fn is_readonly(&self) -> bool {
if self.capacity() == 0 {
return true;
}
let ptr = self.ptr_to_refcount();
let rc = unsafe { std::ptr::read(ptr) as isize };
rc == 0
}
fn set_readonly(&mut self) {
assert_ne!(self.capacity(), 0);
let ptr = self.ptr_to_refcount();
unsafe { std::ptr::write(ptr, 0) }
}
fn inc(&mut self, n: usize) {
let ptr = self.ptr_to_refcount();
unsafe {
let value = std::ptr::read(ptr);
std::ptr::write(ptr, Ord::max(0, ((value as isize) + n as isize) as usize));
}
}
fn dec(&mut self) {
if self.capacity() == 0 {
// no valid allocation, elements pointer is dangling
return;
}
let ptr = self.ptr_to_refcount();
unsafe {
let value = std::ptr::read(ptr) as isize;
match value {
0 => {
// static lifetime, do nothing
}
isize::MIN => {
// refcount becomes zero; free allocation
crate::roc_dealloc(self.ptr_to_allocation().cast(), 1);
}
_ => {
std::ptr::write(ptr, (value - 1) as usize);
}
}
}
}
fn with_capacity(cap: usize) -> Self {
let mut this = Self {
elements: NonNull::dangling(),
length: 0,
capacity_or_ref_ptr: 0,
};
this.reserve(cap);
this
}
/// Increase a BigString's capacity by at least the requested number of elements (possibly more).
///
/// May return a new BigString, if the provided one was not unique.
fn reserve(&mut self, n: usize) {
let align = std::mem::size_of::<usize>();
let desired_cap = self.len() + n;
let desired_alloc = align + desired_cap;
if self.is_unique() {
if self.capacity() >= desired_cap {
return;
}
let new_alloc = unsafe {
roc_realloc(
self.ptr_to_allocation().cast(),
desired_alloc as _,
align + self.capacity(),
align as _,
)
};
let elements = unsafe { NonNull::new_unchecked(new_alloc.cast::<u8>().add(align)) };
let mut this = Self {
elements,
length: self.len(),
capacity_or_ref_ptr: desired_cap,
};
std::mem::swap(&mut this, self);
std::mem::forget(this);
} else {
let ptr = unsafe { crate::roc_alloc(desired_alloc, align as _) } as *mut u8;
let elements = unsafe { NonNull::new_unchecked(ptr.cast::<u8>().add(align)) };
unsafe {
// Copy the old elements to the new allocation.
std::ptr::copy_nonoverlapping(self.ptr_to_first_elem(), ptr.add(align), self.len());
}
let mut this = Self {
elements,
length: self.len(),
capacity_or_ref_ptr: desired_cap,
};
std::mem::swap(&mut this, self);
std::mem::drop(this);
}
}
}
impl Clone for BigString {
fn clone(&self) -> Self {
let mut this = Self {
elements: self.elements,
length: self.length,
capacity_or_ref_ptr: self.capacity_or_ref_ptr,
};
this.inc(1);
this
}
}
impl Drop for BigString {
fn drop(&mut self) {
self.dec()
}
}
#[repr(C)] #[repr(C)]
union RocStrInner { union RocStrInner {
// TODO: this really should be separated from the List type. // TODO: this really should be separated from the List type.
@ -704,12 +997,12 @@ union RocStrInner {
// Currently, there are work arounds in RocList to handle both via removing the highest bit of length in many cases. // Currently, there are work arounds in RocList to handle both via removing the highest bit of length in many cases.
// With glue changes, we should probably rewrite these cleanly to match what is in the zig bitcode. // With glue changes, we should probably rewrite these cleanly to match what is in the zig bitcode.
// It is definitely a bit stale now and I think the storage mechanism can be quite confusing with our extra pieces of state. // It is definitely a bit stale now and I think the storage mechanism can be quite confusing with our extra pieces of state.
heap_allocated: ManuallyDrop<RocList<u8>>, heap_allocated: ManuallyDrop<BigString>,
small_string: SmallString, small_string: SmallString,
} }
enum RocStrInnerRef<'a> { enum RocStrInnerRef<'a> {
HeapAllocated(&'a RocList<u8>), HeapAllocated(&'a BigString),
SmallString(&'a SmallString), SmallString(&'a SmallString),
} }
@ -756,17 +1049,6 @@ impl SmallString {
fn len(&self) -> usize { fn len(&self) -> usize {
usize::from(self.len & !RocStr::MASK) usize::from(self.len & !RocStr::MASK)
} }
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
fn first_nul_byte(&self) -> Option<usize> {
for (index, byte) in self.bytes[0..self.len()].iter().enumerate() {
if *byte == 0 {
return Some(index);
}
}
None
}
} }
impl Deref for SmallString { impl Deref for SmallString {

View file

@ -358,6 +358,53 @@ mod test_roc_std {
let roc_list = RocList::<RocStr>::empty(); let roc_list = RocList::<RocStr>::empty();
assert!(roc_list.is_unique()); assert!(roc_list.is_unique());
} }
#[test]
fn slicing_and_dicing_list() {
let example = RocList::from_slice(b"chaos is a ladder");
// basic slice from the start
assert_eq!(example.slice_range(0..5).as_slice(), b"chaos");
// slice in the middle
assert_eq!(example.slice_range(6..10).as_slice(), b"is a");
// slice of slice
let first = example.slice_range(0..5);
assert_eq!(first.slice_range(0..3).as_slice(), b"cha");
}
#[test]
fn slicing_and_dicing_str() {
let example = RocStr::from("chaos is a ladder");
// basic slice from the start
assert_eq!(example.slice_range(0..5).as_str(), "chaos");
// slice in the middle
assert_eq!(example.slice_range(6..10).as_str(), "is a");
// slice of slice
let first = example.slice_range(0..5);
assert_eq!(first.slice_range(0..3).as_str(), "cha");
}
#[test]
fn split_whitespace() {
let example = RocStr::from("chaos is a ladder");
let split: Vec<_> = example.split_whitespace().collect();
assert_eq!(
split,
vec![
RocStr::from("chaos"),
RocStr::from("is"),
RocStr::from("a"),
RocStr::from("ladder"),
]
);
}
} }
#[cfg(test)] #[cfg(test)]