roc/roc_std/src/roc_str.rs
2022-05-06 15:08:08 -04:00

233 lines
5.4 KiB
Rust

#![deny(unsafe_op_in_unsafe_fn)]
use core::{
convert::TryFrom,
fmt::Debug,
mem::{size_of, ManuallyDrop},
ops::{Deref, DerefMut},
};
use crate::{rc::ReferenceCount, RocList};
#[repr(transparent)]
pub struct RocStr(RocStrInner);
impl RocStr {
pub const SIZE: usize = core::mem::size_of::<Self>();
pub const MASK: u8 = 0b1000_0000;
pub const fn empty() -> Self {
Self(RocStrInner {
small_string: SmallString::empty(),
})
}
/// Create a string from bytes.
///
/// # Safety
///
/// `slice` must be valid UTF-8.
pub unsafe fn from_slice(slice: &[u8]) -> Self {
if let Some(small_string) = unsafe { SmallString::try_from(slice) } {
Self(RocStrInner { small_string })
} else {
let heap_allocated = RocList::from_slice(slice);
Self(RocStrInner {
heap_allocated: ManuallyDrop::new(heap_allocated),
})
}
}
fn is_small_str(&self) -> bool {
unsafe { self.0.small_string.is_small_str() }
}
fn as_enum_ref(&self) -> RocStrInnerRef {
if self.is_small_str() {
unsafe { RocStrInnerRef::SmallString(&self.0.small_string) }
} else {
unsafe { RocStrInnerRef::HeapAllocated(&self.0.heap_allocated) }
}
}
pub fn len(&self) -> usize {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => h.len(),
RocStrInnerRef::SmallString(s) => s.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn as_str(&self) -> &str {
&*self
}
}
impl Deref for RocStr {
type Target = str;
fn deref(&self) -> &Self::Target {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => unsafe { core::str::from_utf8_unchecked(&*h) },
RocStrInnerRef::SmallString(s) => &*s,
}
}
}
impl Default for RocStr {
fn default() -> Self {
Self::empty()
}
}
impl From<&str> for RocStr {
fn from(s: &str) -> Self {
unsafe { Self::from_slice(s.as_bytes()) }
}
}
impl PartialEq for RocStr {
fn eq(&self, other: &Self) -> bool {
self.deref() == other.deref()
}
}
impl Eq for RocStr {}
impl PartialOrd for RocStr {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl Ord for RocStr {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Debug for RocStr {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.deref().fmt(f)
}
}
unsafe impl ReferenceCount for RocStr {
fn increment(&self) {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => h.increment(),
RocStrInnerRef::SmallString(_) => {
// Do nothing.
}
}
}
unsafe fn decrement(ptr: *const Self) {
let this = unsafe { &*ptr };
if this.is_small_str() {
// Do nothing.
} else {
unsafe {
RocList::<u8>::decrement(ptr.cast());
}
}
}
}
impl Clone for RocStr {
fn clone(&self) -> Self {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => Self(RocStrInner {
heap_allocated: ManuallyDrop::new(h.clone()),
}),
RocStrInnerRef::SmallString(s) => Self(RocStrInner { small_string: *s }),
}
}
}
impl Drop for RocStr {
fn drop(&mut self) {
if self.is_small_str() {
// Do nothing.
} else {
unsafe {
ManuallyDrop::drop(&mut self.0.heap_allocated);
}
}
}
}
#[repr(C)]
union RocStrInner {
heap_allocated: ManuallyDrop<RocList<u8>>,
small_string: SmallString,
}
enum RocStrInnerRef<'a> {
HeapAllocated(&'a RocList<u8>),
SmallString(&'a SmallString),
}
#[derive(Debug, Clone, Copy)]
#[repr(C)]
struct SmallString {
bytes: [u8; Self::CAPACITY],
len: u8,
}
impl SmallString {
const CAPACITY: usize = size_of::<RocList<u8>>() - 1;
const fn empty() -> Self {
Self {
bytes: [0; Self::CAPACITY],
len: RocStr::MASK,
}
}
/// # Safety
///
/// `slice` must be valid UTF-8.
unsafe fn try_from(slice: &[u8]) -> Option<Self> {
// Check the size of the slice.
let len_as_u8 = u8::try_from(slice.len()).ok()?;
if (len_as_u8 as usize) > Self::CAPACITY {
return None;
}
// Construct the small string.
let mut bytes = [0; Self::CAPACITY];
bytes[..slice.len()].copy_from_slice(slice);
Some(Self {
bytes,
len: len_as_u8 | RocStr::MASK,
})
}
fn is_small_str(&self) -> bool {
self.len & RocStr::MASK != 0
}
fn len(&self) -> usize {
usize::from(self.len & !RocStr::MASK)
}
}
impl Deref for SmallString {
type Target = str;
fn deref(&self) -> &Self::Target {
let len = self.len();
unsafe { core::str::from_utf8_unchecked(self.bytes.get_unchecked(..len)) }
}
}
impl DerefMut for SmallString {
fn deref_mut(&mut self) -> &mut Self::Target {
let len = self.len();
unsafe { core::str::from_utf8_unchecked_mut(self.bytes.get_unchecked_mut(..len)) }
}
}