diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index d6b5a5fecd..90fde2879c 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -155,6 +155,7 @@ comptime { exportStrFn(str.strNumberOfBytes, "number_of_bytes"); exportStrFn(str.strFromFloatC, "from_float"); exportStrFn(str.strEqual, "equal"); + exportStrFn(str.substringUnsafe, "substring_unsafe"); exportStrFn(str.strToUtf8C, "to_utf8"); exportStrFn(str.fromUtf8C, "from_utf8"); exportStrFn(str.fromUtf8RangeC, "from_utf8_range"); diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index 4ce043b1ce..ec7a97d739 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -224,3 +224,9 @@ toI8 : Str -> Result I8 [InvalidNumStr]* ## Gets the byte at the given index, without performing a bounds check getUnsafe : Str, Nat -> U8 + +## gives the number of string bytes +countBytes : Str -> Nat + +## string slice that does not do bounds checking or utf-8 verification +substringUnsafe : Str, Nat, Nat -> Str diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index fcd25b48c6..d6fc53d0fd 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -324,6 +324,7 @@ pub const STR_TO_INT: IntrinsicName = int_intrinsic!("roc_builtins.str.to_int"); pub const STR_TO_FLOAT: IntrinsicName = float_intrinsic!("roc_builtins.str.to_float"); pub const STR_TO_DECIMAL: &str = "roc_builtins.str.to_decimal"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; +pub const STR_SUBSTRING_UNSAFE: &str = "roc_builtins.str.substring_unsafe"; pub const STR_TO_UTF8: &str = "roc_builtins.str.to_utf8"; pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8"; pub const STR_FROM_UTF8_RANGE: &str = "roc_builtins.str.from_utf8_range"; diff --git a/crates/compiler/can/src/builtins.rs b/crates/compiler/can/src/builtins.rs index 42e81b7d9f..37ebeac1f8 100644 --- a/crates/compiler/can/src/builtins.rs +++ b/crates/compiler/can/src/builtins.rs @@ -82,6 +82,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_ENDS_WITH => str_ends_with, STR_COUNT_GRAPHEMES => str_count_graphemes, STR_COUNT_BYTES=> str_count_bytes, + STR_SUBSTRING_UNSAFE => str_substring_unsafe, STR_FROM_UTF8 => str_from_utf8, STR_FROM_UTF8_RANGE => str_from_utf8_range, STR_TO_UTF8 => str_to_utf8, @@ -1731,6 +1732,11 @@ fn str_count_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_1(symbol, LowLevel::StrCountBytes, var_store) } +/// Str.substringUnsafe : Str, Nat, Nat -> Nat +fn str_substring_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_1(symbol, LowLevel::StrSubstringUnsafe, var_store) +} + /// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]* fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { let bytes_var = var_store.fresh(); diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 82ba19e6a0..199608b99b 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -5347,6 +5347,15 @@ fn run_low_level<'a, 'ctx, 'env>( let string = load_symbol(scope, &args[0]); call_bitcode_fn(env, &[string], bitcode::STR_COUNT_BYTES) } + StrSubstringUnsafe => { + // Str.substringUnsafe : Str, Nat, Nat -> Str + debug_assert_eq!(args.len(), 3); + + let string = load_symbol(scope, &args[0]); + let start = load_symbol(scope, &args[1]); + let length = load_symbol(scope, &args[2]); + call_str_bitcode_fn(env, &[string, start, length], bitcode::STR_SUBSTRING_UNSAFE) + } StrTrim => { // Str.trim : Str -> Str debug_assert_eq!(args.len(), 1); diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index c851a4cc19..df71f55988 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -277,6 +277,9 @@ impl<'a> LowLevelCall<'a> { StrToUtf8 => self.load_args_and_call_zig(backend, bitcode::STR_TO_UTF8), StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT), StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM), + StrSubstringUnsafe => { + self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE) + } // List ListLen => match backend.storage.get(&self.arguments[0]) { diff --git a/crates/compiler/module/src/low_level.rs b/crates/compiler/module/src/low_level.rs index 9ef3aebe44..2ae3cd16b6 100644 --- a/crates/compiler/module/src/low_level.rs +++ b/crates/compiler/module/src/low_level.rs @@ -26,6 +26,7 @@ pub enum LowLevel { StrToNum, StrToScalars, StrGetUnsafe, + StrSubstringUnsafe, ListLen, ListWithCapacity, ListGetUnsafe, diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index 163e71061a..87f06b9545 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1192,6 +1192,7 @@ define_builtins! { 34 STR_TO_SCALARS: "toScalars" 35 STR_GET_UNSAFE: "getUnsafe" 36 STR_COUNT_BYTES: "countBytes" + 37 STR_SUBSTRING_UNSAFE: "substringUnsafe" } 5 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index fda9d33735..180b7c1cfd 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -895,6 +895,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { StrGetUnsafe | ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]), ListConcat => arena.alloc_slice_copy(&[owned, owned]), StrConcat => arena.alloc_slice_copy(&[owned, borrowed]), + StrSubstringUnsafe => arena.alloc_slice_copy(&[owned]), StrTrim => arena.alloc_slice_copy(&[owned]), StrTrimLeft => arena.alloc_slice_copy(&[owned]), StrTrimRight => arena.alloc_slice_copy(&[owned]),