diff --git a/src/build/builtin_compiler/main.zig b/src/build/builtin_compiler/main.zig index 5a5f74cb82..e885737761 100644 --- a/src/build/builtin_compiler/main.zig +++ b/src/build/builtin_compiler/main.zig @@ -140,6 +140,9 @@ fn replaceStrIsEmptyWithLowLevel(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) { if (env.common.findIdent("Builtin.Str.drop_suffix")) |str_drop_suffix_ident| { try low_level_map.put(str_drop_suffix_ident, .str_drop_suffix); } + if (env.common.findIdent("Builtin.Str.count_utf8_bytes")) |str_count_utf8_bytes_ident| { + try low_level_map.put(str_count_utf8_bytes_ident, .str_count_utf8_bytes); + } if (env.common.findIdent("Builtin.List.len")) |list_len_ident| { try low_level_map.put(list_len_ident, .list_len); } diff --git a/src/build/roc/Builtin.roc b/src/build/roc/Builtin.roc index c2d7498635..634f376173 100644 --- a/src/build/roc/Builtin.roc +++ b/src/build/roc/Builtin.roc @@ -15,6 +15,7 @@ Builtin :: [].{ with_prefix : Str, Str -> Str drop_prefix : Str, Str -> Str drop_suffix : Str, Str -> Str + count_utf8_bytes : Str -> U64 } List(_item) :: [ProvidedByCompiler].{ diff --git a/src/canonicalize/Expression.zig b/src/canonicalize/Expression.zig index b4e1f813f8..82cd181929 100644 --- a/src/canonicalize/Expression.zig +++ b/src/canonicalize/Expression.zig @@ -415,6 +415,7 @@ pub const Expr = union(enum) { str_with_prefix, str_drop_prefix, str_drop_suffix, + str_count_utf8_bytes, // Numeric to_str operations u8_to_str, diff --git a/src/eval/interpreter.zig b/src/eval/interpreter.zig index ee10ee22b3..431a1c6c1e 100644 --- a/src/eval/interpreter.zig +++ b/src/eval/interpreter.zig @@ -3091,6 +3091,23 @@ pub const Interpreter = struct { out.is_initialized = true; return out; }, + .str_count_utf8_bytes => { + // Str.count_utf8_bytes : Str -> U64 + std.debug.assert(args.len == 1); + + const string_arg = args[0]; + std.debug.assert(string_arg.ptr != null); + + const string: *const RocStr = @ptrCast(@alignCast(string_arg.ptr.?)); + const byte_count = builtins.str.countUtf8Bytes(string.*); + + const result_layout = layout.Layout.int(.u64); + var out = try self.pushRaw(result_layout, 0); + out.is_initialized = false; + try out.setInt(@intCast(byte_count)); + out.is_initialized = true; + return out; + }, .list_len => { // List.len : List(a) -> U64 // Note: listLen returns usize, but List.len always returns U64. diff --git a/test/snapshots/repl/str_count_utf8_bytes.md b/test/snapshots/repl/str_count_utf8_bytes.md new file mode 100644 index 0000000000..ba8272e34a --- /dev/null +++ b/test/snapshots/repl/str_count_utf8_bytes.md @@ -0,0 +1,25 @@ +# META +~~~ini +description=Str.count_utf8_bytes should return the number of bytes in the string +type=repl +~~~ +# SOURCE +~~~roc +» Str.count_utf8_bytes("") +» Str.count_utf8_bytes("hello") +» Str.count_utf8_bytes("hello world") +» Str.count_utf8_bytes("é") +» Str.count_utf8_bytes("🎉") +~~~ +# OUTPUT +0 +--- +5 +--- +11 +--- +2 +--- +4 +# PROBLEMS +NIL