From 8fcd482901902a3d85a5bbfc3bfeed2732c9e32e Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Wed, 26 Nov 2025 12:38:54 +1100 Subject: [PATCH] add Str.to_utf8 builtin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/build/builtin_compiler/main.zig | 3 +++ src/build/roc/Builtin.roc | 1 + src/canonicalize/Expression.zig | 1 + src/eval/interpreter.zig | 25 +++++++++++++++++++++++++ test/snapshots/repl/str_to_utf8.md | 22 ++++++++++++++++++++++ 5 files changed, 52 insertions(+) create mode 100644 test/snapshots/repl/str_to_utf8.md diff --git a/src/build/builtin_compiler/main.zig b/src/build/builtin_compiler/main.zig index c82ce1a044..029506ebf7 100644 --- a/src/build/builtin_compiler/main.zig +++ b/src/build/builtin_compiler/main.zig @@ -152,6 +152,9 @@ fn replaceStrIsEmptyWithLowLevel(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) { if (env.common.findIdent("Builtin.Str.release_excess_capacity")) |str_release_excess_capacity_ident| { try low_level_map.put(str_release_excess_capacity_ident, .str_release_excess_capacity); } + if (env.common.findIdent("Builtin.Str.to_utf8")) |str_to_utf8_ident| { + try low_level_map.put(str_to_utf8_ident, .str_to_utf8); + } if (env.common.findIdent("Builtin.List.len")) |list_len_ident| { try low_level_map.put(list_len_ident, .list_len); } diff --git a/src/build/roc/Builtin.roc b/src/build/roc/Builtin.roc index 43ec70b67c..1160c81bcc 100644 --- a/src/build/roc/Builtin.roc +++ b/src/build/roc/Builtin.roc @@ -19,6 +19,7 @@ Builtin :: [].{ with_capacity : U64 -> Str reserve : Str, U64 -> Str release_excess_capacity : Str -> Str + to_utf8 : Str -> List(U8) } List(_item) :: [ProvidedByCompiler].{ diff --git a/src/canonicalize/Expression.zig b/src/canonicalize/Expression.zig index bb212db217..f1228da0e4 100644 --- a/src/canonicalize/Expression.zig +++ b/src/canonicalize/Expression.zig @@ -419,6 +419,7 @@ pub const Expr = union(enum) { str_with_capacity, str_reserve, str_release_excess_capacity, + str_to_utf8, // Numeric to_str operations u8_to_str, diff --git a/src/eval/interpreter.zig b/src/eval/interpreter.zig index cb51dbd82b..aaf1c949d9 100644 --- a/src/eval/interpreter.zig +++ b/src/eval/interpreter.zig @@ -3173,6 +3173,31 @@ pub const Interpreter = struct { out.is_initialized = true; return out; }, + .str_to_utf8 => { + // Str.to_utf8 : Str -> List(U8) + std.debug.assert(args.len == 1); + + const string_arg = args[0]; + std.debug.assert(string_arg.ptr != null); + + const string: *const RocStr = @ptrCast(@alignCast(string_arg.ptr.?)); + const result_list = builtins.str.strToUtf8C(string.*, roc_ops); + + const result_rt_var = return_rt_var orelse { + self.triggerCrash("str_to_utf8 requires return type info", false, roc_ops); + return error.Crash; + }; + const result_layout = try self.getRuntimeLayout(result_rt_var); + + var out = try self.pushRaw(result_layout, 0); + out.is_initialized = false; + + const result_ptr: *builtins.list.RocList = @ptrCast(@alignCast(out.ptr.?)); + result_ptr.* = result_list; + + out.is_initialized = true; + return out; + }, .list_len => { // List.len : List(a) -> U64 // Note: listLen returns usize, but List.len always returns U64. diff --git a/test/snapshots/repl/str_to_utf8.md b/test/snapshots/repl/str_to_utf8.md new file mode 100644 index 0000000000..57f308e325 --- /dev/null +++ b/test/snapshots/repl/str_to_utf8.md @@ -0,0 +1,22 @@ +# META +~~~ini +description=Str.to_utf8 should convert a string to a list of UTF-8 bytes +type=repl +~~~ +# SOURCE +~~~roc +» List.len(Str.to_utf8("")) +» List.len(Str.to_utf8("hello")) +» List.len(Str.to_utf8("é")) +» List.len(Str.to_utf8("🎉")) +~~~ +# OUTPUT +0 +--- +5 +--- +2 +--- +4 +# PROBLEMS +NIL