define Str.concat in zig

2025-09-28 14:24:45 +00:00 · 2020-12-01 21:36:33 +01:00 · 2020-12-01 21:36:33 +01:00 · 1a71b8bac7
commit 1a71b8bac7
parent 1589e688b3
3 changed files with 140 additions and 1 deletions
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@ -7,7 +7,7 @@ const expect = testing.expect;
 extern fn malloc(size: usize) ?*u8;
 extern fn free([*]u8) void;

-const RocStr = struct {
+const RocStr = extern struct {
    str_bytes: ?[*]u8,
    str_len: usize,

@ -119,6 +119,10 @@ const RocStr = struct {
        return if (self.is_small_str()) small_len else big_len;
    }

+    pub fn is_empty(self: RocStr) bool {
+        return self.len() == 0;
+    }
+
    // Given a pointer to some memory of length (self.len() + 1) bytes,
    // write this RocStr's contents into it as a nul-terminated C string.
    //
@ -587,3 +591,136 @@ test "startsWith: 12345678912345678910 starts with 123456789123456789" {

    expect(startsWith(str_ptr, str_len, prefix_ptr, prefix_len));
 }
+
+// Str.concat
+
+test "RocStr.concat: small concat small" {
+    const str1_len = 3;
+    var str1: [str1_len]u8 = "foo".*;
+    const str1_ptr: [*]u8 = &str1;
+    var roc_str1 = RocStr.init(str1_ptr, str1_len);
+
+    const str2_len = 3;
+    var str2: [str2_len]u8 = "abc".*;
+    const str2_ptr: [*]u8 = &str2;
+    var roc_str2 = RocStr.init(str2_ptr, str2_len);
+
+    const str3_len = 6;
+    var str3: [str3_len]u8 = "fooabc".*;
+    const str3_ptr: [*]u8 = &str3;
+    var roc_str3 = RocStr.init(str3_ptr, str3_len);
+
+    const result = strConcat(8, roc_str1, roc_str2);
+
+    expect(roc_str3.eq(result));
+
+    roc_str1.drop();
+    roc_str2.drop();
+    roc_str3.drop();
+    result.drop();
+}
+
+pub fn strConcat(ptr_size: u32, arg1: RocStr, arg2: RocStr) callconv(.C) RocStr {
+    return switch (ptr_size) {
+        4 => strConcatHelp(i32, arg1, arg2),
+        8 => strConcatHelp(i64, arg1, arg2),
+        else => unreachable,
+    };
+}
+
+fn strConcatHelp(comptime T: type, arg1: RocStr, arg2: RocStr) RocStr {
+    if (arg1.is_empty()) {
+        return cloneNonemptyStr(T, arg2);
+    } else if (arg2.is_empty()) {
+        return cloneNonemptyStr(T, arg1);
+    } else {
+        const combined_length = arg1.len() + arg2.len();
+
+        const small_str_bytes = 2 * @sizeOf(T);
+        const result_is_big = combined_length >= small_str_bytes;
+
+        if (result_is_big) {
+            var result = allocate_str(T, combined_length);
+
+            {
+                const old_if_small = &@bitCast([16]u8, arg1);
+                const old_if_big = @ptrCast([*]u8, arg1.str_bytes);
+                const old_bytes = if (arg1.is_small_str()) old_if_small else old_if_big;
+
+                const new_bytes: [*]u8 = @ptrCast([*]u8, result.str_bytes);
+
+                @memcpy(new_bytes, old_bytes, arg1.len());
+            }
+
+            {
+                const old_if_small = &@bitCast([16]u8, arg2);
+                const old_if_big = @ptrCast([*]u8, arg2.str_bytes);
+                const old_bytes = if (arg2.is_small_str()) old_if_small else old_if_big;
+
+                const new_bytes = @ptrCast([*]u8, result.str_bytes) + arg1.len();
+
+                @memcpy(new_bytes, old_bytes, arg2.len());
+            }
+
+            return result;
+        } else {
+            var result = [16]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+            // if the result is small, then for sure arg1 and arg2 are also small
+
+            {
+                var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg1));
+                var new_bytes: [*]u8 = @ptrCast([*]u8, &result);
+
+                @memcpy(new_bytes, old_bytes, arg1.len());
+            }
+
+            {
+                var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg2));
+                var new_bytes = @ptrCast([*]u8, &result) + arg1.len();
+
+                @memcpy(new_bytes, old_bytes, arg2.len());
+            }
+
+            const mask: u8 = 0b1000_0000;
+            const final_byte = @truncate(u8, combined_length) | mask;
+
+            result[small_str_bytes - 1] = final_byte;
+
+            return @bitCast(RocStr, result);
+        }
+
+        return result;
+    }
+}
+
+fn cloneNonemptyStr(comptime T: type, str: RocStr) RocStr {
+    if (str.is_small_str() or str.is_empty()) {
+        // just return the bytes
+        return str;
+    } else {
+        var new_str = allocate_str(T, str.str_len);
+
+        var old_bytes: [*]u8 = @ptrCast([*]u8, str.str_bytes);
+        var new_bytes: [*]u8 = @ptrCast([*]u8, new_str.str_bytes);
+
+        @memcpy(new_bytes, old_bytes, str.str_len);
+
+        return new_str;
+    }
+}
+
+fn allocate_str(comptime T: type, number_of_chars: u64) RocStr {
+    const length = @sizeOf(T) + number_of_chars;
+    var new_bytes: [*]T = @ptrCast([*]T, @alignCast(@alignOf(T), malloc(length)));
+
+    new_bytes[0] = std.math.maxInt(T);
+
+    var first_element = @ptrCast([*]align(@alignOf(T)) u8, new_bytes);
+    first_element += 8;
+
+    return RocStr{
+        .str_bytes = first_element,
+        .str_len = number_of_chars,
+    };
+}