Add Str.countGraphemes

This commit is contained in:
Jared Ramirez 2020-11-02 10:22:36 -06:00
parent 00130c6dc0
commit 63308d90e1
18 changed files with 11345 additions and 44 deletions

View file

@ -1,2 +1,4 @@
zig-cache
src/zig-cache
builtins.ll
builtins.bc

View file

@ -1,5 +1,7 @@
# Bitcode for Builtins
## How it works
Roc's builtins are implemented in the compiler using LLVM only.
When their implementations are simple enough (e.g. addition), they
can be implemented directly in Inkwell.
@ -22,3 +24,10 @@ There will be two directories like `roc_builtins-[some random characters]`, look
## Calling bitcode functions
Use the `call_bitcode_fn` function defined in `llvm/src/build.rs` to call bitcode funcitons.
## Developing
To add a builtin, just add it to `src/main.zig`. For anything you add, you must add tests for it! Not only does to make the builtins more maintainable, it's the the easiest way to test these functions on Zig. To run the test, run
```
zig build test
```

View file

@ -0,0 +1,59 @@
const builtin = @import("builtin");
const std = @import("std");
const mem = std.mem;
const Builder = std.build.Builder;
pub fn build(b: *Builder) void {
b.setPreferredReleaseMode(builtin.Mode.ReleaseFast);
const mode = b.standardReleaseOptions();
// Options
const fallback_main_path = "./src/main.zig";
const main_path_desc = b.fmt("Override path to main.zig. Used by \"ir\", \"bc\", and \"test\". Defaults to \"{}\". ", .{fallback_main_path});
const main_path = b.option([]const u8, "main-path", main_path_desc) orelse fallback_main_path;
const fallback_bitcode_path = "./builtins.bc";
const bitcode_path_desc = b.fmt("Override path to generated bitcode file. Used by \"ir\" and \"bc\". Defaults to \"{}\". ", .{fallback_bitcode_path});
const bitcode_path = b.option([]const u8, "bc-path", bitcode_path_desc) orelse fallback_bitcode_path;
// Tests
var main_tests = b.addTest(main_path);
main_tests.setBuildMode(mode);
const test_step = b.step("test", "Run tests");
test_step.dependOn(&main_tests.step);
// Lib
const obj_name = "builtins";
const obj = b.addObject(obj_name, main_path);
obj.setBuildMode(mode);
obj.strip = true;
obj.emit_llvm_ir = true;
obj.emit_bin = false;
const ir = b.step("ir", "Build LLVM ir");
ir.dependOn(&obj.step);
// IR to Bitcode
const bitcode_path_arg = b.fmt("-o={}", .{bitcode_path});
const ir_out_file = b.fmt("{}.ll", .{obj_name});
const ir_to_bitcode = b.addSystemCommand(&[_][]const u8{
"llvm-as-10",
ir_out_file,
bitcode_path_arg
});
const bicode = b.step("bc", "Build LLVM ir and convert to bitcode");
bicode.dependOn(ir);
bicode.dependOn(&ir_to_bitcode.step);
b.default_step = ir;
removeInstallSteps(b);
}
fn removeInstallSteps(b: *Builder) void {
for (b.top_level_steps.items) |top_level_step, i| {
if (mem.eql(u8, top_level_step.step.name, "install") or mem.eql(u8, top_level_step.step.name, "uninstall")) {
const name = top_level_step.step.name;
_ = b.top_level_steps.swapRemove(i);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,14 @@
const std = @import("std");
const math = std.math;
const expect = std.testing.expect;
const unicode = std.unicode;
const testing = std.testing;
const expectEqual = testing.expectEqual;
const expect = testing.expect;
const roc_builtins_namespace = "roc_builtins";
// MATH
const math_namespace = roc_builtins_namespace ++ ".math";
const str_namespace = roc_builtins_namespace ++ ".str";
comptime { @export(atan, .{ .name = math_namespace ++ ".atan", .linkage = .Strong }); }
fn atan(num: f64) callconv(.C) f64 {
@ -23,7 +27,7 @@ fn powInt(base: i64, exp: i64) callconv(.C) i64 {
comptime { @export(acos, .{ .name = math_namespace ++ ".acos", .linkage = .Strong }); }
fn acos(num: f64) callconv(.C) f64 {
return math.acos(num);
return math.acos(num);
}
comptime { @export(asin, .{ .name = math_namespace ++ ".asin", .linkage = .Strong }); }
@ -31,6 +35,8 @@ fn asin(num: f64) callconv(.C) f64 {
return math.asin(num);
}
// STR
const str_namespace = roc_builtins_namespace ++ ".str";
// Str.split
@ -45,7 +51,7 @@ const RocStr = struct {
};
}
pub fn eq(self: RocStr, other: RocStr) bool {
pub fn eq(self: *RocStr, other: RocStr) bool {
if (self.str_len != other.str_len) {
return false;
}
@ -71,7 +77,7 @@ const RocStr = struct {
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(RocStr.eq(roc_str1, roc_str2));
expect(roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal different length" {
@ -85,7 +91,7 @@ const RocStr = struct {
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!RocStr.eq(roc_str1, roc_str2));
expect(!roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal same length" {
@ -99,7 +105,7 @@ const RocStr = struct {
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!RocStr.eq(roc_str1, roc_str2));
expect(!roc_str1.eq(roc_str2));
}
};
@ -175,8 +181,8 @@ test "strSplitInPlace: no delimiter" {
RocStr.init(str_ptr, 3),
};
expect(array.len == expected.len);
expect(RocStr.eq(array[0], expected[0]));
expectEqual(array.len, expected.len);
expect(array[0].eq(expected[0]));
}
test "strSplitInPlace: delimiter on sides" {
@ -212,10 +218,10 @@ test "strSplitInPlace: delimiter on sides" {
const expected_str_ptr: [*]u8 = &expected_str;
var expectedRocStr = RocStr.init(expected_str_ptr, expected_str_len);
expect(array.len == 3);
expect(array[0].str_len == 0);
expect(RocStr.eq(array[1], expectedRocStr));
expect(array[2].str_len == 0);
expectEqual(array.len, 3);
expectEqual(array[0].str_len, 0);
expect(array[1].eq(expectedRocStr));
expectEqual(array[2].str_len, 0);
}
test "strSplitInPlace: three pieces" {
@ -266,10 +272,10 @@ test "strSplitInPlace: three pieces" {
}
};
expect(expected_array.len == array.len);
expect(RocStr.eq(array[0], expected_array[0]));
expect(RocStr.eq(array[1], expected_array[1]));
expect(RocStr.eq(array[2], expected_array[2]));
expectEqual(expected_array.len, array.len);
expect(array[0].eq(expected_array[0]));
expect(array[1].eq(expected_array[1]));
expect(array[2].eq(expected_array[2]));
}
// This is used for `Str.split : Str, Str -> Array Str
@ -336,7 +342,7 @@ test "countSegments: long delimiter" {
delimiter_len
);
expect(segments_count == 1);
expectEqual(segments_count, 1);
}
test "countSegments: delimiter at start" {
@ -358,7 +364,7 @@ test "countSegments: delimiter at start" {
delimiter_len
);
expect(segments_count == 2);
expectEqual(segments_count, 2);
}
test "countSegments: delimiter interspered" {
@ -380,5 +386,92 @@ test "countSegments: delimiter interspered" {
delimiter_len
);
expect(segments_count == 3);
expectEqual(segments_count, 3);
}
// Str.countGraphemeClusters
const grapheme = @import("grapheme.zig");
comptime { @export(countGraphemeClusters, .{ .name = str_namespace ++ ".count_grapheme_clusters", .linkage = .Strong }); }
fn countGraphemeClusters(bytes_ptr: [*]u8, bytes_len: usize) callconv(.C) usize {
var bytes = bytes_ptr[0..bytes_len];
var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator();
var count: usize = 0;
var grapheme_break_state: ?grapheme.BoundClass = null;
var grapheme_break_state_ptr = &grapheme_break_state;
var opt_last_codepoint: ?u21 = null;
while (iter.nextCodepoint()) |cur_codepoint| {
if (opt_last_codepoint) |last_codepoint| {
var did_break = grapheme.isGraphemeBreak(
last_codepoint,
cur_codepoint,
grapheme_break_state_ptr
);
if (did_break) {
count += 1;
grapheme_break_state = null;
}
}
opt_last_codepoint = cur_codepoint;
}
if (bytes_len != 0) {
count += 1;
}
return count;
}
test "countGraphemeClusters: empty string" {
var bytes_arr = "".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 0);
}
test "countGraphemeClusters: ascii characters" {
var bytes_arr = "abcd".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 4);
}
test "countGraphemeClusters: utf8 characters" {
var bytes_arr = "ãxā".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis" {
var bytes_arr = "🤔🤔🤔".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis and ut8 characters" {
var bytes_arr = "🤔å🤔¥🤔ç".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 6);
}
test "countGraphemeClusters: emojis, ut8, and ascii characters" {
var bytes_arr = "6🤔å🤔e¥🤔çpp".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 10);
}
// https://github.com/ziglang/zig/blob/master/lib/std/std.zig#L94
test "" {
testing.refAllDecls(@This());
}