mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 06:14:46 +00:00
Add Str.countGraphemes
This commit is contained in:
parent
00130c6dc0
commit
63308d90e1
18 changed files with 11345 additions and 44 deletions
|
@ -64,6 +64,34 @@ One of the cool things about Roc is that it evaluates if a value in memory is sh
|
|||
|
||||
We have to define the uniqueness constraints of a function just like we have to define a type signature. That is what happens in `unique.rs`. This can be tricky so it would be a good step to ask for help on if it is confusing.
|
||||
|
||||
## Testing it
|
||||
### solve/tests/solve_expr.rs
|
||||
To make sure that Roc is properly inferring the type of the new builting, add a test to this file simlar to:
|
||||
```
|
||||
#[test]
|
||||
fn atan() {
|
||||
infer_eq_without_problem(
|
||||
indoc!(
|
||||
r#"
|
||||
Num.atan
|
||||
"#
|
||||
),
|
||||
"Float -> Float",
|
||||
);
|
||||
}
|
||||
```
|
||||
But replace `Num.atan` and the type signature with the new builtin.
|
||||
|
||||
### gen/test/*.rs
|
||||
In this directory, there are a couple files like `gen_num.rs`, `gen_str.rs`, etc. For the `Str` module builtins, put the test in `gen_str.rs`, etc. Find the one for the new builtin, and add a test like:
|
||||
```
|
||||
#[test]
|
||||
fn atan() {
|
||||
assert_evals_to!("Num.atan 10", 1.4711276743037347, f64);
|
||||
}
|
||||
```
|
||||
But replace `Num.atan`, the return value, and the return type with your new builtin.
|
||||
|
||||
# Mistakes that are easy to make!!
|
||||
|
||||
When implementing a new builtin, it is often easy to copy and paste the implementation for an existing builtin. This can take you quite far since many builtins are very similar, but it also risks forgetting to change one small part of what you copy and pasted and losing a lot of time later on when you cant figure out why things dont work. So, speaking from experience, even if you are copying an existing builtin, try and implement it manually without copying and pasting. Two recent instances of this (as of September 7th, 2020):
|
||||
|
|
2
compiler/builtins/bitcode/.gitignore
vendored
2
compiler/builtins/bitcode/.gitignore
vendored
|
@ -1,2 +1,4 @@
|
|||
zig-cache
|
||||
src/zig-cache
|
||||
builtins.ll
|
||||
builtins.bc
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# Bitcode for Builtins
|
||||
|
||||
## How it works
|
||||
|
||||
Roc's builtins are implemented in the compiler using LLVM only.
|
||||
When their implementations are simple enough (e.g. addition), they
|
||||
can be implemented directly in Inkwell.
|
||||
|
@ -22,3 +24,10 @@ There will be two directories like `roc_builtins-[some random characters]`, look
|
|||
## Calling bitcode functions
|
||||
|
||||
Use the `call_bitcode_fn` function defined in `llvm/src/build.rs` to call bitcode funcitons.
|
||||
|
||||
## Developing
|
||||
|
||||
To add a builtin, just add it to `src/main.zig`. For anything you add, you must add tests for it! Not only does to make the builtins more maintainable, it's the the easiest way to test these functions on Zig. To run the test, run
|
||||
```
|
||||
zig build test
|
||||
```
|
||||
|
|
59
compiler/builtins/bitcode/build.zig
Normal file
59
compiler/builtins/bitcode/build.zig
Normal file
|
@ -0,0 +1,59 @@
|
|||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const Builder = std.build.Builder;
|
||||
|
||||
pub fn build(b: *Builder) void {
|
||||
b.setPreferredReleaseMode(builtin.Mode.ReleaseFast);
|
||||
const mode = b.standardReleaseOptions();
|
||||
|
||||
// Options
|
||||
const fallback_main_path = "./src/main.zig";
|
||||
const main_path_desc = b.fmt("Override path to main.zig. Used by \"ir\", \"bc\", and \"test\". Defaults to \"{}\". ", .{fallback_main_path});
|
||||
const main_path = b.option([]const u8, "main-path", main_path_desc) orelse fallback_main_path;
|
||||
|
||||
const fallback_bitcode_path = "./builtins.bc";
|
||||
const bitcode_path_desc = b.fmt("Override path to generated bitcode file. Used by \"ir\" and \"bc\". Defaults to \"{}\". ", .{fallback_bitcode_path});
|
||||
const bitcode_path = b.option([]const u8, "bc-path", bitcode_path_desc) orelse fallback_bitcode_path;
|
||||
|
||||
// Tests
|
||||
var main_tests = b.addTest(main_path);
|
||||
main_tests.setBuildMode(mode);
|
||||
const test_step = b.step("test", "Run tests");
|
||||
test_step.dependOn(&main_tests.step);
|
||||
|
||||
// Lib
|
||||
const obj_name = "builtins";
|
||||
const obj = b.addObject(obj_name, main_path);
|
||||
obj.setBuildMode(mode);
|
||||
obj.strip = true;
|
||||
obj.emit_llvm_ir = true;
|
||||
obj.emit_bin = false;
|
||||
const ir = b.step("ir", "Build LLVM ir");
|
||||
ir.dependOn(&obj.step);
|
||||
|
||||
// IR to Bitcode
|
||||
const bitcode_path_arg = b.fmt("-o={}", .{bitcode_path});
|
||||
const ir_out_file = b.fmt("{}.ll", .{obj_name});
|
||||
const ir_to_bitcode = b.addSystemCommand(&[_][]const u8{
|
||||
"llvm-as-10",
|
||||
ir_out_file,
|
||||
bitcode_path_arg
|
||||
});
|
||||
|
||||
const bicode = b.step("bc", "Build LLVM ir and convert to bitcode");
|
||||
bicode.dependOn(ir);
|
||||
bicode.dependOn(&ir_to_bitcode.step);
|
||||
|
||||
b.default_step = ir;
|
||||
removeInstallSteps(b);
|
||||
}
|
||||
|
||||
fn removeInstallSteps(b: *Builder) void {
|
||||
for (b.top_level_steps.items) |top_level_step, i| {
|
||||
if (mem.eql(u8, top_level_step.step.name, "install") or mem.eql(u8, top_level_step.step.name, "uninstall")) {
|
||||
const name = top_level_step.step.name;
|
||||
_ = b.top_level_steps.swapRemove(i);
|
||||
}
|
||||
}
|
||||
}
|
10986
compiler/builtins/bitcode/src/grapheme.zig
Normal file
10986
compiler/builtins/bitcode/src/grapheme.zig
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,10 +1,14 @@
|
|||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const expect = std.testing.expect;
|
||||
const unicode = std.unicode;
|
||||
const testing = std.testing;
|
||||
const expectEqual = testing.expectEqual;
|
||||
const expect = testing.expect;
|
||||
|
||||
const roc_builtins_namespace = "roc_builtins";
|
||||
|
||||
// MATH
|
||||
const math_namespace = roc_builtins_namespace ++ ".math";
|
||||
const str_namespace = roc_builtins_namespace ++ ".str";
|
||||
|
||||
comptime { @export(atan, .{ .name = math_namespace ++ ".atan", .linkage = .Strong }); }
|
||||
fn atan(num: f64) callconv(.C) f64 {
|
||||
|
@ -31,6 +35,8 @@ fn asin(num: f64) callconv(.C) f64 {
|
|||
return math.asin(num);
|
||||
}
|
||||
|
||||
// STR
|
||||
const str_namespace = roc_builtins_namespace ++ ".str";
|
||||
|
||||
// Str.split
|
||||
|
||||
|
@ -45,7 +51,7 @@ const RocStr = struct {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn eq(self: RocStr, other: RocStr) bool {
|
||||
pub fn eq(self: *RocStr, other: RocStr) bool {
|
||||
if (self.str_len != other.str_len) {
|
||||
return false;
|
||||
}
|
||||
|
@ -71,7 +77,7 @@ const RocStr = struct {
|
|||
const str2_ptr: [*]u8 = &str2;
|
||||
var roc_str2 = RocStr.init(str2_ptr, str2_len);
|
||||
|
||||
expect(RocStr.eq(roc_str1, roc_str2));
|
||||
expect(roc_str1.eq(roc_str2));
|
||||
}
|
||||
|
||||
test "RocStr.eq: not equal different length" {
|
||||
|
@ -85,7 +91,7 @@ const RocStr = struct {
|
|||
const str2_ptr: [*]u8 = &str2;
|
||||
var roc_str2 = RocStr.init(str2_ptr, str2_len);
|
||||
|
||||
expect(!RocStr.eq(roc_str1, roc_str2));
|
||||
expect(!roc_str1.eq(roc_str2));
|
||||
}
|
||||
|
||||
test "RocStr.eq: not equal same length" {
|
||||
|
@ -99,7 +105,7 @@ const RocStr = struct {
|
|||
const str2_ptr: [*]u8 = &str2;
|
||||
var roc_str2 = RocStr.init(str2_ptr, str2_len);
|
||||
|
||||
expect(!RocStr.eq(roc_str1, roc_str2));
|
||||
expect(!roc_str1.eq(roc_str2));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -175,8 +181,8 @@ test "strSplitInPlace: no delimiter" {
|
|||
RocStr.init(str_ptr, 3),
|
||||
};
|
||||
|
||||
expect(array.len == expected.len);
|
||||
expect(RocStr.eq(array[0], expected[0]));
|
||||
expectEqual(array.len, expected.len);
|
||||
expect(array[0].eq(expected[0]));
|
||||
}
|
||||
|
||||
test "strSplitInPlace: delimiter on sides" {
|
||||
|
@ -212,10 +218,10 @@ test "strSplitInPlace: delimiter on sides" {
|
|||
const expected_str_ptr: [*]u8 = &expected_str;
|
||||
var expectedRocStr = RocStr.init(expected_str_ptr, expected_str_len);
|
||||
|
||||
expect(array.len == 3);
|
||||
expect(array[0].str_len == 0);
|
||||
expect(RocStr.eq(array[1], expectedRocStr));
|
||||
expect(array[2].str_len == 0);
|
||||
expectEqual(array.len, 3);
|
||||
expectEqual(array[0].str_len, 0);
|
||||
expect(array[1].eq(expectedRocStr));
|
||||
expectEqual(array[2].str_len, 0);
|
||||
}
|
||||
|
||||
test "strSplitInPlace: three pieces" {
|
||||
|
@ -266,10 +272,10 @@ test "strSplitInPlace: three pieces" {
|
|||
}
|
||||
};
|
||||
|
||||
expect(expected_array.len == array.len);
|
||||
expect(RocStr.eq(array[0], expected_array[0]));
|
||||
expect(RocStr.eq(array[1], expected_array[1]));
|
||||
expect(RocStr.eq(array[2], expected_array[2]));
|
||||
expectEqual(expected_array.len, array.len);
|
||||
expect(array[0].eq(expected_array[0]));
|
||||
expect(array[1].eq(expected_array[1]));
|
||||
expect(array[2].eq(expected_array[2]));
|
||||
}
|
||||
|
||||
// This is used for `Str.split : Str, Str -> Array Str
|
||||
|
@ -336,7 +342,7 @@ test "countSegments: long delimiter" {
|
|||
delimiter_len
|
||||
);
|
||||
|
||||
expect(segments_count == 1);
|
||||
expectEqual(segments_count, 1);
|
||||
}
|
||||
|
||||
test "countSegments: delimiter at start" {
|
||||
|
@ -358,7 +364,7 @@ test "countSegments: delimiter at start" {
|
|||
delimiter_len
|
||||
);
|
||||
|
||||
expect(segments_count == 2);
|
||||
expectEqual(segments_count, 2);
|
||||
}
|
||||
|
||||
test "countSegments: delimiter interspered" {
|
||||
|
@ -380,5 +386,92 @@ test "countSegments: delimiter interspered" {
|
|||
delimiter_len
|
||||
);
|
||||
|
||||
expect(segments_count == 3);
|
||||
expectEqual(segments_count, 3);
|
||||
}
|
||||
|
||||
// Str.countGraphemeClusters
|
||||
const grapheme = @import("grapheme.zig");
|
||||
|
||||
comptime { @export(countGraphemeClusters, .{ .name = str_namespace ++ ".count_grapheme_clusters", .linkage = .Strong }); }
|
||||
fn countGraphemeClusters(bytes_ptr: [*]u8, bytes_len: usize) callconv(.C) usize {
|
||||
var bytes = bytes_ptr[0..bytes_len];
|
||||
var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator();
|
||||
|
||||
var count: usize = 0;
|
||||
var grapheme_break_state: ?grapheme.BoundClass = null;
|
||||
var grapheme_break_state_ptr = &grapheme_break_state;
|
||||
var opt_last_codepoint: ?u21 = null;
|
||||
while (iter.nextCodepoint()) |cur_codepoint| {
|
||||
if (opt_last_codepoint) |last_codepoint| {
|
||||
var did_break = grapheme.isGraphemeBreak(
|
||||
last_codepoint,
|
||||
cur_codepoint,
|
||||
grapheme_break_state_ptr
|
||||
);
|
||||
if (did_break) {
|
||||
count += 1;
|
||||
grapheme_break_state = null;
|
||||
}
|
||||
}
|
||||
opt_last_codepoint = cur_codepoint;
|
||||
}
|
||||
|
||||
if (bytes_len != 0) {
|
||||
count += 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: empty string" {
|
||||
var bytes_arr = "".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 0);
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: ascii characters" {
|
||||
var bytes_arr = "abcd".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 4);
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: utf8 characters" {
|
||||
var bytes_arr = "ãxā".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 3);
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: emojis" {
|
||||
var bytes_arr = "🤔🤔🤔".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 3);
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: emojis and ut8 characters" {
|
||||
var bytes_arr = "🤔å🤔¥🤔ç".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 6);
|
||||
}
|
||||
|
||||
test "countGraphemeClusters: emojis, ut8, and ascii characters" {
|
||||
var bytes_arr = "6🤔å🤔e¥🤔çpp".*;
|
||||
var bytes_len = bytes_arr.len;
|
||||
var bytes_ptr: [*]u8 = &bytes_arr;
|
||||
var count = countGraphemeClusters(bytes_ptr, bytes_len);
|
||||
expectEqual(count, 10);
|
||||
}
|
||||
|
||||
// https://github.com/ziglang/zig/blob/master/lib/std/std.zig#L94
|
||||
test "" {
|
||||
testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
|
@ -5,26 +5,7 @@ use std::path::Path;
|
|||
use std::process::Command;
|
||||
use std::str;
|
||||
|
||||
fn run_command<S, I>(command: &str, args: I)
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: AsRef<OsStr>,
|
||||
{
|
||||
let output_result = Command::new(OsStr::new(&command)).args(args).output();
|
||||
match output_result {
|
||||
Ok(output) => match output.status.success() {
|
||||
true => (),
|
||||
false => {
|
||||
let error_str = match str::from_utf8(&output.stderr) {
|
||||
Ok(stderr) => stderr.to_string(),
|
||||
Err(_) => format!("Failed to run \"{}\"", command),
|
||||
};
|
||||
panic!("{} failed: {}", command, error_str);
|
||||
}
|
||||
},
|
||||
Err(reason) => panic!("{} failed: {}", command, reason),
|
||||
}
|
||||
}
|
||||
// TODO: Use zig build system command instead
|
||||
|
||||
fn main() {
|
||||
let out_dir = env::var_os("OUT_DIR").unwrap();
|
||||
|
@ -68,3 +49,24 @@ fn main() {
|
|||
println!("cargo:rerun-if-changed={}", src_path_str);
|
||||
println!("cargo:rustc-env=BUILTINS_BC={}", dest_bc);
|
||||
}
|
||||
|
||||
fn run_command<S, I>(command: &str, args: I)
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: AsRef<OsStr>,
|
||||
{
|
||||
let output_result = Command::new(OsStr::new(&command)).args(args).output();
|
||||
match output_result {
|
||||
Ok(output) => match output.status.success() {
|
||||
true => (),
|
||||
false => {
|
||||
let error_str = match str::from_utf8(&output.stderr) {
|
||||
Ok(stderr) => stderr.to_string(),
|
||||
Err(_) => format!("Failed to run \"{}\"", command),
|
||||
};
|
||||
panic!("{} failed: {}", command, error_str);
|
||||
}
|
||||
},
|
||||
Err(reason) => panic!("{} failed: {}", command, reason),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,4 +24,5 @@ pub const MATH_IS_FINITE: &str = "roc_builtins.math.is_finite";
|
|||
pub const MATH_POW_INT: &str = "roc_builtins.math.pow_int";
|
||||
|
||||
pub const STR_COUNT_SEGEMENTS: &str = "roc_builtins.str.count_segements";
|
||||
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
||||
pub const STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
||||
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
||||
|
|
|
@ -402,6 +402,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
|
|||
top_level_function(vec![str_type()], Box::new(bool_type())),
|
||||
);
|
||||
|
||||
// countGraphemes : Str -> Int
|
||||
add_type(
|
||||
Symbol::STR_COUNT_GRAPHEMES,
|
||||
top_level_function(vec![str_type()], Box::new(int_type())),
|
||||
);
|
||||
|
||||
// List module
|
||||
|
||||
// get : List elem, Int -> Result elem [ OutOfBounds ]*
|
||||
|
|
|
@ -1028,6 +1028,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
|
|||
unique_function(vec![str_type(star1), str_type(star2)], str_type(star3))
|
||||
});
|
||||
|
||||
// Str.countGraphemes : Attr * Str, -> Attr * Int
|
||||
add_type(Symbol::STR_COUNT_GRAPHEMES, {
|
||||
let_tvars! { star1, star2 };
|
||||
unique_function(vec![str_type(star1)], int_type(star2))
|
||||
});
|
||||
|
||||
// Result module
|
||||
|
||||
// map : Attr * (Result (Attr a e))
|
||||
|
|
|
@ -52,6 +52,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
|
|||
Symbol::BOOL_NOT => bool_not,
|
||||
Symbol::STR_CONCAT => str_concat,
|
||||
Symbol::STR_IS_EMPTY => str_is_empty,
|
||||
Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes,
|
||||
Symbol::LIST_LEN => list_len,
|
||||
Symbol::LIST_GET => list_get,
|
||||
Symbol::LIST_SET => list_set,
|
||||
|
@ -924,7 +925,7 @@ fn str_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
|||
)
|
||||
}
|
||||
|
||||
/// Str.isEmpty : List * -> Bool
|
||||
/// Str.isEmpty : Str -> Bool
|
||||
fn str_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||
let str_var = var_store.fresh();
|
||||
let bool_var = var_store.fresh();
|
||||
|
@ -944,6 +945,26 @@ fn str_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
|||
)
|
||||
}
|
||||
|
||||
/// Str.countGraphemes : Str -> Int
|
||||
fn str_count_graphemes(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||
let str_var = var_store.fresh();
|
||||
let int_var = var_store.fresh();
|
||||
|
||||
let body = RunLowLevel {
|
||||
op: LowLevel::StrCountGraphemes,
|
||||
args: vec![(str_var, Var(Symbol::ARG_1))],
|
||||
ret_var: int_var,
|
||||
};
|
||||
|
||||
defn(
|
||||
symbol,
|
||||
vec![(str_var, Symbol::ARG_1)],
|
||||
var_store,
|
||||
body,
|
||||
int_var,
|
||||
)
|
||||
}
|
||||
|
||||
/// List.concat : List elem, List elem -> List elem
|
||||
fn list_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||
let list_var = var_store.fresh();
|
||||
|
|
21
compiler/gen/src/llvm/bitcode.rs
Normal file
21
compiler/gen/src/llvm/bitcode.rs
Normal file
|
@ -0,0 +1,21 @@
|
|||
use inkwell::types::BasicTypeEnum;
|
||||
use roc_module::low_level::LowLevel;
|
||||
|
||||
fn call_bitcode_fn<'a, 'ctx, 'env>(
|
||||
op: LowLevel,
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
args: &[BasicValueEnum<'ctx>],
|
||||
fn_name: &str,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
let fn_val = env
|
||||
.module
|
||||
.get_function(fn_name)
|
||||
.unwrap_or_else(|| panic!("Unrecognized builtin function: {:?} - if you're working on the Roc compiler, do you need to rebuild the bitcode? See compiler/builtins/bitcode/README.md", fn_name));
|
||||
let call = env.builder.build_call(fn_val, args, "call_builtin");
|
||||
|
||||
call.set_call_convention(fn_val.get_call_conventions());
|
||||
|
||||
call.try_as_basic_value()
|
||||
.left()
|
||||
.unwrap_or_else(|| panic!("LLVM error: Invalid call for low-level op {:?}", op))
|
||||
}
|
|
@ -4,7 +4,7 @@ use crate::llvm::build_list::{
|
|||
list_get_unsafe, list_join, list_keep_if, list_len, list_map, list_prepend, list_repeat,
|
||||
list_reverse, list_set, list_single, list_walk_right,
|
||||
};
|
||||
use crate::llvm::build_str::{str_concat, str_len, CHAR_LAYOUT};
|
||||
use crate::llvm::build_str::{str_concat, str_count_graphemes, str_len, CHAR_LAYOUT};
|
||||
use crate::llvm::compare::{build_eq, build_neq};
|
||||
use crate::llvm::convert::{
|
||||
basic_type_from_layout, block_of_memory, collection, get_fn_type, get_ptr_type, ptr_int,
|
||||
|
@ -2527,6 +2527,12 @@ fn run_low_level<'a, 'ctx, 'env>(
|
|||
);
|
||||
BasicValueEnum::IntValue(is_zero)
|
||||
}
|
||||
StrCountGraphemes => {
|
||||
// Str.countGraphemes : Str -> Int
|
||||
debug_assert_eq!(args.len(), 1);
|
||||
|
||||
str_count_graphemes(env, scope, parent, args[0])
|
||||
}
|
||||
ListLen => {
|
||||
// List.len : List * -> Int
|
||||
debug_assert_eq!(args.len(), 1);
|
||||
|
|
|
@ -7,6 +7,8 @@ use inkwell::builder::Builder;
|
|||
use inkwell::types::BasicTypeEnum;
|
||||
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
|
||||
use inkwell::{AddressSpace, IntPredicate};
|
||||
use roc_builtins::bitcode;
|
||||
use roc_module::low_level::LowLevel;
|
||||
use roc_module::symbol::Symbol;
|
||||
use roc_mono::layout::{Builtin, Layout};
|
||||
|
||||
|
@ -591,3 +593,55 @@ fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntVa
|
|||
"str_len_is_nonzero",
|
||||
)
|
||||
}
|
||||
|
||||
/// Str.countGraphemes : Str -> Int
|
||||
pub fn str_count_graphemes<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
scope: &Scope<'a, 'ctx>,
|
||||
parent: FunctionValue<'ctx>,
|
||||
str_symbol: Symbol,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
let ctx = env.context;
|
||||
|
||||
let sym_str_ptr = ptr_from_symbol(scope, str_symbol);
|
||||
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
|
||||
|
||||
load_str(
|
||||
env,
|
||||
parent,
|
||||
*sym_str_ptr,
|
||||
str_wrapper_type,
|
||||
|str_ptr, str_len, _str_smallness| {
|
||||
call_bitcode_fn(
|
||||
LowLevel::StrCountGraphemes,
|
||||
env,
|
||||
&[
|
||||
BasicValueEnum::PointerValue(str_ptr).into(),
|
||||
BasicValueEnum::IntValue(str_len).into(),
|
||||
],
|
||||
&bitcode::STR_COUNT_GRAPEHEME_CLUSTERS,
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Duplicated from build.rs for now, once it's all working I'll delete this and import it form a
|
||||
// common place
|
||||
fn call_bitcode_fn<'a, 'ctx, 'env>(
|
||||
op: LowLevel,
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
args: &[BasicValueEnum<'ctx>],
|
||||
fn_name: &str,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
let fn_val = env
|
||||
.module
|
||||
.get_function(fn_name)
|
||||
.unwrap_or_else(|| panic!("Unrecognized builtin function: {:?} - if you're working on the Roc compiler, do you need to rebuild the bitcode? See compiler/builtins/bitcode/README.md", fn_name));
|
||||
let call = env.builder.build_call(fn_val, args, "call_builtin");
|
||||
|
||||
call.set_call_convention(fn_val.get_call_conventions());
|
||||
|
||||
call.try_as_basic_value()
|
||||
.left()
|
||||
.unwrap_or_else(|| panic!("LLVM error: Invalid call for low-level op {:?}", op))
|
||||
}
|
||||
|
|
|
@ -202,4 +202,9 @@ mod gen_str {
|
|||
fn empty_str_is_empty() {
|
||||
assert_evals_to!(r#"Str.isEmpty """#, true, bool);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn str_count_graphemes() {
|
||||
assert_evals_to!(r#"Str.countGraphemes "6🤔å🤔e¥🤔çpp""#, 10, usize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
pub enum LowLevel {
|
||||
StrConcat,
|
||||
StrIsEmpty,
|
||||
StrCountGraphemes,
|
||||
ListLen,
|
||||
ListGetUnsafe,
|
||||
ListSet,
|
||||
|
|
|
@ -670,6 +670,7 @@ define_builtins! {
|
|||
2 STR_IS_EMPTY: "isEmpty"
|
||||
3 STR_APPEND: "append"
|
||||
4 STR_CONCAT: "concat"
|
||||
5 STR_COUNT_GRAPHEMES: "countGraphemes"
|
||||
}
|
||||
4 LIST: "List" => {
|
||||
0 LIST_LIST: "List" imported // the List.List type alias
|
||||
|
|
|
@ -510,7 +510,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
|
|||
ListSet => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
||||
ListSetInPlace => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
||||
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
|
||||
ListConcat | StrConcat => arena.alloc_slice_copy(&[owned, borrowed]),
|
||||
ListConcat | StrConcat | StrCountGraphemes => arena.alloc_slice_copy(&[owned, borrowed]),
|
||||
|
||||
ListSingle => arena.alloc_slice_copy(&[irrelevant]),
|
||||
ListRepeat => arena.alloc_slice_copy(&[irrelevant, irrelevant]),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue