mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-03 19:58:18 +00:00
simplify string splitting LLVM codegen
This commit is contained in:
parent
2455819362
commit
52605fcca9
5 changed files with 19 additions and 66 deletions
|
@ -143,7 +143,6 @@ comptime {
|
||||||
exportStrFn(str.init, "init");
|
exportStrFn(str.init, "init");
|
||||||
exportStrFn(str.strToScalarsC, "to_scalars");
|
exportStrFn(str.strToScalarsC, "to_scalars");
|
||||||
exportStrFn(str.strSplit, "str_split");
|
exportStrFn(str.strSplit, "str_split");
|
||||||
exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
|
|
||||||
exportStrFn(str.countSegments, "count_segments");
|
exportStrFn(str.countSegments, "count_segments");
|
||||||
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
||||||
exportStrFn(str.countUtf8Bytes, "count_utf8_bytes");
|
exportStrFn(str.countUtf8Bytes, "count_utf8_bytes");
|
||||||
|
|
|
@ -753,29 +753,19 @@ fn strFromFloatHelp(comptime T: type, float: T) RocStr {
|
||||||
|
|
||||||
// Str.split
|
// Str.split
|
||||||
|
|
||||||
// For dev backends
|
|
||||||
pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
||||||
const segment_count = countSegments(string, delimiter);
|
const segment_count = countSegments(string, delimiter);
|
||||||
const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr));
|
const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr));
|
||||||
|
|
||||||
if (list.bytes) |bytes| {
|
if (list.bytes) |bytes| {
|
||||||
const strings = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), bytes));
|
const strings = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), bytes));
|
||||||
strSplitInPlace(strings, string, delimiter);
|
strSplitHelp(strings, string, delimiter);
|
||||||
}
|
}
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For LLVM backend
|
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||||
pub fn strSplitInPlaceC(opt_array: ?[*]RocStr, string: RocStr, delimiter: RocStr) callconv(.C) void {
|
|
||||||
if (opt_array) |array| {
|
|
||||||
return @call(.{ .modifier = always_inline }, strSplitInPlace, .{ array, string, delimiter });
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn strSplitInPlace(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|
||||||
var ret_array_index: usize = 0;
|
var ret_array_index: usize = 0;
|
||||||
var slice_start_index: usize = 0;
|
var slice_start_index: usize = 0;
|
||||||
var str_index: usize = 0;
|
var str_index: usize = 0;
|
||||||
|
@ -820,7 +810,7 @@ fn strSplitInPlace(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, str_len - slice_start_index);
|
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, str_len - slice_start_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitInPlace: empty delimiter" {
|
test "strSplitHelp: empty delimiter" {
|
||||||
// Str.split "abc" "" == ["abc"]
|
// Str.split "abc" "" == ["abc"]
|
||||||
const str_arr = "abc";
|
const str_arr = "abc";
|
||||||
const str = RocStr.init(str_arr, str_arr.len);
|
const str = RocStr.init(str_arr, str_arr.len);
|
||||||
|
@ -831,7 +821,7 @@ test "strSplitInPlace: empty delimiter" {
|
||||||
var array: [1]RocStr = undefined;
|
var array: [1]RocStr = undefined;
|
||||||
const array_ptr: [*]RocStr = &array;
|
const array_ptr: [*]RocStr = &array;
|
||||||
|
|
||||||
strSplitInPlace(array_ptr, str, delimiter);
|
strSplitHelp(array_ptr, str, delimiter);
|
||||||
|
|
||||||
var expected = [1]RocStr{
|
var expected = [1]RocStr{
|
||||||
str,
|
str,
|
||||||
|
@ -854,7 +844,7 @@ test "strSplitInPlace: empty delimiter" {
|
||||||
try expect(array[0].eq(expected[0]));
|
try expect(array[0].eq(expected[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitInPlace: no delimiter" {
|
test "strSplitHelp: no delimiter" {
|
||||||
// Str.split "abc" "!" == ["abc"]
|
// Str.split "abc" "!" == ["abc"]
|
||||||
const str_arr = "abc";
|
const str_arr = "abc";
|
||||||
const str = RocStr.init(str_arr, str_arr.len);
|
const str = RocStr.init(str_arr, str_arr.len);
|
||||||
|
@ -865,7 +855,7 @@ test "strSplitInPlace: no delimiter" {
|
||||||
var array: [1]RocStr = undefined;
|
var array: [1]RocStr = undefined;
|
||||||
const array_ptr: [*]RocStr = &array;
|
const array_ptr: [*]RocStr = &array;
|
||||||
|
|
||||||
strSplitInPlace(array_ptr, str, delimiter);
|
strSplitHelp(array_ptr, str, delimiter);
|
||||||
|
|
||||||
var expected = [1]RocStr{
|
var expected = [1]RocStr{
|
||||||
str,
|
str,
|
||||||
|
@ -888,7 +878,7 @@ test "strSplitInPlace: no delimiter" {
|
||||||
try expect(array[0].eq(expected[0]));
|
try expect(array[0].eq(expected[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitInPlace: empty end" {
|
test "strSplitHelp: empty end" {
|
||||||
const str_arr = "1---- ---- ---- ---- ----2---- ---- ---- ---- ----";
|
const str_arr = "1---- ---- ---- ---- ----2---- ---- ---- ---- ----";
|
||||||
const str = RocStr.init(str_arr, str_arr.len);
|
const str = RocStr.init(str_arr, str_arr.len);
|
||||||
|
|
||||||
|
@ -903,7 +893,7 @@ test "strSplitInPlace: empty end" {
|
||||||
};
|
};
|
||||||
const array_ptr: [*]RocStr = &array;
|
const array_ptr: [*]RocStr = &array;
|
||||||
|
|
||||||
strSplitInPlace(array_ptr, str, delimiter);
|
strSplitHelp(array_ptr, str, delimiter);
|
||||||
|
|
||||||
const one = RocStr.init("1", 1);
|
const one = RocStr.init("1", 1);
|
||||||
const two = RocStr.init("2", 1);
|
const two = RocStr.init("2", 1);
|
||||||
|
@ -931,7 +921,7 @@ test "strSplitInPlace: empty end" {
|
||||||
try expect(array[2].eq(expected[2]));
|
try expect(array[2].eq(expected[2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitInPlace: delimiter on sides" {
|
test "strSplitHelp: delimiter on sides" {
|
||||||
const str_arr = "tttghittt";
|
const str_arr = "tttghittt";
|
||||||
const str = RocStr.init(str_arr, str_arr.len);
|
const str = RocStr.init(str_arr, str_arr.len);
|
||||||
|
|
||||||
|
@ -945,7 +935,7 @@ test "strSplitInPlace: delimiter on sides" {
|
||||||
undefined,
|
undefined,
|
||||||
};
|
};
|
||||||
const array_ptr: [*]RocStr = &array;
|
const array_ptr: [*]RocStr = &array;
|
||||||
strSplitInPlace(array_ptr, str, delimiter);
|
strSplitHelp(array_ptr, str, delimiter);
|
||||||
|
|
||||||
const ghi_arr = "ghi";
|
const ghi_arr = "ghi";
|
||||||
const ghi = RocStr.init(ghi_arr, ghi_arr.len);
|
const ghi = RocStr.init(ghi_arr, ghi_arr.len);
|
||||||
|
@ -973,7 +963,7 @@ test "strSplitInPlace: delimiter on sides" {
|
||||||
try expect(array[2].eq(expected[2]));
|
try expect(array[2].eq(expected[2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitInPlace: three pieces" {
|
test "strSplitHelp: three pieces" {
|
||||||
// Str.split "a!b!c" "!" == ["a", "b", "c"]
|
// Str.split "a!b!c" "!" == ["a", "b", "c"]
|
||||||
const str_arr = "a!b!c";
|
const str_arr = "a!b!c";
|
||||||
const str = RocStr.init(str_arr, str_arr.len);
|
const str = RocStr.init(str_arr, str_arr.len);
|
||||||
|
@ -985,7 +975,7 @@ test "strSplitInPlace: three pieces" {
|
||||||
var array: [array_len]RocStr = undefined;
|
var array: [array_len]RocStr = undefined;
|
||||||
const array_ptr: [*]RocStr = &array;
|
const array_ptr: [*]RocStr = &array;
|
||||||
|
|
||||||
strSplitInPlace(array_ptr, str, delimiter);
|
strSplitHelp(array_ptr, str, delimiter);
|
||||||
|
|
||||||
const a = RocStr.init("a", 1);
|
const a = RocStr.init("a", 1);
|
||||||
const b = RocStr.init("b", 1);
|
const b = RocStr.init("b", 1);
|
||||||
|
|
|
@ -311,7 +311,6 @@ pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
||||||
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
||||||
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
||||||
pub const STR_STR_SPLIT: &str = "roc_builtins.str.str_split";
|
pub const STR_STR_SPLIT: &str = "roc_builtins.str.str_split";
|
||||||
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
|
||||||
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
|
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
|
||||||
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
||||||
pub const STR_COUNT_UTF8_BYTES: &str = "roc_builtins.str.count_utf8_bytes";
|
pub const STR_COUNT_UTF8_BYTES: &str = "roc_builtins.str.count_utf8_bytes";
|
||||||
|
|
|
@ -13,9 +13,7 @@ use crate::llvm::build_list::{
|
||||||
list_replace_unsafe, list_sort_with, list_sublist, list_swap, list_symbol_to_c_abi,
|
list_replace_unsafe, list_sort_with, list_sublist, list_swap, list_symbol_to_c_abi,
|
||||||
list_to_c_abi, list_with_capacity,
|
list_to_c_abi, list_with_capacity,
|
||||||
};
|
};
|
||||||
use crate::llvm::build_str::{
|
use crate::llvm::build_str::{str_from_float, str_from_int, str_from_utf8, str_from_utf8_range};
|
||||||
str_from_float, str_from_int, str_from_utf8, str_from_utf8_range, str_split,
|
|
||||||
};
|
|
||||||
use crate::llvm::compare::{generic_eq, generic_neq};
|
use crate::llvm::compare::{generic_eq, generic_neq};
|
||||||
use crate::llvm::convert::{
|
use crate::llvm::convert::{
|
||||||
self, argument_type_from_layout, basic_type_from_builtin, basic_type_from_layout,
|
self, argument_type_from_layout, basic_type_from_builtin, basic_type_from_layout,
|
||||||
|
@ -5311,7 +5309,10 @@ fn run_low_level<'a, 'ctx, 'env>(
|
||||||
// Str.split : Str, Str -> List Str
|
// Str.split : Str, Str -> List Str
|
||||||
debug_assert_eq!(args.len(), 2);
|
debug_assert_eq!(args.len(), 2);
|
||||||
|
|
||||||
str_split(env, scope, args[0], args[1])
|
let string = load_symbol(scope, &args[0]);
|
||||||
|
let delimiter = load_symbol(scope, &args[1]);
|
||||||
|
|
||||||
|
call_list_bitcode_fn(env, &[string, delimiter], bitcode::STR_STR_SPLIT)
|
||||||
}
|
}
|
||||||
StrIsEmpty => {
|
StrIsEmpty => {
|
||||||
// Str.isEmpty : Str -> Str
|
// Str.isEmpty : Str -> Str
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::llvm::bitcode::{call_bitcode_fn, call_str_bitcode_fn, call_void_bitcode_fn};
|
use crate::llvm::bitcode::{call_bitcode_fn, call_str_bitcode_fn, call_void_bitcode_fn};
|
||||||
use crate::llvm::build::{Env, Scope};
|
use crate::llvm::build::{Env, Scope};
|
||||||
use crate::llvm::build_list::{allocate_list, pass_update_mode, store_list};
|
use crate::llvm::build_list::pass_update_mode;
|
||||||
use inkwell::builder::Builder;
|
use inkwell::builder::Builder;
|
||||||
use inkwell::values::{BasicValueEnum, IntValue, PointerValue, StructValue};
|
use inkwell::values::{BasicValueEnum, IntValue, PointerValue, StructValue};
|
||||||
use inkwell::AddressSpace;
|
use inkwell::AddressSpace;
|
||||||
|
@ -10,48 +10,12 @@ use roc_module::symbol::Symbol;
|
||||||
use roc_mono::layout::{Builtin, Layout};
|
use roc_mono::layout::{Builtin, Layout};
|
||||||
use roc_target::PtrWidth;
|
use roc_target::PtrWidth;
|
||||||
|
|
||||||
|
use super::bitcode::call_list_bitcode_fn;
|
||||||
use super::build::{create_entry_block_alloca, load_symbol};
|
use super::build::{create_entry_block_alloca, load_symbol};
|
||||||
use super::build_list::list_symbol_to_c_abi;
|
use super::build_list::list_symbol_to_c_abi;
|
||||||
|
|
||||||
pub static CHAR_LAYOUT: Layout = Layout::u8();
|
pub static CHAR_LAYOUT: Layout = Layout::u8();
|
||||||
|
|
||||||
/// Str.split : Str, Str -> List Str
|
|
||||||
pub fn str_split<'a, 'ctx, 'env>(
|
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
|
||||||
scope: &Scope<'a, 'ctx>,
|
|
||||||
str_symbol: Symbol,
|
|
||||||
delimiter_symbol: Symbol,
|
|
||||||
) -> BasicValueEnum<'ctx> {
|
|
||||||
let builder = env.builder;
|
|
||||||
|
|
||||||
let string = load_symbol(scope, &str_symbol);
|
|
||||||
let delimiter = load_symbol(scope, &delimiter_symbol);
|
|
||||||
|
|
||||||
let segment_count =
|
|
||||||
call_bitcode_fn(env, &[string, delimiter], bitcode::STR_COUNT_SEGMENTS).into_int_value();
|
|
||||||
|
|
||||||
// a pointer to the elements
|
|
||||||
let ret_list_ptr = allocate_list(env, &Layout::Builtin(Builtin::Str), segment_count);
|
|
||||||
|
|
||||||
// get the RocStr type defined by zig
|
|
||||||
let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
|
|
||||||
|
|
||||||
// convert `*mut { *mut u8, i64 }` to `*mut RocStr`
|
|
||||||
let ret_list_ptr_zig_rocstr = builder.build_bitcast(
|
|
||||||
ret_list_ptr,
|
|
||||||
roc_str_type.ptr_type(AddressSpace::Generic),
|
|
||||||
"convert_to_zig_rocstr",
|
|
||||||
);
|
|
||||||
|
|
||||||
call_void_bitcode_fn(
|
|
||||||
env,
|
|
||||||
&[ret_list_ptr_zig_rocstr, string, delimiter],
|
|
||||||
bitcode::STR_STR_SPLIT_IN_PLACE,
|
|
||||||
);
|
|
||||||
|
|
||||||
store_list(env, ret_list_ptr, segment_count)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn str_symbol_to_c_abi<'a, 'ctx, 'env>(
|
pub fn str_symbol_to_c_abi<'a, 'ctx, 'env>(
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
scope: &Scope<'a, 'ctx>,
|
scope: &Scope<'a, 'ctx>,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue