Merge remote-tracking branch 'origin/trunk' into big-nested-pattern-match

This commit is contained in:
Folkert 2020-11-14 03:07:34 +01:00
commit e2521c1011
15 changed files with 387 additions and 48 deletions

View file

@ -13,7 +13,7 @@ comptime { exportNumFn(num.asin, "asin"); }
// Str Module
const str = @import("str.zig");
comptime { exportStrFn(str.strSplitInPlace, "str_split_in_place"); }
comptime { exportStrFn(str.countSegments, "count_segements"); }
comptime { exportStrFn(str.countSegments, "count_segments"); }
comptime { exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters"); }
// Export helpers - Must be run inside a comptime

View file

@ -76,13 +76,16 @@ const RocStr = struct {
// Str.split
pub fn strSplitInPlace(
array: [*]RocStr,
bytes_array: [*]u128,
array_len: usize,
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_bytes_ptrs: [*]u8,
delimiter_len: usize
) callconv(.C) void {
var array = @ptrCast([*]RocStr, bytes_array);
var ret_array_index : usize = 0;
var sliceStart_index : usize = 0;
@ -96,7 +99,7 @@ pub fn strSplitInPlace(
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
var delimiterChar = delimiter_bytes[delimiter_index];
var delimiterChar = delimiter_bytes_ptrs[delimiter_index];
var strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {
@ -250,10 +253,10 @@ test "strSplitInPlace: three pieces" {
pub fn countSegments(
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_bytes_ptrs: [*]u8,
delimiter_len: usize
) callconv(.C) i64 {
var count: i64 = 1;
) callconv(.C) usize {
var count: usize = 1;
if (str_len > delimiter_len) {
var str_index: usize = 0;
@ -265,7 +268,7 @@ pub fn countSegments(
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
const delimiterChar = delimiter_bytes[delimiter_index];
const delimiterChar = delimiter_bytes_ptrs[delimiter_index];
const strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {

View file

@ -0,0 +1,64 @@
#[macro_use]
extern crate pretty_assertions;
#[cfg(test)]
mod bitcode {
use roc_builtins_bitcode::{count_segments_, str_split_};
#[test]
fn count_segments() {
assert_eq!(
count_segments_((&"hello there").as_bytes(), (&"hello").as_bytes()),
2
);
assert_eq!(
count_segments_((&"a\nb\nc").as_bytes(), (&"\n").as_bytes()),
3
);
assert_eq!(
count_segments_((&"str").as_bytes(), (&"delimiter").as_bytes()),
1
);
}
#[test]
fn str_split() {
fn splits_to(string: &str, delimiter: &str, expectation: &[&[u8]]) {
assert_eq!(
str_split_(
&mut [(&"").as_bytes()].repeat(expectation.len()),
&string.as_bytes(),
&delimiter.as_bytes()
),
expectation
);
}
splits_to(
"a!b!c",
"!",
&[(&"a").as_bytes(), (&"b").as_bytes(), (&"c").as_bytes()],
);
splits_to(
"a!?b!?c!?",
"!?",
&[
(&"a").as_bytes(),
(&"b").as_bytes(),
(&"c").as_bytes(),
(&"").as_bytes(),
],
);
splits_to("abc", "!", &[(&"abc").as_bytes()]);
splits_to(
"tttttghittttt",
"ttttt",
&[(&"").as_bytes(), (&"ghi").as_bytes(), (&"").as_bytes()],
);
splits_to("def", "!!!!!!", &[(&"def").as_bytes()]);
}
}

View file

@ -11,7 +11,9 @@ fn main() {
let out_dir = env::var_os("OUT_DIR").unwrap();
// "." is relative to where "build.rs" is
let src_path = Path::new(".").join("bitcode").join("src").join("main.zig");
let src_path = Path::new(".").join("bitcode").join("src");
let main_zig_path = src_path.join("main.zig");
let src_path_str = src_path.to_str().expect("Invalid src path");
println!("Building main.zig from: {}", src_path_str);
@ -28,7 +30,7 @@ fn main() {
"zig",
&[
"build-obj",
src_path_str,
main_zig_path.to_str().unwrap(),
&emit_ir_arg,
"-fno-emit-bin",
"--strip",

View file

@ -23,6 +23,6 @@ pub const NUM_ATAN: &str = "roc_builtins.num.atan";
pub const NUM_IS_FINITE: &str = "roc_builtins.num.is_finite";
pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int";
pub const STR_COUNT_SEGEMENTS: &str = "roc_builtins.str.count_segements";
pub const STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";

View file

@ -390,6 +390,15 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
// Str module
// Str.split : Str, Str -> List Str
add_type(
Symbol::STR_SPLIT,
top_level_function(
vec![str_type(), str_type()],
Box::new(list_type(str_type())),
),
);
// Str.concat : Str, Str -> Str
add_type(
Symbol::STR_CONCAT,

View file

@ -1016,6 +1016,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
// Str module
// Str.split :
// Attr * Str,
// Attr * Str
// -> Attr * (List (Attr * Str))
add_type(Symbol::STR_SPLIT, {
let_tvars! { star1, star2, star3, star4 };
unique_function(
vec![str_type(star1), str_type(star2)],
SolvedType::Apply(
Symbol::ATTR_ATTR,
vec![
flex(star3),
SolvedType::Apply(Symbol::LIST_LIST, vec![str_type(star4)]),
],
),
)
});
// isEmpty : Attr * Str -> Attr * Bool
add_type(Symbol::STR_IS_EMPTY, {
let_tvars! { star1, star2 };

View file

@ -51,6 +51,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
Symbol::BOOL_OR => bool_or,
Symbol::BOOL_NOT => bool_not,
Symbol::STR_CONCAT => str_concat,
Symbol::STR_SPLIT => str_split,
Symbol::STR_IS_EMPTY => str_is_empty,
Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes,
Symbol::LIST_LEN => list_len,
@ -906,6 +907,26 @@ fn list_reverse(symbol: Symbol, var_store: &mut VarStore) -> Def {
)
}
/// Str.split : Str, Str -> List Str
fn str_split(symbol: Symbol, var_store: &mut VarStore) -> Def {
let str_var = var_store.fresh();
let ret_list_var = var_store.fresh();
let body = RunLowLevel {
op: LowLevel::StrSplit,
args: vec![(str_var, Var(Symbol::ARG_1)), (str_var, Var(Symbol::ARG_2))],
ret_var: ret_list_var,
};
defn(
symbol,
vec![(str_var, Symbol::ARG_1), (str_var, Symbol::ARG_2)],
var_store,
body,
ret_list_var,
)
}
/// Str.concat : Str, Str -> Str
fn str_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
let str_var = var_store.fresh();

View file

@ -4,7 +4,7 @@ use crate::llvm::build_list::{
list_get_unsafe, list_join, list_keep_if, list_len, list_map, list_prepend, list_repeat,
list_reverse, list_set, list_single, list_walk_right,
};
use crate::llvm::build_str::{str_concat, str_count_graphemes, str_len, CHAR_LAYOUT};
use crate::llvm::build_str::{str_concat, str_count_graphemes, str_len, str_split, CHAR_LAYOUT};
use crate::llvm::compare::{build_eq, build_neq};
use crate::llvm::convert::{
basic_type_from_layout, block_of_memory, collection, get_fn_type, get_ptr_type, ptr_int,
@ -24,8 +24,8 @@ use inkwell::passes::{PassManager, PassManagerBuilder};
use inkwell::types::{BasicTypeEnum, FunctionType, IntType, StructType};
use inkwell::values::BasicValueEnum::{self, *};
use inkwell::values::{
BasicValue, CallSiteValue, FloatValue, FunctionValue, InstructionOpcode, IntValue,
PointerValue, StructValue,
BasicValue, CallSiteValue, FloatValue, FunctionValue, InstructionOpcode, InstructionValue,
IntValue, PointerValue, StructValue,
};
use inkwell::OptimizationLevel;
use inkwell::{AddressSpace, IntPredicate};
@ -2229,6 +2229,14 @@ fn run_low_level<'a, 'ctx, 'env>(
str_concat(env, inplace, scope, parent, args[0], args[1])
}
StrSplit => {
// Str.split : Str, Str -> List Str
debug_assert_eq!(args.len(), 2);
let inplace = get_inplace_from_layout(layout);
str_split(env, scope, parent, inplace, args[0], args[1])
}
StrIsEmpty => {
// Str.isEmpty : Str -> Str
debug_assert_eq!(args.len(), 1);
@ -2745,12 +2753,7 @@ fn build_int_binop<'a, 'ctx, 'env>(
NumLte => bd.build_int_compare(SLE, lhs, rhs, "int_lte").into(),
NumRemUnchecked => bd.build_int_signed_rem(lhs, rhs, "rem_int").into(),
NumDivUnchecked => bd.build_int_signed_div(lhs, rhs, "div_int").into(),
NumPowInt => call_bitcode_fn(
NumPowInt,
env,
&[lhs.into(), rhs.into()],
&bitcode::NUM_POW_INT,
),
NumPowInt => call_bitcode_fn(env, &[lhs.into(), rhs.into()], &bitcode::NUM_POW_INT),
_ => {
unreachable!("Unrecognized int binary operation: {:?}", op);
}
@ -2758,22 +2761,48 @@ fn build_int_binop<'a, 'ctx, 'env>(
}
pub fn call_bitcode_fn<'a, 'ctx, 'env>(
op: LowLevel,
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],
fn_name: &str,
) -> BasicValueEnum<'ctx> {
call_bitcode_fn_help(env, args, fn_name)
.try_as_basic_value()
.left()
.unwrap_or_else(|| {
panic!(
"LLVM error: Did not get return value from bitcode function {:?}",
fn_name
)
})
}
pub fn call_void_bitcode_fn<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],
fn_name: &str,
) -> InstructionValue<'ctx> {
call_bitcode_fn_help(env, args, fn_name)
.try_as_basic_value()
.right()
.unwrap_or_else(|| panic!("LLVM error: Tried to call void bitcode function, but got return value from bitcode function, {:?}", fn_name))
}
fn call_bitcode_fn_help<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],
fn_name: &str,
) -> CallSiteValue<'ctx> {
let fn_val = env
.module
.get_function(fn_name)
.unwrap_or_else(|| panic!("Unrecognized builtin function: {:?} - if you're working on the Roc compiler, do you need to rebuild the bitcode? See compiler/builtins/bitcode/README.md", fn_name));
dbg!(fn_val);
let call = env.builder.build_call(fn_val, args, "call_builtin");
call.set_call_convention(fn_val.get_call_conventions());
call.try_as_basic_value()
.left()
.unwrap_or_else(|| panic!("LLVM error: Invalid call for low-level op {:?}", op))
call
}
fn build_float_binop<'a, 'ctx, 'env>(
@ -2798,8 +2827,7 @@ fn build_float_binop<'a, 'ctx, 'env>(
let result = bd.build_float_add(lhs, rhs, "add_float");
let is_finite =
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::NUM_IS_FINITE)
.into_int_value();
call_bitcode_fn(env, &[result.into()], &bitcode::NUM_IS_FINITE).into_int_value();
let then_block = context.append_basic_block(parent, "then_block");
let throw_block = context.append_basic_block(parent, "throw_block");
@ -2820,8 +2848,7 @@ fn build_float_binop<'a, 'ctx, 'env>(
let result = bd.build_float_add(lhs, rhs, "add_float");
let is_finite =
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::NUM_IS_FINITE)
.into_int_value();
call_bitcode_fn(env, &[result.into()], &bitcode::NUM_IS_FINITE).into_int_value();
let is_infinite = bd.build_not(is_finite, "negate");
let struct_type = context.struct_type(
@ -2950,10 +2977,10 @@ fn build_float_unary_op<'a, 'ctx, 'env>(
env.context.i64_type(),
"num_floor",
),
NumIsFinite => call_bitcode_fn(NumIsFinite, env, &[arg.into()], &bitcode::NUM_IS_FINITE),
NumAtan => call_bitcode_fn(NumAtan, env, &[arg.into()], &bitcode::NUM_ATAN),
NumAcos => call_bitcode_fn(NumAcos, env, &[arg.into()], &bitcode::NUM_ACOS),
NumAsin => call_bitcode_fn(NumAsin, env, &[arg.into()], &bitcode::NUM_ASIN),
NumIsFinite => call_bitcode_fn(env, &[arg.into()], &bitcode::NUM_IS_FINITE),
NumAtan => call_bitcode_fn(env, &[arg.into()], &bitcode::NUM_ATAN),
NumAcos => call_bitcode_fn(env, &[arg.into()], &bitcode::NUM_ACOS),
NumAsin => call_bitcode_fn(env, &[arg.into()], &bitcode::NUM_ASIN),
_ => {
unreachable!("Unrecognized int unary operation: {:?}", op);
}

View file

@ -1503,17 +1503,12 @@ where
// This helper simulates a basic for loop, where
// and index increments up from 0 to some end value
fn incrementing_index_loop<'ctx, LoopFn>(
pub fn incrementing_index_loop<'ctx, LoopFn>(
builder: &Builder<'ctx>,
ctx: &'ctx Context,
parent: FunctionValue<'ctx>,
end: IntValue<'ctx>,
index_name: &str,
// allocating memory for an index is costly, so sometimes
// we want to reuse an index if multiple loops happen in a
// series, such as the case in List.concat. A memory
// allocation cab be passed in to be used, and the memory
// allocation that _is_ used is the return value.
mut loop_fn: LoopFn,
) -> PointerValue<'ctx>
where

View file

@ -1,4 +1,6 @@
use crate::llvm::build::{call_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope};
use crate::llvm::build::{
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
};
use crate::llvm::build_list::{
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
};
@ -8,12 +10,81 @@ use inkwell::types::BasicTypeEnum;
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
use inkwell::{AddressSpace, IntPredicate};
use roc_builtins::bitcode;
use roc_module::low_level::LowLevel;
use roc_module::symbol::Symbol;
use roc_mono::layout::{Builtin, Layout};
pub static CHAR_LAYOUT: Layout = Layout::Builtin(Builtin::Int8);
/// Str.split : Str, Str -> List Str
pub fn str_split<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
inplace: InPlace,
str_symbol: Symbol,
delimiter_symbol: Symbol,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
let str_ptr = ptr_from_symbol(scope, str_symbol);
let delimiter_ptr = ptr_from_symbol(scope, delimiter_symbol);
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
env,
parent,
*str_ptr,
str_wrapper_type,
|str_bytes_ptr, str_len, _str_smallness| {
load_str(
env,
parent,
*delimiter_ptr,
str_wrapper_type,
|delimiter_bytes_ptr, delimiter_len, _delimiter_smallness| {
let segment_count = call_bitcode_fn(
env,
&[
BasicValueEnum::PointerValue(str_bytes_ptr),
BasicValueEnum::IntValue(str_len),
BasicValueEnum::PointerValue(delimiter_bytes_ptr),
BasicValueEnum::IntValue(delimiter_len),
],
&bitcode::STR_COUNT_SEGMENTS,
)
.into_int_value();
let ret_list_ptr =
allocate_list(env, inplace, &Layout::Builtin(Builtin::Str), segment_count);
let ret_list_ptr_u128s = builder.build_bitcast(
ret_list_ptr,
ctx.i128_type().ptr_type(AddressSpace::Generic),
"ret_u128_list",
);
call_void_bitcode_fn(
env,
&[
ret_list_ptr_u128s,
BasicValueEnum::IntValue(segment_count),
BasicValueEnum::PointerValue(str_bytes_ptr),
BasicValueEnum::IntValue(str_len),
BasicValueEnum::PointerValue(delimiter_bytes_ptr),
BasicValueEnum::IntValue(delimiter_len),
],
&bitcode::STR_STR_SPLIT_IN_PLACE,
);
store_list(env, ret_list_ptr, segment_count)
},
)
},
)
}
/// Str.concat : Str, Str -> Str
pub fn str_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
@ -613,7 +684,6 @@ pub fn str_count_graphemes<'a, 'ctx, 'env>(
ret_type,
|str_ptr, str_len, _str_smallness| {
call_bitcode_fn(
LowLevel::StrCountGraphemes,
env,
&[
BasicValueEnum::PointerValue(str_ptr),

View file

@ -13,6 +13,134 @@ mod helpers;
#[cfg(test)]
mod gen_str {
#[test]
fn str_split_bigger_delimiter_small_str() {
assert_evals_to!(
indoc!(
r#"
List.len (Str.split "hello" "JJJJ there")
"#
),
1,
i64
);
assert_evals_to!(
indoc!(
r#"
when List.first (Str.split "JJJ" "JJJJ there") is
Ok str ->
Str.countGraphemes str
_ ->
-1
"#
),
3,
i64
);
// assert_evals_to!(
// indoc!(
// r#"
// when List.first (Str.split "JJJJJ" "JJJJ there") is
// Ok str ->
// str
// |> Str.concat str
// |> Str.concat str
// |> Str.concat str
// |> Str.concat str
//
// _ ->
// "Not Str!"
//
// "#
// ),
// "JJJJJJJJJJJJJJJJJJJJJJJJJ",
// &'static str
// );
}
// #[test]
// fn str_split_small_str_big_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "JJJ" "0123456789abcdefghi"
// "#
// ),
// &["JJJ"],
// &'static [&'static str]
// );
// }
#[test]
fn str_split_big_str_small_delimiter() {
assert_evals_to!(
indoc!(
r#"
Str.split "01234567789abcdefghi?01234567789abcdefghi" "?"
"#
),
&["01234567789abcdefghi", "01234567789abcdefghi"],
&'static [&'static str]
);
assert_evals_to!(
indoc!(
r#"
Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch"
"#
),
&["01234567789abcdefghi ", " 01234567789abcdefghi"],
&'static [&'static str]
);
}
// #[test]
// fn str_split_small_str_small_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "J!J!J" "!"
// "#
// ),
// &["J", "J", "J"],
// &'static [&'static str]
// );
// }
#[test]
fn str_split_bigger_delimiter_big_str() {
assert_evals_to!(
indoc!(
r#"
Str.split
"string to split is shorter"
"than the delimiter which happens to be very very long"
"#
),
&["string to split is shorter"],
&'static [&'static str]
);
}
// #[test]
// fn str_split_big_str() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split
// "hello 0123456789abcdef there 0123456789abcdef "
// " 0123456789abcdef "
// "#
// ),
// &["hello", "there"],
// &'static [&'static str]
// );
// }
#[test]
fn str_concat_big_to_big() {
assert_evals_to!(

View file

@ -5,6 +5,7 @@
pub enum LowLevel {
StrConcat,
StrIsEmpty,
StrSplit,
StrCountGraphemes,
ListLen,
ListGetUnsafe,

View file

@ -670,7 +670,8 @@ define_builtins! {
2 STR_IS_EMPTY: "isEmpty"
3 STR_APPEND: "append"
4 STR_CONCAT: "concat"
5 STR_COUNT_GRAPHEMES: "countGraphemes"
5 STR_SPLIT: "split"
6 STR_COUNT_GRAPHEMES: "countGraphemes"
}
4 LIST: "List" => {
0 LIST_LIST: "List" imported // the List.List type alias

View file

@ -525,7 +525,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
ListSetInPlace => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
ListConcat | StrConcat => arena.alloc_slice_copy(&[owned, borrowed]),
StrSplit => arena.alloc_slice_copy(&[borrowed, borrowed]),
ListSingle => arena.alloc_slice_copy(&[irrelevant]),
ListRepeat => arena.alloc_slice_copy(&[irrelevant, irrelevant]),
ListReverse => arena.alloc_slice_copy(&[owned]),