apply optimization passes

This commit is contained in:
Folkert 2020-08-10 13:34:28 +02:00
parent 9e75252ddb
commit 8f7d842d41
5 changed files with 211 additions and 86 deletions

View file

@ -1,7 +1,6 @@
use bumpalo::Bump;
use inkwell::context::Context;
use inkwell::execution_engine::JitFunction;
use inkwell::passes::PassManager;
use inkwell::types::BasicType;
use inkwell::OptimizationLevel;
use roc_builtins::unique::uniq_stdlib;
@ -209,13 +208,9 @@ pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, S
}
let context = Context::create();
let module = roc_gen::llvm::build::module_from_builtins(&context, "app");
let module = arena.alloc(roc_gen::llvm::build::module_from_builtins(&context, "app"));
let builder = context.create_builder();
let fpm = PassManager::create(&module);
roc_gen::llvm::build::add_passes(&fpm, opt_level);
fpm.initialize();
let (mpm, fpm) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level);
// pretty-print the expr type string for later.
name_all_type_vars(var, &mut subs);
@ -243,7 +238,7 @@ pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, S
builder: &builder,
context: &context,
interns,
module: arena.alloc(module),
module,
ptr_bytes,
leak: false,
};
@ -355,6 +350,8 @@ pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, S
panic!("Main function {} failed LLVM verification. Uncomment things near this error message for more details.", main_fn_name);
}
mpm.run_on(module);
// Verify the module
if let Err(errors) = env.module.verify() {
panic!("Errors defining module: {:?}", errors);

View file

@ -1,7 +1,6 @@
use bumpalo::Bump;
use inkwell::context::Context;
use inkwell::module::Linkage;
use inkwell::passes::PassManager;
use inkwell::types::BasicType;
use inkwell::OptimizationLevel;
use roc_gen::layout_id::LayoutIds;
@ -127,13 +126,9 @@ pub fn gen(
// Generate the binary
let context = Context::create();
let module = module_from_builtins(&context, "app");
let module = arena.alloc(module_from_builtins(&context, "app"));
let builder = context.create_builder();
let fpm = PassManager::create(&module);
roc_gen::llvm::build::add_passes(&fpm, opt_level);
fpm.initialize();
let (mpm, fpm) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level);
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
@ -154,7 +149,7 @@ pub fn gen(
builder: &builder,
context: &context,
interns: loaded.interns,
module: arena.alloc(module),
module,
ptr_bytes,
leak: false,
};
@ -317,6 +312,8 @@ pub fn gen(
panic!("Function {} failed LLVM verification.", main_fn_name);
}
mpm.run_on(module);
// Verify the module
if let Err(errors) = env.module.verify() {
panic!("😱 LLVM errors when defining module: {:?}", errors);

View file

@ -31,6 +31,7 @@ const PRINT_FN_VERIFICATION_OUTPUT: bool = true;
#[cfg(not(debug_assertions))]
const PRINT_FN_VERIFICATION_OUTPUT: bool = false;
#[derive(Debug, Clone, Copy)]
pub enum OptLevel {
Normal,
Optimize,
@ -154,14 +155,18 @@ fn add_intrinsic<'ctx>(
fn_val
}
pub fn add_passes(fpm: &PassManager<FunctionValue<'_>>, opt_level: OptLevel) {
pub fn construct_optimization_passes<'a>(
module: &'a Module,
opt_level: OptLevel,
) -> (PassManager<Module<'a>>, PassManager<FunctionValue<'a>>) {
let mpm = PassManager::create(());
let fpm = PassManager::create(module);
// tail-call elimination is always on
fpm.add_instruction_combining_pass();
fpm.add_tail_call_elimination_pass();
let pmb = PassManagerBuilder::create();
// Enable more optimizations when running cargo test --release
match opt_level {
OptLevel::Normal => {
pmb.set_optimization_level(OptimizationLevel::None);
@ -171,24 +176,35 @@ pub fn add_passes(fpm: &PassManager<FunctionValue<'_>>, opt_level: OptLevel) {
//
// See https://llvm.org/doxygen/CodeGen_8h_source.html
pmb.set_optimization_level(OptimizationLevel::Aggressive);
pmb.set_inliner_with_threshold(4);
// TODO figure out how enabling these individually differs from
// the broad "aggressive optimizations" setting.
// TODO figure out which of these actually help
// fpm.add_reassociate_pass();
// fpm.add_basic_alias_analysis_pass();
// fpm.add_promote_memory_to_register_pass();
// fpm.add_cfg_simplification_pass();
// fpm.add_gvn_pass();
// TODO figure out why enabling any of these (even alone) causes LLVM to segfault
// fpm.add_strip_dead_prototypes_pass();
// fpm.add_dead_arg_elimination_pass();
// fpm.add_function_inlining_pass();
// pmb.set_inliner_with_threshold(4);
// function passes
fpm.add_basic_alias_analysis_pass();
fpm.add_memcpy_optimize_pass();
fpm.add_jump_threading_pass();
fpm.add_instruction_combining_pass();
fpm.add_licm_pass();
fpm.add_loop_unroll_pass();
fpm.add_scalar_repl_aggregates_pass_ssa();
// module passes
mpm.add_cfg_simplification_pass();
mpm.add_jump_threading_pass();
mpm.add_instruction_combining_pass();
mpm.add_memcpy_optimize_pass();
mpm.add_promote_memory_to_register_pass();
}
}
pmb.populate_module_pass_manager(&mpm);
pmb.populate_function_pass_manager(&fpm);
fpm.initialize();
// For now, we have just one of each
(mpm, fpm)
}
pub fn build_exp_literal<'a, 'ctx, 'env>(
@ -493,23 +509,14 @@ pub fn build_exp_expr<'a, 'ctx, 'env>(
// This tricks comes from
// https://github.com/raviqqe/ssf/blob/bc32aae68940d5bddf5984128e85af75ca4f4686/ssf-llvm/src/expression_compiler.rs#L116
let array_type = ctx.i8_type().array_type(whole_size);
let internal_type =
basic_type_from_layout(env.arena, env.context, tag_layout, env.ptr_bytes);
let result = cast_basic_basic(
cast_basic_basic(
builder,
struct_val.into_struct_value().into(),
array_type.into(),
);
// For unclear reasons, we can't cast an array to a struct on the other side.
// the solution is to wrap the array in a struct (yea...)
let wrapper_type = ctx.struct_type(&[array_type.into()], false);
let mut wrapper_val = wrapper_type.const_zero().into();
wrapper_val = builder
.build_insert_value(wrapper_val, result, 0, "insert_field")
.unwrap();
wrapper_val.into_struct_value().into()
internal_type,
)
}
AccessAtIndex {
index,
@ -805,18 +812,14 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>(
Dec(symbol, cont) => {
let (value, layout) = load_symbol_and_layout(env, scope, symbol);
let layout = layout.clone();
// TODO exclude unique lists in the future
match layout {
Layout::Builtin(Builtin::List(_, _)) => decrement_refcount_list(
env,
layout_ids,
scope,
parent,
cont,
value.into_struct_value(),
),
_ => build_exp_stmt(env, layout_ids, scope, parent, cont),
/*
if layout.contains_refcounted() {
decrement_refcount_layout(env, parent, value, &layout);
}
*/
build_exp_stmt(env, layout_ids, scope, parent, cont)
}
_ => todo!("unsupported expr {:?}", stmt),
}
@ -874,7 +877,113 @@ fn list_get_refcount_ptr<'a, 'ctx, 'env>(
)
}
#[allow(dead_code)]
fn decrement_refcount_layout<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
parent: FunctionValue<'ctx>,
value: BasicValueEnum<'ctx>,
layout: &Layout<'a>,
) {
use Layout::*;
match layout {
Builtin(builtin) => decrement_refcount_builtin(env, parent, value, builtin),
Struct(layouts) => {
let wrapper_struct = value.into_struct_value();
for (i, field_layout) in layouts.iter().enumerate() {
if field_layout.contains_refcounted() {
let field_ptr = env
.builder
.build_extract_value(wrapper_struct, i as u32, "decrement_struct_field")
.unwrap();
decrement_refcount_layout(env, parent, field_ptr, field_layout)
}
}
}
Union(tags) => {
debug_assert!(!tags.is_empty());
let wrapper_struct = value.into_struct_value();
// read the tag_id
let tag_id = env
.builder
.build_extract_value(wrapper_struct, 0, "read_tag_id")
.unwrap()
.into_int_value();
// next, make a jump table for all possible values of the tag_id
let mut cases = Vec::with_capacity_in(tags.len(), env.arena);
let merge_block = env.context.append_basic_block(parent, "decrement_merge");
for (tag_id, field_layouts) in tags.iter().enumerate() {
let block = env.context.append_basic_block(parent, "tag_id_decrement");
env.builder.position_at_end(block);
for (i, field_layout) in field_layouts.iter().enumerate() {
if field_layout.contains_refcounted() {
let field_ptr = env
.builder
.build_extract_value(wrapper_struct, i as u32, "decrement_struct_field")
.unwrap();
decrement_refcount_layout(env, parent, field_ptr, field_layout)
}
}
env.builder.build_unconditional_branch(merge_block);
cases.push((env.context.i8_type().const_int(tag_id as u64, false), block));
}
let (_, default_block) = cases.pop().unwrap();
env.builder.build_switch(tag_id, default_block, &cases);
env.builder.position_at_end(merge_block);
}
FunctionPointer(_, _) | Pointer(_) => {}
}
}
#[inline(always)]
fn decrement_refcount_builtin<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
parent: FunctionValue<'ctx>,
value: BasicValueEnum<'ctx>,
builtin: &Builtin<'a>,
) {
use Builtin::*;
match builtin {
List(_, element_layout) => {
if element_layout.contains_refcounted() {
// TODO decrement all values
}
let wrapper_struct = value.into_struct_value();
decrement_refcount_list(env, parent, wrapper_struct);
}
Set(element_layout) => {
if element_layout.contains_refcounted() {
// TODO decrement all values
}
let wrapper_struct = value.into_struct_value();
decrement_refcount_list(env, parent, wrapper_struct);
}
Map(key_layout, value_layout) => {
if key_layout.contains_refcounted() || value_layout.contains_refcounted() {
// TODO decrement all values
}
let wrapper_struct = value.into_struct_value();
decrement_refcount_list(env, parent, wrapper_struct);
}
_ => {}
}
}
fn increment_refcount_list<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
original_wrapper: StructValue<'ctx>,
@ -900,15 +1009,11 @@ fn increment_refcount_list<'a, 'ctx, 'env>(
builder.build_store(refcount_ptr, decremented);
}
#[allow(dead_code)]
fn decrement_refcount_list<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
layout_ids: &mut LayoutIds<'a>,
scope: &mut Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
stmt: &roc_mono::ir::Stmt<'a>,
original_wrapper: StructValue<'ctx>,
) -> BasicValueEnum<'ctx> {
) {
let builder = env.builder;
let ctx = env.context;
@ -956,7 +1061,6 @@ fn decrement_refcount_list<'a, 'ctx, 'env>(
// emit merge block
builder.position_at_end(cont_block);
build_exp_stmt(env, layout_ids, scope, parent, stmt)
}
fn load_symbol<'a, 'ctx, 'env>(
@ -987,6 +1091,17 @@ fn load_symbol_and_layout<'a, 'ctx, 'env, 'b>(
}
}
fn get_symbol_and_layout<'a, 'ctx, 'env, 'b>(
env: &Env<'a, 'ctx, 'env>,
scope: &'b Scope<'a, 'ctx>,
symbol: &Symbol,
) -> (PointerValue<'ctx>, &'b Layout<'a>) {
match scope.get(symbol) {
Some((layout, ptr)) => (*ptr, layout),
None => panic!("There was no entry for {:?} in scope {:?}", symbol, scope),
}
}
/// Cast a struct to another struct of the same (or smaller?) size
fn cast_struct_struct<'ctx>(
builder: &Builder<'ctx>,
@ -1012,7 +1127,7 @@ fn cast_basic_basic<'ctx>(
.build_bitcast(
argument_pointer,
to_type.ptr_type(inkwell::AddressSpace::Generic),
"",
"cast_basic_basic",
)
.into_pointer_value();

View file

@ -112,12 +112,31 @@ pub fn basic_type_from_layout<'ctx>(
let ptr_size = std::mem::size_of::<i64>();
let union_size = layout.stack_size(ptr_size as u32);
let array_type = context
.i8_type()
.array_type(union_size)
.as_basic_type_enum();
// The memory layout of Union is a bit tricky.
// We have tags with different memory layouts, that are part of the same type.
// For llvm, all tags must have the same memory layout.
//
// So, we convert all tags to a layout of bytes of some size.
// It turns out that encoding to i64 for as many elements as possible is
// a nice optimization, the remainder is encoded as bytes.
context.struct_type(&[array_type], false).into()
let num_i64 = union_size / 8;
let num_i8 = union_size % 8;
let i64_array_type = context.i64_type().array_type(num_i64).as_basic_type_enum();
if num_i8 == 0 {
// the object fits perfectly in some number of i64's
// (i.e. the size is a multiple of 8 bytes)
context.struct_type(&[i64_array_type], false).into()
} else {
// there are some trailing bytes at the end
let i8_array_type = context.i8_type().array_type(num_i8).as_basic_type_enum();
context
.struct_type(&[i64_array_type, i8_array_type], false)
.into()
}
}
Builtin(builtin) => match builtin {

View file

@ -7,7 +7,6 @@ pub fn helper_without_uniqueness<'a>(
context: &'a inkwell::context::Context,
) -> (&'static str, inkwell::execution_engine::ExecutionEngine<'a>) {
use crate::helpers::{can_expr, infer_expr, CanExprOut};
use inkwell::passes::PassManager;
use inkwell::types::BasicType;
use inkwell::OptimizationLevel;
use roc_gen::llvm::build::Scope;
@ -60,11 +59,10 @@ pub fn helper_without_uniqueness<'a>(
} else {
roc_gen::llvm::build::OptLevel::Optimize
};
let fpm = PassManager::create(&module);
roc_gen::llvm::build::add_passes(&fpm, opt_level);
fpm.initialize();
let module = arena.alloc(module);
let (module_pass, function_pass) =
roc_gen::llvm::build::construct_optimization_passes(module, opt_level);
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
@ -87,7 +85,7 @@ pub fn helper_without_uniqueness<'a>(
builder: &builder,
context: context,
interns,
module: arena.alloc(module),
module,
ptr_bytes,
leak: leak,
};
@ -143,7 +141,7 @@ pub fn helper_without_uniqueness<'a>(
build_proc(&env, &mut layout_ids, proc, fn_val, arg_basic_types);
if fn_val.verify(true) {
fpm.run_on(&fn_val);
function_pass.run_on(&fn_val);
} else {
eprintln!(
"\n\nFunction {:?} failed LLVM verification in NON-OPTIMIZED build. Its content was:\n", fn_val.get_name().to_str().unwrap()
@ -183,18 +181,20 @@ pub fn helper_without_uniqueness<'a>(
// env.module.print_to_stderr();
if main_fn.verify(true) {
fpm.run_on(&main_fn);
function_pass.run_on(&main_fn);
} else {
panic!("Main function {} failed LLVM verification in NON-OPTIMIZED build. Uncomment things nearby to see more details.", main_fn_name);
}
module_pass.run_on(env.module);
// Verify the module
if let Err(errors) = env.module.verify() {
panic!("Errors defining module: {:?}", errors);
}
// Uncomment this to see the module's optimized LLVM instruction output:
// env.module.print_to_stderr();
env.module.print_to_stderr();
(main_fn_name, execution_engine.clone())
}
@ -206,7 +206,6 @@ pub fn helper_with_uniqueness<'a>(
context: &'a inkwell::context::Context,
) -> (&'static str, inkwell::execution_engine::ExecutionEngine<'a>) {
use crate::helpers::{infer_expr, uniq_expr};
use inkwell::passes::PassManager;
use inkwell::types::BasicType;
use inkwell::OptimizationLevel;
use roc_gen::llvm::build::Scope;
@ -242,18 +241,14 @@ pub fn helper_with_uniqueness<'a>(
unify_problems
);
let module = roc_gen::llvm::build::module_from_builtins(context, "app");
let module = arena.alloc(roc_gen::llvm::build::module_from_builtins(context, "app"));
let builder = context.create_builder();
let opt_level = if cfg!(debug_assertions) {
roc_gen::llvm::build::OptLevel::Normal
} else {
roc_gen::llvm::build::OptLevel::Optimize
};
let fpm = PassManager::create(&module);
roc_gen::llvm::build::add_passes(&fpm, opt_level);
fpm.initialize();
let (mpm, fpm) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level);
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
@ -278,7 +273,7 @@ pub fn helper_with_uniqueness<'a>(
builder: &builder,
context: context,
interns,
module: arena.alloc(module),
module,
ptr_bytes,
leak: leak,
};
@ -379,6 +374,8 @@ pub fn helper_with_uniqueness<'a>(
panic!("main function {} failed LLVM verification in OPTIMIZED build. Uncomment nearby statements to see more details.", main_fn_name);
}
mpm.run_on(module);
// Verify the module
if let Err(errors) = env.module.verify() {
panic!("Errors defining module: {:?}", errors);