Optimize literal list construction in LLVM backend

Currently, list literals are always heap-allocated and their elements
are stored by emitting a GEP and store for each item in the literal.
This produces huge quantities of IR, causing compile times for e.g.
programs with large literals or ingested files to blow up.

Instead, if a list literal consists entirely of literal values, create a
global section for the literal and return a pointer to it.
This commit is contained in:
Ayaz Hafiz 2024-06-22 19:07:18 -05:00 committed by Brendan Hansknecht
parent 08019951be
commit 9abbcfaafc
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
2 changed files with 51 additions and 109 deletions

View file

@ -2915,130 +2915,61 @@ fn list_literal<'a, 'ctx>(
let list_length = elems.len(); let list_length = elems.len();
let list_length_intval = env.ptr_int().const_int(list_length as _, false); let list_length_intval = env.ptr_int().const_int(list_length as _, false);
// TODO re-enable, currently causes morphic segfaults because it tries to update let is_all_constant = elems.iter().all(|e| e.is_literal());
// constants in-place...
// if element_type.is_int_type() { if is_all_constant {
if false {
let element_type = element_type.into_int_type(); let element_type = element_type.into_int_type();
let element_width = layout_interner.stack_size(element_layout); let element_width = layout_interner.stack_size(element_layout);
let size = list_length * element_width as usize;
let alignment = layout_interner let refcount_slot_bytes = layout_interner
.alignment_bytes(element_layout) .alignment_bytes(element_layout)
.max(env.target.ptr_width() as u32); .max(env.target.ptr_width() as u32) as usize;
let mut is_all_constant = true; let refcount_slot_elements =
let zero_elements = (refcount_slot_bytes as f64 / element_width as f64).ceil() as usize;
(env.target.ptr_width() as u8 as f64 / element_width as f64).ceil() as usize;
// runtime-evaluated elements let data_bytes = list_length * element_width as usize;
let mut runtime_evaluated_elements = Vec::with_capacity_in(list_length, env.arena);
// set up a global that contains all the literal elements of the array assert!(refcount_slot_elements > 0);
// any variables or expressions are represented as `undef`
let global = {
let mut global_elements = Vec::with_capacity_in(list_length, env.arena);
// Add zero bytes that represent the refcount let mut bytes = Vec::with_capacity_in(refcount_slot_elements + data_bytes, env.arena);
//
// - if all elements are const, then we store the whole list as a constant. // Fill the refcount slot with nulls
// It then needs a refcount before the first element. for _ in 0..(refcount_slot_elements) {
// - but if the list is not all constants, then we will just copy the constant values, bytes.push(element_type.const_zero());
// and we do not need that refcount at the start
//
// In the latter case, we won't store the zeros in the globals
// (we slice them off again below)
for _ in 0..zero_elements {
global_elements.push(element_type.const_zero());
} }
// Copy the elements from the list literal into the array // Copy the elements from the list literal into the array
for (index, element) in elems.iter().enumerate() { for element in elems.iter() {
match element { let literal = element.get_literal().expect("is_all_constant is true");
ListLiteralElement::Literal(literal) => { let val = build_exp_literal(env, layout_interner, element_layout, &literal);
let val = build_exp_literal(env, layout_interner, element_layout, literal); bytes.push(val.into_int_value());
global_elements.push(val.into_int_value());
}
ListLiteralElement::Symbol(symbol) => {
let val = scope.load_symbol(symbol);
// here we'd like to furthermore check for intval.is_const().
// if all elements are const for LLVM, we could make the array a constant.
// BUT morphic does not know about this, and could allow us to modify that
// array in-place. That would cause a segfault. So, we'll have to find
// constants ourselves and cannot lean on LLVM here.
is_all_constant = false;
runtime_evaluated_elements.push((index, val));
global_elements.push(element_type.get_undef());
}
};
} }
let const_elements = if is_all_constant { let typ = element_type.array_type(bytes.len() as u32);
global_elements.into_bump_slice()
} else {
&global_elements[zero_elements..]
};
// use None for the address space (e.g. Const does not work)
let typ = element_type.array_type(const_elements.len() as u32);
let global = env.module.add_global(typ, None, "roc__list_literal"); let global = env.module.add_global(typ, None, "roc__list_literal");
global.set_initializer(&element_type.const_array(bytes.into_bump_slice()));
global.set_constant(true); global.set_constant(true);
global.set_alignment(alignment); global.set_alignment(layout_interner.alignment_bytes(element_layout));
global.set_unnamed_addr(true); global.set_unnamed_addr(true);
global.set_linkage(inkwell::module::Linkage::Private); global.set_linkage(inkwell::module::Linkage::Private);
global.set_initializer(&element_type.const_array(const_elements)); let with_rc_ptr = global.as_pointer_value();
global.as_pointer_value()
};
if is_all_constant { let data_ptr = unsafe {
// all elements are constants, so we can use the memory in the constants section directly
// here we make a pointer to the first actual element (skipping the 0 bytes that
// represent the refcount)
let zero = env.ptr_int().const_zero();
let offset = env.ptr_int().const_int(zero_elements as _, false);
let ptr = unsafe {
env.builder.new_build_in_bounds_gep( env.builder.new_build_in_bounds_gep(
element_type, element_type,
global, with_rc_ptr,
&[zero, offset], &[env
"first_element_pointer", .ptr_int()
.const_int(refcount_slot_elements as u64, false)],
"get_data_ptr",
) )
}; };
super::build_list::store_list(env, ptr, list_length_intval).into() super::build_list::store_list(env, data_ptr, list_length_intval).into()
} else {
// some of our elements are non-constant, so we must allocate space on the heap
let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval);
// then, copy the relevant segment from the constant section into the heap
env.builder
.build_memcpy(
ptr,
alignment,
global,
alignment,
env.ptr_int().const_int(size as _, false),
)
.unwrap();
// then replace the `undef`s with the values that we evaluate at runtime
for (index, val) in runtime_evaluated_elements {
let index_val = ctx.i64_type().const_int(index as u64, false);
let elem_ptr = unsafe {
builder.new_build_in_bounds_gep(element_type, ptr, &[index_val], "index")
};
builder.new_build_store(elem_ptr, val);
}
super::build_list::store_list(env, ptr, list_length_intval).into()
}
} else { } else {
let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval); let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval);

View file

@ -49,6 +49,17 @@ impl<'a> ListLiteralElement<'a> {
_ => None, _ => None,
} }
} }
pub fn get_literal(&self) -> Option<Literal<'a>> {
match self {
Self::Literal(l) => Some(*l),
_ => None,
}
}
pub fn is_literal(&self) -> bool {
matches!(self, Self::Literal(_))
}
} }
pub enum NumLiteral { pub enum NumLiteral {