mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-27 22:09:09 +00:00
Optimize literal list construction in LLVM backend
Currently, list literals are always heap-allocated and their elements are stored by emitting a GEP and store for each item in the literal. This produces huge quantities of IR, causing compile times for e.g. programs with large literals or ingested files to blow up. Instead, if a list literal consists entirely of literal values, create a global section for the literal and return a pointer to it.
This commit is contained in:
parent
08019951be
commit
9abbcfaafc
2 changed files with 51 additions and 109 deletions
|
@ -2915,130 +2915,61 @@ fn list_literal<'a, 'ctx>(
|
||||||
let list_length = elems.len();
|
let list_length = elems.len();
|
||||||
let list_length_intval = env.ptr_int().const_int(list_length as _, false);
|
let list_length_intval = env.ptr_int().const_int(list_length as _, false);
|
||||||
|
|
||||||
// TODO re-enable, currently causes morphic segfaults because it tries to update
|
let is_all_constant = elems.iter().all(|e| e.is_literal());
|
||||||
// constants in-place...
|
|
||||||
// if element_type.is_int_type() {
|
if is_all_constant {
|
||||||
if false {
|
|
||||||
let element_type = element_type.into_int_type();
|
let element_type = element_type.into_int_type();
|
||||||
|
|
||||||
let element_width = layout_interner.stack_size(element_layout);
|
let element_width = layout_interner.stack_size(element_layout);
|
||||||
let size = list_length * element_width as usize;
|
|
||||||
let alignment = layout_interner
|
let refcount_slot_bytes = layout_interner
|
||||||
.alignment_bytes(element_layout)
|
.alignment_bytes(element_layout)
|
||||||
.max(env.target.ptr_width() as u32);
|
.max(env.target.ptr_width() as u32) as usize;
|
||||||
|
|
||||||
let mut is_all_constant = true;
|
let refcount_slot_elements =
|
||||||
let zero_elements =
|
(refcount_slot_bytes as f64 / element_width as f64).ceil() as usize;
|
||||||
(env.target.ptr_width() as u8 as f64 / element_width as f64).ceil() as usize;
|
|
||||||
|
|
||||||
// runtime-evaluated elements
|
let data_bytes = list_length * element_width as usize;
|
||||||
let mut runtime_evaluated_elements = Vec::with_capacity_in(list_length, env.arena);
|
|
||||||
|
|
||||||
// set up a global that contains all the literal elements of the array
|
assert!(refcount_slot_elements > 0);
|
||||||
// any variables or expressions are represented as `undef`
|
|
||||||
let global = {
|
|
||||||
let mut global_elements = Vec::with_capacity_in(list_length, env.arena);
|
|
||||||
|
|
||||||
// Add zero bytes that represent the refcount
|
let mut bytes = Vec::with_capacity_in(refcount_slot_elements + data_bytes, env.arena);
|
||||||
//
|
|
||||||
// - if all elements are const, then we store the whole list as a constant.
|
|
||||||
// It then needs a refcount before the first element.
|
|
||||||
// - but if the list is not all constants, then we will just copy the constant values,
|
|
||||||
// and we do not need that refcount at the start
|
|
||||||
//
|
|
||||||
// In the latter case, we won't store the zeros in the globals
|
|
||||||
// (we slice them off again below)
|
|
||||||
for _ in 0..zero_elements {
|
|
||||||
global_elements.push(element_type.const_zero());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the elements from the list literal into the array
|
// Fill the refcount slot with nulls
|
||||||
for (index, element) in elems.iter().enumerate() {
|
for _ in 0..(refcount_slot_elements) {
|
||||||
match element {
|
bytes.push(element_type.const_zero());
|
||||||
ListLiteralElement::Literal(literal) => {
|
}
|
||||||
let val = build_exp_literal(env, layout_interner, element_layout, literal);
|
|
||||||
global_elements.push(val.into_int_value());
|
|
||||||
}
|
|
||||||
ListLiteralElement::Symbol(symbol) => {
|
|
||||||
let val = scope.load_symbol(symbol);
|
|
||||||
|
|
||||||
// here we'd like to furthermore check for intval.is_const().
|
// Copy the elements from the list literal into the array
|
||||||
// if all elements are const for LLVM, we could make the array a constant.
|
for element in elems.iter() {
|
||||||
// BUT morphic does not know about this, and could allow us to modify that
|
let literal = element.get_literal().expect("is_all_constant is true");
|
||||||
// array in-place. That would cause a segfault. So, we'll have to find
|
let val = build_exp_literal(env, layout_interner, element_layout, &literal);
|
||||||
// constants ourselves and cannot lean on LLVM here.
|
bytes.push(val.into_int_value());
|
||||||
|
}
|
||||||
|
|
||||||
is_all_constant = false;
|
let typ = element_type.array_type(bytes.len() as u32);
|
||||||
|
let global = env.module.add_global(typ, None, "roc__list_literal");
|
||||||
|
|
||||||
runtime_evaluated_elements.push((index, val));
|
global.set_initializer(&element_type.const_array(bytes.into_bump_slice()));
|
||||||
|
global.set_constant(true);
|
||||||
|
global.set_alignment(layout_interner.alignment_bytes(element_layout));
|
||||||
|
global.set_unnamed_addr(true);
|
||||||
|
global.set_linkage(inkwell::module::Linkage::Private);
|
||||||
|
|
||||||
global_elements.push(element_type.get_undef());
|
let with_rc_ptr = global.as_pointer_value();
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
let const_elements = if is_all_constant {
|
let data_ptr = unsafe {
|
||||||
global_elements.into_bump_slice()
|
env.builder.new_build_in_bounds_gep(
|
||||||
} else {
|
element_type,
|
||||||
&global_elements[zero_elements..]
|
with_rc_ptr,
|
||||||
};
|
&[env
|
||||||
|
.ptr_int()
|
||||||
// use None for the address space (e.g. Const does not work)
|
.const_int(refcount_slot_elements as u64, false)],
|
||||||
let typ = element_type.array_type(const_elements.len() as u32);
|
"get_data_ptr",
|
||||||
let global = env.module.add_global(typ, None, "roc__list_literal");
|
)
|
||||||
|
|
||||||
global.set_constant(true);
|
|
||||||
global.set_alignment(alignment);
|
|
||||||
global.set_unnamed_addr(true);
|
|
||||||
global.set_linkage(inkwell::module::Linkage::Private);
|
|
||||||
|
|
||||||
global.set_initializer(&element_type.const_array(const_elements));
|
|
||||||
global.as_pointer_value()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if is_all_constant {
|
super::build_list::store_list(env, data_ptr, list_length_intval).into()
|
||||||
// all elements are constants, so we can use the memory in the constants section directly
|
|
||||||
// here we make a pointer to the first actual element (skipping the 0 bytes that
|
|
||||||
// represent the refcount)
|
|
||||||
let zero = env.ptr_int().const_zero();
|
|
||||||
let offset = env.ptr_int().const_int(zero_elements as _, false);
|
|
||||||
|
|
||||||
let ptr = unsafe {
|
|
||||||
env.builder.new_build_in_bounds_gep(
|
|
||||||
element_type,
|
|
||||||
global,
|
|
||||||
&[zero, offset],
|
|
||||||
"first_element_pointer",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
super::build_list::store_list(env, ptr, list_length_intval).into()
|
|
||||||
} else {
|
|
||||||
// some of our elements are non-constant, so we must allocate space on the heap
|
|
||||||
let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval);
|
|
||||||
|
|
||||||
// then, copy the relevant segment from the constant section into the heap
|
|
||||||
env.builder
|
|
||||||
.build_memcpy(
|
|
||||||
ptr,
|
|
||||||
alignment,
|
|
||||||
global,
|
|
||||||
alignment,
|
|
||||||
env.ptr_int().const_int(size as _, false),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// then replace the `undef`s with the values that we evaluate at runtime
|
|
||||||
for (index, val) in runtime_evaluated_elements {
|
|
||||||
let index_val = ctx.i64_type().const_int(index as u64, false);
|
|
||||||
let elem_ptr = unsafe {
|
|
||||||
builder.new_build_in_bounds_gep(element_type, ptr, &[index_val], "index")
|
|
||||||
};
|
|
||||||
|
|
||||||
builder.new_build_store(elem_ptr, val);
|
|
||||||
}
|
|
||||||
|
|
||||||
super::build_list::store_list(env, ptr, list_length_intval).into()
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval);
|
let ptr = allocate_list(env, layout_interner, element_layout, list_length_intval);
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,17 @@ impl<'a> ListLiteralElement<'a> {
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_literal(&self) -> Option<Literal<'a>> {
|
||||||
|
match self {
|
||||||
|
Self::Literal(l) => Some(*l),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_literal(&self) -> bool {
|
||||||
|
matches!(self, Self::Literal(_))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum NumLiteral {
|
pub enum NumLiteral {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue