Hash record field name order in generated layouts

Closes #2535 See the referenced issue for longer discussion - here's the synopsis. Consider this program ``` app "test" provides [ nums ] to "./platform" alpha = { a: 1, b: 2 } nums : List U8 nums = [ alpha.a, alpha.b, ] ``` Here's its IR: ``` procedure : `#UserApp.alpha` {I64, U8} procedure = `#UserApp.alpha` (): let `#UserApp.5` : Builtin(Int(I64)) = 1i64; let `#UserApp.6` : Builtin(Int(U8)) = 2i64; let `#UserApp.4` : Struct([Builtin(Int(I64)), Builtin(Int(U8))]) = Struct {`#UserApp.5`, `#UserApp.6`}; ret `#UserApp.4`; procedure : `#UserApp.nums` List U8 procedure = `#UserApp.nums` (): let `#UserApp.7` : Struct([Builtin(Int(I64)), Builtin(Int(U8))]) = CallByName `#UserApp.alpha`; let `#UserApp.1` : Builtin(Int(U8)) = StructAtIndex 1 `#UserApp.7`; let `#UserApp.3` : Struct([Builtin(Int(I64)), Builtin(Int(U8))]) = CallByName `#UserApp.alpha`; let `#UserApp.2` : Builtin(Int(U8)) = StructAtIndex 1 `#UserApp.3`; let `#UserApp.0` : Builtin(List(Builtin(Int(U8)))) = Array [`#UserApp.1`, `#UserApp.2`]; ret `#UserApp.0`; ``` What's happening is that we need to specialize `alpha` twice - once for the type of a narrowed to a U8, another time for the type of b narrowed to a U8. We do the specialization for alpha.b first - record fields are sorted by layout, so we generate a record of type {i64, u8}. But then we go to specialize alpha.a, but this has the same layout - {i64, u8} - so we reuse the existing one! So (at least for records), we need to include record field order associated with the sorted layout fields, so that we don't reuse monomorphizations like this incorrectly!
2025-09-28 14:24:45 +00:00 · 2022-02-20 19:51:21 -05:00 · 2022-02-20 19:51:21 -05:00 · e52d427ac8
commit e52d427ac8
parent 74daec84df
22 changed files with 225 additions and 105 deletions
--- a/compiler/mono/src/layout.rs
+++ b/compiler/mono/src/layout.rs
@ -11,8 +11,9 @@ use roc_types::subs::{
    Content, FlatType, RecordFields, Subs, UnionTags, UnsortedUnionTags, Variable,
 };
 use roc_types::types::{gather_fields_unsorted_iter, RecordField};
-use std::collections::hash_map::Entry;
+use std::collections::hash_map::{DefaultHasher, Entry};
 use std::collections::HashMap;
+use std::hash::{Hash, Hasher};
 use ven_pretty::{DocAllocator, DocBuilder};

 // if your changes cause this number to go down, great!
@ -26,6 +27,7 @@ static_assertions::assert_eq_size!([usize; 3], LambdaSet);
 pub type TagIdIntType = u16;
 pub const MAX_ENUM_SIZE: usize = (std::mem::size_of::<TagIdIntType>() * 8) as usize;
 const GENERATE_NULLABLE: bool = true;
+const IRRELEVANT_FIELD_HASH: u64 = 0;

 #[derive(Debug, Clone)]
 pub enum LayoutProblem {
@ -205,10 +207,19 @@ impl<'a> RawFunctionLayout<'a> {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum Layout<'a> {
    Builtin(Builtin<'a>),
-    /// A layout that is empty (turns into the empty struct in LLVM IR
-    /// but for our purposes, not zero-sized, so it does not get dropped from data structures
-    /// this is important for closures that capture zero-sized values
-    Struct(&'a [Layout<'a>]),
+    Struct {
+        /// Two different struct types can have the same layout, for example
+        ///   { a: U8,  b: I64 }
+        ///   { a: I64, b: U8 }
+        /// both have the layout {I64, U8}. Not distinguishing the order of record fields can cause
+        /// us problems during monomorphization when we specialize the same type in different ways,
+        /// so keep a hash of the record order for disambiguation. This still of course may result
+        /// in collisions, but it's unlikely.
+        ///
+        /// See also https://github.com/rtfeldman/roc/issues/2535.
+        field_order_hash: u64,
+        field_layouts: &'a [Layout<'a>],
+    },
    Union(UnionLayout<'a>),
    LambdaSet(LambdaSet<'a>),
    RecursivePointer,
@ -417,7 +428,9 @@ impl<'a> UnionLayout<'a> {

    fn tags_alignment_bytes(tags: &[&[Layout]], target_info: TargetInfo) -> u32 {
        tags.iter()
-            .map(|fields| Layout::Struct(fields).alignment_bytes(target_info))
+            .map(|field_layouts| {
+                Layout::struct_no_name_order(field_layouts).alignment_bytes(target_info)
+            })
            .max()
            .unwrap_or(0)
    }
@ -426,14 +439,14 @@ impl<'a> UnionLayout<'a> {
        let allocation = match self {
            UnionLayout::NonRecursive(_) => unreachable!("not heap-allocated"),
            UnionLayout::Recursive(tags) => Self::tags_alignment_bytes(tags, target_info),
-            UnionLayout::NonNullableUnwrapped(fields) => {
-                Layout::Struct(fields).alignment_bytes(target_info)
+            UnionLayout::NonNullableUnwrapped(field_layouts) => {
+                Layout::struct_no_name_order(field_layouts).alignment_bytes(target_info)
            }
            UnionLayout::NullableWrapped { other_tags, .. } => {
                Self::tags_alignment_bytes(other_tags, target_info)
            }
            UnionLayout::NullableUnwrapped { other_fields, .. } => {
-                Layout::Struct(other_fields).alignment_bytes(target_info)
+                Layout::struct_no_name_order(other_fields).alignment_bytes(target_info)
            }
        };

@ -495,12 +508,12 @@ impl<'a> UnionLayout<'a> {
        let mut alignment_bytes = 0;

        for field_layouts in variant_field_layouts {
-            let mut data = Layout::Struct(field_layouts);
+            let mut data = Layout::struct_no_name_order(field_layouts);

            let fields_and_id;
            if let Some(id_layout) = id_data_layout {
                fields_and_id = [data, id_layout];
-                data = Layout::Struct(&fields_and_id);
+                data = Layout::struct_no_name_order(&fields_and_id);
            }

            let (variant_size, variant_alignment) = data.stack_size_and_alignment(target_info);
@ -590,7 +603,10 @@ impl<'a> LambdaSet<'a> {
    }

    pub fn is_represented(&self) -> Option<Layout<'a>> {
-        if let Layout::Struct(&[]) = self.representation {
+        if let Layout::Struct {
+            field_layouts: &[], ..
+        } = self.representation
+        {
            None
        } else {
            Some(*self.representation)
@ -648,7 +664,7 @@ impl<'a> LambdaSet<'a> {
                    } => todo!("recursive closures"),
                }
            }
-            Layout::Struct(_) => {
+            Layout::Struct { .. } => {
                // get the fields from the set, where they are sorted in alphabetic order
                // (and not yet sorted by their alignment)
                let (_, fields) = self
@ -673,7 +689,9 @@ impl<'a> LambdaSet<'a> {
            argument_layouts
        } else {
            match self.representation {
-                Layout::Struct(&[]) => {
+                Layout::Struct {
+                    field_layouts: &[], ..
+                } => {
                    // this function does not have anything in its closure, and the lambda set is a
                    // singleton, so we pass no extra argument
                    argument_layouts
@ -769,7 +787,7 @@ impl<'a> LambdaSet<'a> {
            }
            Newtype {
                arguments: layouts, ..
-            } => Layout::Struct(layouts.into_bump_slice()),
+            } => Layout::struct_no_name_order(layouts.into_bump_slice()),
            Wrapped(variant) => {
                use WrappedVariant::*;

@ -865,7 +883,10 @@ pub const fn round_up_to_alignment(width: u32, alignment: u32) -> u32 {

 impl<'a> Layout<'a> {
    pub const VOID: Self = Layout::Union(UnionLayout::NonRecursive(&[]));
-    pub const UNIT: Self = Layout::Struct(&[]);
+    pub const UNIT: Self = Layout::Struct {
+        field_layouts: &[],
+        field_order_hash: IRRELEVANT_FIELD_HASH,
+    };

    fn new_help<'b>(
        env: &mut Env<'a, 'b>,
@ -926,7 +947,7 @@ impl<'a> Layout<'a> {

        match self {
            Builtin(builtin) => builtin.safe_to_memcpy(),
-            Struct(fields) => fields
+            Struct { field_layouts, .. } => field_layouts
                .iter()
                .all(|field_layout| field_layout.safe_to_memcpy()),
            Union(variant) => {
@ -990,10 +1011,10 @@ impl<'a> Layout<'a> {

        match self {
            Builtin(builtin) => builtin.stack_size(target_info),
-            Struct(fields) => {
+            Struct { field_layouts, .. } => {
                let mut sum = 0;

-                for field_layout in *fields {
+                for field_layout in *field_layouts {
                    sum += field_layout.stack_size(target_info);
                }

@ -1020,7 +1041,7 @@ impl<'a> Layout<'a> {

    pub fn alignment_bytes(&self, target_info: TargetInfo) -> u32 {
        match self {
-            Layout::Struct(fields) => fields
+            Layout::Struct { field_layouts, .. } => field_layouts
                .iter()
                .map(|x| x.alignment_bytes(target_info))
                .max()
@ -1069,7 +1090,7 @@ impl<'a> Layout<'a> {
    pub fn allocation_alignment_bytes(&self, target_info: TargetInfo) -> u32 {
        match self {
            Layout::Builtin(builtin) => builtin.allocation_alignment_bytes(target_info),
-            Layout::Struct(_) => unreachable!("not heap-allocated"),
+            Layout::Struct { .. } => unreachable!("not heap-allocated"),
            Layout::Union(union_layout) => union_layout.allocation_alignment_bytes(target_info),
            Layout::LambdaSet(lambda_set) => lambda_set
                .runtime_representation()
@ -1103,7 +1124,7 @@ impl<'a> Layout<'a> {

        match self {
            Builtin(builtin) => builtin.is_refcounted(),
-            Struct(fields) => fields.iter().any(|f| f.contains_refcounted()),
+            Struct { field_layouts, .. } => field_layouts.iter().any(|f| f.contains_refcounted()),
            Union(variant) => {
                use UnionLayout::*;

@ -1134,8 +1155,8 @@ impl<'a> Layout<'a> {

        match self {
            Builtin(builtin) => builtin.to_doc(alloc, parens),
-            Struct(fields) => {
-                let fields_doc = fields.iter().map(|x| x.to_doc(alloc, parens));
+            Struct { field_layouts, .. } => {
+                let fields_doc = field_layouts.iter().map(|x| x.to_doc(alloc, parens));

                alloc
                    .text("{")
@ -1147,6 +1168,14 @@ impl<'a> Layout<'a> {
            RecursivePointer => alloc.text("*self"),
        }
    }
+
+    /// Used to build a `Layout::Struct` where the field name order is irrelevant.
+    pub fn struct_no_name_order(field_layouts: &'a [Layout]) -> Self {
+        Self::Struct {
+            field_layouts,
+            field_order_hash: IRRELEVANT_FIELD_HASH,
+        }
+    }
 }

 /// Avoid recomputing Layout from Variable multiple times.
@ -1590,6 +1619,14 @@ fn layout_from_flat_type<'a>(
                size2.cmp(&size1).then(label1.cmp(label2))
            });

+            let field_order_hash = {
+                let mut field_order_hasher = DefaultHasher::new();
+                pairs
+                    .iter()
+                    .for_each(|(label, _)| label.hash(&mut field_order_hasher));
+                field_order_hasher.finish()
+            };
+
            let mut layouts = Vec::from_iter_in(pairs.into_iter().map(|t| t.1), arena);

            if layouts.len() == 1 {
@ -1597,7 +1634,10 @@ fn layout_from_flat_type<'a>(
                // unwrap it.
                Ok(layouts.pop().unwrap())
            } else {
-                Ok(Layout::Struct(layouts.into_bump_slice()))
+                Ok(Layout::Struct {
+                    field_order_hash,
+                    field_layouts: layouts.into_bump_slice(),
+                })
            }
        }
        TagUnion(tags, ext_var) => {
@ -2224,6 +2264,9 @@ pub fn union_sorted_tags_help<'a>(
            } else {
                UnionVariant::Newtype {
                    tag_name,
+                    // NB: It may be the case that the order of the layouts chosen here must be
+                    // hashed, in case they differ for two specializations of the same type. If
+                    // there is a problem with newtypes not specializing correctly, this might be why!
                    arguments: layouts,
                }
            }
@ -2430,7 +2473,8 @@ fn layout_from_tag_union<'a>(
                    let answer1 = if field_layouts.len() == 1 {
                        field_layouts[0]
                    } else {
-                        Layout::Struct(field_layouts.into_bump_slice())
+                        // NB: does the field name order here matter, when it comes from the tag payload order?
+                        Layout::struct_no_name_order(field_layouts.into_bump_slice())
                    };

                    answer1