Lambda set compaction must preserve unique specializations of concrete types

There are times that multiple concrete types may appear in unspecialized lambda sets that are being unified. The primary case is during monomorphization, when unspecialized lambda sets join at the same time that concrete types get instantiated. Since lambda set specialization and compaction happens only after unifications are complete, unifications that monomorphize can induce the above-described situation. In these cases, - unspecialized lambda sets that are due to equivalent type variables can be compacted, since they are in fact the same specialization. - unspecialized lambda sets that are due to different type variables cannot be compacted, even if their types unify, since they may point to different specializations. For example, consider the unspecialized lambda set `[[] + [A]:toEncoder:1 + [B]:toEncoder:1]` - this set wants two encoders, one for `[A]` and one for `[B]`, which is materially different from the set `[[] + [A, B]:toEncoder:1]`.
2025-07-24 06:55:15 +00:00 · 2023-02-20 18:50:07 -06:00 · 2023-02-20 18:50:07 -06:00 · d7a069675b
commit d7a069675b
parent f7aa7e734d
2 changed files with 197 additions and 14 deletions
--- a/crates/compiler/test_mono/src/tests.rs
+++ b/crates/compiler/test_mono/src/tests.rs
@ -2571,3 +2571,155 @@ fn recursive_lambda_set_has_nested_non_recursive_lambda_sets_issue_5026() {
        "#
    )
 }
+
+#[mono_test]
+fn unspecialized_lambda_set_unification_keeps_all_concrete_types_without_unification() {
+    // This is a regression test for the ambient lambda set specialization algorithm.
+    //
+    // In the program below, monomorphization of `toEncoderQ` with the `Q` in `main` induces the
+    // resolution of `t.a` and `t.b`, and the unification of their pending unspecialization lambda
+    // sets, when `t.a` and `t.b` have been resolved to concrete types, but before the
+    // specialization procedure steps in to resolve the lambda sets concretely. That's because
+    // monomorphization unifies the general type of `toEncoderQ` with the concrete type, forcing
+    // concretization of `t`, but the specialization procedure runs only after the unification is
+    // complete.
+    //
+    // In this case, it's imperative that the unspecialized lambda sets of `toEncoder t.a` and
+    // `toEncoder t.b` wind up in the same lambda set, that is in
+    //
+    // tag : @MEncoder (Bytes, Linear -[[] + @MU8:toEncoder:1 + @MStr:toEncoder+1] -> Bytes)
+    //       -[lTag]->
+    //       @MEncoder (Bytes, Linear -[[Linear:lTag:3 { @MEncoder (Bytes, Linear -[[] + @MU8:toEncoder:1 + @MStr:toEncoder:1] -> Bytes) }]] -> Bytes)
+    //
+    // rather than forcing the lambda set inside to `tag` to become disjoint, as e.g.
+    //
+    // tag : @MEncoder (Bytes, Linear -[[] + @MU8:toEncoder:1 + @MStr:toEncoder+1] -> Bytes)
+    //       -[lTag]->
+    //       @MEncoder (Bytes, Linear -[[
+    //                      Linear:lTag:3 { @MEncoder (Bytes, Linear -[[] + @MU8:toEncoder:1] -> Bytes) },
+    //                      Linear:lTag:3 { @MEncoder (Bytes, Linear -[[] + @MStr:toEncoder:1] -> Bytes) },
+    //                  ]] -> Bytes)
+    indoc!(
+        r#"
+        app "test" provides [main] to "./platform"
+
+        MEncoder fmt := List U8, fmt -> List U8 | fmt has Format
+
+        MEncoding has
+          toEncoder : val -> MEncoder fmt | val has MEncoding, fmt has Format
+
+        Format has
+          u8 : {} -> MEncoder fmt | fmt has Format
+          str : {} -> MEncoder fmt | fmt has Format
+          tag : MEncoder fmt -> MEncoder fmt | fmt has Format
+
+        Linear := {} has [Format {u8: lU8, str: lStr, tag: lTag}]
+
+        MU8 := U8 has [MEncoding {toEncoder: toEncoderU8}]
+        MStr := Str has [MEncoding {toEncoder: toEncoderStr}]
+
+        Q a b := { a: a, b: b }
+
+        lU8 = \{} -> @MEncoder (\lst, @Linear {} -> lst)
+        lStr = \{} -> @MEncoder (\lst, @Linear {} -> lst)
+
+        lTag = \@MEncoder doFormat -> @MEncoder (\lst, @Linear {} ->
+            doFormat lst (@Linear {})
+        )
+
+        toEncoderU8 = \@MU8 _ -> u8 {}
+
+        toEncoderStr = \@MStr _ -> str {}
+
+        toEncoderQ =
+            \@Q t -> \fmt ->
+                @MEncoder doit = if Bool.true
+                    then tag (toEncoder t.a)
+                    else tag (toEncoder t.b)
+
+                doit [] fmt
+
+        main =
+            fmt = toEncoderQ (@Q {a : @MStr "", b: @MU8 7})
+            fmt (@Linear {})
+        "#
+    )
+}
+
+#[mono_test]
+fn unspecialized_lambda_set_unification_keeps_all_concrete_types_without_unification_of_unifiable()
+{
+    // This is a regression test for the ambient lambda set specialization algorithm.
+    //
+    // The principle of the test is equivalent to that of `unspecialized_lambda_set_unification_keeps_all_concrete_types_without_unification`.
+    //
+    // However, this test requires a larger reproduction because it is negative behavior is only
+    // visible in the presence of builtin ability usage (in this case, `Encoding` and
+    // `EncoderFormatting`).
+    //
+    // In this test, the payload types `[A]*` and `[B]*` of the encoded type `Q` are unifiable in
+    // their unspecialized lambda set representations under `toEncoderQ`; however, they must not
+    // be, because they in fact represent to different specializations of needed encoders. In
+    // particular, the lambda set `[[] + [A]:toEncoder:1 + [B]:toEncoder:1]` must be preserved,
+    // rather than collapsing to `[[] + [A, B]:toEncoder:1]`.
+    indoc!(
+        r#"
+        app "test" imports [Json] provides [main] to "./platform"
+
+        Q a b := { a: a, b: b } has [Encoding {toEncoder: toEncoderQ}]
+
+        toEncoderQ =
+            \@Q t -> Encode.custom \bytes, fmt ->
+                f = if Bool.true
+                    then Encode.tag "A" [Encode.toEncoder t.a]
+                    else Encode.tag "B" [Encode.toEncoder t.b]
+
+                Encode.appendWith bytes f fmt
+
+        accessor = @Q {a : A, b: B}
+
+        main =
+            Encode.toBytes accessor Json.toUtf8
+        "#
+    )
+}
+
+#[mono_test]
+fn unspecialized_lambda_set_unification_does_not_duplicate_identical_concrete_types() {
+    // This is a regression test for the ambient lambda set specialization algorithm.
+    //
+    // The principle of the test is equivalent to that of `unspecialized_lambda_set_unification_keeps_all_concrete_types_without_unification`.
+    //
+    // However, this test requires a larger reproduction because it is negative behavior is only
+    // visible in the presence of builtin ability usage (in this case, `Encoding` and
+    // `EncoderFormatting`).
+    //
+    // In this test, the payload types `Str` and `Str` of the encoded type `Q` are unifiable in
+    // their unspecialized lambda set representations under `toEncoderQ`, and moreoever they are
+    // equivalent specializations, since they both come from the same root variable `x`. In as
+    // such, the lambda set `[[] + Str:toEncoder:1]` should be produced during compaction, rather
+    // than staying as the expanded `[[] + Str:toEncoder:1 + Str:toEncoder:1]` after the types of
+    // `t.a` and `t.b` are filled in.
+    indoc!(
+        r#"
+        app "test" imports [Json] provides [main] to "./platform"
+
+        Q a b := { a: a, b: b } has [Encoding {toEncoder: toEncoderQ}]
+
+        toEncoderQ =
+            \@Q t -> Encode.custom \bytes, fmt ->
+                f = if Bool.true
+                    then Encode.tag "A" [Encode.toEncoder t.a]
+                    else Encode.tag "B" [Encode.toEncoder t.b]
+
+                Encode.appendWith bytes f fmt
+
+        accessor =
+            x = ""
+            @Q {a : x, b: x}
+
+        main =
+            Encode.toBytes accessor Json.toUtf8
+        "#
+    )
+}
--- a/crates/compiler/unify/src/unify.rs
+++ b/crates/compiler/unify/src/unify.rs
@ -1568,8 +1568,10 @@ fn unspecialized_lambda_set_sorter(subs: &Subs, uls1: Uls, uls2: Uls) -> std::cm
                }
                (FlexAbleVar(..), _) => Greater,
                (_, FlexAbleVar(..)) => Less,
-                // For everything else, the order is irrelevant
-                (_, _) => Less,
+                // For everything else, sort by the root key
+                (_, _) => subs
+                    .get_root_key_without_compacting(var1)
+                    .cmp(&subs.get_root_key_without_compacting(var2)),
            }
        }
        ord => ord,
@ -1731,6 +1733,8 @@ fn unify_unspecialized_lambdas<M: MetaCollector>(
                            let kept = uls_left.next().unwrap();
                            merged_uls.push(*kept);
                        } else {
+                            // CASE: disjoint_flex_specializations
+                            //
                            //     ... a1     ...
                            //     ... b1     ...
                            // =>  ... a1, b1 ...
@ -1800,20 +1804,47 @@ fn unify_unspecialized_lambdas<M: MetaCollector>(
                        let _dropped = uls_left.next().unwrap();
                    }
                    (_, _) => {
-                        //     ... {foo: _} ...
-                        //     ... {foo: _} ...
-                        // =>  ... {foo: _} ...
-                        //
-                        // Unify them, then advance one.
-                        // (the choice is arbitrary, so we choose the left)
+                        if env.subs.equivalent_without_compacting(var_l, var_r) {
+                            //     ... a1    ...
+                            //     ... b1=a1 ...
+                            // =>  ... a1    ...
+                            //
+                            // Keep the one on the left, drop the one on the right. Then progress
+                            // both, because the next variable on the left must be disjoint from
+                            // the current on the right (resp. next variable on the right vs.
+                            // current left) - if they aren't, then the invariant was broken.
+                            //
+                            // Then progress both, because the invariant tells us they must be
+                            // disjoint, and if there were any concrete variables, they would have
+                            // appeared earlier.
+                            let _dropped = uls_right.next().unwrap();
+                            let kept = uls_left.next().unwrap();
+                            merged_uls.push(*kept);

-                        let outcome = unify_pool(env, pool, var_l, var_r, mode);
-                        if !outcome.mismatches.is_empty() {
-                            return Err(outcome);
+                            debug_assert!(uls_right
+                                .peek()
+                                .map(|r| env.subs.equivalent_without_compacting(var_l, r.0))
+                                .unwrap_or(true));
+                            debug_assert!(uls_left
+                                .peek()
+                                .map(|l| env.subs.equivalent_without_compacting(l.0, var_r))
+                                .unwrap_or(true));
+                        } else {
+                            // Even if these two variables unify, since they are not equivalent,
+                            // they correspond to different specializations! As such we must not
+                            // merge them.
+                            //
+                            // Instead, keep both, but do so by adding and advancing the side with
+                            // the lower root. See CASE disjoint_flex_specializations for
+                            // reasoning.
+                            if env.subs.get_root_key(var_l) < env.subs.get_root_key(var_r) {
+                                let kept = uls_left.next().unwrap();
+                                merged_uls.push(*kept);
+                            } else {
+                                let kept = uls_right.next().unwrap();
+                                merged_uls.push(*kept);
+                            }
                        }
-                        whole_outcome.union(outcome);
-
-                        let _dropped = uls_left.next().unwrap();
                    }
                }
            }