Merge remote-tracking branch 'origin/main' into serialize-idents

2025-12-23 08:48:03 +00:00 · 2025-07-26 22:45:51 -04:00 · 2025-07-26 22:45:51 -04:00 · dbc99059d4
commit dbc99059d4
parent a7693e7074 9486c996d9
10 changed files with 948 additions and 131 deletions
--- a/src/base/Ident.zig
+++ b/src/base/Ident.zig
@ -23,18 +23,9 @@ attributes: Attributes,

 /// Create a new identifier from a string.
 pub fn for_text(text: []const u8) Ident {
-    // Parse identifier attributes from the text
-    const is_ignored = std.mem.startsWith(u8, text, "_");
-    const is_effectful = std.mem.endsWith(u8, text, "!");
-    // TODO: parse reassignable attribute (var keyword handling)
-
    return Ident{
        .raw_text = text,
-        .attributes = Attributes{
-            .effectful = is_effectful,
-            .ignored = is_ignored,
-            .reassignable = false,
-        },
+        .attributes = Attributes.fromString(text),
    };
 }

@ -68,18 +59,9 @@ pub fn from_bytes(bytes: []const u8) Error!Ident {
        }
    }

-    // Parse identifier attributes from the bytes
-    const is_ignored = std.mem.startsWith(u8, bytes, "_");
-    const is_effectful = std.mem.endsWith(u8, bytes, "!");
-    // TODO: parse reassignable attribute (var keyword handling)
-
    return Ident{
        .raw_text = bytes,
-        .attributes = Attributes{
-            .effectful = is_effectful,
-            .ignored = is_ignored,
-            .reassignable = false,
-        },
+        .attributes = Attributes.fromString(bytes),
    };
 }

@ -96,6 +78,14 @@ pub const Attributes = packed struct(u3) {
    effectful: bool,
    ignored: bool,
    reassignable: bool,
+
+    pub fn fromString(text: []const u8) Attributes {
+        return .{
+            .effectful = std.mem.endsWith(u8, text, "!"),
+            .ignored = std.mem.startsWith(u8, text, "_"),
+            .reassignable = false,
+        };
+    }
 };

 /// An interner for identifier names.
@ -179,13 +169,23 @@ pub const Store = struct {
        return self.interner.contains(text);
    }

+    /// Find an identifier by its string, returning its index if it exists.
+    /// This is different from insert in that it's guaranteed not to modify the store.
+    pub fn findByString(self: *const Store, text: []const u8) ?Idx {
+        // Look up in the interner without inserting
+        const interner_idx = self.interner.strings.get(text) orelse return null;

+        // Create an Idx with inferred attributes from the text
+        return Idx{
+            .attributes = Attributes.fromString(text),
+            .idx = @as(u29, @intCast(@intFromEnum(interner_idx))),
+        };
+    }

    /// Freeze the identifier store, preventing any new entries from being added.
    pub fn freeze(self: *Store) void {
        self.interner.freeze();
    }
-
    /// Calculate the size needed to serialize this Ident.Store
    pub fn serializedSize(self: *const Store) usize {
        var size: usize = 0;
--- a/src/cache/CacheModule.zig
+++ b/src/cache/CacheModule.zig
@ -55,8 +55,7 @@ pub const Header = struct {
    ident_store: ComponentInfo,
    line_starts: ComponentInfo,
    types_store: ComponentInfo,
-    exposed_by_str: ComponentInfo,
-    exposed_nodes: ComponentInfo,
+    exposed_items: ComponentInfo,
    external_decls: ComponentInfo,

    /// Spans can be stored directly since they're small
@ -68,7 +67,7 @@ pub const Header = struct {
    warning_count: u32,

    /// Fixed padding to ensure alignment
-    _padding: [16]u8 = [_]u8{0} ** 16,
+    _padding: [24]u8 = [_]u8{0} ** 24,

    /// Error specific to initializing a Header from bytes
    pub const InitError = error{
@ -121,8 +120,7 @@ pub const CacheModule = struct {
        const ident_store_size = module_env.idents.serializedSize();
        const line_starts_size = module_env.line_starts.serializedSize();
        const types_store_size = module_env.types.serializedSize();
-        const exposed_by_str_size = module_env.exposed_by_str.serializedSize();
-        const exposed_nodes_size = module_env.exposed_nodes.serializedSize();
+        const exposed_items_size = module_env.exposed_items.serializedSize();
        const external_decls_size = module_env.external_decls.serializedSize();

        // Calculate aligned offsets
@ -153,12 +151,8 @@ pub const CacheModule = struct {
        offset += @intCast(types_store_size);

        offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
-        const exposed_by_str_offset = offset;
-        offset += @intCast(exposed_by_str_size);
-
-        offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
-        const exposed_nodes_offset = offset;
-        offset += @intCast(exposed_nodes_size);
+        const exposed_items_offset = offset;
+        offset += @intCast(exposed_items_size);

        offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
        const external_decls_offset = offset;
@ -188,8 +182,7 @@ pub const CacheModule = struct {
            .ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) },
            .line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) },
            .types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) },
-            .exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) },
-            .exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) },
+            .exposed_items = .{ .offset = exposed_items_offset, .length = @intCast(exposed_items_size) },
            .external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) },
            .all_defs = module_env.all_defs,
            .all_statements = module_env.all_statements,
@ -207,8 +200,7 @@ pub const CacheModule = struct {
        std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0);
        std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0);
        std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0);
-        std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0);
-        std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0);
+        std.debug.assert(exposed_items_offset % SERIALIZATION_ALIGNMENT == 0);
        std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0);

        // Serialize each component
@ -219,8 +211,7 @@ pub const CacheModule = struct {
        _ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator);
        _ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size])));
        _ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator);
-        _ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]);
-        _ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]);
+        _ = try module_env.exposed_items.serializeInto(data_section[exposed_items_offset .. exposed_items_offset + exposed_items_size]);
        _ = try module_env.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size])));

        // TODO Calculate and store checksum
@ -300,11 +291,8 @@ pub const CacheModule = struct {
        var types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator);
        errdefer types_store.deinit();

-        var exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator);
-        errdefer exposed_by_str.deinit(allocator);
-
-        var exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator);
-        errdefer exposed_nodes.deinit(allocator);
+        var exposed_items = try collections.ExposedItems.deserializeFrom(self.getComponentData(.exposed_items), allocator);
+        errdefer exposed_items.deinit(allocator);

        // Create ModuleEnv from deserialized components
        var module_env = ModuleEnv{
@ -313,8 +301,7 @@ pub const CacheModule = struct {
            .ident_ids_for_slicing = ident_ids_for_slicing,
            .strings = strings,
            .types = types_store,
-            .exposed_by_str = exposed_by_str,
-            .exposed_nodes = exposed_nodes,
+            .exposed_items = exposed_items,
            .line_starts = line_starts,
            .source = source,
            // Module compilation fields - will be initialized after
@ -360,8 +347,7 @@ pub const CacheModule = struct {
            .ident_store => self.header.ident_store,
            .line_starts => self.header.line_starts,
            .types_store => self.header.types_store,
-            .exposed_by_str => self.header.exposed_by_str,
-            .exposed_nodes => self.header.exposed_nodes,
+            .exposed_items => self.header.exposed_items,
            .external_decls => self.header.external_decls,
        };
        return self.data[info.offset .. info.offset + info.length];
@ -381,8 +367,7 @@ pub const CacheModule = struct {
                .ident_store = self.header.ident_store.length,
                .line_starts = self.header.line_starts.length,
                .types_store = self.header.types_store.length,
-                .exposed_by_str = self.header.exposed_by_str.length,
-                .exposed_nodes = self.header.exposed_nodes.length,
+                .exposed_items = self.header.exposed_items.length,
                .external_decls = self.header.external_decls.length,
            },
        };
@ -400,8 +385,7 @@ pub const CacheModule = struct {
                .ident_store => self.header.ident_store,
                .line_starts => self.header.line_starts,
                .types_store => self.header.types_store,
-                .exposed_by_str => self.header.exposed_by_str,
-                .exposed_nodes => self.header.exposed_nodes,
+                .exposed_items => self.header.exposed_items,
                .external_decls => self.header.external_decls,
            };

@ -568,8 +552,7 @@ const ComponentType = enum {
    ident_store,
    line_starts,
    types_store,
-    exposed_by_str,
-    exposed_nodes,
+    exposed_items,
    external_decls,
 };

@ -586,8 +569,7 @@ pub const Diagnostics = struct {
        ident_store: u32,
        line_starts: u32,
        types_store: u32,
-        exposed_by_str: u32,
-        exposed_nodes: u32,
+        exposed_items: u32,
        external_decls: u32,
    },
 };
--- a/src/check/canonicalize.zig
+++ b/src/check/canonicalize.zig
@ -623,17 +623,20 @@ pub fn canonicalizeFile(
                last_type_anno = null; // Clear after successful use

                // If this declaration successfully defined an exposed value, remove it from exposed_ident_texts
-                // and add it to exposed_nodes
+                // and add the node index to exposed_items
                const pattern = self.parse_ir.store.getPattern(decl.pattern);
                if (pattern == .ident) {
                    const token_region = self.parse_ir.tokens.resolve(@intCast(pattern.ident.ident_tok));
                    const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];

-                    // If this identifier is exposed, add it to exposed_nodes
+                    // If this identifier is exposed, add it to exposed_items
                    if (self.exposed_ident_texts.contains(ident_text)) {
-                        // Store the def index as u16 in exposed_nodes
+                        // Get the interned identifier - it should already exist from parsing
+                        const ident = base.Ident.for_text(ident_text);
+                        const idx = try self.env.idents.insert(self.env.gpa, ident);
+                        // Store the def index as u16 in exposed_items
                        const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
-                        try self.env.exposed_nodes.put(self.env.gpa, ident_text, def_idx_u16);
+                        try self.env.exposed_items.setNodeIndexById(self.env.gpa, @bitCast(idx), def_idx_u16);
                    }

                    _ = self.exposed_ident_texts.remove(ident_text);
@ -900,15 +903,15 @@ fn createExposedScope(
        const exposed = self.parse_ir.store.getExposedItem(exposed_idx);
        switch (exposed) {
            .lower_ident => |ident| {
-                // Get the text of the identifier token to use as key
+                // Get the text for tracking redundant exposures
                const token_region = self.parse_ir.tokens.resolve(@intCast(ident.ident));
                const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];

-                // Add to exposed_by_str for permanent storage (unconditionally)
-                try self.env.exposed_by_str.put(gpa, ident_text, {});
-
-                // Also build exposed_scope with proper identifiers
+                // Get the interned identifier
                if (self.parse_ir.tokens.resolveIdentifier(ident.ident)) |ident_idx| {
+                    // Add to exposed_items for permanent storage (unconditionally)
+                    try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
+
                    // Use a dummy pattern index - we just need to track that it's exposed
                    const dummy_idx = @as(Pattern.Idx, @enumFromInt(0));
                    try self.exposed_scope.put(gpa, .ident, ident_idx, dummy_idx);
@ -933,15 +936,15 @@ fn createExposedScope(
                }
            },
            .upper_ident => |type_name| {
-                // Get the text of the identifier token to use as key
+                // Get the text for tracking redundant exposures
                const token_region = self.parse_ir.tokens.resolve(@intCast(type_name.ident));
                const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];

-                // Add to exposed_by_str for permanent storage (unconditionally)
-                try self.env.exposed_by_str.put(gpa, type_text, {});
-
-                // Also build exposed_scope with proper identifiers
+                // Get the interned identifier
                if (self.parse_ir.tokens.resolveIdentifier(type_name.ident)) |ident_idx| {
+                    // Add to exposed_items for permanent storage (unconditionally)
+                    try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
+
                    // Use a dummy statement index - we just need to track that it's exposed
                    const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
                    try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
@ -966,15 +969,15 @@ fn createExposedScope(
                }
            },
            .upper_ident_star => |type_with_constructors| {
-                // Get the text of the identifier token to use as key
+                // Get the text for tracking redundant exposures
                const token_region = self.parse_ir.tokens.resolve(@intCast(type_with_constructors.ident));
                const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];

-                // Add to exposed_by_str for permanent storage (unconditionally)
-                try self.env.exposed_by_str.put(gpa, type_text, {});
-
-                // Also build exposed_scope with proper identifiers
+                // Get the interned identifier
                if (self.parse_ir.tokens.resolveIdentifier(type_with_constructors.ident)) |ident_idx| {
+                    // Add to exposed_items for permanent storage (unconditionally)
+                    try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
+
                    // Use a dummy statement index - we just need to track that it's exposed
                    const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
                    try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
@ -1368,7 +1371,7 @@ fn introduceExposedItemsIntoScope(
            return;
        }

-        // Get the module's exposed_by_str map
+        // Get the module's exposed_items
        const module_env = envs_map.get(module_name_text).?;

        // Validate each exposed item
@ -1377,7 +1380,14 @@ fn introduceExposedItemsIntoScope(
            const item_name_text = self.env.idents.getText(exposed_item.name);

            // Check if the item is exposed by the module
-            if (!module_env.exposed_by_str.contains(item_name_text)) {
+            // We need to look up by string because the identifiers are from different modules
+            // First, try to find this identifier in the target module's ident store
+            const is_exposed = if (module_env.idents.findByString(item_name_text)) |target_ident|
+                module_env.exposed_items.containsById(self.env.gpa, @bitCast(target_ident))
+            else
+                false;
+
+            if (!is_exposed) {
                // Determine if it's a type or value based on capitalization
                const first_char = item_name_text[0];

@ -1702,11 +1712,16 @@ pub fn canonicalizeExpr(
                                };
                            };

-                            // Look up the target node index in the module's exposed_nodes
+                            // Look up the target node index in the module's exposed_items
+                            // Need to convert identifier from current module to target module
                            const field_text = self.env.idents.getText(ident);
                            const target_node_idx = if (self.module_envs) |envs_map| blk: {
                                if (envs_map.get(module_text)) |module_env| {
-                                    break :blk module_env.exposed_nodes.get(field_text) orelse 0;
+                                    if (module_env.idents.findByString(field_text)) |target_ident| {
+                                        break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
+                                    } else {
+                                        break :blk 0;
+                                    }
                                } else {
                                    break :blk 0;
                                }
@ -1761,11 +1776,16 @@ pub fn canonicalizeExpr(
                                };
                            };

-                            // Look up the target node index in the module's exposed_nodes
+                            // Look up the target node index in the module's exposed_items
+                            // Need to convert identifier from current module to target module
                            const field_text = self.env.idents.getText(exposed_info.original_name);
                            const target_node_idx = if (self.module_envs) |envs_map| blk: {
                                if (envs_map.get(module_text)) |module_env| {
-                                    break :blk module_env.exposed_nodes.get(field_text) orelse 0;
+                                    if (module_env.idents.findByString(field_text)) |target_ident| {
+                                        break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
+                                    } else {
+                                        break :blk 0;
+                                    }
                                } else {
                                    break :blk 0;
                                }
@ -6541,11 +6561,16 @@ fn tryModuleQualifiedLookup(self: *Self, field_access: AST.BinOp) std.mem.Alloca

    const region = self.parse_ir.tokenizedRegionToRegion(field_access.region);

-    // Look up the target node index in the module's exposed_nodes
+    // Look up the target node index in the module's exposed_items
+    // Need to convert identifier from current module to target module
    const field_text = self.env.idents.getText(field_name);
    const target_node_idx = if (self.module_envs) |envs_map| blk: {
        if (envs_map.get(module_text)) |module_env| {
-            break :blk module_env.exposed_nodes.get(field_text) orelse 0;
+            if (module_env.idents.findByString(field_text)) |target_ident| {
+                break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
+            } else {
+                break :blk 0;
+            }
        } else {
            break :blk 0;
        }
--- a/src/check/canonicalize/test/exposed_shadowing_test.zig
+++ b/src/check/canonicalize/test/exposed_shadowing_test.zig
@ -7,6 +7,7 @@
 const std = @import("std");
 const compile = @import("compile");
 const parse = @import("parse");
+const base = @import("base");

 const canonicalize = @import("../../canonicalize.zig");

@ -342,7 +343,7 @@ test "complex case with redundant, shadowing, and not implemented" {
    try testing.expect(found_not_implemented);
 }

-test "exposed_by_str is populated correctly" {
+test "exposed_items is populated correctly" {
    const allocator = testing.allocator;

    const source =
@ -365,16 +366,23 @@ test "exposed_by_str is populated correctly" {

    try canonicalizer.canonicalizeFile();

-    // Check that exposed_by_str contains all exposed items
-    try testing.expect(env.exposed_by_str.contains("foo"));
-    try testing.expect(env.exposed_by_str.contains("bar"));
-    try testing.expect(env.exposed_by_str.contains("MyType"));
+    // Check that exposed_items contains the correct number of items
+    // The exposed items were added during canonicalization

    // Should have exactly 3 entries (duplicates not stored)
-    try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
+    try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
+
+    // Check that exposed_items contains all exposed items
+    const foo_idx = env.idents.findByString("foo").?;
+    const bar_idx = env.idents.findByString("bar").?;
+    const mytype_idx = env.idents.findByString("MyType").?;
+
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(mytype_idx)));
 }

-test "exposed_by_str persists after canonicalization" {
+test "exposed_items persists after canonicalization" {
    const allocator = testing.allocator;

    const source =
@ -397,16 +405,20 @@ test "exposed_by_str persists after canonicalization" {

    try canonicalizer.canonicalizeFile();

-    // All exposed items should be in exposed_by_str, even those not implemented
-    try testing.expect(env.exposed_by_str.contains("x"));
-    try testing.expect(env.exposed_by_str.contains("y"));
-    try testing.expect(env.exposed_by_str.contains("z"));
+    // All exposed items should be in exposed_items, even those not implemented
+    const x_idx = env.idents.findByString("x").?;
+    const y_idx = env.idents.findByString("y").?;
+    const z_idx = env.idents.findByString("z").?;
+
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(x_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(y_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(z_idx)));

    // Verify the map persists in env after canonicalization is complete
-    try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
+    try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
 }

-test "exposed_by_str never has entries removed" {
+test "exposed_items never has entries removed" {
    const allocator = testing.allocator;

    const source =
@ -429,13 +441,57 @@ test "exposed_by_str never has entries removed" {

    try canonicalizer.canonicalizeFile();

-    // All exposed items should remain in exposed_by_str
+    // All exposed items should remain in exposed_items
    // Even though foo appears twice and baz is not implemented,
-    // exposed_by_str should have all unique exposed identifiers
-    try testing.expect(env.exposed_by_str.contains("foo"));
-    try testing.expect(env.exposed_by_str.contains("bar"));
-    try testing.expect(env.exposed_by_str.contains("baz"));
+    // exposed_items should have all unique exposed identifiers
+    const foo_idx = env.idents.findByString("foo").?;
+    const bar_idx = env.idents.findByString("bar").?;
+    const baz_idx = env.idents.findByString("baz").?;
+
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(baz_idx)));

    // Should have exactly 3 unique entries
-    try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
+    try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
+}
+
+test "exposed_items handles identifiers with different attributes" {
+    const allocator = testing.allocator;
+
+    // Module exposing foo and foo! - these should be treated as different identifiers
+    // Note: Using foo and foo! to test that attributes are properly included in the key
+    const source =
+        \\module [foo, foo!]
+        \\
+        \\foo = 42
+        \\foo! = \x -> x + 1
+    ;
+
+    var env = try ModuleEnv.init(allocator, source);
+    defer env.deinit();
+    try env.initCIRFields(allocator, "Test");
+
+    var ast = try parse.parse(&env);
+    defer ast.deinit(allocator);
+
+    var canonicalizer = try canonicalize.init(&env, &ast, null);
+    defer canonicalizer.deinit();
+
+    try canonicalizer.canonicalizeFile();
+
+    // Both should be in exposed_items as separate entries
+    const foo_idx = env.idents.findByString("foo").?;
+    const foo_effectful_idx = env.idents.findByString("foo!").?;
+
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
+    try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_effectful_idx)));
+
+    // Should have exactly 2 entries - if we only used u29 without attributes, they might incorrectly merge
+    try testing.expectEqual(@as(usize, 2), env.exposed_items.count());
+
+    // Verify they have different full u32 values (index + attributes)
+    const foo_u32 = @as(u32, @bitCast(foo_idx));
+    const foo_effectful_u32 = @as(u32, @bitCast(foo_effectful_idx));
+    try testing.expect(foo_u32 != foo_effectful_u32);
 }
--- a/src/check/canonicalize/test/import_validation_test.zig
+++ b/src/check/canonicalize/test/import_validation_test.zig
@ -59,10 +59,15 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
    }

    // Add exposed items to Json module
-    try json_env.exposed_by_str.put(allocator, "decode", {});
-    try json_env.exposed_by_str.put(allocator, "encode", {});
-    try json_env.exposed_by_str.put(allocator, "JsonError", {});
-    try json_env.exposed_by_str.put(allocator, "DecodeProblem", {});
+    const Ident = base.Ident;
+    const decode_idx = try json_env.idents.insert(allocator, Ident.for_text("decode"));
+    try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_idx));
+    const encode_idx = try json_env.idents.insert(allocator, Ident.for_text("encode"));
+    try json_env.exposed_items.addExposedById(allocator, @bitCast(encode_idx));
+    const json_error_idx = try json_env.idents.insert(allocator, Ident.for_text("JsonError"));
+    try json_env.exposed_items.addExposedById(allocator, @bitCast(json_error_idx));
+    const decode_problem_idx = try json_env.idents.insert(allocator, Ident.for_text("DecodeProblem"));
+    try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_problem_idx));

    try module_envs.put("Json", json_env);

@ -75,9 +80,12 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
    }

    // Add exposed items to Utils module
-    try utils_env.exposed_by_str.put(allocator, "map", {});
-    try utils_env.exposed_by_str.put(allocator, "filter", {});
-    try utils_env.exposed_by_str.put(allocator, "Result", {});
+    const map_idx = try utils_env.idents.insert(allocator, Ident.for_text("map"));
+    try utils_env.exposed_items.addExposedById(allocator, @bitCast(map_idx));
+    const filter_idx = try utils_env.idents.insert(allocator, Ident.for_text("filter"));
+    try utils_env.exposed_items.addExposedById(allocator, @bitCast(filter_idx));
+    const result_idx = try utils_env.idents.insert(allocator, Ident.for_text("Result"));
+    try utils_env.exposed_items.addExposedById(allocator, @bitCast(result_idx));

    try module_envs.put("Utils", utils_env);

@ -502,7 +510,7 @@ test "module-qualified lookups with e_lookup_external" {
    try expectEqual(true, found_dict_empty);
 }

-test "exposed_nodes - tracking CIR node indices for exposed items" {
+test "exposed_items - tracking CIR node indices for exposed items" {
    var gpa_state = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
    defer std.debug.assert(gpa_state.deinit() == .ok);
    const allocator = gpa_state.allocator();
@ -519,16 +527,20 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
        allocator.destroy(math_env);
    }

-    // Add exposed items
-    try math_env.exposed_by_str.put(allocator, "add", {});
-    try math_env.exposed_by_str.put(allocator, "multiply", {});
-    try math_env.exposed_by_str.put(allocator, "PI", {});
+    // Add exposed items and set their node indices
+    const Ident = base.Ident;
+    const add_idx = try math_env.idents.insert(allocator, Ident.for_text("add"));
+    try math_env.exposed_items.addExposedById(allocator, @bitCast(add_idx));
+    const multiply_idx = try math_env.idents.insert(allocator, Ident.for_text("multiply"));
+    try math_env.exposed_items.addExposedById(allocator, @bitCast(multiply_idx));
+    const pi_idx = try math_env.idents.insert(allocator, Ident.for_text("PI"));
+    try math_env.exposed_items.addExposedById(allocator, @bitCast(pi_idx));

    // Simulate having CIR node indices for these exposed items
    // In real usage, these would be set during canonicalization of MathUtils
-    try math_env.exposed_nodes.put(allocator, "add", 100);
-    try math_env.exposed_nodes.put(allocator, "multiply", 200);
-    try math_env.exposed_nodes.put(allocator, "PI", 300);
+    try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(add_idx), 100);
+    try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(multiply_idx), 200);
+    try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(pi_idx), 300);

    try module_envs.put("MathUtils", math_env);

@ -586,15 +598,16 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
    try expectEqual(true, found_multiply_with_idx_200);
    try expectEqual(true, found_pi_with_idx_300);

-    // Test case where exposed_nodes is not populated (should get 0)
+    // Test case where node index is not populated (should get 0)
    const empty_env = try allocator.create(ModuleEnv);
    empty_env.* = try ModuleEnv.init(allocator, "");
    defer {
        empty_env.deinit();
        allocator.destroy(empty_env);
    }
-    try empty_env.exposed_by_str.put(allocator, "undefined", {});
-    // Don't add to exposed_nodes - should default to 0
+    const undefined_idx = try empty_env.idents.insert(allocator, Ident.for_text("undefined"));
+    try empty_env.exposed_items.addExposedById(allocator, @bitCast(undefined_idx));
+    // Don't set node index - should default to 0
    try module_envs.put("EmptyModule", empty_env);

    const source2 =
--- a/src/check/check_types.zig
+++ b/src/check/check_types.zig
@ -860,10 +860,16 @@ pub fn checkExpr(self: *Self, expr_idx: ModuleEnv.Expr.Idx) std.mem.Allocator.Er

                            if (origin_module) |module| {
                                // Look up the method in the origin module's exports
+                                // Need to convert identifier from current module to target module
                                const method_name_str = self.cir.idents.getText(dot_access.field_name);

-                                // Search through the module's exposed nodes
-                                if (module.exposed_nodes.get(method_name_str)) |node_idx| {
+                                // Search through the module's exposed items
+                                const node_idx_opt = if (module.idents.findByString(method_name_str)) |target_ident|
+                                    module.exposed_items.getNodeIndexById(self.gpa, @bitCast(target_ident))
+                                else
+                                    null;
+
+                                if (node_idx_opt) |node_idx| {
                                    // Found the method!
                                    const target_expr_idx = @as(ModuleEnv.Expr.Idx, @enumFromInt(node_idx));

--- a/src/collections/ExposedItems.zig
+++ b/src/collections/ExposedItems.zig
@ -0,0 +1,288 @@
+//! A specialized collection for tracking exposed items in modules using interned IDs.
+//!
+//! This combines the functionality of exposed_by_str and exposed_nodes into a single
+//! SortedArrayBuilder to save memory and simplify the API. It uses interned identifier
+//! indices (u32 matching Ident.Idx) as keys and u16 values (the node indices).
+//!
+//! A value of 0 means "exposed but not yet defined", while non-zero values
+//! are actual node indices.
+//!
+//! Note: This module only provides ID-based methods. String-to-ID conversion must be
+//! handled by the caller who has access to the Ident.Store.
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
+
+// We use u32 which is the bit representation of base.Ident.Idx
+// This includes both the 29-bit index AND the 3-bit attributes (effectful, ignored, reassignable)
+// This is critical because foo, foo!, and _foo are different identifiers
+const IdentIdx = u32;
+
+/// A collection that tracks exposed items by their names and associated CIR node indices
+pub const ExposedItems = struct {
+    /// Maps item name (as interned ID) -> CIR node index (0 means exposed but not defined)
+    items: SortedArrayBuilder(IdentIdx, u16),
+
+    const Self = @This();
+
+    pub fn init() Self {
+        return .{
+            .items = SortedArrayBuilder(IdentIdx, u16).init(),
+        };
+    }
+
+    pub fn deinit(self: *Self, allocator: Allocator) void {
+        self.items.deinit(allocator);
+    }
+
+    /// Add an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
+    pub fn addExposedById(self: *Self, allocator: Allocator, ident_idx: IdentIdx) !void {
+        // Add with value 0 to indicate "exposed but not yet defined"
+        // The SortedArrayBuilder will handle duplicates by keeping the last value,
+        // but we don't want to overwrite an existing node index with 0
+        if (self.items.sorted) {
+            // If already sorted, check if it exists first
+            if (self.items.get(allocator, ident_idx) == null) {
+                try self.items.put(allocator, ident_idx, 0);
+            }
+        } else {
+            // If not sorted yet, just add it - duplicates will be handled later
+            try self.items.put(allocator, ident_idx, 0);
+        }
+    }
+
+    /// Set the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
+    pub fn setNodeIndexById(self: *Self, allocator: Allocator, ident_idx: IdentIdx, node_idx: u16) !void {
+        // First ensure the array is sorted so we can search
+        self.items.ensureSorted(allocator);
+
+        // Find the existing entry and update its value
+        const entries = self.items.entries.items;
+        for (entries) |*entry| {
+            if (entry.key == ident_idx) {
+                entry.value = node_idx;
+                return;
+            }
+        }
+
+        // If not found, add a new entry
+        try self.items.put(allocator, ident_idx, node_idx);
+    }
+
+    /// Check if an item is exposed by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
+    pub fn containsById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) bool {
+        var mutable_self = @constCast(self);
+        return mutable_self.items.get(allocator, ident_idx) != null;
+    }
+
+    /// Get the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
+    pub fn getNodeIndexById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) ?u16 {
+        var mutable_self = @constCast(self);
+        return mutable_self.items.get(allocator, ident_idx);
+    }
+
+    /// Get the number of exposed items
+    pub fn count(self: *const Self) usize {
+        return self.items.count();
+    }
+
+    /// Ensure the array is sorted and deduplicated
+    /// This should be called at the end of canonicalization
+    pub fn ensureSorted(self: *Self, allocator: Allocator) void {
+        self.items.ensureSorted(allocator);
+    }
+
+    /// Detect duplicate exposed items for error reporting
+    /// Returns the interned IDs of duplicates (caller must convert to strings)
+    pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]IdentIdx {
+        return self.items.detectDuplicates(allocator);
+    }
+
+    /// Relocate pointers after memory movement
+    pub fn relocate(self: *Self, offset: isize) void {
+        self.items.relocate(offset);
+    }
+
+    /// Calculate the size needed to serialize this ExposedItems
+    pub fn serializedSize(self: *const Self) usize {
+        // We need to serialize:
+        // 1. The count of items (u32)
+        // 2. For each item: key (u32) + value (u16)
+        var size: usize = @sizeOf(u32); // count
+
+        for (self.items.entries.items) |_| {
+            size += @sizeOf(u32); // key (interned ID)
+            size += @sizeOf(u16); // value
+        }
+
+        // Align to SERIALIZATION_ALIGNMENT
+        const SERIALIZATION_ALIGNMENT = 16;
+        return std.mem.alignForward(usize, size, SERIALIZATION_ALIGNMENT);
+    }
+
+    /// Serialize this ExposedItems into the provided buffer
+    pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 {
+        const size = self.serializedSize();
+        if (buffer.len < size) return error.BufferTooSmall;
+
+        var offset: usize = 0;
+
+        // Write count
+        std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.items.entries.items.len), .little);
+        offset += @sizeOf(u32);
+
+        // Write entries
+        for (self.items.entries.items) |entry| {
+            // Write key (interned ID)
+            std.mem.writeInt(u32, buffer[offset..][0..4], entry.key, .little);
+            offset += @sizeOf(u32);
+
+            // Write value
+            std.mem.writeInt(u16, buffer[offset..][0..2], entry.value, .little);
+            offset += @sizeOf(u16);
+        }
+
+        // Zero padding
+        if (offset < size) {
+            @memset(buffer[offset..size], 0);
+        }
+
+        return buffer[0..size];
+    }
+
+    /// Deserialize ExposedItems from the provided buffer
+    pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self {
+        if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall;
+
+        var offset: usize = 0;
+
+        // Read count
+        const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little);
+        offset += @sizeOf(u32);
+
+        var result = Self.init();
+        errdefer result.deinit(allocator);
+
+        // Read entries
+        for (0..entry_count) |_| {
+            // Read key (interned ID)
+            if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall;
+            const key_u32 = std.mem.readInt(u32, buffer[offset..][0..4], .little);
+            // Use the full u32 (includes both index and attributes)
+            const key = key_u32;
+            offset += @sizeOf(u32);
+
+            // Read value
+            if (offset + @sizeOf(u16) > buffer.len) return error.BufferTooSmall;
+            const value = std.mem.readInt(u16, buffer[offset..][0..2], .little);
+            offset += @sizeOf(u16);
+
+            // Add to builder
+            try result.items.put(allocator, key, value);
+        }
+
+        return result;
+    }
+
+    /// Append to iovec writer for serialization
+    pub fn appendToIovecs(self: *const Self, writer: anytype) !usize {
+        return self.items.appendToIovecs(writer);
+    }
+
+    /// Iterator for all exposed items
+    pub const Iterator = struct {
+        items: []const SortedArrayBuilder(IdentIdx, u16).Entry,
+        index: usize,
+
+        pub fn next(self: *Iterator) ?struct { ident_idx: IdentIdx, node_idx: u16 } {
+            if (self.index < self.items.len) {
+                const entry = self.items[self.index];
+                self.index += 1;
+                return .{ .ident_idx = entry.key, .node_idx = entry.value };
+            }
+            return null;
+        }
+    };
+
+    /// Get an iterator over all exposed items
+    pub fn iterator(self: *const Self) Iterator {
+        return .{ .items = self.items.entries.items, .index = 0 };
+    }
+};
+
+test "ExposedItems basic operations" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var exposed = ExposedItems.init();
+    defer exposed.deinit(allocator);
+
+    // Add exposed items (like exposed_by_str.put)
+    try exposed.addExposed(allocator, "foo");
+    try exposed.addExposed(allocator, "bar");
+    try exposed.addExposed(allocator, "MyType");
+
+    // Test count
+    try testing.expectEqual(@as(usize, 3), exposed.count());
+
+    // Test contains (like exposed_by_str.contains)
+    try testing.expect(exposed.contains(allocator, "foo"));
+    try testing.expect(exposed.contains(allocator, "bar"));
+    try testing.expect(exposed.contains(allocator, "MyType"));
+    try testing.expect(!exposed.contains(allocator, "missing"));
+
+    // Test getNodeIndex before setting (should be 0)
+    try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "foo"));
+
+    // Set node indices (like exposed_nodes.put)
+    try exposed.setNodeIndex(allocator, "foo", 42);
+    try exposed.setNodeIndex(allocator, "bar", 84);
+
+    // Test getNodeIndex after setting
+    try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
+    try testing.expectEqual(@as(?u16, 84), exposed.getNodeIndex(allocator, "bar"));
+    try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "MyType")); // Not set
+    try testing.expectEqual(@as(?u16, null), exposed.getNodeIndex(allocator, "missing")); // Not exposed
+}
+
+test "ExposedItems handles duplicates" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var exposed = ExposedItems.init();
+    defer exposed.deinit(allocator);
+
+    // Add items with duplicates
+    try exposed.addExposed(allocator, "foo");
+    try exposed.setNodeIndex(allocator, "foo", 42);
+    try exposed.addExposed(allocator, "foo"); // Duplicate - should not overwrite node index
+
+    // Force sorting
+    exposed.ensureSorted(allocator);
+
+    // Should still have the node index
+    try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
+}
+
+test "ExposedItems detectDuplicates" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var exposed = ExposedItems.init();
+    defer exposed.deinit(allocator);
+
+    // Add some duplicates
+    try exposed.addExposed(allocator, "foo");
+    try exposed.addExposed(allocator, "bar");
+    try exposed.addExposed(allocator, "foo"); // duplicate
+    try exposed.setNodeIndex(allocator, "bar", 42);
+    try exposed.addExposed(allocator, "bar"); // duplicate
+
+    // Detect duplicates
+    const duplicates = try exposed.detectDuplicates(allocator);
+    defer allocator.free(duplicates);
+
+    // Should report both duplicates
+    try testing.expectEqual(@as(usize, 2), duplicates.len);
+}
--- a/src/collections/SortedArrayBuilder.zig
+++ b/src/collections/SortedArrayBuilder.zig
@ -0,0 +1,450 @@
+//! A builder for creating sorted arrays with binary search lookups.
+//!
+//! SortedArrayBuilder provides a way to build key-value mappings that are
+//! optimized for small sizes and deterministic serialization. It maintains
+//! a sorted array internally, enabling efficient binary search lookups.
+//!
+//! Features:
+//! - Supports both string and numeric key types
+//! - Maintains sorted order for deterministic serialization
+//! - Efficient binary search lookups O(log n)
+//! - Zero-copy deserialization
+//! - Minimal memory overhead
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+// Import base for both testing and runtime
+const base = @import("base");
+const serialization = @import("serialization");
+const IovecWriter = serialization.IovecWriter;
+
+/// A builder for creating sorted arrays directly without using hash maps
+/// This is more efficient when we know we won't have duplicates
+pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type {
+    return struct {
+        entries: std.ArrayListUnmanaged(Entry) = .{},
+        sorted: bool = true,
+
+        const Self = @This();
+
+        pub const Entry = struct {
+            key: K,
+            value: V,
+
+            fn lessThan(_: void, a: Entry, b: Entry) bool {
+                if (K == []const u8) {
+                    return std.mem.lessThan(u8, a.key, b.key);
+                } else if (@typeInfo(K) == .int or @typeInfo(K) == .@"enum") {
+                    return a.key < b.key;
+                } else {
+                    @compileError("Unsupported key type for SortedArrayBuilder");
+                }
+            }
+        };
+
+        pub fn init() Self {
+            return .{};
+        }
+
+        pub fn deinit(self: *Self, allocator: Allocator) void {
+            if (K == []const u8) {
+                // Free string keys
+                for (self.entries.items) |entry| {
+                    allocator.free(entry.key);
+                }
+            }
+            self.entries.deinit(allocator);
+        }
+
+        /// Add a key-value pair
+        pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void {
+            const new_key = if (K == []const u8) try allocator.dupe(u8, key) else key;
+
+            // Check if we need to maintain sorted order
+            if (self.sorted and self.entries.items.len > 0) {
+                const last = self.entries.items[self.entries.items.len - 1];
+                if (K == []const u8) {
+                    self.sorted = std.mem.lessThan(u8, last.key, new_key);
+                } else {
+                    self.sorted = last.key < new_key;
+                }
+            }
+
+            try self.entries.append(allocator, .{ .key = new_key, .value = value });
+        }
+
+        /// Get value by key (requires sorting first if not already sorted)
+        pub fn get(self: *Self, allocator: Allocator, key: K) ?V {
+            self.ensureSorted(allocator);
+
+            var left: usize = 0;
+            var right: usize = self.entries.items.len;
+
+            while (left < right) {
+                const mid = left + (right - left) / 2;
+                const mid_key = self.entries.items[mid].key;
+
+                const cmp = if (K == []const u8)
+                    std.mem.order(u8, mid_key, key)
+                else if (mid_key == key)
+                    std.math.Order.eq
+                else if (mid_key < key)
+                    std.math.Order.lt
+                else
+                    std.math.Order.gt;
+
+                switch (cmp) {
+                    .eq => return self.entries.items[mid].value,
+                    .lt => left = mid + 1,
+                    .gt => right = mid,
+                }
+            }
+            return null;
+        }
+
+        /// Ensure the array is sorted and deduplicate any duplicate entries
+        pub fn ensureSorted(self: *Self, allocator: Allocator) void {
+            if (!self.sorted) {
+                std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
+                self.sorted = true;
+
+                // Check for and report duplicates, then deduplicate
+                self.deduplicateAndReport(allocator);
+            }
+        }
+
+        /// Check for duplicates, report them, and remove duplicates keeping the last occurrence
+        fn deduplicateAndReport(self: *Self, allocator: Allocator) void {
+            if (self.entries.items.len <= 1) return;
+
+            // First pass: detect and report duplicates
+            var i: usize = 1;
+            while (i < self.entries.items.len) {
+                const prev_entry = self.entries.items[i - 1];
+                const curr_entry = self.entries.items[i];
+                const is_duplicate = if (K == []const u8)
+                    std.mem.eql(u8, prev_entry.key, curr_entry.key)
+                else
+                    prev_entry.key == curr_entry.key;
+
+                if (is_duplicate) {
+                    // Note: Duplicate detection is handled by caller via detectDuplicates() method
+                }
+                i += 1;
+            }
+
+            // Second pass: deduplicate by keeping last occurrence
+            var write_index: usize = 0;
+            for (self.entries.items, 0..) |entry, read_index| {
+                var should_keep = true;
+
+                // Look ahead to see if there's a duplicate later
+                for (self.entries.items[read_index + 1 ..]) |future_entry| {
+                    const is_duplicate = if (K == []const u8)
+                        std.mem.eql(u8, entry.key, future_entry.key)
+                    else
+                        entry.key == future_entry.key;
+
+                    if (is_duplicate) {
+                        should_keep = false;
+                        break;
+                    }
+                }
+
+                if (should_keep) {
+                    if (write_index != read_index) {
+                        self.entries.items[write_index] = entry;
+                    }
+                    write_index += 1;
+                } else {
+                    // Free the string key that we're not keeping
+                    if (K == []const u8) {
+                        allocator.free(entry.key);
+                    }
+                }
+            }
+
+            // Update the length to reflect deduplicated entries
+            self.entries.items.len = write_index;
+        }
+
+        /// Detect duplicates without modifying the array - returns list of duplicate keys
+        pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]K {
+            var duplicates = std.ArrayList(K).init(allocator);
+
+            if (self.entries.items.len <= 1) return duplicates.toOwnedSlice();
+
+            // Ensure sorted first
+            if (!self.sorted) {
+                std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
+                self.sorted = true;
+            }
+
+            var reported_keys = if (K == []const u8)
+                std.StringHashMap(void).init(allocator)
+            else
+                std.AutoHashMap(K, void).init(allocator);
+            defer reported_keys.deinit();
+
+            var i: usize = 1;
+            while (i < self.entries.items.len) {
+                const prev_entry = self.entries.items[i - 1];
+                const curr_entry = self.entries.items[i];
+                const is_duplicate = if (K == []const u8)
+                    std.mem.eql(u8, prev_entry.key, curr_entry.key)
+                else
+                    prev_entry.key == curr_entry.key;
+
+                if (is_duplicate) {
+                    // Report duplicate only once per unique key
+                    const result = try reported_keys.getOrPut(curr_entry.key);
+                    if (!result.found_existing) {
+                        try duplicates.append(curr_entry.key);
+                    }
+                }
+                i += 1;
+            }
+
+            return duplicates.toOwnedSlice();
+        }
+
+        /// Relocate pointers after memory movement
+        pub fn relocate(self: *Self, offset: isize) void {
+            // Relocate the entries array pointer
+            if (self.entries.items.len > 0) {
+                const old_ptr = @intFromPtr(self.entries.items.ptr);
+                // Skip relocation if this is a sentinel value
+                // Define sentinel value locally since iovec_serialize is not available
+                const EMPTY_ARRAY_SENTINEL: usize = 0xDEADBEEF;
+                if (old_ptr != EMPTY_ARRAY_SENTINEL) {
+                    const new_ptr = @as(usize, @intCast(@as(isize, @intCast(old_ptr)) + offset));
+                    self.entries.items.ptr = @ptrFromInt(new_ptr);
+                }
+            }
+        }
+
+        /// Get the number of entries
+        pub fn count(self: *const Self) usize {
+            return self.entries.items.len;
+        }
+
+        /// Append this SortedArrayBuilder to an iovec writer for serialization
+        pub fn appendToIovecs(self: *const Self, writer: *IovecWriter) !usize {
+
+            // Must be sorted before serialization
+            if (!self.sorted) {
+                @panic("SortedArrayBuilder must be sorted before serialization");
+            }
+
+            // Create a mutable copy of self that we can modify
+            var builder_copy = self.*;
+
+            // Create a buffer for the final serialized struct
+            const builder_copy_buffer = try writer.allocator.alloc(u8, @sizeOf(Self));
+
+            // Track this allocation so it gets freed when writer is deinitialized
+            try writer.owned_buffers.append(builder_copy_buffer);
+
+            // Serialize entries array data
+            const entries_offset = if (self.entries.items.len > 0) blk: {
+                const data = std.mem.sliceAsBytes(self.entries.items);
+                const offset = try writer.appendBytes(Entry, data);
+                break :blk offset;
+            } else 0;
+
+            // Update pointer in the copy to use offset
+            // For empty arrays, use sentinel value instead of 0 to avoid null pointer
+            builder_copy.entries.items.ptr = if (entries_offset == 0)
+                @ptrFromInt(0xDEADBEEF) // EMPTY_ARRAY_SENTINEL
+            else
+                @ptrFromInt(entries_offset);
+            builder_copy.entries.items.len = self.entries.items.len;
+
+            // Note: For string keys, we don't serialize the string data separately here.
+            // This assumes that the strings are already managed elsewhere (e.g., in an interner)
+            // and we're just storing pointers/slices to them. If string keys need to be
+            // serialized as well, that would require additional handling.
+
+            // Copy the modified struct to the buffer
+            @memcpy(builder_copy_buffer, std.mem.asBytes(&builder_copy));
+
+            // Now that all pointers have been converted to offsets, add the copy to iovecs
+            const struct_offset = try writer.appendBytes(Self, builder_copy_buffer);
+
+            return struct_offset;
+        }
+    };
+}
+
+test "SortedArrayBuilder basic operations" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder([]const u8, u32).init();
+    defer builder.deinit(allocator);
+
+    // Add items in random order
+    try builder.put(allocator, "zebra", 100);
+    try builder.put(allocator, "apple", 50);
+    try builder.put(allocator, "banana", 150);
+    try builder.put(allocator, "cherry", 30);
+
+    // Test count
+    try testing.expectEqual(@as(usize, 4), builder.count());
+
+    // Test get (forces sorting)
+    try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "apple"));
+    try testing.expectEqual(@as(?u32, 150), builder.get(allocator, "banana"));
+    try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "cherry"));
+    try testing.expectEqual(@as(?u32, 100), builder.get(allocator, "zebra"));
+    try testing.expectEqual(@as(?u32, null), builder.get(allocator, "missing"));
+
+    // Test iovec serialization
+    var writer = IovecWriter.init(allocator);
+    defer writer.deinit();
+
+    _ = try builder.appendToIovecs(&writer);
+
+    // Verify we have some iovecs (basic smoke test)
+    try testing.expect(writer.iovecs.items.len > 0);
+}
+
+test "SortedArrayBuilder maintains sorted order when added in order" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder([]const u8, u16).init();
+    defer builder.deinit(allocator);
+
+    // Add items in sorted order
+    try builder.put(allocator, "aaa", 1);
+    try builder.put(allocator, "bbb", 2);
+    try builder.put(allocator, "ccc", 3);
+    try builder.put(allocator, "ddd", 4);
+
+    // Should still be marked as sorted
+    try testing.expect(builder.sorted);
+
+    // Get should work without sorting
+    try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "aaa"));
+    try testing.expectEqual(@as(?u16, 4), builder.get(allocator, "ddd"));
+}
+
+test "SortedArrayBuilder detectDuplicates example usage" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder(u32, u16).init();
+    defer builder.deinit(allocator);
+
+    // Add some entries with duplicates
+    try builder.put(allocator, 100, 1);
+    try builder.put(allocator, 200, 2);
+    try builder.put(allocator, 100, 3); // duplicate of 100
+    try builder.put(allocator, 300, 4);
+    try builder.put(allocator, 200, 5); // duplicate of 200
+
+    // Detect duplicates before sorting/deduplicating
+    const duplicates = try builder.detectDuplicates(allocator);
+    defer allocator.free(duplicates);
+
+    // Example: Report duplicates (in real code, this would push diagnostics)
+    for (duplicates) |duplicate_key| {
+        // In a real compiler, you'd push a diagnostic here:
+        // try cir.pushDiagnostic(CIR.Diagnostic{ .duplicate_exposed_item = ... });
+        std.debug.print("Found duplicate key: {d}\n", .{duplicate_key});
+    }
+
+    // Verify we found the expected duplicates
+    try testing.expectEqual(@as(usize, 2), duplicates.len);
+
+    // After detection, normal operations work as expected
+    builder.ensureSorted(allocator);
+    try testing.expectEqual(@as(usize, 3), builder.count()); // Deduplicated
+    try testing.expectEqual(@as(?u16, 3), builder.get(allocator, 100)); // Last value kept
+    try testing.expectEqual(@as(?u16, 5), builder.get(allocator, 200)); // Last value kept
+}
+
+test "SortedArrayBuilder handles duplicates with string keys" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder([]const u8, u32).init();
+    defer builder.deinit(allocator);
+
+    // Add duplicate keys with different values
+    try builder.put(allocator, "apple", 10);
+    try builder.put(allocator, "banana", 20);
+    try builder.put(allocator, "apple", 30); // duplicate key
+    try builder.put(allocator, "cherry", 40);
+    try builder.put(allocator, "banana", 50); // duplicate key
+
+    // Initial count includes duplicates
+    try testing.expectEqual(@as(usize, 5), builder.count());
+
+    // Force sorting and deduplication
+    builder.ensureSorted(allocator);
+
+    // Count should be reduced after deduplication
+    try testing.expectEqual(@as(usize, 3), builder.count());
+
+    // Should return the last value for each key (keeping last occurrence)
+    try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "apple"));
+    try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "banana"));
+    try testing.expectEqual(@as(?u32, 40), builder.get(allocator, "cherry"));
+}
+
+test "SortedArrayBuilder handles duplicates with numeric keys" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder(u32, []const u8).init();
+    defer builder.deinit(allocator);
+
+    // Add duplicate keys with different values
+    try builder.put(allocator, 100, "first");
+    try builder.put(allocator, 200, "second");
+    try builder.put(allocator, 100, "updated_first"); // duplicate key
+    try builder.put(allocator, 300, "third");
+
+    // Initial count includes duplicates
+    try testing.expectEqual(@as(usize, 4), builder.count());
+
+    // Force sorting and deduplication
+    builder.ensureSorted(allocator);
+
+    // Count should be reduced after deduplication
+    try testing.expectEqual(@as(usize, 3), builder.count());
+
+    // Should return the last value for each key
+    try testing.expectEqual(@as(?[]const u8, "updated_first"), builder.get(allocator, 100));
+    try testing.expectEqual(@as(?[]const u8, "second"), builder.get(allocator, 200));
+    try testing.expectEqual(@as(?[]const u8, "third"), builder.get(allocator, 300));
+}
+
+test "SortedArrayBuilder no duplicates case" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    var builder = SortedArrayBuilder([]const u8, u16).init();
+    defer builder.deinit(allocator);
+
+    // Add unique keys only
+    try builder.put(allocator, "unique1", 1);
+    try builder.put(allocator, "unique2", 2);
+    try builder.put(allocator, "unique3", 3);
+
+    const original_count = builder.count();
+
+    // Force sorting (should not change count)
+    builder.ensureSorted(allocator);
+
+    // Count should remain the same
+    try testing.expectEqual(original_count, builder.count());
+
+    // All values should be retrievable
+    try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "unique1"));
+    try testing.expectEqual(@as(?u16, 2), builder.get(allocator, "unique2"));
+    try testing.expectEqual(@as(?u16, 3), builder.get(allocator, "unique3"));
+}
--- a/src/collections/mod.zig
+++ b/src/collections/mod.zig
@ -17,6 +17,9 @@ pub const SafeMultiList = @import("safe_list.zig").SafeMultiList;

 pub const SafeStringHashMap = @import("safe_hash_map.zig").SafeStringHashMap;

+pub const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
+pub const ExposedItems = @import("ExposedItems.zig").ExposedItems;
+
 /// A range that must have at least one element
 pub const NonEmptyRange = struct {
    /// Starting index (inclusive)
--- a/src/compile/ModuleEnv.zig
+++ b/src/compile/ModuleEnv.zig
@ -68,12 +68,8 @@ idents: Ident.Store,
 ident_ids_for_slicing: collections.SafeList(Ident.Idx),
 strings: StringLiteral.Store,
 types: types_mod.Store,
-/// Map of exposed items by their string representation (not interned)
-/// This is built during canonicalization and preserved for later use
-exposed_by_str: collections.SafeStringHashMap(void),
-/// Map of exposed item names to their node indices (stored as u16)
-/// This is populated during canonicalization to allow cross-module lookups
-exposed_nodes: collections.SafeStringHashMap(u16),
+/// The items (a combination of types and values) that this module exposes
+exposed_items: collections.ExposedItems,

 /// Line starts for error reporting. We retain only start and offset positions in the IR
 /// and then use these line starts to calculate the line number and column number as required.
@ -130,8 +126,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error!
        .ident_ids_for_slicing = try collections.SafeList(Ident.Idx).initCapacity(gpa, 256),
        .strings = try StringLiteral.Store.initCapacityBytes(gpa, 4096),
        .types = try types_mod.Store.initCapacity(gpa, 2048, 512),
-        .exposed_by_str = try collections.SafeStringHashMap(void).initCapacity(gpa, 64),
-        .exposed_nodes = try collections.SafeStringHashMap(u16).initCapacity(gpa, 64),
+        .exposed_items = collections.ExposedItems.init(),
        .line_starts = try collections.SafeList(u32).initCapacity(gpa, 256),
        .source = source,
        // Initialize compilation fields with empty/default values
@ -152,8 +147,7 @@ pub fn deinit(self: *Self) void {
    self.strings.deinit(self.gpa);
    self.types.deinit();
    self.line_starts.deinit(self.gpa);
-    self.exposed_by_str.deinit(self.gpa);
-    self.exposed_nodes.deinit(self.gpa);
+    self.exposed_items.deinit(self.gpa);
    // Clean up compilation fields
    self.external_decls.deinit(self.gpa);
    self.imports.deinit(self.gpa);