diff --git a/src/base/Ident.zig b/src/base/Ident.zig index 37162e407a..797dd6db95 100644 --- a/src/base/Ident.zig +++ b/src/base/Ident.zig @@ -23,18 +23,9 @@ attributes: Attributes, /// Create a new identifier from a string. pub fn for_text(text: []const u8) Ident { - // Parse identifier attributes from the text - const is_ignored = std.mem.startsWith(u8, text, "_"); - const is_effectful = std.mem.endsWith(u8, text, "!"); - // TODO: parse reassignable attribute (var keyword handling) - return Ident{ .raw_text = text, - .attributes = Attributes{ - .effectful = is_effectful, - .ignored = is_ignored, - .reassignable = false, - }, + .attributes = Attributes.fromString(text), }; } @@ -68,18 +59,9 @@ pub fn from_bytes(bytes: []const u8) Error!Ident { } } - // Parse identifier attributes from the bytes - const is_ignored = std.mem.startsWith(u8, bytes, "_"); - const is_effectful = std.mem.endsWith(u8, bytes, "!"); - // TODO: parse reassignable attribute (var keyword handling) - return Ident{ .raw_text = bytes, - .attributes = Attributes{ - .effectful = is_effectful, - .ignored = is_ignored, - .reassignable = false, - }, + .attributes = Attributes.fromString(bytes), }; } @@ -96,6 +78,14 @@ pub const Attributes = packed struct(u3) { effectful: bool, ignored: bool, reassignable: bool, + + pub fn fromString(text: []const u8) Attributes { + return .{ + .effectful = std.mem.endsWith(u8, text, "!"), + .ignored = std.mem.startsWith(u8, text, "_"), + .reassignable = false, + }; + } }; /// An interner for identifier names. @@ -179,13 +169,23 @@ pub const Store = struct { return self.interner.contains(text); } + /// Find an identifier by its string, returning its index if it exists. + /// This is different from insert in that it's guaranteed not to modify the store. + pub fn findByString(self: *const Store, text: []const u8) ?Idx { + // Look up in the interner without inserting + const interner_idx = self.interner.strings.get(text) orelse return null; + // Create an Idx with inferred attributes from the text + return Idx{ + .attributes = Attributes.fromString(text), + .idx = @as(u29, @intCast(@intFromEnum(interner_idx))), + }; + } /// Freeze the identifier store, preventing any new entries from being added. pub fn freeze(self: *Store) void { self.interner.freeze(); } - /// Calculate the size needed to serialize this Ident.Store pub fn serializedSize(self: *const Store) usize { var size: usize = 0; diff --git a/src/cache/CacheModule.zig b/src/cache/CacheModule.zig index f08d5a33d8..53855dfaf4 100644 --- a/src/cache/CacheModule.zig +++ b/src/cache/CacheModule.zig @@ -55,8 +55,7 @@ pub const Header = struct { ident_store: ComponentInfo, line_starts: ComponentInfo, types_store: ComponentInfo, - exposed_by_str: ComponentInfo, - exposed_nodes: ComponentInfo, + exposed_items: ComponentInfo, external_decls: ComponentInfo, /// Spans can be stored directly since they're small @@ -68,7 +67,7 @@ pub const Header = struct { warning_count: u32, /// Fixed padding to ensure alignment - _padding: [16]u8 = [_]u8{0} ** 16, + _padding: [24]u8 = [_]u8{0} ** 24, /// Error specific to initializing a Header from bytes pub const InitError = error{ @@ -121,8 +120,7 @@ pub const CacheModule = struct { const ident_store_size = module_env.idents.serializedSize(); const line_starts_size = module_env.line_starts.serializedSize(); const types_store_size = module_env.types.serializedSize(); - const exposed_by_str_size = module_env.exposed_by_str.serializedSize(); - const exposed_nodes_size = module_env.exposed_nodes.serializedSize(); + const exposed_items_size = module_env.exposed_items.serializedSize(); const external_decls_size = module_env.external_decls.serializedSize(); // Calculate aligned offsets @@ -153,12 +151,8 @@ pub const CacheModule = struct { offset += @intCast(types_store_size); offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); - const exposed_by_str_offset = offset; - offset += @intCast(exposed_by_str_size); - - offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); - const exposed_nodes_offset = offset; - offset += @intCast(exposed_nodes_size); + const exposed_items_offset = offset; + offset += @intCast(exposed_items_size); offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); const external_decls_offset = offset; @@ -188,8 +182,7 @@ pub const CacheModule = struct { .ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) }, .line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) }, .types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) }, - .exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) }, - .exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) }, + .exposed_items = .{ .offset = exposed_items_offset, .length = @intCast(exposed_items_size) }, .external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) }, .all_defs = module_env.all_defs, .all_statements = module_env.all_statements, @@ -207,8 +200,7 @@ pub const CacheModule = struct { std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0); - std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0); - std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(exposed_items_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0); // Serialize each component @@ -219,8 +211,7 @@ pub const CacheModule = struct { _ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator); _ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size]))); _ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator); - _ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]); - _ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]); + _ = try module_env.exposed_items.serializeInto(data_section[exposed_items_offset .. exposed_items_offset + exposed_items_size]); _ = try module_env.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size]))); // TODO Calculate and store checksum @@ -300,11 +291,8 @@ pub const CacheModule = struct { var types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator); errdefer types_store.deinit(); - var exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator); - errdefer exposed_by_str.deinit(allocator); - - var exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator); - errdefer exposed_nodes.deinit(allocator); + var exposed_items = try collections.ExposedItems.deserializeFrom(self.getComponentData(.exposed_items), allocator); + errdefer exposed_items.deinit(allocator); // Create ModuleEnv from deserialized components var module_env = ModuleEnv{ @@ -313,8 +301,7 @@ pub const CacheModule = struct { .ident_ids_for_slicing = ident_ids_for_slicing, .strings = strings, .types = types_store, - .exposed_by_str = exposed_by_str, - .exposed_nodes = exposed_nodes, + .exposed_items = exposed_items, .line_starts = line_starts, .source = source, // Module compilation fields - will be initialized after @@ -360,8 +347,7 @@ pub const CacheModule = struct { .ident_store => self.header.ident_store, .line_starts => self.header.line_starts, .types_store => self.header.types_store, - .exposed_by_str => self.header.exposed_by_str, - .exposed_nodes => self.header.exposed_nodes, + .exposed_items => self.header.exposed_items, .external_decls => self.header.external_decls, }; return self.data[info.offset .. info.offset + info.length]; @@ -381,8 +367,7 @@ pub const CacheModule = struct { .ident_store = self.header.ident_store.length, .line_starts = self.header.line_starts.length, .types_store = self.header.types_store.length, - .exposed_by_str = self.header.exposed_by_str.length, - .exposed_nodes = self.header.exposed_nodes.length, + .exposed_items = self.header.exposed_items.length, .external_decls = self.header.external_decls.length, }, }; @@ -400,8 +385,7 @@ pub const CacheModule = struct { .ident_store => self.header.ident_store, .line_starts => self.header.line_starts, .types_store => self.header.types_store, - .exposed_by_str => self.header.exposed_by_str, - .exposed_nodes => self.header.exposed_nodes, + .exposed_items => self.header.exposed_items, .external_decls => self.header.external_decls, }; @@ -568,8 +552,7 @@ const ComponentType = enum { ident_store, line_starts, types_store, - exposed_by_str, - exposed_nodes, + exposed_items, external_decls, }; @@ -586,8 +569,7 @@ pub const Diagnostics = struct { ident_store: u32, line_starts: u32, types_store: u32, - exposed_by_str: u32, - exposed_nodes: u32, + exposed_items: u32, external_decls: u32, }, }; diff --git a/src/check/canonicalize.zig b/src/check/canonicalize.zig index 36dfc81a28..19b01e2198 100644 --- a/src/check/canonicalize.zig +++ b/src/check/canonicalize.zig @@ -623,17 +623,20 @@ pub fn canonicalizeFile( last_type_anno = null; // Clear after successful use // If this declaration successfully defined an exposed value, remove it from exposed_ident_texts - // and add it to exposed_nodes + // and add the node index to exposed_items const pattern = self.parse_ir.store.getPattern(decl.pattern); if (pattern == .ident) { const token_region = self.parse_ir.tokens.resolve(@intCast(pattern.ident.ident_tok)); const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // If this identifier is exposed, add it to exposed_nodes + // If this identifier is exposed, add it to exposed_items if (self.exposed_ident_texts.contains(ident_text)) { - // Store the def index as u16 in exposed_nodes + // Get the interned identifier - it should already exist from parsing + const ident = base.Ident.for_text(ident_text); + const idx = try self.env.idents.insert(self.env.gpa, ident); + // Store the def index as u16 in exposed_items const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx)); - try self.env.exposed_nodes.put(self.env.gpa, ident_text, def_idx_u16); + try self.env.exposed_items.setNodeIndexById(self.env.gpa, @bitCast(idx), def_idx_u16); } _ = self.exposed_ident_texts.remove(ident_text); @@ -900,15 +903,15 @@ fn createExposedScope( const exposed = self.parse_ir.store.getExposedItem(exposed_idx); switch (exposed) { .lower_ident => |ident| { - // Get the text of the identifier token to use as key + // Get the text for tracking redundant exposures const token_region = self.parse_ir.tokens.resolve(@intCast(ident.ident)); const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, ident_text, {}); - - // Also build exposed_scope with proper identifiers + // Get the interned identifier if (self.parse_ir.tokens.resolveIdentifier(ident.ident)) |ident_idx| { + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx)); + // Use a dummy pattern index - we just need to track that it's exposed const dummy_idx = @as(Pattern.Idx, @enumFromInt(0)); try self.exposed_scope.put(gpa, .ident, ident_idx, dummy_idx); @@ -933,15 +936,15 @@ fn createExposedScope( } }, .upper_ident => |type_name| { - // Get the text of the identifier token to use as key + // Get the text for tracking redundant exposures const token_region = self.parse_ir.tokens.resolve(@intCast(type_name.ident)); const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, type_text, {}); - - // Also build exposed_scope with proper identifiers + // Get the interned identifier if (self.parse_ir.tokens.resolveIdentifier(type_name.ident)) |ident_idx| { + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx)); + // Use a dummy statement index - we just need to track that it's exposed const dummy_idx = @as(Statement.Idx, @enumFromInt(0)); try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx); @@ -966,15 +969,15 @@ fn createExposedScope( } }, .upper_ident_star => |type_with_constructors| { - // Get the text of the identifier token to use as key + // Get the text for tracking redundant exposures const token_region = self.parse_ir.tokens.resolve(@intCast(type_with_constructors.ident)); const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, type_text, {}); - - // Also build exposed_scope with proper identifiers + // Get the interned identifier if (self.parse_ir.tokens.resolveIdentifier(type_with_constructors.ident)) |ident_idx| { + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx)); + // Use a dummy statement index - we just need to track that it's exposed const dummy_idx = @as(Statement.Idx, @enumFromInt(0)); try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx); @@ -1368,7 +1371,7 @@ fn introduceExposedItemsIntoScope( return; } - // Get the module's exposed_by_str map + // Get the module's exposed_items const module_env = envs_map.get(module_name_text).?; // Validate each exposed item @@ -1377,7 +1380,14 @@ fn introduceExposedItemsIntoScope( const item_name_text = self.env.idents.getText(exposed_item.name); // Check if the item is exposed by the module - if (!module_env.exposed_by_str.contains(item_name_text)) { + // We need to look up by string because the identifiers are from different modules + // First, try to find this identifier in the target module's ident store + const is_exposed = if (module_env.idents.findByString(item_name_text)) |target_ident| + module_env.exposed_items.containsById(self.env.gpa, @bitCast(target_ident)) + else + false; + + if (!is_exposed) { // Determine if it's a type or value based on capitalization const first_char = item_name_text[0]; @@ -1702,11 +1712,16 @@ pub fn canonicalizeExpr( }; }; - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items + // Need to convert identifier from current module to target module const field_text = self.env.idents.getText(ident); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + if (module_env.idents.findByString(field_text)) |target_ident| { + break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0; + } else { + break :blk 0; + } } else { break :blk 0; } @@ -1761,11 +1776,16 @@ pub fn canonicalizeExpr( }; }; - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items + // Need to convert identifier from current module to target module const field_text = self.env.idents.getText(exposed_info.original_name); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + if (module_env.idents.findByString(field_text)) |target_ident| { + break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0; + } else { + break :blk 0; + } } else { break :blk 0; } @@ -6541,11 +6561,16 @@ fn tryModuleQualifiedLookup(self: *Self, field_access: AST.BinOp) std.mem.Alloca const region = self.parse_ir.tokenizedRegionToRegion(field_access.region); - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items + // Need to convert identifier from current module to target module const field_text = self.env.idents.getText(field_name); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + if (module_env.idents.findByString(field_text)) |target_ident| { + break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0; + } else { + break :blk 0; + } } else { break :blk 0; } diff --git a/src/check/canonicalize/test/exposed_shadowing_test.zig b/src/check/canonicalize/test/exposed_shadowing_test.zig index c57f6e5690..6487e32967 100644 --- a/src/check/canonicalize/test/exposed_shadowing_test.zig +++ b/src/check/canonicalize/test/exposed_shadowing_test.zig @@ -7,6 +7,7 @@ const std = @import("std"); const compile = @import("compile"); const parse = @import("parse"); +const base = @import("base"); const canonicalize = @import("../../canonicalize.zig"); @@ -342,7 +343,7 @@ test "complex case with redundant, shadowing, and not implemented" { try testing.expect(found_not_implemented); } -test "exposed_by_str is populated correctly" { +test "exposed_items is populated correctly" { const allocator = testing.allocator; const source = @@ -365,16 +366,23 @@ test "exposed_by_str is populated correctly" { try canonicalizer.canonicalizeFile(); - // Check that exposed_by_str contains all exposed items - try testing.expect(env.exposed_by_str.contains("foo")); - try testing.expect(env.exposed_by_str.contains("bar")); - try testing.expect(env.exposed_by_str.contains("MyType")); + // Check that exposed_items contains the correct number of items + // The exposed items were added during canonicalization // Should have exactly 3 entries (duplicates not stored) - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); + + // Check that exposed_items contains all exposed items + const foo_idx = env.idents.findByString("foo").?; + const bar_idx = env.idents.findByString("bar").?; + const mytype_idx = env.idents.findByString("MyType").?; + + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(mytype_idx))); } -test "exposed_by_str persists after canonicalization" { +test "exposed_items persists after canonicalization" { const allocator = testing.allocator; const source = @@ -397,16 +405,20 @@ test "exposed_by_str persists after canonicalization" { try canonicalizer.canonicalizeFile(); - // All exposed items should be in exposed_by_str, even those not implemented - try testing.expect(env.exposed_by_str.contains("x")); - try testing.expect(env.exposed_by_str.contains("y")); - try testing.expect(env.exposed_by_str.contains("z")); + // All exposed items should be in exposed_items, even those not implemented + const x_idx = env.idents.findByString("x").?; + const y_idx = env.idents.findByString("y").?; + const z_idx = env.idents.findByString("z").?; + + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(x_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(y_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(z_idx))); // Verify the map persists in env after canonicalization is complete - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); } -test "exposed_by_str never has entries removed" { +test "exposed_items never has entries removed" { const allocator = testing.allocator; const source = @@ -429,13 +441,57 @@ test "exposed_by_str never has entries removed" { try canonicalizer.canonicalizeFile(); - // All exposed items should remain in exposed_by_str + // All exposed items should remain in exposed_items // Even though foo appears twice and baz is not implemented, - // exposed_by_str should have all unique exposed identifiers - try testing.expect(env.exposed_by_str.contains("foo")); - try testing.expect(env.exposed_by_str.contains("bar")); - try testing.expect(env.exposed_by_str.contains("baz")); + // exposed_items should have all unique exposed identifiers + const foo_idx = env.idents.findByString("foo").?; + const bar_idx = env.idents.findByString("bar").?; + const baz_idx = env.idents.findByString("baz").?; + + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(baz_idx))); // Should have exactly 3 unique entries - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); +} + +test "exposed_items handles identifiers with different attributes" { + const allocator = testing.allocator; + + // Module exposing foo and foo! - these should be treated as different identifiers + // Note: Using foo and foo! to test that attributes are properly included in the key + const source = + \\module [foo, foo!] + \\ + \\foo = 42 + \\foo! = \x -> x + 1 + ; + + var env = try ModuleEnv.init(allocator, source); + defer env.deinit(); + try env.initCIRFields(allocator, "Test"); + + var ast = try parse.parse(&env); + defer ast.deinit(allocator); + + var canonicalizer = try canonicalize.init(&env, &ast, null); + defer canonicalizer.deinit(); + + try canonicalizer.canonicalizeFile(); + + // Both should be in exposed_items as separate entries + const foo_idx = env.idents.findByString("foo").?; + const foo_effectful_idx = env.idents.findByString("foo!").?; + + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx))); + try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_effectful_idx))); + + // Should have exactly 2 entries - if we only used u29 without attributes, they might incorrectly merge + try testing.expectEqual(@as(usize, 2), env.exposed_items.count()); + + // Verify they have different full u32 values (index + attributes) + const foo_u32 = @as(u32, @bitCast(foo_idx)); + const foo_effectful_u32 = @as(u32, @bitCast(foo_effectful_idx)); + try testing.expect(foo_u32 != foo_effectful_u32); } diff --git a/src/check/canonicalize/test/import_validation_test.zig b/src/check/canonicalize/test/import_validation_test.zig index 1f131d4b1f..4b066d0282 100644 --- a/src/check/canonicalize/test/import_validation_test.zig +++ b/src/check/canonicalize/test/import_validation_test.zig @@ -59,10 +59,15 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E } // Add exposed items to Json module - try json_env.exposed_by_str.put(allocator, "decode", {}); - try json_env.exposed_by_str.put(allocator, "encode", {}); - try json_env.exposed_by_str.put(allocator, "JsonError", {}); - try json_env.exposed_by_str.put(allocator, "DecodeProblem", {}); + const Ident = base.Ident; + const decode_idx = try json_env.idents.insert(allocator, Ident.for_text("decode")); + try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_idx)); + const encode_idx = try json_env.idents.insert(allocator, Ident.for_text("encode")); + try json_env.exposed_items.addExposedById(allocator, @bitCast(encode_idx)); + const json_error_idx = try json_env.idents.insert(allocator, Ident.for_text("JsonError")); + try json_env.exposed_items.addExposedById(allocator, @bitCast(json_error_idx)); + const decode_problem_idx = try json_env.idents.insert(allocator, Ident.for_text("DecodeProblem")); + try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_problem_idx)); try module_envs.put("Json", json_env); @@ -75,9 +80,12 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E } // Add exposed items to Utils module - try utils_env.exposed_by_str.put(allocator, "map", {}); - try utils_env.exposed_by_str.put(allocator, "filter", {}); - try utils_env.exposed_by_str.put(allocator, "Result", {}); + const map_idx = try utils_env.idents.insert(allocator, Ident.for_text("map")); + try utils_env.exposed_items.addExposedById(allocator, @bitCast(map_idx)); + const filter_idx = try utils_env.idents.insert(allocator, Ident.for_text("filter")); + try utils_env.exposed_items.addExposedById(allocator, @bitCast(filter_idx)); + const result_idx = try utils_env.idents.insert(allocator, Ident.for_text("Result")); + try utils_env.exposed_items.addExposedById(allocator, @bitCast(result_idx)); try module_envs.put("Utils", utils_env); @@ -502,7 +510,7 @@ test "module-qualified lookups with e_lookup_external" { try expectEqual(true, found_dict_empty); } -test "exposed_nodes - tracking CIR node indices for exposed items" { +test "exposed_items - tracking CIR node indices for exposed items" { var gpa_state = std.heap.GeneralPurposeAllocator(.{ .safety = true }){}; defer std.debug.assert(gpa_state.deinit() == .ok); const allocator = gpa_state.allocator(); @@ -519,16 +527,20 @@ test "exposed_nodes - tracking CIR node indices for exposed items" { allocator.destroy(math_env); } - // Add exposed items - try math_env.exposed_by_str.put(allocator, "add", {}); - try math_env.exposed_by_str.put(allocator, "multiply", {}); - try math_env.exposed_by_str.put(allocator, "PI", {}); + // Add exposed items and set their node indices + const Ident = base.Ident; + const add_idx = try math_env.idents.insert(allocator, Ident.for_text("add")); + try math_env.exposed_items.addExposedById(allocator, @bitCast(add_idx)); + const multiply_idx = try math_env.idents.insert(allocator, Ident.for_text("multiply")); + try math_env.exposed_items.addExposedById(allocator, @bitCast(multiply_idx)); + const pi_idx = try math_env.idents.insert(allocator, Ident.for_text("PI")); + try math_env.exposed_items.addExposedById(allocator, @bitCast(pi_idx)); // Simulate having CIR node indices for these exposed items // In real usage, these would be set during canonicalization of MathUtils - try math_env.exposed_nodes.put(allocator, "add", 100); - try math_env.exposed_nodes.put(allocator, "multiply", 200); - try math_env.exposed_nodes.put(allocator, "PI", 300); + try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(add_idx), 100); + try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(multiply_idx), 200); + try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(pi_idx), 300); try module_envs.put("MathUtils", math_env); @@ -586,15 +598,16 @@ test "exposed_nodes - tracking CIR node indices for exposed items" { try expectEqual(true, found_multiply_with_idx_200); try expectEqual(true, found_pi_with_idx_300); - // Test case where exposed_nodes is not populated (should get 0) + // Test case where node index is not populated (should get 0) const empty_env = try allocator.create(ModuleEnv); empty_env.* = try ModuleEnv.init(allocator, ""); defer { empty_env.deinit(); allocator.destroy(empty_env); } - try empty_env.exposed_by_str.put(allocator, "undefined", {}); - // Don't add to exposed_nodes - should default to 0 + const undefined_idx = try empty_env.idents.insert(allocator, Ident.for_text("undefined")); + try empty_env.exposed_items.addExposedById(allocator, @bitCast(undefined_idx)); + // Don't set node index - should default to 0 try module_envs.put("EmptyModule", empty_env); const source2 = diff --git a/src/check/check_types.zig b/src/check/check_types.zig index 50908d2281..5ff991d03d 100644 --- a/src/check/check_types.zig +++ b/src/check/check_types.zig @@ -860,10 +860,16 @@ pub fn checkExpr(self: *Self, expr_idx: ModuleEnv.Expr.Idx) std.mem.Allocator.Er if (origin_module) |module| { // Look up the method in the origin module's exports + // Need to convert identifier from current module to target module const method_name_str = self.cir.idents.getText(dot_access.field_name); - // Search through the module's exposed nodes - if (module.exposed_nodes.get(method_name_str)) |node_idx| { + // Search through the module's exposed items + const node_idx_opt = if (module.idents.findByString(method_name_str)) |target_ident| + module.exposed_items.getNodeIndexById(self.gpa, @bitCast(target_ident)) + else + null; + + if (node_idx_opt) |node_idx| { // Found the method! const target_expr_idx = @as(ModuleEnv.Expr.Idx, @enumFromInt(node_idx)); diff --git a/src/collections/ExposedItems.zig b/src/collections/ExposedItems.zig new file mode 100644 index 0000000000..cf982f3094 --- /dev/null +++ b/src/collections/ExposedItems.zig @@ -0,0 +1,288 @@ +//! A specialized collection for tracking exposed items in modules using interned IDs. +//! +//! This combines the functionality of exposed_by_str and exposed_nodes into a single +//! SortedArrayBuilder to save memory and simplify the API. It uses interned identifier +//! indices (u32 matching Ident.Idx) as keys and u16 values (the node indices). +//! +//! A value of 0 means "exposed but not yet defined", while non-zero values +//! are actual node indices. +//! +//! Note: This module only provides ID-based methods. String-to-ID conversion must be +//! handled by the caller who has access to the Ident.Store. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder; + +// We use u32 which is the bit representation of base.Ident.Idx +// This includes both the 29-bit index AND the 3-bit attributes (effectful, ignored, reassignable) +// This is critical because foo, foo!, and _foo are different identifiers +const IdentIdx = u32; + +/// A collection that tracks exposed items by their names and associated CIR node indices +pub const ExposedItems = struct { + /// Maps item name (as interned ID) -> CIR node index (0 means exposed but not defined) + items: SortedArrayBuilder(IdentIdx, u16), + + const Self = @This(); + + pub fn init() Self { + return .{ + .items = SortedArrayBuilder(IdentIdx, u16).init(), + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.items.deinit(allocator); + } + + /// Add an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32) + pub fn addExposedById(self: *Self, allocator: Allocator, ident_idx: IdentIdx) !void { + // Add with value 0 to indicate "exposed but not yet defined" + // The SortedArrayBuilder will handle duplicates by keeping the last value, + // but we don't want to overwrite an existing node index with 0 + if (self.items.sorted) { + // If already sorted, check if it exists first + if (self.items.get(allocator, ident_idx) == null) { + try self.items.put(allocator, ident_idx, 0); + } + } else { + // If not sorted yet, just add it - duplicates will be handled later + try self.items.put(allocator, ident_idx, 0); + } + } + + /// Set the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32) + pub fn setNodeIndexById(self: *Self, allocator: Allocator, ident_idx: IdentIdx, node_idx: u16) !void { + // First ensure the array is sorted so we can search + self.items.ensureSorted(allocator); + + // Find the existing entry and update its value + const entries = self.items.entries.items; + for (entries) |*entry| { + if (entry.key == ident_idx) { + entry.value = node_idx; + return; + } + } + + // If not found, add a new entry + try self.items.put(allocator, ident_idx, node_idx); + } + + /// Check if an item is exposed by its interned ID (pass @bitCast(base.Ident.Idx) to u32) + pub fn containsById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) bool { + var mutable_self = @constCast(self); + return mutable_self.items.get(allocator, ident_idx) != null; + } + + /// Get the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32) + pub fn getNodeIndexById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) ?u16 { + var mutable_self = @constCast(self); + return mutable_self.items.get(allocator, ident_idx); + } + + /// Get the number of exposed items + pub fn count(self: *const Self) usize { + return self.items.count(); + } + + /// Ensure the array is sorted and deduplicated + /// This should be called at the end of canonicalization + pub fn ensureSorted(self: *Self, allocator: Allocator) void { + self.items.ensureSorted(allocator); + } + + /// Detect duplicate exposed items for error reporting + /// Returns the interned IDs of duplicates (caller must convert to strings) + pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]IdentIdx { + return self.items.detectDuplicates(allocator); + } + + /// Relocate pointers after memory movement + pub fn relocate(self: *Self, offset: isize) void { + self.items.relocate(offset); + } + + /// Calculate the size needed to serialize this ExposedItems + pub fn serializedSize(self: *const Self) usize { + // We need to serialize: + // 1. The count of items (u32) + // 2. For each item: key (u32) + value (u16) + var size: usize = @sizeOf(u32); // count + + for (self.items.entries.items) |_| { + size += @sizeOf(u32); // key (interned ID) + size += @sizeOf(u16); // value + } + + // Align to SERIALIZATION_ALIGNMENT + const SERIALIZATION_ALIGNMENT = 16; + return std.mem.alignForward(usize, size, SERIALIZATION_ALIGNMENT); + } + + /// Serialize this ExposedItems into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + + // Write count + std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.items.entries.items.len), .little); + offset += @sizeOf(u32); + + // Write entries + for (self.items.entries.items) |entry| { + // Write key (interned ID) + std.mem.writeInt(u32, buffer[offset..][0..4], entry.key, .little); + offset += @sizeOf(u32); + + // Write value + std.mem.writeInt(u16, buffer[offset..][0..2], entry.value, .little); + offset += @sizeOf(u16); + } + + // Zero padding + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize ExposedItems from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + var offset: usize = 0; + + // Read count + const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little); + offset += @sizeOf(u32); + + var result = Self.init(); + errdefer result.deinit(allocator); + + // Read entries + for (0..entry_count) |_| { + // Read key (interned ID) + if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall; + const key_u32 = std.mem.readInt(u32, buffer[offset..][0..4], .little); + // Use the full u32 (includes both index and attributes) + const key = key_u32; + offset += @sizeOf(u32); + + // Read value + if (offset + @sizeOf(u16) > buffer.len) return error.BufferTooSmall; + const value = std.mem.readInt(u16, buffer[offset..][0..2], .little); + offset += @sizeOf(u16); + + // Add to builder + try result.items.put(allocator, key, value); + } + + return result; + } + + /// Append to iovec writer for serialization + pub fn appendToIovecs(self: *const Self, writer: anytype) !usize { + return self.items.appendToIovecs(writer); + } + + /// Iterator for all exposed items + pub const Iterator = struct { + items: []const SortedArrayBuilder(IdentIdx, u16).Entry, + index: usize, + + pub fn next(self: *Iterator) ?struct { ident_idx: IdentIdx, node_idx: u16 } { + if (self.index < self.items.len) { + const entry = self.items[self.index]; + self.index += 1; + return .{ .ident_idx = entry.key, .node_idx = entry.value }; + } + return null; + } + }; + + /// Get an iterator over all exposed items + pub fn iterator(self: *const Self) Iterator { + return .{ .items = self.items.entries.items, .index = 0 }; + } +}; + +test "ExposedItems basic operations" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add exposed items (like exposed_by_str.put) + try exposed.addExposed(allocator, "foo"); + try exposed.addExposed(allocator, "bar"); + try exposed.addExposed(allocator, "MyType"); + + // Test count + try testing.expectEqual(@as(usize, 3), exposed.count()); + + // Test contains (like exposed_by_str.contains) + try testing.expect(exposed.contains(allocator, "foo")); + try testing.expect(exposed.contains(allocator, "bar")); + try testing.expect(exposed.contains(allocator, "MyType")); + try testing.expect(!exposed.contains(allocator, "missing")); + + // Test getNodeIndex before setting (should be 0) + try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "foo")); + + // Set node indices (like exposed_nodes.put) + try exposed.setNodeIndex(allocator, "foo", 42); + try exposed.setNodeIndex(allocator, "bar", 84); + + // Test getNodeIndex after setting + try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo")); + try testing.expectEqual(@as(?u16, 84), exposed.getNodeIndex(allocator, "bar")); + try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "MyType")); // Not set + try testing.expectEqual(@as(?u16, null), exposed.getNodeIndex(allocator, "missing")); // Not exposed +} + +test "ExposedItems handles duplicates" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add items with duplicates + try exposed.addExposed(allocator, "foo"); + try exposed.setNodeIndex(allocator, "foo", 42); + try exposed.addExposed(allocator, "foo"); // Duplicate - should not overwrite node index + + // Force sorting + exposed.ensureSorted(allocator); + + // Should still have the node index + try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo")); +} + +test "ExposedItems detectDuplicates" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add some duplicates + try exposed.addExposed(allocator, "foo"); + try exposed.addExposed(allocator, "bar"); + try exposed.addExposed(allocator, "foo"); // duplicate + try exposed.setNodeIndex(allocator, "bar", 42); + try exposed.addExposed(allocator, "bar"); // duplicate + + // Detect duplicates + const duplicates = try exposed.detectDuplicates(allocator); + defer allocator.free(duplicates); + + // Should report both duplicates + try testing.expectEqual(@as(usize, 2), duplicates.len); +} diff --git a/src/collections/SortedArrayBuilder.zig b/src/collections/SortedArrayBuilder.zig new file mode 100644 index 0000000000..ba776b0b7a --- /dev/null +++ b/src/collections/SortedArrayBuilder.zig @@ -0,0 +1,450 @@ +//! A builder for creating sorted arrays with binary search lookups. +//! +//! SortedArrayBuilder provides a way to build key-value mappings that are +//! optimized for small sizes and deterministic serialization. It maintains +//! a sorted array internally, enabling efficient binary search lookups. +//! +//! Features: +//! - Supports both string and numeric key types +//! - Maintains sorted order for deterministic serialization +//! - Efficient binary search lookups O(log n) +//! - Zero-copy deserialization +//! - Minimal memory overhead + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +// Import base for both testing and runtime +const base = @import("base"); +const serialization = @import("serialization"); +const IovecWriter = serialization.IovecWriter; + +/// A builder for creating sorted arrays directly without using hash maps +/// This is more efficient when we know we won't have duplicates +pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + sorted: bool = true, + + const Self = @This(); + + pub const Entry = struct { + key: K, + value: V, + + fn lessThan(_: void, a: Entry, b: Entry) bool { + if (K == []const u8) { + return std.mem.lessThan(u8, a.key, b.key); + } else if (@typeInfo(K) == .int or @typeInfo(K) == .@"enum") { + return a.key < b.key; + } else { + @compileError("Unsupported key type for SortedArrayBuilder"); + } + } + }; + + pub fn init() Self { + return .{}; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + if (K == []const u8) { + // Free string keys + for (self.entries.items) |entry| { + allocator.free(entry.key); + } + } + self.entries.deinit(allocator); + } + + /// Add a key-value pair + pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void { + const new_key = if (K == []const u8) try allocator.dupe(u8, key) else key; + + // Check if we need to maintain sorted order + if (self.sorted and self.entries.items.len > 0) { + const last = self.entries.items[self.entries.items.len - 1]; + if (K == []const u8) { + self.sorted = std.mem.lessThan(u8, last.key, new_key); + } else { + self.sorted = last.key < new_key; + } + } + + try self.entries.append(allocator, .{ .key = new_key, .value = value }); + } + + /// Get value by key (requires sorting first if not already sorted) + pub fn get(self: *Self, allocator: Allocator, key: K) ?V { + self.ensureSorted(allocator); + + var left: usize = 0; + var right: usize = self.entries.items.len; + + while (left < right) { + const mid = left + (right - left) / 2; + const mid_key = self.entries.items[mid].key; + + const cmp = if (K == []const u8) + std.mem.order(u8, mid_key, key) + else if (mid_key == key) + std.math.Order.eq + else if (mid_key < key) + std.math.Order.lt + else + std.math.Order.gt; + + switch (cmp) { + .eq => return self.entries.items[mid].value, + .lt => left = mid + 1, + .gt => right = mid, + } + } + return null; + } + + /// Ensure the array is sorted and deduplicate any duplicate entries + pub fn ensureSorted(self: *Self, allocator: Allocator) void { + if (!self.sorted) { + std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan); + self.sorted = true; + + // Check for and report duplicates, then deduplicate + self.deduplicateAndReport(allocator); + } + } + + /// Check for duplicates, report them, and remove duplicates keeping the last occurrence + fn deduplicateAndReport(self: *Self, allocator: Allocator) void { + if (self.entries.items.len <= 1) return; + + // First pass: detect and report duplicates + var i: usize = 1; + while (i < self.entries.items.len) { + const prev_entry = self.entries.items[i - 1]; + const curr_entry = self.entries.items[i]; + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, prev_entry.key, curr_entry.key) + else + prev_entry.key == curr_entry.key; + + if (is_duplicate) { + // Note: Duplicate detection is handled by caller via detectDuplicates() method + } + i += 1; + } + + // Second pass: deduplicate by keeping last occurrence + var write_index: usize = 0; + for (self.entries.items, 0..) |entry, read_index| { + var should_keep = true; + + // Look ahead to see if there's a duplicate later + for (self.entries.items[read_index + 1 ..]) |future_entry| { + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, entry.key, future_entry.key) + else + entry.key == future_entry.key; + + if (is_duplicate) { + should_keep = false; + break; + } + } + + if (should_keep) { + if (write_index != read_index) { + self.entries.items[write_index] = entry; + } + write_index += 1; + } else { + // Free the string key that we're not keeping + if (K == []const u8) { + allocator.free(entry.key); + } + } + } + + // Update the length to reflect deduplicated entries + self.entries.items.len = write_index; + } + + /// Detect duplicates without modifying the array - returns list of duplicate keys + pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]K { + var duplicates = std.ArrayList(K).init(allocator); + + if (self.entries.items.len <= 1) return duplicates.toOwnedSlice(); + + // Ensure sorted first + if (!self.sorted) { + std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan); + self.sorted = true; + } + + var reported_keys = if (K == []const u8) + std.StringHashMap(void).init(allocator) + else + std.AutoHashMap(K, void).init(allocator); + defer reported_keys.deinit(); + + var i: usize = 1; + while (i < self.entries.items.len) { + const prev_entry = self.entries.items[i - 1]; + const curr_entry = self.entries.items[i]; + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, prev_entry.key, curr_entry.key) + else + prev_entry.key == curr_entry.key; + + if (is_duplicate) { + // Report duplicate only once per unique key + const result = try reported_keys.getOrPut(curr_entry.key); + if (!result.found_existing) { + try duplicates.append(curr_entry.key); + } + } + i += 1; + } + + return duplicates.toOwnedSlice(); + } + + /// Relocate pointers after memory movement + pub fn relocate(self: *Self, offset: isize) void { + // Relocate the entries array pointer + if (self.entries.items.len > 0) { + const old_ptr = @intFromPtr(self.entries.items.ptr); + // Skip relocation if this is a sentinel value + // Define sentinel value locally since iovec_serialize is not available + const EMPTY_ARRAY_SENTINEL: usize = 0xDEADBEEF; + if (old_ptr != EMPTY_ARRAY_SENTINEL) { + const new_ptr = @as(usize, @intCast(@as(isize, @intCast(old_ptr)) + offset)); + self.entries.items.ptr = @ptrFromInt(new_ptr); + } + } + } + + /// Get the number of entries + pub fn count(self: *const Self) usize { + return self.entries.items.len; + } + + /// Append this SortedArrayBuilder to an iovec writer for serialization + pub fn appendToIovecs(self: *const Self, writer: *IovecWriter) !usize { + + // Must be sorted before serialization + if (!self.sorted) { + @panic("SortedArrayBuilder must be sorted before serialization"); + } + + // Create a mutable copy of self that we can modify + var builder_copy = self.*; + + // Create a buffer for the final serialized struct + const builder_copy_buffer = try writer.allocator.alloc(u8, @sizeOf(Self)); + + // Track this allocation so it gets freed when writer is deinitialized + try writer.owned_buffers.append(builder_copy_buffer); + + // Serialize entries array data + const entries_offset = if (self.entries.items.len > 0) blk: { + const data = std.mem.sliceAsBytes(self.entries.items); + const offset = try writer.appendBytes(Entry, data); + break :blk offset; + } else 0; + + // Update pointer in the copy to use offset + // For empty arrays, use sentinel value instead of 0 to avoid null pointer + builder_copy.entries.items.ptr = if (entries_offset == 0) + @ptrFromInt(0xDEADBEEF) // EMPTY_ARRAY_SENTINEL + else + @ptrFromInt(entries_offset); + builder_copy.entries.items.len = self.entries.items.len; + + // Note: For string keys, we don't serialize the string data separately here. + // This assumes that the strings are already managed elsewhere (e.g., in an interner) + // and we're just storing pointers/slices to them. If string keys need to be + // serialized as well, that would require additional handling. + + // Copy the modified struct to the buffer + @memcpy(builder_copy_buffer, std.mem.asBytes(&builder_copy)); + + // Now that all pointers have been converted to offsets, add the copy to iovecs + const struct_offset = try writer.appendBytes(Self, builder_copy_buffer); + + return struct_offset; + } + }; +} + +test "SortedArrayBuilder basic operations" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u32).init(); + defer builder.deinit(allocator); + + // Add items in random order + try builder.put(allocator, "zebra", 100); + try builder.put(allocator, "apple", 50); + try builder.put(allocator, "banana", 150); + try builder.put(allocator, "cherry", 30); + + // Test count + try testing.expectEqual(@as(usize, 4), builder.count()); + + // Test get (forces sorting) + try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "apple")); + try testing.expectEqual(@as(?u32, 150), builder.get(allocator, "banana")); + try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "cherry")); + try testing.expectEqual(@as(?u32, 100), builder.get(allocator, "zebra")); + try testing.expectEqual(@as(?u32, null), builder.get(allocator, "missing")); + + // Test iovec serialization + var writer = IovecWriter.init(allocator); + defer writer.deinit(); + + _ = try builder.appendToIovecs(&writer); + + // Verify we have some iovecs (basic smoke test) + try testing.expect(writer.iovecs.items.len > 0); +} + +test "SortedArrayBuilder maintains sorted order when added in order" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u16).init(); + defer builder.deinit(allocator); + + // Add items in sorted order + try builder.put(allocator, "aaa", 1); + try builder.put(allocator, "bbb", 2); + try builder.put(allocator, "ccc", 3); + try builder.put(allocator, "ddd", 4); + + // Should still be marked as sorted + try testing.expect(builder.sorted); + + // Get should work without sorting + try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "aaa")); + try testing.expectEqual(@as(?u16, 4), builder.get(allocator, "ddd")); +} + +test "SortedArrayBuilder detectDuplicates example usage" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder(u32, u16).init(); + defer builder.deinit(allocator); + + // Add some entries with duplicates + try builder.put(allocator, 100, 1); + try builder.put(allocator, 200, 2); + try builder.put(allocator, 100, 3); // duplicate of 100 + try builder.put(allocator, 300, 4); + try builder.put(allocator, 200, 5); // duplicate of 200 + + // Detect duplicates before sorting/deduplicating + const duplicates = try builder.detectDuplicates(allocator); + defer allocator.free(duplicates); + + // Example: Report duplicates (in real code, this would push diagnostics) + for (duplicates) |duplicate_key| { + // In a real compiler, you'd push a diagnostic here: + // try cir.pushDiagnostic(CIR.Diagnostic{ .duplicate_exposed_item = ... }); + std.debug.print("Found duplicate key: {d}\n", .{duplicate_key}); + } + + // Verify we found the expected duplicates + try testing.expectEqual(@as(usize, 2), duplicates.len); + + // After detection, normal operations work as expected + builder.ensureSorted(allocator); + try testing.expectEqual(@as(usize, 3), builder.count()); // Deduplicated + try testing.expectEqual(@as(?u16, 3), builder.get(allocator, 100)); // Last value kept + try testing.expectEqual(@as(?u16, 5), builder.get(allocator, 200)); // Last value kept +} + +test "SortedArrayBuilder handles duplicates with string keys" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u32).init(); + defer builder.deinit(allocator); + + // Add duplicate keys with different values + try builder.put(allocator, "apple", 10); + try builder.put(allocator, "banana", 20); + try builder.put(allocator, "apple", 30); // duplicate key + try builder.put(allocator, "cherry", 40); + try builder.put(allocator, "banana", 50); // duplicate key + + // Initial count includes duplicates + try testing.expectEqual(@as(usize, 5), builder.count()); + + // Force sorting and deduplication + builder.ensureSorted(allocator); + + // Count should be reduced after deduplication + try testing.expectEqual(@as(usize, 3), builder.count()); + + // Should return the last value for each key (keeping last occurrence) + try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "apple")); + try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "banana")); + try testing.expectEqual(@as(?u32, 40), builder.get(allocator, "cherry")); +} + +test "SortedArrayBuilder handles duplicates with numeric keys" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder(u32, []const u8).init(); + defer builder.deinit(allocator); + + // Add duplicate keys with different values + try builder.put(allocator, 100, "first"); + try builder.put(allocator, 200, "second"); + try builder.put(allocator, 100, "updated_first"); // duplicate key + try builder.put(allocator, 300, "third"); + + // Initial count includes duplicates + try testing.expectEqual(@as(usize, 4), builder.count()); + + // Force sorting and deduplication + builder.ensureSorted(allocator); + + // Count should be reduced after deduplication + try testing.expectEqual(@as(usize, 3), builder.count()); + + // Should return the last value for each key + try testing.expectEqual(@as(?[]const u8, "updated_first"), builder.get(allocator, 100)); + try testing.expectEqual(@as(?[]const u8, "second"), builder.get(allocator, 200)); + try testing.expectEqual(@as(?[]const u8, "third"), builder.get(allocator, 300)); +} + +test "SortedArrayBuilder no duplicates case" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u16).init(); + defer builder.deinit(allocator); + + // Add unique keys only + try builder.put(allocator, "unique1", 1); + try builder.put(allocator, "unique2", 2); + try builder.put(allocator, "unique3", 3); + + const original_count = builder.count(); + + // Force sorting (should not change count) + builder.ensureSorted(allocator); + + // Count should remain the same + try testing.expectEqual(original_count, builder.count()); + + // All values should be retrievable + try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "unique1")); + try testing.expectEqual(@as(?u16, 2), builder.get(allocator, "unique2")); + try testing.expectEqual(@as(?u16, 3), builder.get(allocator, "unique3")); +} diff --git a/src/collections/mod.zig b/src/collections/mod.zig index 57457b7655..9d8276fab2 100644 --- a/src/collections/mod.zig +++ b/src/collections/mod.zig @@ -17,6 +17,9 @@ pub const SafeMultiList = @import("safe_list.zig").SafeMultiList; pub const SafeStringHashMap = @import("safe_hash_map.zig").SafeStringHashMap; +pub const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder; +pub const ExposedItems = @import("ExposedItems.zig").ExposedItems; + /// A range that must have at least one element pub const NonEmptyRange = struct { /// Starting index (inclusive) diff --git a/src/compile/ModuleEnv.zig b/src/compile/ModuleEnv.zig index e796dc4e3a..025b207921 100644 --- a/src/compile/ModuleEnv.zig +++ b/src/compile/ModuleEnv.zig @@ -68,12 +68,8 @@ idents: Ident.Store, ident_ids_for_slicing: collections.SafeList(Ident.Idx), strings: StringLiteral.Store, types: types_mod.Store, -/// Map of exposed items by their string representation (not interned) -/// This is built during canonicalization and preserved for later use -exposed_by_str: collections.SafeStringHashMap(void), -/// Map of exposed item names to their node indices (stored as u16) -/// This is populated during canonicalization to allow cross-module lookups -exposed_nodes: collections.SafeStringHashMap(u16), +/// The items (a combination of types and values) that this module exposes +exposed_items: collections.ExposedItems, /// Line starts for error reporting. We retain only start and offset positions in the IR /// and then use these line starts to calculate the line number and column number as required. @@ -130,8 +126,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error! .ident_ids_for_slicing = try collections.SafeList(Ident.Idx).initCapacity(gpa, 256), .strings = try StringLiteral.Store.initCapacityBytes(gpa, 4096), .types = try types_mod.Store.initCapacity(gpa, 2048, 512), - .exposed_by_str = try collections.SafeStringHashMap(void).initCapacity(gpa, 64), - .exposed_nodes = try collections.SafeStringHashMap(u16).initCapacity(gpa, 64), + .exposed_items = collections.ExposedItems.init(), .line_starts = try collections.SafeList(u32).initCapacity(gpa, 256), .source = source, // Initialize compilation fields with empty/default values @@ -152,8 +147,7 @@ pub fn deinit(self: *Self) void { self.strings.deinit(self.gpa); self.types.deinit(); self.line_starts.deinit(self.gpa); - self.exposed_by_str.deinit(self.gpa); - self.exposed_nodes.deinit(self.gpa); + self.exposed_items.deinit(self.gpa); // Clean up compilation fields self.external_decls.deinit(self.gpa); self.imports.deinit(self.gpa);