diff --git a/src/cache/CacheModule.zig b/src/cache/CacheModule.zig index f08d5a33d8..53855dfaf4 100644 --- a/src/cache/CacheModule.zig +++ b/src/cache/CacheModule.zig @@ -55,8 +55,7 @@ pub const Header = struct { ident_store: ComponentInfo, line_starts: ComponentInfo, types_store: ComponentInfo, - exposed_by_str: ComponentInfo, - exposed_nodes: ComponentInfo, + exposed_items: ComponentInfo, external_decls: ComponentInfo, /// Spans can be stored directly since they're small @@ -68,7 +67,7 @@ pub const Header = struct { warning_count: u32, /// Fixed padding to ensure alignment - _padding: [16]u8 = [_]u8{0} ** 16, + _padding: [24]u8 = [_]u8{0} ** 24, /// Error specific to initializing a Header from bytes pub const InitError = error{ @@ -121,8 +120,7 @@ pub const CacheModule = struct { const ident_store_size = module_env.idents.serializedSize(); const line_starts_size = module_env.line_starts.serializedSize(); const types_store_size = module_env.types.serializedSize(); - const exposed_by_str_size = module_env.exposed_by_str.serializedSize(); - const exposed_nodes_size = module_env.exposed_nodes.serializedSize(); + const exposed_items_size = module_env.exposed_items.serializedSize(); const external_decls_size = module_env.external_decls.serializedSize(); // Calculate aligned offsets @@ -153,12 +151,8 @@ pub const CacheModule = struct { offset += @intCast(types_store_size); offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); - const exposed_by_str_offset = offset; - offset += @intCast(exposed_by_str_size); - - offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); - const exposed_nodes_offset = offset; - offset += @intCast(exposed_nodes_size); + const exposed_items_offset = offset; + offset += @intCast(exposed_items_size); offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); const external_decls_offset = offset; @@ -188,8 +182,7 @@ pub const CacheModule = struct { .ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) }, .line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) }, .types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) }, - .exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) }, - .exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) }, + .exposed_items = .{ .offset = exposed_items_offset, .length = @intCast(exposed_items_size) }, .external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) }, .all_defs = module_env.all_defs, .all_statements = module_env.all_statements, @@ -207,8 +200,7 @@ pub const CacheModule = struct { std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0); - std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0); - std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(exposed_items_offset % SERIALIZATION_ALIGNMENT == 0); std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0); // Serialize each component @@ -219,8 +211,7 @@ pub const CacheModule = struct { _ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator); _ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size]))); _ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator); - _ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]); - _ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]); + _ = try module_env.exposed_items.serializeInto(data_section[exposed_items_offset .. exposed_items_offset + exposed_items_size]); _ = try module_env.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size]))); // TODO Calculate and store checksum @@ -300,11 +291,8 @@ pub const CacheModule = struct { var types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator); errdefer types_store.deinit(); - var exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator); - errdefer exposed_by_str.deinit(allocator); - - var exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator); - errdefer exposed_nodes.deinit(allocator); + var exposed_items = try collections.ExposedItems.deserializeFrom(self.getComponentData(.exposed_items), allocator); + errdefer exposed_items.deinit(allocator); // Create ModuleEnv from deserialized components var module_env = ModuleEnv{ @@ -313,8 +301,7 @@ pub const CacheModule = struct { .ident_ids_for_slicing = ident_ids_for_slicing, .strings = strings, .types = types_store, - .exposed_by_str = exposed_by_str, - .exposed_nodes = exposed_nodes, + .exposed_items = exposed_items, .line_starts = line_starts, .source = source, // Module compilation fields - will be initialized after @@ -360,8 +347,7 @@ pub const CacheModule = struct { .ident_store => self.header.ident_store, .line_starts => self.header.line_starts, .types_store => self.header.types_store, - .exposed_by_str => self.header.exposed_by_str, - .exposed_nodes => self.header.exposed_nodes, + .exposed_items => self.header.exposed_items, .external_decls => self.header.external_decls, }; return self.data[info.offset .. info.offset + info.length]; @@ -381,8 +367,7 @@ pub const CacheModule = struct { .ident_store = self.header.ident_store.length, .line_starts = self.header.line_starts.length, .types_store = self.header.types_store.length, - .exposed_by_str = self.header.exposed_by_str.length, - .exposed_nodes = self.header.exposed_nodes.length, + .exposed_items = self.header.exposed_items.length, .external_decls = self.header.external_decls.length, }, }; @@ -400,8 +385,7 @@ pub const CacheModule = struct { .ident_store => self.header.ident_store, .line_starts => self.header.line_starts, .types_store => self.header.types_store, - .exposed_by_str => self.header.exposed_by_str, - .exposed_nodes => self.header.exposed_nodes, + .exposed_items => self.header.exposed_items, .external_decls => self.header.external_decls, }; @@ -568,8 +552,7 @@ const ComponentType = enum { ident_store, line_starts, types_store, - exposed_by_str, - exposed_nodes, + exposed_items, external_decls, }; @@ -586,8 +569,7 @@ pub const Diagnostics = struct { ident_store: u32, line_starts: u32, types_store: u32, - exposed_by_str: u32, - exposed_nodes: u32, + exposed_items: u32, external_decls: u32, }, }; diff --git a/src/check/canonicalize.zig b/src/check/canonicalize.zig index 36dfc81a28..c48cdbec02 100644 --- a/src/check/canonicalize.zig +++ b/src/check/canonicalize.zig @@ -623,17 +623,17 @@ pub fn canonicalizeFile( last_type_anno = null; // Clear after successful use // If this declaration successfully defined an exposed value, remove it from exposed_ident_texts - // and add it to exposed_nodes + // and add the node index to exposed_items const pattern = self.parse_ir.store.getPattern(decl.pattern); if (pattern == .ident) { const token_region = self.parse_ir.tokens.resolve(@intCast(pattern.ident.ident_tok)); const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // If this identifier is exposed, add it to exposed_nodes + // If this identifier is exposed, add it to exposed_items if (self.exposed_ident_texts.contains(ident_text)) { - // Store the def index as u16 in exposed_nodes + // Store the def index as u16 in exposed_items const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx)); - try self.env.exposed_nodes.put(self.env.gpa, ident_text, def_idx_u16); + try self.env.exposed_items.setNodeIndex(self.env.gpa, ident_text, def_idx_u16); } _ = self.exposed_ident_texts.remove(ident_text); @@ -904,8 +904,8 @@ fn createExposedScope( const token_region = self.parse_ir.tokens.resolve(@intCast(ident.ident)); const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, ident_text, {}); + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposed(gpa, ident_text); // Also build exposed_scope with proper identifiers if (self.parse_ir.tokens.resolveIdentifier(ident.ident)) |ident_idx| { @@ -937,8 +937,8 @@ fn createExposedScope( const token_region = self.parse_ir.tokens.resolve(@intCast(type_name.ident)); const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, type_text, {}); + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposed(gpa, type_text); // Also build exposed_scope with proper identifiers if (self.parse_ir.tokens.resolveIdentifier(type_name.ident)) |ident_idx| { @@ -970,8 +970,8 @@ fn createExposedScope( const token_region = self.parse_ir.tokens.resolve(@intCast(type_with_constructors.ident)); const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset]; - // Add to exposed_by_str for permanent storage (unconditionally) - try self.env.exposed_by_str.put(gpa, type_text, {}); + // Add to exposed_items for permanent storage (unconditionally) + try self.env.exposed_items.addExposed(gpa, type_text); // Also build exposed_scope with proper identifiers if (self.parse_ir.tokens.resolveIdentifier(type_with_constructors.ident)) |ident_idx| { @@ -1368,7 +1368,7 @@ fn introduceExposedItemsIntoScope( return; } - // Get the module's exposed_by_str map + // Get the module's exposed_items const module_env = envs_map.get(module_name_text).?; // Validate each exposed item @@ -1377,7 +1377,7 @@ fn introduceExposedItemsIntoScope( const item_name_text = self.env.idents.getText(exposed_item.name); // Check if the item is exposed by the module - if (!module_env.exposed_by_str.contains(item_name_text)) { + if (!module_env.exposed_items.contains(self.env.gpa, item_name_text)) { // Determine if it's a type or value based on capitalization const first_char = item_name_text[0]; @@ -1702,11 +1702,11 @@ pub fn canonicalizeExpr( }; }; - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items const field_text = self.env.idents.getText(ident); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + break :blk module_env.exposed_items.getNodeIndex(self.env.gpa, field_text) orelse 0; } else { break :blk 0; } @@ -1761,11 +1761,11 @@ pub fn canonicalizeExpr( }; }; - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items const field_text = self.env.idents.getText(exposed_info.original_name); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + break :blk module_env.exposed_items.getNodeIndex(self.env.gpa, field_text) orelse 0; } else { break :blk 0; } @@ -6541,11 +6541,11 @@ fn tryModuleQualifiedLookup(self: *Self, field_access: AST.BinOp) std.mem.Alloca const region = self.parse_ir.tokenizedRegionToRegion(field_access.region); - // Look up the target node index in the module's exposed_nodes + // Look up the target node index in the module's exposed_items const field_text = self.env.idents.getText(field_name); const target_node_idx = if (self.module_envs) |envs_map| blk: { if (envs_map.get(module_text)) |module_env| { - break :blk module_env.exposed_nodes.get(field_text) orelse 0; + break :blk module_env.exposed_items.getNodeIndex(self.env.gpa, field_text) orelse 0; } else { break :blk 0; } diff --git a/src/check/canonicalize/test/exposed_shadowing_test.zig b/src/check/canonicalize/test/exposed_shadowing_test.zig index c57f6e5690..08f6931062 100644 --- a/src/check/canonicalize/test/exposed_shadowing_test.zig +++ b/src/check/canonicalize/test/exposed_shadowing_test.zig @@ -342,7 +342,7 @@ test "complex case with redundant, shadowing, and not implemented" { try testing.expect(found_not_implemented); } -test "exposed_by_str is populated correctly" { +test "exposed_items is populated correctly" { const allocator = testing.allocator; const source = @@ -365,16 +365,16 @@ test "exposed_by_str is populated correctly" { try canonicalizer.canonicalizeFile(); - // Check that exposed_by_str contains all exposed items - try testing.expect(env.exposed_by_str.contains("foo")); - try testing.expect(env.exposed_by_str.contains("bar")); - try testing.expect(env.exposed_by_str.contains("MyType")); + // Check that exposed_items contains all exposed items + try testing.expect(env.exposed_items.contains(env.gpa, "foo")); + try testing.expect(env.exposed_items.contains(env.gpa, "bar")); + try testing.expect(env.exposed_items.contains(env.gpa, "MyType")); // Should have exactly 3 entries (duplicates not stored) - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); } -test "exposed_by_str persists after canonicalization" { +test "exposed_items persists after canonicalization" { const allocator = testing.allocator; const source = @@ -397,16 +397,16 @@ test "exposed_by_str persists after canonicalization" { try canonicalizer.canonicalizeFile(); - // All exposed items should be in exposed_by_str, even those not implemented - try testing.expect(env.exposed_by_str.contains("x")); - try testing.expect(env.exposed_by_str.contains("y")); - try testing.expect(env.exposed_by_str.contains("z")); + // All exposed items should be in exposed_items, even those not implemented + try testing.expect(env.exposed_items.contains(env.gpa, "x")); + try testing.expect(env.exposed_items.contains(env.gpa, "y")); + try testing.expect(env.exposed_items.contains(env.gpa, "z")); // Verify the map persists in env after canonicalization is complete - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); } -test "exposed_by_str never has entries removed" { +test "exposed_items never has entries removed" { const allocator = testing.allocator; const source = @@ -429,13 +429,13 @@ test "exposed_by_str never has entries removed" { try canonicalizer.canonicalizeFile(); - // All exposed items should remain in exposed_by_str + // All exposed items should remain in exposed_items // Even though foo appears twice and baz is not implemented, - // exposed_by_str should have all unique exposed identifiers - try testing.expect(env.exposed_by_str.contains("foo")); - try testing.expect(env.exposed_by_str.contains("bar")); - try testing.expect(env.exposed_by_str.contains("baz")); + // exposed_items should have all unique exposed identifiers + try testing.expect(env.exposed_items.contains(env.gpa, "foo")); + try testing.expect(env.exposed_items.contains(env.gpa, "bar")); + try testing.expect(env.exposed_items.contains(env.gpa, "baz")); // Should have exactly 3 unique entries - try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count()); + try testing.expectEqual(@as(usize, 3), env.exposed_items.count()); } diff --git a/src/check/canonicalize/test/import_validation_test.zig b/src/check/canonicalize/test/import_validation_test.zig index 06a106cacc..fcac1ddc45 100644 --- a/src/check/canonicalize/test/import_validation_test.zig +++ b/src/check/canonicalize/test/import_validation_test.zig @@ -59,10 +59,10 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E } // Add exposed items to Json module - try json_env.exposed_by_str.put(allocator, "decode", {}); - try json_env.exposed_by_str.put(allocator, "encode", {}); - try json_env.exposed_by_str.put(allocator, "JsonError", {}); - try json_env.exposed_by_str.put(allocator, "DecodeProblem", {}); + try json_env.exposed_items.addExposed(allocator, "decode"); + try json_env.exposed_items.addExposed(allocator, "encode"); + try json_env.exposed_items.addExposed(allocator, "JsonError"); + try json_env.exposed_items.addExposed(allocator, "DecodeProblem"); try module_envs.put("Json", json_env); @@ -75,9 +75,9 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E } // Add exposed items to Utils module - try utils_env.exposed_by_str.put(allocator, "map", {}); - try utils_env.exposed_by_str.put(allocator, "filter", {}); - try utils_env.exposed_by_str.put(allocator, "Result", {}); + try utils_env.exposed_items.addExposed(allocator, "map"); + try utils_env.exposed_items.addExposed(allocator, "filter"); + try utils_env.exposed_items.addExposed(allocator, "Result"); try module_envs.put("Utils", utils_env); @@ -502,7 +502,7 @@ test "module-qualified lookups with e_lookup_external" { try expectEqual(true, found_dict_empty); } -test "exposed_nodes - tracking CIR node indices for exposed items" { +test "exposed_items - tracking CIR node indices for exposed items" { var gpa_state = std.heap.GeneralPurposeAllocator(.{ .safety = true }){}; defer std.debug.assert(gpa_state.deinit() == .ok); const allocator = gpa_state.allocator(); @@ -519,16 +519,16 @@ test "exposed_nodes - tracking CIR node indices for exposed items" { allocator.destroy(math_env); } - // Add exposed items - try math_env.exposed_by_str.put(allocator, "add", {}); - try math_env.exposed_by_str.put(allocator, "multiply", {}); - try math_env.exposed_by_str.put(allocator, "PI", {}); + // Add exposed items and set their node indices + try math_env.exposed_items.addExposed(allocator, "add"); + try math_env.exposed_items.addExposed(allocator, "multiply"); + try math_env.exposed_items.addExposed(allocator, "PI"); // Simulate having CIR node indices for these exposed items // In real usage, these would be set during canonicalization of MathUtils - try math_env.exposed_nodes.put(allocator, "add", 100); - try math_env.exposed_nodes.put(allocator, "multiply", 200); - try math_env.exposed_nodes.put(allocator, "PI", 300); + try math_env.exposed_items.setNodeIndex(allocator, "add", 100); + try math_env.exposed_items.setNodeIndex(allocator, "multiply", 200); + try math_env.exposed_items.setNodeIndex(allocator, "PI", 300); try module_envs.put("MathUtils", math_env); @@ -586,15 +586,15 @@ test "exposed_nodes - tracking CIR node indices for exposed items" { try expectEqual(true, found_multiply_with_idx_200); try expectEqual(true, found_pi_with_idx_300); - // Test case where exposed_nodes is not populated (should get 0) + // Test case where node index is not populated (should get 0) const empty_env = try allocator.create(ModuleEnv); empty_env.* = try ModuleEnv.init(allocator, ""); defer { empty_env.deinit(); allocator.destroy(empty_env); } - try empty_env.exposed_by_str.put(allocator, "undefined", {}); - // Don't add to exposed_nodes - should default to 0 + try empty_env.exposed_items.addExposed(allocator, "undefined"); + // Don't set node index - should default to 0 try module_envs.put("EmptyModule", empty_env); const source2 = diff --git a/src/check/check_types.zig b/src/check/check_types.zig index 50908d2281..2ca19a63a9 100644 --- a/src/check/check_types.zig +++ b/src/check/check_types.zig @@ -862,8 +862,8 @@ pub fn checkExpr(self: *Self, expr_idx: ModuleEnv.Expr.Idx) std.mem.Allocator.Er // Look up the method in the origin module's exports const method_name_str = self.cir.idents.getText(dot_access.field_name); - // Search through the module's exposed nodes - if (module.exposed_nodes.get(method_name_str)) |node_idx| { + // Search through the module's exposed items + if (module.exposed_items.getNodeIndex(self.gpa, method_name_str)) |node_idx| { // Found the method! const target_expr_idx = @as(ModuleEnv.Expr.Idx, @enumFromInt(node_idx)); diff --git a/src/collections/ExposedItems.zig b/src/collections/ExposedItems.zig new file mode 100644 index 0000000000..24915589cc --- /dev/null +++ b/src/collections/ExposedItems.zig @@ -0,0 +1,279 @@ +//! A specialized collection for tracking exposed items in modules using string keys. +//! +//! This combines the functionality of exposed_by_str and exposed_nodes into a single +//! SortedArrayBuilder to save memory and simplify the API. It uses string keys +//! (the exposed item names) and u16 values (the node indices). +//! +//! A value of 0 means "exposed but not yet defined", while non-zero values +//! are actual node indices. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder; + +/// A collection that tracks exposed items by their names and associated CIR node indices +pub const ExposedItems = struct { + /// Maps item name -> CIR node index (0 means exposed but not defined) + items: SortedArrayBuilder([]const u8, u16), + + const Self = @This(); + + pub fn init() Self { + return .{ + .items = SortedArrayBuilder([]const u8, u16).init(), + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.items.deinit(allocator); + } + + /// Add an exposed item (equivalent to exposed_by_str.put) + /// If the item already exists, this is a no-op (keeping the existing value) + pub fn addExposed(self: *Self, allocator: Allocator, name: []const u8) !void { + // Add with value 0 to indicate "exposed but not yet defined" + // The SortedArrayBuilder will handle duplicates by keeping the last value, + // but we don't want to overwrite an existing node index with 0 + if (self.items.sorted) { + // If already sorted, check if it exists first + if (self.items.get(allocator, name) == null) { + try self.items.put(allocator, name, 0); + } + } else { + // If not sorted yet, just add it - duplicates will be handled later + try self.items.put(allocator, name, 0); + } + } + + /// Set the node index for an exposed item (equivalent to exposed_nodes.put) + pub fn setNodeIndex(self: *Self, allocator: Allocator, name: []const u8, node_idx: u16) !void { + // This will overwrite any existing value (including 0) + try self.items.put(allocator, name, node_idx); + } + + /// Check if an item is exposed (equivalent to exposed_by_str.contains) + pub fn contains(self: *const Self, allocator: Allocator, name: []const u8) bool { + // Cast away const for the lookup since SortedArrayBuilder.get may sort the array + var mutable_self = @constCast(self); + return mutable_self.items.get(allocator, name) != null; + } + + /// Get the node index for an exposed item (equivalent to exposed_nodes.get) + /// Returns null if not exposed, or the node index (which may be 0) + pub fn getNodeIndex(self: *const Self, allocator: Allocator, name: []const u8) ?u16 { + // Cast away const for the lookup since SortedArrayBuilder.get may sort the array + var mutable_self = @constCast(self); + return mutable_self.items.get(allocator, name); + } + + /// Get the number of exposed items + pub fn count(self: *const Self) usize { + return self.items.count(); + } + + /// Ensure the array is sorted and deduplicated + /// This should be called at the end of canonicalization + pub fn ensureSorted(self: *Self, allocator: Allocator) void { + self.items.ensureSorted(allocator); + } + + /// Detect duplicate exposed items for error reporting + pub fn detectDuplicates(self: *Self, allocator: Allocator) ![][]const u8 { + return self.items.detectDuplicates(allocator); + } + + /// Relocate pointers after memory movement + pub fn relocate(self: *Self, offset: isize) void { + self.items.relocate(offset); + } + + /// Calculate the size needed to serialize this ExposedItems + pub fn serializedSize(self: *const Self) usize { + // We need to serialize: + // 1. The count of items (u32) + // 2. For each item: key length (u32) + key bytes + value (u16) + var size: usize = @sizeOf(u32); // count + + for (self.items.entries.items) |entry| { + size += @sizeOf(u32); // key length + size += entry.key.len; // key bytes + size += @sizeOf(u16); // value + } + + // Align to SERIALIZATION_ALIGNMENT + const SERIALIZATION_ALIGNMENT = 16; + return std.mem.alignForward(usize, size, SERIALIZATION_ALIGNMENT); + } + + /// Serialize this ExposedItems into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + + // Write count + std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.items.entries.items.len), .little); + offset += @sizeOf(u32); + + // Write entries + for (self.items.entries.items) |entry| { + // Write key length + std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(entry.key.len), .little); + offset += @sizeOf(u32); + + // Write key bytes + @memcpy(buffer[offset..offset + entry.key.len], entry.key); + offset += entry.key.len; + + // Write value + std.mem.writeInt(u16, buffer[offset..][0..2], entry.value, .little); + offset += @sizeOf(u16); + } + + // Zero padding + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize ExposedItems from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + var offset: usize = 0; + + // Read count + const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little); + offset += @sizeOf(u32); + + var result = Self.init(); + errdefer result.deinit(allocator); + + // Read entries + for (0..entry_count) |_| { + // Read key length + if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall; + const key_len = std.mem.readInt(u32, buffer[offset..][0..4], .little); + offset += @sizeOf(u32); + + // Read key bytes + if (offset + key_len > buffer.len) return error.BufferTooSmall; + const key = buffer[offset..offset + key_len]; + offset += key_len; + + // Read value + if (offset + @sizeOf(u16) > buffer.len) return error.BufferTooSmall; + const value = std.mem.readInt(u16, buffer[offset..][0..2], .little); + offset += @sizeOf(u16); + + // Add to builder + try result.items.put(allocator, key, value); + } + + return result; + } + + /// Append to iovec writer for serialization + pub fn appendToIovecs(self: *const Self, writer: anytype) !usize { + return self.items.appendToIovecs(writer); + } + + /// Iterator for all exposed items + pub const Iterator = struct { + items: []const SortedArrayBuilder([]const u8, u16).Entry, + index: usize, + + pub fn next(self: *Iterator) ?struct { name: []const u8, node_idx: u16 } { + if (self.index < self.items.len) { + const entry = self.items[self.index]; + self.index += 1; + return .{ .name = entry.key, .node_idx = entry.value }; + } + return null; + } + }; + + /// Get an iterator over all exposed items + pub fn iterator(self: *const Self) Iterator { + return .{ .items = self.items.entries.items, .index = 0 }; + } +}; + +test "ExposedItems basic operations" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add exposed items (like exposed_by_str.put) + try exposed.addExposed(allocator, "foo"); + try exposed.addExposed(allocator, "bar"); + try exposed.addExposed(allocator, "MyType"); + + // Test count + try testing.expectEqual(@as(usize, 3), exposed.count()); + + // Test contains (like exposed_by_str.contains) + try testing.expect(exposed.contains(allocator, "foo")); + try testing.expect(exposed.contains(allocator, "bar")); + try testing.expect(exposed.contains(allocator, "MyType")); + try testing.expect(!exposed.contains(allocator, "missing")); + + // Test getNodeIndex before setting (should be 0) + try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "foo")); + + // Set node indices (like exposed_nodes.put) + try exposed.setNodeIndex(allocator, "foo", 42); + try exposed.setNodeIndex(allocator, "bar", 84); + + // Test getNodeIndex after setting + try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo")); + try testing.expectEqual(@as(?u16, 84), exposed.getNodeIndex(allocator, "bar")); + try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "MyType")); // Not set + try testing.expectEqual(@as(?u16, null), exposed.getNodeIndex(allocator, "missing")); // Not exposed +} + +test "ExposedItems handles duplicates" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add items with duplicates + try exposed.addExposed(allocator, "foo"); + try exposed.setNodeIndex(allocator, "foo", 42); + try exposed.addExposed(allocator, "foo"); // Duplicate - should not overwrite node index + + // Force sorting + exposed.ensureSorted(allocator); + + // Should still have the node index + try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo")); +} + +test "ExposedItems detectDuplicates" { + const testing = std.testing; + const allocator = testing.allocator; + + var exposed = ExposedItems.init(); + defer exposed.deinit(allocator); + + // Add some duplicates + try exposed.addExposed(allocator, "foo"); + try exposed.addExposed(allocator, "bar"); + try exposed.addExposed(allocator, "foo"); // duplicate + try exposed.setNodeIndex(allocator, "bar", 42); + try exposed.addExposed(allocator, "bar"); // duplicate + + // Detect duplicates + const duplicates = try exposed.detectDuplicates(allocator); + defer allocator.free(duplicates); + + // Should report both duplicates + try testing.expectEqual(@as(usize, 2), duplicates.len); +} diff --git a/src/collections/SortedArrayBuilder.zig b/src/collections/SortedArrayBuilder.zig new file mode 100644 index 0000000000..ba776b0b7a --- /dev/null +++ b/src/collections/SortedArrayBuilder.zig @@ -0,0 +1,450 @@ +//! A builder for creating sorted arrays with binary search lookups. +//! +//! SortedArrayBuilder provides a way to build key-value mappings that are +//! optimized for small sizes and deterministic serialization. It maintains +//! a sorted array internally, enabling efficient binary search lookups. +//! +//! Features: +//! - Supports both string and numeric key types +//! - Maintains sorted order for deterministic serialization +//! - Efficient binary search lookups O(log n) +//! - Zero-copy deserialization +//! - Minimal memory overhead + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +// Import base for both testing and runtime +const base = @import("base"); +const serialization = @import("serialization"); +const IovecWriter = serialization.IovecWriter; + +/// A builder for creating sorted arrays directly without using hash maps +/// This is more efficient when we know we won't have duplicates +pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + sorted: bool = true, + + const Self = @This(); + + pub const Entry = struct { + key: K, + value: V, + + fn lessThan(_: void, a: Entry, b: Entry) bool { + if (K == []const u8) { + return std.mem.lessThan(u8, a.key, b.key); + } else if (@typeInfo(K) == .int or @typeInfo(K) == .@"enum") { + return a.key < b.key; + } else { + @compileError("Unsupported key type for SortedArrayBuilder"); + } + } + }; + + pub fn init() Self { + return .{}; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + if (K == []const u8) { + // Free string keys + for (self.entries.items) |entry| { + allocator.free(entry.key); + } + } + self.entries.deinit(allocator); + } + + /// Add a key-value pair + pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void { + const new_key = if (K == []const u8) try allocator.dupe(u8, key) else key; + + // Check if we need to maintain sorted order + if (self.sorted and self.entries.items.len > 0) { + const last = self.entries.items[self.entries.items.len - 1]; + if (K == []const u8) { + self.sorted = std.mem.lessThan(u8, last.key, new_key); + } else { + self.sorted = last.key < new_key; + } + } + + try self.entries.append(allocator, .{ .key = new_key, .value = value }); + } + + /// Get value by key (requires sorting first if not already sorted) + pub fn get(self: *Self, allocator: Allocator, key: K) ?V { + self.ensureSorted(allocator); + + var left: usize = 0; + var right: usize = self.entries.items.len; + + while (left < right) { + const mid = left + (right - left) / 2; + const mid_key = self.entries.items[mid].key; + + const cmp = if (K == []const u8) + std.mem.order(u8, mid_key, key) + else if (mid_key == key) + std.math.Order.eq + else if (mid_key < key) + std.math.Order.lt + else + std.math.Order.gt; + + switch (cmp) { + .eq => return self.entries.items[mid].value, + .lt => left = mid + 1, + .gt => right = mid, + } + } + return null; + } + + /// Ensure the array is sorted and deduplicate any duplicate entries + pub fn ensureSorted(self: *Self, allocator: Allocator) void { + if (!self.sorted) { + std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan); + self.sorted = true; + + // Check for and report duplicates, then deduplicate + self.deduplicateAndReport(allocator); + } + } + + /// Check for duplicates, report them, and remove duplicates keeping the last occurrence + fn deduplicateAndReport(self: *Self, allocator: Allocator) void { + if (self.entries.items.len <= 1) return; + + // First pass: detect and report duplicates + var i: usize = 1; + while (i < self.entries.items.len) { + const prev_entry = self.entries.items[i - 1]; + const curr_entry = self.entries.items[i]; + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, prev_entry.key, curr_entry.key) + else + prev_entry.key == curr_entry.key; + + if (is_duplicate) { + // Note: Duplicate detection is handled by caller via detectDuplicates() method + } + i += 1; + } + + // Second pass: deduplicate by keeping last occurrence + var write_index: usize = 0; + for (self.entries.items, 0..) |entry, read_index| { + var should_keep = true; + + // Look ahead to see if there's a duplicate later + for (self.entries.items[read_index + 1 ..]) |future_entry| { + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, entry.key, future_entry.key) + else + entry.key == future_entry.key; + + if (is_duplicate) { + should_keep = false; + break; + } + } + + if (should_keep) { + if (write_index != read_index) { + self.entries.items[write_index] = entry; + } + write_index += 1; + } else { + // Free the string key that we're not keeping + if (K == []const u8) { + allocator.free(entry.key); + } + } + } + + // Update the length to reflect deduplicated entries + self.entries.items.len = write_index; + } + + /// Detect duplicates without modifying the array - returns list of duplicate keys + pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]K { + var duplicates = std.ArrayList(K).init(allocator); + + if (self.entries.items.len <= 1) return duplicates.toOwnedSlice(); + + // Ensure sorted first + if (!self.sorted) { + std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan); + self.sorted = true; + } + + var reported_keys = if (K == []const u8) + std.StringHashMap(void).init(allocator) + else + std.AutoHashMap(K, void).init(allocator); + defer reported_keys.deinit(); + + var i: usize = 1; + while (i < self.entries.items.len) { + const prev_entry = self.entries.items[i - 1]; + const curr_entry = self.entries.items[i]; + const is_duplicate = if (K == []const u8) + std.mem.eql(u8, prev_entry.key, curr_entry.key) + else + prev_entry.key == curr_entry.key; + + if (is_duplicate) { + // Report duplicate only once per unique key + const result = try reported_keys.getOrPut(curr_entry.key); + if (!result.found_existing) { + try duplicates.append(curr_entry.key); + } + } + i += 1; + } + + return duplicates.toOwnedSlice(); + } + + /// Relocate pointers after memory movement + pub fn relocate(self: *Self, offset: isize) void { + // Relocate the entries array pointer + if (self.entries.items.len > 0) { + const old_ptr = @intFromPtr(self.entries.items.ptr); + // Skip relocation if this is a sentinel value + // Define sentinel value locally since iovec_serialize is not available + const EMPTY_ARRAY_SENTINEL: usize = 0xDEADBEEF; + if (old_ptr != EMPTY_ARRAY_SENTINEL) { + const new_ptr = @as(usize, @intCast(@as(isize, @intCast(old_ptr)) + offset)); + self.entries.items.ptr = @ptrFromInt(new_ptr); + } + } + } + + /// Get the number of entries + pub fn count(self: *const Self) usize { + return self.entries.items.len; + } + + /// Append this SortedArrayBuilder to an iovec writer for serialization + pub fn appendToIovecs(self: *const Self, writer: *IovecWriter) !usize { + + // Must be sorted before serialization + if (!self.sorted) { + @panic("SortedArrayBuilder must be sorted before serialization"); + } + + // Create a mutable copy of self that we can modify + var builder_copy = self.*; + + // Create a buffer for the final serialized struct + const builder_copy_buffer = try writer.allocator.alloc(u8, @sizeOf(Self)); + + // Track this allocation so it gets freed when writer is deinitialized + try writer.owned_buffers.append(builder_copy_buffer); + + // Serialize entries array data + const entries_offset = if (self.entries.items.len > 0) blk: { + const data = std.mem.sliceAsBytes(self.entries.items); + const offset = try writer.appendBytes(Entry, data); + break :blk offset; + } else 0; + + // Update pointer in the copy to use offset + // For empty arrays, use sentinel value instead of 0 to avoid null pointer + builder_copy.entries.items.ptr = if (entries_offset == 0) + @ptrFromInt(0xDEADBEEF) // EMPTY_ARRAY_SENTINEL + else + @ptrFromInt(entries_offset); + builder_copy.entries.items.len = self.entries.items.len; + + // Note: For string keys, we don't serialize the string data separately here. + // This assumes that the strings are already managed elsewhere (e.g., in an interner) + // and we're just storing pointers/slices to them. If string keys need to be + // serialized as well, that would require additional handling. + + // Copy the modified struct to the buffer + @memcpy(builder_copy_buffer, std.mem.asBytes(&builder_copy)); + + // Now that all pointers have been converted to offsets, add the copy to iovecs + const struct_offset = try writer.appendBytes(Self, builder_copy_buffer); + + return struct_offset; + } + }; +} + +test "SortedArrayBuilder basic operations" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u32).init(); + defer builder.deinit(allocator); + + // Add items in random order + try builder.put(allocator, "zebra", 100); + try builder.put(allocator, "apple", 50); + try builder.put(allocator, "banana", 150); + try builder.put(allocator, "cherry", 30); + + // Test count + try testing.expectEqual(@as(usize, 4), builder.count()); + + // Test get (forces sorting) + try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "apple")); + try testing.expectEqual(@as(?u32, 150), builder.get(allocator, "banana")); + try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "cherry")); + try testing.expectEqual(@as(?u32, 100), builder.get(allocator, "zebra")); + try testing.expectEqual(@as(?u32, null), builder.get(allocator, "missing")); + + // Test iovec serialization + var writer = IovecWriter.init(allocator); + defer writer.deinit(); + + _ = try builder.appendToIovecs(&writer); + + // Verify we have some iovecs (basic smoke test) + try testing.expect(writer.iovecs.items.len > 0); +} + +test "SortedArrayBuilder maintains sorted order when added in order" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u16).init(); + defer builder.deinit(allocator); + + // Add items in sorted order + try builder.put(allocator, "aaa", 1); + try builder.put(allocator, "bbb", 2); + try builder.put(allocator, "ccc", 3); + try builder.put(allocator, "ddd", 4); + + // Should still be marked as sorted + try testing.expect(builder.sorted); + + // Get should work without sorting + try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "aaa")); + try testing.expectEqual(@as(?u16, 4), builder.get(allocator, "ddd")); +} + +test "SortedArrayBuilder detectDuplicates example usage" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder(u32, u16).init(); + defer builder.deinit(allocator); + + // Add some entries with duplicates + try builder.put(allocator, 100, 1); + try builder.put(allocator, 200, 2); + try builder.put(allocator, 100, 3); // duplicate of 100 + try builder.put(allocator, 300, 4); + try builder.put(allocator, 200, 5); // duplicate of 200 + + // Detect duplicates before sorting/deduplicating + const duplicates = try builder.detectDuplicates(allocator); + defer allocator.free(duplicates); + + // Example: Report duplicates (in real code, this would push diagnostics) + for (duplicates) |duplicate_key| { + // In a real compiler, you'd push a diagnostic here: + // try cir.pushDiagnostic(CIR.Diagnostic{ .duplicate_exposed_item = ... }); + std.debug.print("Found duplicate key: {d}\n", .{duplicate_key}); + } + + // Verify we found the expected duplicates + try testing.expectEqual(@as(usize, 2), duplicates.len); + + // After detection, normal operations work as expected + builder.ensureSorted(allocator); + try testing.expectEqual(@as(usize, 3), builder.count()); // Deduplicated + try testing.expectEqual(@as(?u16, 3), builder.get(allocator, 100)); // Last value kept + try testing.expectEqual(@as(?u16, 5), builder.get(allocator, 200)); // Last value kept +} + +test "SortedArrayBuilder handles duplicates with string keys" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u32).init(); + defer builder.deinit(allocator); + + // Add duplicate keys with different values + try builder.put(allocator, "apple", 10); + try builder.put(allocator, "banana", 20); + try builder.put(allocator, "apple", 30); // duplicate key + try builder.put(allocator, "cherry", 40); + try builder.put(allocator, "banana", 50); // duplicate key + + // Initial count includes duplicates + try testing.expectEqual(@as(usize, 5), builder.count()); + + // Force sorting and deduplication + builder.ensureSorted(allocator); + + // Count should be reduced after deduplication + try testing.expectEqual(@as(usize, 3), builder.count()); + + // Should return the last value for each key (keeping last occurrence) + try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "apple")); + try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "banana")); + try testing.expectEqual(@as(?u32, 40), builder.get(allocator, "cherry")); +} + +test "SortedArrayBuilder handles duplicates with numeric keys" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder(u32, []const u8).init(); + defer builder.deinit(allocator); + + // Add duplicate keys with different values + try builder.put(allocator, 100, "first"); + try builder.put(allocator, 200, "second"); + try builder.put(allocator, 100, "updated_first"); // duplicate key + try builder.put(allocator, 300, "third"); + + // Initial count includes duplicates + try testing.expectEqual(@as(usize, 4), builder.count()); + + // Force sorting and deduplication + builder.ensureSorted(allocator); + + // Count should be reduced after deduplication + try testing.expectEqual(@as(usize, 3), builder.count()); + + // Should return the last value for each key + try testing.expectEqual(@as(?[]const u8, "updated_first"), builder.get(allocator, 100)); + try testing.expectEqual(@as(?[]const u8, "second"), builder.get(allocator, 200)); + try testing.expectEqual(@as(?[]const u8, "third"), builder.get(allocator, 300)); +} + +test "SortedArrayBuilder no duplicates case" { + const testing = std.testing; + const allocator = testing.allocator; + + var builder = SortedArrayBuilder([]const u8, u16).init(); + defer builder.deinit(allocator); + + // Add unique keys only + try builder.put(allocator, "unique1", 1); + try builder.put(allocator, "unique2", 2); + try builder.put(allocator, "unique3", 3); + + const original_count = builder.count(); + + // Force sorting (should not change count) + builder.ensureSorted(allocator); + + // Count should remain the same + try testing.expectEqual(original_count, builder.count()); + + // All values should be retrievable + try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "unique1")); + try testing.expectEqual(@as(?u16, 2), builder.get(allocator, "unique2")); + try testing.expectEqual(@as(?u16, 3), builder.get(allocator, "unique3")); +} diff --git a/src/collections/mod.zig b/src/collections/mod.zig index 57457b7655..9d8276fab2 100644 --- a/src/collections/mod.zig +++ b/src/collections/mod.zig @@ -17,6 +17,9 @@ pub const SafeMultiList = @import("safe_list.zig").SafeMultiList; pub const SafeStringHashMap = @import("safe_hash_map.zig").SafeStringHashMap; +pub const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder; +pub const ExposedItems = @import("ExposedItems.zig").ExposedItems; + /// A range that must have at least one element pub const NonEmptyRange = struct { /// Starting index (inclusive) diff --git a/src/compile/ModuleEnv.zig b/src/compile/ModuleEnv.zig index e796dc4e3a..025b207921 100644 --- a/src/compile/ModuleEnv.zig +++ b/src/compile/ModuleEnv.zig @@ -68,12 +68,8 @@ idents: Ident.Store, ident_ids_for_slicing: collections.SafeList(Ident.Idx), strings: StringLiteral.Store, types: types_mod.Store, -/// Map of exposed items by their string representation (not interned) -/// This is built during canonicalization and preserved for later use -exposed_by_str: collections.SafeStringHashMap(void), -/// Map of exposed item names to their node indices (stored as u16) -/// This is populated during canonicalization to allow cross-module lookups -exposed_nodes: collections.SafeStringHashMap(u16), +/// The items (a combination of types and values) that this module exposes +exposed_items: collections.ExposedItems, /// Line starts for error reporting. We retain only start and offset positions in the IR /// and then use these line starts to calculate the line number and column number as required. @@ -130,8 +126,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error! .ident_ids_for_slicing = try collections.SafeList(Ident.Idx).initCapacity(gpa, 256), .strings = try StringLiteral.Store.initCapacityBytes(gpa, 4096), .types = try types_mod.Store.initCapacity(gpa, 2048, 512), - .exposed_by_str = try collections.SafeStringHashMap(void).initCapacity(gpa, 64), - .exposed_nodes = try collections.SafeStringHashMap(u16).initCapacity(gpa, 64), + .exposed_items = collections.ExposedItems.init(), .line_starts = try collections.SafeList(u32).initCapacity(gpa, 256), .source = source, // Initialize compilation fields with empty/default values @@ -152,8 +147,7 @@ pub fn deinit(self: *Self) void { self.strings.deinit(self.gpa); self.types.deinit(); self.line_starts.deinit(self.gpa); - self.exposed_by_str.deinit(self.gpa); - self.exposed_nodes.deinit(self.gpa); + self.exposed_items.deinit(self.gpa); // Clean up compilation fields self.external_decls.deinit(self.gpa); self.imports.deinit(self.gpa);