Merge remote-tracking branch 'origin/main' into serialize-idents

This commit is contained in:
Richard Feldman 2025-07-26 22:45:51 -04:00
commit dbc99059d4
No known key found for this signature in database
10 changed files with 948 additions and 131 deletions

View file

@ -23,18 +23,9 @@ attributes: Attributes,
/// Create a new identifier from a string.
pub fn for_text(text: []const u8) Ident {
// Parse identifier attributes from the text
const is_ignored = std.mem.startsWith(u8, text, "_");
const is_effectful = std.mem.endsWith(u8, text, "!");
// TODO: parse reassignable attribute (var keyword handling)
return Ident{
.raw_text = text,
.attributes = Attributes{
.effectful = is_effectful,
.ignored = is_ignored,
.reassignable = false,
},
.attributes = Attributes.fromString(text),
};
}
@ -68,18 +59,9 @@ pub fn from_bytes(bytes: []const u8) Error!Ident {
}
}
// Parse identifier attributes from the bytes
const is_ignored = std.mem.startsWith(u8, bytes, "_");
const is_effectful = std.mem.endsWith(u8, bytes, "!");
// TODO: parse reassignable attribute (var keyword handling)
return Ident{
.raw_text = bytes,
.attributes = Attributes{
.effectful = is_effectful,
.ignored = is_ignored,
.reassignable = false,
},
.attributes = Attributes.fromString(bytes),
};
}
@ -96,6 +78,14 @@ pub const Attributes = packed struct(u3) {
effectful: bool,
ignored: bool,
reassignable: bool,
pub fn fromString(text: []const u8) Attributes {
return .{
.effectful = std.mem.endsWith(u8, text, "!"),
.ignored = std.mem.startsWith(u8, text, "_"),
.reassignable = false,
};
}
};
/// An interner for identifier names.
@ -179,13 +169,23 @@ pub const Store = struct {
return self.interner.contains(text);
}
/// Find an identifier by its string, returning its index if it exists.
/// This is different from insert in that it's guaranteed not to modify the store.
pub fn findByString(self: *const Store, text: []const u8) ?Idx {
// Look up in the interner without inserting
const interner_idx = self.interner.strings.get(text) orelse return null;
// Create an Idx with inferred attributes from the text
return Idx{
.attributes = Attributes.fromString(text),
.idx = @as(u29, @intCast(@intFromEnum(interner_idx))),
};
}
/// Freeze the identifier store, preventing any new entries from being added.
pub fn freeze(self: *Store) void {
self.interner.freeze();
}
/// Calculate the size needed to serialize this Ident.Store
pub fn serializedSize(self: *const Store) usize {
var size: usize = 0;

View file

@ -55,8 +55,7 @@ pub const Header = struct {
ident_store: ComponentInfo,
line_starts: ComponentInfo,
types_store: ComponentInfo,
exposed_by_str: ComponentInfo,
exposed_nodes: ComponentInfo,
exposed_items: ComponentInfo,
external_decls: ComponentInfo,
/// Spans can be stored directly since they're small
@ -68,7 +67,7 @@ pub const Header = struct {
warning_count: u32,
/// Fixed padding to ensure alignment
_padding: [16]u8 = [_]u8{0} ** 16,
_padding: [24]u8 = [_]u8{0} ** 24,
/// Error specific to initializing a Header from bytes
pub const InitError = error{
@ -121,8 +120,7 @@ pub const CacheModule = struct {
const ident_store_size = module_env.idents.serializedSize();
const line_starts_size = module_env.line_starts.serializedSize();
const types_store_size = module_env.types.serializedSize();
const exposed_by_str_size = module_env.exposed_by_str.serializedSize();
const exposed_nodes_size = module_env.exposed_nodes.serializedSize();
const exposed_items_size = module_env.exposed_items.serializedSize();
const external_decls_size = module_env.external_decls.serializedSize();
// Calculate aligned offsets
@ -153,12 +151,8 @@ pub const CacheModule = struct {
offset += @intCast(types_store_size);
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
const exposed_by_str_offset = offset;
offset += @intCast(exposed_by_str_size);
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
const exposed_nodes_offset = offset;
offset += @intCast(exposed_nodes_size);
const exposed_items_offset = offset;
offset += @intCast(exposed_items_size);
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
const external_decls_offset = offset;
@ -188,8 +182,7 @@ pub const CacheModule = struct {
.ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) },
.line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) },
.types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) },
.exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) },
.exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) },
.exposed_items = .{ .offset = exposed_items_offset, .length = @intCast(exposed_items_size) },
.external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) },
.all_defs = module_env.all_defs,
.all_statements = module_env.all_statements,
@ -207,8 +200,7 @@ pub const CacheModule = struct {
std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(exposed_items_offset % SERIALIZATION_ALIGNMENT == 0);
std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0);
// Serialize each component
@ -219,8 +211,7 @@ pub const CacheModule = struct {
_ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator);
_ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size])));
_ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator);
_ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]);
_ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]);
_ = try module_env.exposed_items.serializeInto(data_section[exposed_items_offset .. exposed_items_offset + exposed_items_size]);
_ = try module_env.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size])));
// TODO Calculate and store checksum
@ -300,11 +291,8 @@ pub const CacheModule = struct {
var types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator);
errdefer types_store.deinit();
var exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator);
errdefer exposed_by_str.deinit(allocator);
var exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator);
errdefer exposed_nodes.deinit(allocator);
var exposed_items = try collections.ExposedItems.deserializeFrom(self.getComponentData(.exposed_items), allocator);
errdefer exposed_items.deinit(allocator);
// Create ModuleEnv from deserialized components
var module_env = ModuleEnv{
@ -313,8 +301,7 @@ pub const CacheModule = struct {
.ident_ids_for_slicing = ident_ids_for_slicing,
.strings = strings,
.types = types_store,
.exposed_by_str = exposed_by_str,
.exposed_nodes = exposed_nodes,
.exposed_items = exposed_items,
.line_starts = line_starts,
.source = source,
// Module compilation fields - will be initialized after
@ -360,8 +347,7 @@ pub const CacheModule = struct {
.ident_store => self.header.ident_store,
.line_starts => self.header.line_starts,
.types_store => self.header.types_store,
.exposed_by_str => self.header.exposed_by_str,
.exposed_nodes => self.header.exposed_nodes,
.exposed_items => self.header.exposed_items,
.external_decls => self.header.external_decls,
};
return self.data[info.offset .. info.offset + info.length];
@ -381,8 +367,7 @@ pub const CacheModule = struct {
.ident_store = self.header.ident_store.length,
.line_starts = self.header.line_starts.length,
.types_store = self.header.types_store.length,
.exposed_by_str = self.header.exposed_by_str.length,
.exposed_nodes = self.header.exposed_nodes.length,
.exposed_items = self.header.exposed_items.length,
.external_decls = self.header.external_decls.length,
},
};
@ -400,8 +385,7 @@ pub const CacheModule = struct {
.ident_store => self.header.ident_store,
.line_starts => self.header.line_starts,
.types_store => self.header.types_store,
.exposed_by_str => self.header.exposed_by_str,
.exposed_nodes => self.header.exposed_nodes,
.exposed_items => self.header.exposed_items,
.external_decls => self.header.external_decls,
};
@ -568,8 +552,7 @@ const ComponentType = enum {
ident_store,
line_starts,
types_store,
exposed_by_str,
exposed_nodes,
exposed_items,
external_decls,
};
@ -586,8 +569,7 @@ pub const Diagnostics = struct {
ident_store: u32,
line_starts: u32,
types_store: u32,
exposed_by_str: u32,
exposed_nodes: u32,
exposed_items: u32,
external_decls: u32,
},
};

View file

@ -623,17 +623,20 @@ pub fn canonicalizeFile(
last_type_anno = null; // Clear after successful use
// If this declaration successfully defined an exposed value, remove it from exposed_ident_texts
// and add it to exposed_nodes
// and add the node index to exposed_items
const pattern = self.parse_ir.store.getPattern(decl.pattern);
if (pattern == .ident) {
const token_region = self.parse_ir.tokens.resolve(@intCast(pattern.ident.ident_tok));
const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
// If this identifier is exposed, add it to exposed_nodes
// If this identifier is exposed, add it to exposed_items
if (self.exposed_ident_texts.contains(ident_text)) {
// Store the def index as u16 in exposed_nodes
// Get the interned identifier - it should already exist from parsing
const ident = base.Ident.for_text(ident_text);
const idx = try self.env.idents.insert(self.env.gpa, ident);
// Store the def index as u16 in exposed_items
const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
try self.env.exposed_nodes.put(self.env.gpa, ident_text, def_idx_u16);
try self.env.exposed_items.setNodeIndexById(self.env.gpa, @bitCast(idx), def_idx_u16);
}
_ = self.exposed_ident_texts.remove(ident_text);
@ -900,15 +903,15 @@ fn createExposedScope(
const exposed = self.parse_ir.store.getExposedItem(exposed_idx);
switch (exposed) {
.lower_ident => |ident| {
// Get the text of the identifier token to use as key
// Get the text for tracking redundant exposures
const token_region = self.parse_ir.tokens.resolve(@intCast(ident.ident));
const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
// Add to exposed_by_str for permanent storage (unconditionally)
try self.env.exposed_by_str.put(gpa, ident_text, {});
// Also build exposed_scope with proper identifiers
// Get the interned identifier
if (self.parse_ir.tokens.resolveIdentifier(ident.ident)) |ident_idx| {
// Add to exposed_items for permanent storage (unconditionally)
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
// Use a dummy pattern index - we just need to track that it's exposed
const dummy_idx = @as(Pattern.Idx, @enumFromInt(0));
try self.exposed_scope.put(gpa, .ident, ident_idx, dummy_idx);
@ -933,15 +936,15 @@ fn createExposedScope(
}
},
.upper_ident => |type_name| {
// Get the text of the identifier token to use as key
// Get the text for tracking redundant exposures
const token_region = self.parse_ir.tokens.resolve(@intCast(type_name.ident));
const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
// Add to exposed_by_str for permanent storage (unconditionally)
try self.env.exposed_by_str.put(gpa, type_text, {});
// Also build exposed_scope with proper identifiers
// Get the interned identifier
if (self.parse_ir.tokens.resolveIdentifier(type_name.ident)) |ident_idx| {
// Add to exposed_items for permanent storage (unconditionally)
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
// Use a dummy statement index - we just need to track that it's exposed
const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
@ -966,15 +969,15 @@ fn createExposedScope(
}
},
.upper_ident_star => |type_with_constructors| {
// Get the text of the identifier token to use as key
// Get the text for tracking redundant exposures
const token_region = self.parse_ir.tokens.resolve(@intCast(type_with_constructors.ident));
const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
// Add to exposed_by_str for permanent storage (unconditionally)
try self.env.exposed_by_str.put(gpa, type_text, {});
// Also build exposed_scope with proper identifiers
// Get the interned identifier
if (self.parse_ir.tokens.resolveIdentifier(type_with_constructors.ident)) |ident_idx| {
// Add to exposed_items for permanent storage (unconditionally)
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
// Use a dummy statement index - we just need to track that it's exposed
const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
@ -1368,7 +1371,7 @@ fn introduceExposedItemsIntoScope(
return;
}
// Get the module's exposed_by_str map
// Get the module's exposed_items
const module_env = envs_map.get(module_name_text).?;
// Validate each exposed item
@ -1377,7 +1380,14 @@ fn introduceExposedItemsIntoScope(
const item_name_text = self.env.idents.getText(exposed_item.name);
// Check if the item is exposed by the module
if (!module_env.exposed_by_str.contains(item_name_text)) {
// We need to look up by string because the identifiers are from different modules
// First, try to find this identifier in the target module's ident store
const is_exposed = if (module_env.idents.findByString(item_name_text)) |target_ident|
module_env.exposed_items.containsById(self.env.gpa, @bitCast(target_ident))
else
false;
if (!is_exposed) {
// Determine if it's a type or value based on capitalization
const first_char = item_name_text[0];
@ -1702,11 +1712,16 @@ pub fn canonicalizeExpr(
};
};
// Look up the target node index in the module's exposed_nodes
// Look up the target node index in the module's exposed_items
// Need to convert identifier from current module to target module
const field_text = self.env.idents.getText(ident);
const target_node_idx = if (self.module_envs) |envs_map| blk: {
if (envs_map.get(module_text)) |module_env| {
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
if (module_env.idents.findByString(field_text)) |target_ident| {
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
} else {
break :blk 0;
}
} else {
break :blk 0;
}
@ -1761,11 +1776,16 @@ pub fn canonicalizeExpr(
};
};
// Look up the target node index in the module's exposed_nodes
// Look up the target node index in the module's exposed_items
// Need to convert identifier from current module to target module
const field_text = self.env.idents.getText(exposed_info.original_name);
const target_node_idx = if (self.module_envs) |envs_map| blk: {
if (envs_map.get(module_text)) |module_env| {
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
if (module_env.idents.findByString(field_text)) |target_ident| {
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
} else {
break :blk 0;
}
} else {
break :blk 0;
}
@ -6541,11 +6561,16 @@ fn tryModuleQualifiedLookup(self: *Self, field_access: AST.BinOp) std.mem.Alloca
const region = self.parse_ir.tokenizedRegionToRegion(field_access.region);
// Look up the target node index in the module's exposed_nodes
// Look up the target node index in the module's exposed_items
// Need to convert identifier from current module to target module
const field_text = self.env.idents.getText(field_name);
const target_node_idx = if (self.module_envs) |envs_map| blk: {
if (envs_map.get(module_text)) |module_env| {
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
if (module_env.idents.findByString(field_text)) |target_ident| {
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
} else {
break :blk 0;
}
} else {
break :blk 0;
}

View file

@ -7,6 +7,7 @@
const std = @import("std");
const compile = @import("compile");
const parse = @import("parse");
const base = @import("base");
const canonicalize = @import("../../canonicalize.zig");
@ -342,7 +343,7 @@ test "complex case with redundant, shadowing, and not implemented" {
try testing.expect(found_not_implemented);
}
test "exposed_by_str is populated correctly" {
test "exposed_items is populated correctly" {
const allocator = testing.allocator;
const source =
@ -365,16 +366,23 @@ test "exposed_by_str is populated correctly" {
try canonicalizer.canonicalizeFile();
// Check that exposed_by_str contains all exposed items
try testing.expect(env.exposed_by_str.contains("foo"));
try testing.expect(env.exposed_by_str.contains("bar"));
try testing.expect(env.exposed_by_str.contains("MyType"));
// Check that exposed_items contains the correct number of items
// The exposed items were added during canonicalization
// Should have exactly 3 entries (duplicates not stored)
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
// Check that exposed_items contains all exposed items
const foo_idx = env.idents.findByString("foo").?;
const bar_idx = env.idents.findByString("bar").?;
const mytype_idx = env.idents.findByString("MyType").?;
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(mytype_idx)));
}
test "exposed_by_str persists after canonicalization" {
test "exposed_items persists after canonicalization" {
const allocator = testing.allocator;
const source =
@ -397,16 +405,20 @@ test "exposed_by_str persists after canonicalization" {
try canonicalizer.canonicalizeFile();
// All exposed items should be in exposed_by_str, even those not implemented
try testing.expect(env.exposed_by_str.contains("x"));
try testing.expect(env.exposed_by_str.contains("y"));
try testing.expect(env.exposed_by_str.contains("z"));
// All exposed items should be in exposed_items, even those not implemented
const x_idx = env.idents.findByString("x").?;
const y_idx = env.idents.findByString("y").?;
const z_idx = env.idents.findByString("z").?;
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(x_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(y_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(z_idx)));
// Verify the map persists in env after canonicalization is complete
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
}
test "exposed_by_str never has entries removed" {
test "exposed_items never has entries removed" {
const allocator = testing.allocator;
const source =
@ -429,13 +441,57 @@ test "exposed_by_str never has entries removed" {
try canonicalizer.canonicalizeFile();
// All exposed items should remain in exposed_by_str
// All exposed items should remain in exposed_items
// Even though foo appears twice and baz is not implemented,
// exposed_by_str should have all unique exposed identifiers
try testing.expect(env.exposed_by_str.contains("foo"));
try testing.expect(env.exposed_by_str.contains("bar"));
try testing.expect(env.exposed_by_str.contains("baz"));
// exposed_items should have all unique exposed identifiers
const foo_idx = env.idents.findByString("foo").?;
const bar_idx = env.idents.findByString("bar").?;
const baz_idx = env.idents.findByString("baz").?;
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(baz_idx)));
// Should have exactly 3 unique entries
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
}
test "exposed_items handles identifiers with different attributes" {
const allocator = testing.allocator;
// Module exposing foo and foo! - these should be treated as different identifiers
// Note: Using foo and foo! to test that attributes are properly included in the key
const source =
\\module [foo, foo!]
\\
\\foo = 42
\\foo! = \x -> x + 1
;
var env = try ModuleEnv.init(allocator, source);
defer env.deinit();
try env.initCIRFields(allocator, "Test");
var ast = try parse.parse(&env);
defer ast.deinit(allocator);
var canonicalizer = try canonicalize.init(&env, &ast, null);
defer canonicalizer.deinit();
try canonicalizer.canonicalizeFile();
// Both should be in exposed_items as separate entries
const foo_idx = env.idents.findByString("foo").?;
const foo_effectful_idx = env.idents.findByString("foo!").?;
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_effectful_idx)));
// Should have exactly 2 entries - if we only used u29 without attributes, they might incorrectly merge
try testing.expectEqual(@as(usize, 2), env.exposed_items.count());
// Verify they have different full u32 values (index + attributes)
const foo_u32 = @as(u32, @bitCast(foo_idx));
const foo_effectful_u32 = @as(u32, @bitCast(foo_effectful_idx));
try testing.expect(foo_u32 != foo_effectful_u32);
}

View file

@ -59,10 +59,15 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
}
// Add exposed items to Json module
try json_env.exposed_by_str.put(allocator, "decode", {});
try json_env.exposed_by_str.put(allocator, "encode", {});
try json_env.exposed_by_str.put(allocator, "JsonError", {});
try json_env.exposed_by_str.put(allocator, "DecodeProblem", {});
const Ident = base.Ident;
const decode_idx = try json_env.idents.insert(allocator, Ident.for_text("decode"));
try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_idx));
const encode_idx = try json_env.idents.insert(allocator, Ident.for_text("encode"));
try json_env.exposed_items.addExposedById(allocator, @bitCast(encode_idx));
const json_error_idx = try json_env.idents.insert(allocator, Ident.for_text("JsonError"));
try json_env.exposed_items.addExposedById(allocator, @bitCast(json_error_idx));
const decode_problem_idx = try json_env.idents.insert(allocator, Ident.for_text("DecodeProblem"));
try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_problem_idx));
try module_envs.put("Json", json_env);
@ -75,9 +80,12 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
}
// Add exposed items to Utils module
try utils_env.exposed_by_str.put(allocator, "map", {});
try utils_env.exposed_by_str.put(allocator, "filter", {});
try utils_env.exposed_by_str.put(allocator, "Result", {});
const map_idx = try utils_env.idents.insert(allocator, Ident.for_text("map"));
try utils_env.exposed_items.addExposedById(allocator, @bitCast(map_idx));
const filter_idx = try utils_env.idents.insert(allocator, Ident.for_text("filter"));
try utils_env.exposed_items.addExposedById(allocator, @bitCast(filter_idx));
const result_idx = try utils_env.idents.insert(allocator, Ident.for_text("Result"));
try utils_env.exposed_items.addExposedById(allocator, @bitCast(result_idx));
try module_envs.put("Utils", utils_env);
@ -502,7 +510,7 @@ test "module-qualified lookups with e_lookup_external" {
try expectEqual(true, found_dict_empty);
}
test "exposed_nodes - tracking CIR node indices for exposed items" {
test "exposed_items - tracking CIR node indices for exposed items" {
var gpa_state = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
defer std.debug.assert(gpa_state.deinit() == .ok);
const allocator = gpa_state.allocator();
@ -519,16 +527,20 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
allocator.destroy(math_env);
}
// Add exposed items
try math_env.exposed_by_str.put(allocator, "add", {});
try math_env.exposed_by_str.put(allocator, "multiply", {});
try math_env.exposed_by_str.put(allocator, "PI", {});
// Add exposed items and set their node indices
const Ident = base.Ident;
const add_idx = try math_env.idents.insert(allocator, Ident.for_text("add"));
try math_env.exposed_items.addExposedById(allocator, @bitCast(add_idx));
const multiply_idx = try math_env.idents.insert(allocator, Ident.for_text("multiply"));
try math_env.exposed_items.addExposedById(allocator, @bitCast(multiply_idx));
const pi_idx = try math_env.idents.insert(allocator, Ident.for_text("PI"));
try math_env.exposed_items.addExposedById(allocator, @bitCast(pi_idx));
// Simulate having CIR node indices for these exposed items
// In real usage, these would be set during canonicalization of MathUtils
try math_env.exposed_nodes.put(allocator, "add", 100);
try math_env.exposed_nodes.put(allocator, "multiply", 200);
try math_env.exposed_nodes.put(allocator, "PI", 300);
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(add_idx), 100);
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(multiply_idx), 200);
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(pi_idx), 300);
try module_envs.put("MathUtils", math_env);
@ -586,15 +598,16 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
try expectEqual(true, found_multiply_with_idx_200);
try expectEqual(true, found_pi_with_idx_300);
// Test case where exposed_nodes is not populated (should get 0)
// Test case where node index is not populated (should get 0)
const empty_env = try allocator.create(ModuleEnv);
empty_env.* = try ModuleEnv.init(allocator, "");
defer {
empty_env.deinit();
allocator.destroy(empty_env);
}
try empty_env.exposed_by_str.put(allocator, "undefined", {});
// Don't add to exposed_nodes - should default to 0
const undefined_idx = try empty_env.idents.insert(allocator, Ident.for_text("undefined"));
try empty_env.exposed_items.addExposedById(allocator, @bitCast(undefined_idx));
// Don't set node index - should default to 0
try module_envs.put("EmptyModule", empty_env);
const source2 =

View file

@ -860,10 +860,16 @@ pub fn checkExpr(self: *Self, expr_idx: ModuleEnv.Expr.Idx) std.mem.Allocator.Er
if (origin_module) |module| {
// Look up the method in the origin module's exports
// Need to convert identifier from current module to target module
const method_name_str = self.cir.idents.getText(dot_access.field_name);
// Search through the module's exposed nodes
if (module.exposed_nodes.get(method_name_str)) |node_idx| {
// Search through the module's exposed items
const node_idx_opt = if (module.idents.findByString(method_name_str)) |target_ident|
module.exposed_items.getNodeIndexById(self.gpa, @bitCast(target_ident))
else
null;
if (node_idx_opt) |node_idx| {
// Found the method!
const target_expr_idx = @as(ModuleEnv.Expr.Idx, @enumFromInt(node_idx));

View file

@ -0,0 +1,288 @@
//! A specialized collection for tracking exposed items in modules using interned IDs.
//!
//! This combines the functionality of exposed_by_str and exposed_nodes into a single
//! SortedArrayBuilder to save memory and simplify the API. It uses interned identifier
//! indices (u32 matching Ident.Idx) as keys and u16 values (the node indices).
//!
//! A value of 0 means "exposed but not yet defined", while non-zero values
//! are actual node indices.
//!
//! Note: This module only provides ID-based methods. String-to-ID conversion must be
//! handled by the caller who has access to the Ident.Store.
const std = @import("std");
const Allocator = std.mem.Allocator;
const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
// We use u32 which is the bit representation of base.Ident.Idx
// This includes both the 29-bit index AND the 3-bit attributes (effectful, ignored, reassignable)
// This is critical because foo, foo!, and _foo are different identifiers
const IdentIdx = u32;
/// A collection that tracks exposed items by their names and associated CIR node indices
pub const ExposedItems = struct {
/// Maps item name (as interned ID) -> CIR node index (0 means exposed but not defined)
items: SortedArrayBuilder(IdentIdx, u16),
const Self = @This();
pub fn init() Self {
return .{
.items = SortedArrayBuilder(IdentIdx, u16).init(),
};
}
pub fn deinit(self: *Self, allocator: Allocator) void {
self.items.deinit(allocator);
}
/// Add an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
pub fn addExposedById(self: *Self, allocator: Allocator, ident_idx: IdentIdx) !void {
// Add with value 0 to indicate "exposed but not yet defined"
// The SortedArrayBuilder will handle duplicates by keeping the last value,
// but we don't want to overwrite an existing node index with 0
if (self.items.sorted) {
// If already sorted, check if it exists first
if (self.items.get(allocator, ident_idx) == null) {
try self.items.put(allocator, ident_idx, 0);
}
} else {
// If not sorted yet, just add it - duplicates will be handled later
try self.items.put(allocator, ident_idx, 0);
}
}
/// Set the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
pub fn setNodeIndexById(self: *Self, allocator: Allocator, ident_idx: IdentIdx, node_idx: u16) !void {
// First ensure the array is sorted so we can search
self.items.ensureSorted(allocator);
// Find the existing entry and update its value
const entries = self.items.entries.items;
for (entries) |*entry| {
if (entry.key == ident_idx) {
entry.value = node_idx;
return;
}
}
// If not found, add a new entry
try self.items.put(allocator, ident_idx, node_idx);
}
/// Check if an item is exposed by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
pub fn containsById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) bool {
var mutable_self = @constCast(self);
return mutable_self.items.get(allocator, ident_idx) != null;
}
/// Get the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
pub fn getNodeIndexById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) ?u16 {
var mutable_self = @constCast(self);
return mutable_self.items.get(allocator, ident_idx);
}
/// Get the number of exposed items
pub fn count(self: *const Self) usize {
return self.items.count();
}
/// Ensure the array is sorted and deduplicated
/// This should be called at the end of canonicalization
pub fn ensureSorted(self: *Self, allocator: Allocator) void {
self.items.ensureSorted(allocator);
}
/// Detect duplicate exposed items for error reporting
/// Returns the interned IDs of duplicates (caller must convert to strings)
pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]IdentIdx {
return self.items.detectDuplicates(allocator);
}
/// Relocate pointers after memory movement
pub fn relocate(self: *Self, offset: isize) void {
self.items.relocate(offset);
}
/// Calculate the size needed to serialize this ExposedItems
pub fn serializedSize(self: *const Self) usize {
// We need to serialize:
// 1. The count of items (u32)
// 2. For each item: key (u32) + value (u16)
var size: usize = @sizeOf(u32); // count
for (self.items.entries.items) |_| {
size += @sizeOf(u32); // key (interned ID)
size += @sizeOf(u16); // value
}
// Align to SERIALIZATION_ALIGNMENT
const SERIALIZATION_ALIGNMENT = 16;
return std.mem.alignForward(usize, size, SERIALIZATION_ALIGNMENT);
}
/// Serialize this ExposedItems into the provided buffer
pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 {
const size = self.serializedSize();
if (buffer.len < size) return error.BufferTooSmall;
var offset: usize = 0;
// Write count
std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.items.entries.items.len), .little);
offset += @sizeOf(u32);
// Write entries
for (self.items.entries.items) |entry| {
// Write key (interned ID)
std.mem.writeInt(u32, buffer[offset..][0..4], entry.key, .little);
offset += @sizeOf(u32);
// Write value
std.mem.writeInt(u16, buffer[offset..][0..2], entry.value, .little);
offset += @sizeOf(u16);
}
// Zero padding
if (offset < size) {
@memset(buffer[offset..size], 0);
}
return buffer[0..size];
}
/// Deserialize ExposedItems from the provided buffer
pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self {
if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall;
var offset: usize = 0;
// Read count
const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little);
offset += @sizeOf(u32);
var result = Self.init();
errdefer result.deinit(allocator);
// Read entries
for (0..entry_count) |_| {
// Read key (interned ID)
if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall;
const key_u32 = std.mem.readInt(u32, buffer[offset..][0..4], .little);
// Use the full u32 (includes both index and attributes)
const key = key_u32;
offset += @sizeOf(u32);
// Read value
if (offset + @sizeOf(u16) > buffer.len) return error.BufferTooSmall;
const value = std.mem.readInt(u16, buffer[offset..][0..2], .little);
offset += @sizeOf(u16);
// Add to builder
try result.items.put(allocator, key, value);
}
return result;
}
/// Append to iovec writer for serialization
pub fn appendToIovecs(self: *const Self, writer: anytype) !usize {
return self.items.appendToIovecs(writer);
}
/// Iterator for all exposed items
pub const Iterator = struct {
items: []const SortedArrayBuilder(IdentIdx, u16).Entry,
index: usize,
pub fn next(self: *Iterator) ?struct { ident_idx: IdentIdx, node_idx: u16 } {
if (self.index < self.items.len) {
const entry = self.items[self.index];
self.index += 1;
return .{ .ident_idx = entry.key, .node_idx = entry.value };
}
return null;
}
};
/// Get an iterator over all exposed items
pub fn iterator(self: *const Self) Iterator {
return .{ .items = self.items.entries.items, .index = 0 };
}
};
test "ExposedItems basic operations" {
const testing = std.testing;
const allocator = testing.allocator;
var exposed = ExposedItems.init();
defer exposed.deinit(allocator);
// Add exposed items (like exposed_by_str.put)
try exposed.addExposed(allocator, "foo");
try exposed.addExposed(allocator, "bar");
try exposed.addExposed(allocator, "MyType");
// Test count
try testing.expectEqual(@as(usize, 3), exposed.count());
// Test contains (like exposed_by_str.contains)
try testing.expect(exposed.contains(allocator, "foo"));
try testing.expect(exposed.contains(allocator, "bar"));
try testing.expect(exposed.contains(allocator, "MyType"));
try testing.expect(!exposed.contains(allocator, "missing"));
// Test getNodeIndex before setting (should be 0)
try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "foo"));
// Set node indices (like exposed_nodes.put)
try exposed.setNodeIndex(allocator, "foo", 42);
try exposed.setNodeIndex(allocator, "bar", 84);
// Test getNodeIndex after setting
try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
try testing.expectEqual(@as(?u16, 84), exposed.getNodeIndex(allocator, "bar"));
try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "MyType")); // Not set
try testing.expectEqual(@as(?u16, null), exposed.getNodeIndex(allocator, "missing")); // Not exposed
}
test "ExposedItems handles duplicates" {
const testing = std.testing;
const allocator = testing.allocator;
var exposed = ExposedItems.init();
defer exposed.deinit(allocator);
// Add items with duplicates
try exposed.addExposed(allocator, "foo");
try exposed.setNodeIndex(allocator, "foo", 42);
try exposed.addExposed(allocator, "foo"); // Duplicate - should not overwrite node index
// Force sorting
exposed.ensureSorted(allocator);
// Should still have the node index
try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
}
test "ExposedItems detectDuplicates" {
const testing = std.testing;
const allocator = testing.allocator;
var exposed = ExposedItems.init();
defer exposed.deinit(allocator);
// Add some duplicates
try exposed.addExposed(allocator, "foo");
try exposed.addExposed(allocator, "bar");
try exposed.addExposed(allocator, "foo"); // duplicate
try exposed.setNodeIndex(allocator, "bar", 42);
try exposed.addExposed(allocator, "bar"); // duplicate
// Detect duplicates
const duplicates = try exposed.detectDuplicates(allocator);
defer allocator.free(duplicates);
// Should report both duplicates
try testing.expectEqual(@as(usize, 2), duplicates.len);
}

View file

@ -0,0 +1,450 @@
//! A builder for creating sorted arrays with binary search lookups.
//!
//! SortedArrayBuilder provides a way to build key-value mappings that are
//! optimized for small sizes and deterministic serialization. It maintains
//! a sorted array internally, enabling efficient binary search lookups.
//!
//! Features:
//! - Supports both string and numeric key types
//! - Maintains sorted order for deterministic serialization
//! - Efficient binary search lookups O(log n)
//! - Zero-copy deserialization
//! - Minimal memory overhead
const std = @import("std");
const Allocator = std.mem.Allocator;
// Import base for both testing and runtime
const base = @import("base");
const serialization = @import("serialization");
const IovecWriter = serialization.IovecWriter;
/// A builder for creating sorted arrays directly without using hash maps
/// This is more efficient when we know we won't have duplicates
pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type {
return struct {
entries: std.ArrayListUnmanaged(Entry) = .{},
sorted: bool = true,
const Self = @This();
pub const Entry = struct {
key: K,
value: V,
fn lessThan(_: void, a: Entry, b: Entry) bool {
if (K == []const u8) {
return std.mem.lessThan(u8, a.key, b.key);
} else if (@typeInfo(K) == .int or @typeInfo(K) == .@"enum") {
return a.key < b.key;
} else {
@compileError("Unsupported key type for SortedArrayBuilder");
}
}
};
pub fn init() Self {
return .{};
}
pub fn deinit(self: *Self, allocator: Allocator) void {
if (K == []const u8) {
// Free string keys
for (self.entries.items) |entry| {
allocator.free(entry.key);
}
}
self.entries.deinit(allocator);
}
/// Add a key-value pair
pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void {
const new_key = if (K == []const u8) try allocator.dupe(u8, key) else key;
// Check if we need to maintain sorted order
if (self.sorted and self.entries.items.len > 0) {
const last = self.entries.items[self.entries.items.len - 1];
if (K == []const u8) {
self.sorted = std.mem.lessThan(u8, last.key, new_key);
} else {
self.sorted = last.key < new_key;
}
}
try self.entries.append(allocator, .{ .key = new_key, .value = value });
}
/// Get value by key (requires sorting first if not already sorted)
pub fn get(self: *Self, allocator: Allocator, key: K) ?V {
self.ensureSorted(allocator);
var left: usize = 0;
var right: usize = self.entries.items.len;
while (left < right) {
const mid = left + (right - left) / 2;
const mid_key = self.entries.items[mid].key;
const cmp = if (K == []const u8)
std.mem.order(u8, mid_key, key)
else if (mid_key == key)
std.math.Order.eq
else if (mid_key < key)
std.math.Order.lt
else
std.math.Order.gt;
switch (cmp) {
.eq => return self.entries.items[mid].value,
.lt => left = mid + 1,
.gt => right = mid,
}
}
return null;
}
/// Ensure the array is sorted and deduplicate any duplicate entries
pub fn ensureSorted(self: *Self, allocator: Allocator) void {
if (!self.sorted) {
std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
self.sorted = true;
// Check for and report duplicates, then deduplicate
self.deduplicateAndReport(allocator);
}
}
/// Check for duplicates, report them, and remove duplicates keeping the last occurrence
fn deduplicateAndReport(self: *Self, allocator: Allocator) void {
if (self.entries.items.len <= 1) return;
// First pass: detect and report duplicates
var i: usize = 1;
while (i < self.entries.items.len) {
const prev_entry = self.entries.items[i - 1];
const curr_entry = self.entries.items[i];
const is_duplicate = if (K == []const u8)
std.mem.eql(u8, prev_entry.key, curr_entry.key)
else
prev_entry.key == curr_entry.key;
if (is_duplicate) {
// Note: Duplicate detection is handled by caller via detectDuplicates() method
}
i += 1;
}
// Second pass: deduplicate by keeping last occurrence
var write_index: usize = 0;
for (self.entries.items, 0..) |entry, read_index| {
var should_keep = true;
// Look ahead to see if there's a duplicate later
for (self.entries.items[read_index + 1 ..]) |future_entry| {
const is_duplicate = if (K == []const u8)
std.mem.eql(u8, entry.key, future_entry.key)
else
entry.key == future_entry.key;
if (is_duplicate) {
should_keep = false;
break;
}
}
if (should_keep) {
if (write_index != read_index) {
self.entries.items[write_index] = entry;
}
write_index += 1;
} else {
// Free the string key that we're not keeping
if (K == []const u8) {
allocator.free(entry.key);
}
}
}
// Update the length to reflect deduplicated entries
self.entries.items.len = write_index;
}
/// Detect duplicates without modifying the array - returns list of duplicate keys
pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]K {
var duplicates = std.ArrayList(K).init(allocator);
if (self.entries.items.len <= 1) return duplicates.toOwnedSlice();
// Ensure sorted first
if (!self.sorted) {
std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
self.sorted = true;
}
var reported_keys = if (K == []const u8)
std.StringHashMap(void).init(allocator)
else
std.AutoHashMap(K, void).init(allocator);
defer reported_keys.deinit();
var i: usize = 1;
while (i < self.entries.items.len) {
const prev_entry = self.entries.items[i - 1];
const curr_entry = self.entries.items[i];
const is_duplicate = if (K == []const u8)
std.mem.eql(u8, prev_entry.key, curr_entry.key)
else
prev_entry.key == curr_entry.key;
if (is_duplicate) {
// Report duplicate only once per unique key
const result = try reported_keys.getOrPut(curr_entry.key);
if (!result.found_existing) {
try duplicates.append(curr_entry.key);
}
}
i += 1;
}
return duplicates.toOwnedSlice();
}
/// Relocate pointers after memory movement
pub fn relocate(self: *Self, offset: isize) void {
// Relocate the entries array pointer
if (self.entries.items.len > 0) {
const old_ptr = @intFromPtr(self.entries.items.ptr);
// Skip relocation if this is a sentinel value
// Define sentinel value locally since iovec_serialize is not available
const EMPTY_ARRAY_SENTINEL: usize = 0xDEADBEEF;
if (old_ptr != EMPTY_ARRAY_SENTINEL) {
const new_ptr = @as(usize, @intCast(@as(isize, @intCast(old_ptr)) + offset));
self.entries.items.ptr = @ptrFromInt(new_ptr);
}
}
}
/// Get the number of entries
pub fn count(self: *const Self) usize {
return self.entries.items.len;
}
/// Append this SortedArrayBuilder to an iovec writer for serialization
pub fn appendToIovecs(self: *const Self, writer: *IovecWriter) !usize {
// Must be sorted before serialization
if (!self.sorted) {
@panic("SortedArrayBuilder must be sorted before serialization");
}
// Create a mutable copy of self that we can modify
var builder_copy = self.*;
// Create a buffer for the final serialized struct
const builder_copy_buffer = try writer.allocator.alloc(u8, @sizeOf(Self));
// Track this allocation so it gets freed when writer is deinitialized
try writer.owned_buffers.append(builder_copy_buffer);
// Serialize entries array data
const entries_offset = if (self.entries.items.len > 0) blk: {
const data = std.mem.sliceAsBytes(self.entries.items);
const offset = try writer.appendBytes(Entry, data);
break :blk offset;
} else 0;
// Update pointer in the copy to use offset
// For empty arrays, use sentinel value instead of 0 to avoid null pointer
builder_copy.entries.items.ptr = if (entries_offset == 0)
@ptrFromInt(0xDEADBEEF) // EMPTY_ARRAY_SENTINEL
else
@ptrFromInt(entries_offset);
builder_copy.entries.items.len = self.entries.items.len;
// Note: For string keys, we don't serialize the string data separately here.
// This assumes that the strings are already managed elsewhere (e.g., in an interner)
// and we're just storing pointers/slices to them. If string keys need to be
// serialized as well, that would require additional handling.
// Copy the modified struct to the buffer
@memcpy(builder_copy_buffer, std.mem.asBytes(&builder_copy));
// Now that all pointers have been converted to offsets, add the copy to iovecs
const struct_offset = try writer.appendBytes(Self, builder_copy_buffer);
return struct_offset;
}
};
}
test "SortedArrayBuilder basic operations" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder([]const u8, u32).init();
defer builder.deinit(allocator);
// Add items in random order
try builder.put(allocator, "zebra", 100);
try builder.put(allocator, "apple", 50);
try builder.put(allocator, "banana", 150);
try builder.put(allocator, "cherry", 30);
// Test count
try testing.expectEqual(@as(usize, 4), builder.count());
// Test get (forces sorting)
try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "apple"));
try testing.expectEqual(@as(?u32, 150), builder.get(allocator, "banana"));
try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "cherry"));
try testing.expectEqual(@as(?u32, 100), builder.get(allocator, "zebra"));
try testing.expectEqual(@as(?u32, null), builder.get(allocator, "missing"));
// Test iovec serialization
var writer = IovecWriter.init(allocator);
defer writer.deinit();
_ = try builder.appendToIovecs(&writer);
// Verify we have some iovecs (basic smoke test)
try testing.expect(writer.iovecs.items.len > 0);
}
test "SortedArrayBuilder maintains sorted order when added in order" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder([]const u8, u16).init();
defer builder.deinit(allocator);
// Add items in sorted order
try builder.put(allocator, "aaa", 1);
try builder.put(allocator, "bbb", 2);
try builder.put(allocator, "ccc", 3);
try builder.put(allocator, "ddd", 4);
// Should still be marked as sorted
try testing.expect(builder.sorted);
// Get should work without sorting
try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "aaa"));
try testing.expectEqual(@as(?u16, 4), builder.get(allocator, "ddd"));
}
test "SortedArrayBuilder detectDuplicates example usage" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder(u32, u16).init();
defer builder.deinit(allocator);
// Add some entries with duplicates
try builder.put(allocator, 100, 1);
try builder.put(allocator, 200, 2);
try builder.put(allocator, 100, 3); // duplicate of 100
try builder.put(allocator, 300, 4);
try builder.put(allocator, 200, 5); // duplicate of 200
// Detect duplicates before sorting/deduplicating
const duplicates = try builder.detectDuplicates(allocator);
defer allocator.free(duplicates);
// Example: Report duplicates (in real code, this would push diagnostics)
for (duplicates) |duplicate_key| {
// In a real compiler, you'd push a diagnostic here:
// try cir.pushDiagnostic(CIR.Diagnostic{ .duplicate_exposed_item = ... });
std.debug.print("Found duplicate key: {d}\n", .{duplicate_key});
}
// Verify we found the expected duplicates
try testing.expectEqual(@as(usize, 2), duplicates.len);
// After detection, normal operations work as expected
builder.ensureSorted(allocator);
try testing.expectEqual(@as(usize, 3), builder.count()); // Deduplicated
try testing.expectEqual(@as(?u16, 3), builder.get(allocator, 100)); // Last value kept
try testing.expectEqual(@as(?u16, 5), builder.get(allocator, 200)); // Last value kept
}
test "SortedArrayBuilder handles duplicates with string keys" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder([]const u8, u32).init();
defer builder.deinit(allocator);
// Add duplicate keys with different values
try builder.put(allocator, "apple", 10);
try builder.put(allocator, "banana", 20);
try builder.put(allocator, "apple", 30); // duplicate key
try builder.put(allocator, "cherry", 40);
try builder.put(allocator, "banana", 50); // duplicate key
// Initial count includes duplicates
try testing.expectEqual(@as(usize, 5), builder.count());
// Force sorting and deduplication
builder.ensureSorted(allocator);
// Count should be reduced after deduplication
try testing.expectEqual(@as(usize, 3), builder.count());
// Should return the last value for each key (keeping last occurrence)
try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "apple"));
try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "banana"));
try testing.expectEqual(@as(?u32, 40), builder.get(allocator, "cherry"));
}
test "SortedArrayBuilder handles duplicates with numeric keys" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder(u32, []const u8).init();
defer builder.deinit(allocator);
// Add duplicate keys with different values
try builder.put(allocator, 100, "first");
try builder.put(allocator, 200, "second");
try builder.put(allocator, 100, "updated_first"); // duplicate key
try builder.put(allocator, 300, "third");
// Initial count includes duplicates
try testing.expectEqual(@as(usize, 4), builder.count());
// Force sorting and deduplication
builder.ensureSorted(allocator);
// Count should be reduced after deduplication
try testing.expectEqual(@as(usize, 3), builder.count());
// Should return the last value for each key
try testing.expectEqual(@as(?[]const u8, "updated_first"), builder.get(allocator, 100));
try testing.expectEqual(@as(?[]const u8, "second"), builder.get(allocator, 200));
try testing.expectEqual(@as(?[]const u8, "third"), builder.get(allocator, 300));
}
test "SortedArrayBuilder no duplicates case" {
const testing = std.testing;
const allocator = testing.allocator;
var builder = SortedArrayBuilder([]const u8, u16).init();
defer builder.deinit(allocator);
// Add unique keys only
try builder.put(allocator, "unique1", 1);
try builder.put(allocator, "unique2", 2);
try builder.put(allocator, "unique3", 3);
const original_count = builder.count();
// Force sorting (should not change count)
builder.ensureSorted(allocator);
// Count should remain the same
try testing.expectEqual(original_count, builder.count());
// All values should be retrievable
try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "unique1"));
try testing.expectEqual(@as(?u16, 2), builder.get(allocator, "unique2"));
try testing.expectEqual(@as(?u16, 3), builder.get(allocator, "unique3"));
}

View file

@ -17,6 +17,9 @@ pub const SafeMultiList = @import("safe_list.zig").SafeMultiList;
pub const SafeStringHashMap = @import("safe_hash_map.zig").SafeStringHashMap;
pub const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
pub const ExposedItems = @import("ExposedItems.zig").ExposedItems;
/// A range that must have at least one element
pub const NonEmptyRange = struct {
/// Starting index (inclusive)

View file

@ -68,12 +68,8 @@ idents: Ident.Store,
ident_ids_for_slicing: collections.SafeList(Ident.Idx),
strings: StringLiteral.Store,
types: types_mod.Store,
/// Map of exposed items by their string representation (not interned)
/// This is built during canonicalization and preserved for later use
exposed_by_str: collections.SafeStringHashMap(void),
/// Map of exposed item names to their node indices (stored as u16)
/// This is populated during canonicalization to allow cross-module lookups
exposed_nodes: collections.SafeStringHashMap(u16),
/// The items (a combination of types and values) that this module exposes
exposed_items: collections.ExposedItems,
/// Line starts for error reporting. We retain only start and offset positions in the IR
/// and then use these line starts to calculate the line number and column number as required.
@ -130,8 +126,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error!
.ident_ids_for_slicing = try collections.SafeList(Ident.Idx).initCapacity(gpa, 256),
.strings = try StringLiteral.Store.initCapacityBytes(gpa, 4096),
.types = try types_mod.Store.initCapacity(gpa, 2048, 512),
.exposed_by_str = try collections.SafeStringHashMap(void).initCapacity(gpa, 64),
.exposed_nodes = try collections.SafeStringHashMap(u16).initCapacity(gpa, 64),
.exposed_items = collections.ExposedItems.init(),
.line_starts = try collections.SafeList(u32).initCapacity(gpa, 256),
.source = source,
// Initialize compilation fields with empty/default values
@ -152,8 +147,7 @@ pub fn deinit(self: *Self) void {
self.strings.deinit(self.gpa);
self.types.deinit();
self.line_starts.deinit(self.gpa);
self.exposed_by_str.deinit(self.gpa);
self.exposed_nodes.deinit(self.gpa);
self.exposed_items.deinit(self.gpa);
// Clean up compilation fields
self.external_decls.deinit(self.gpa);
self.imports.deinit(self.gpa);