mirror of
https://github.com/roc-lang/roc.git
synced 2025-12-23 08:48:03 +00:00
Merge remote-tracking branch 'origin/main' into serialize-idents
This commit is contained in:
commit
dbc99059d4
10 changed files with 948 additions and 131 deletions
|
|
@ -23,18 +23,9 @@ attributes: Attributes,
|
|||
|
||||
/// Create a new identifier from a string.
|
||||
pub fn for_text(text: []const u8) Ident {
|
||||
// Parse identifier attributes from the text
|
||||
const is_ignored = std.mem.startsWith(u8, text, "_");
|
||||
const is_effectful = std.mem.endsWith(u8, text, "!");
|
||||
// TODO: parse reassignable attribute (var keyword handling)
|
||||
|
||||
return Ident{
|
||||
.raw_text = text,
|
||||
.attributes = Attributes{
|
||||
.effectful = is_effectful,
|
||||
.ignored = is_ignored,
|
||||
.reassignable = false,
|
||||
},
|
||||
.attributes = Attributes.fromString(text),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -68,18 +59,9 @@ pub fn from_bytes(bytes: []const u8) Error!Ident {
|
|||
}
|
||||
}
|
||||
|
||||
// Parse identifier attributes from the bytes
|
||||
const is_ignored = std.mem.startsWith(u8, bytes, "_");
|
||||
const is_effectful = std.mem.endsWith(u8, bytes, "!");
|
||||
// TODO: parse reassignable attribute (var keyword handling)
|
||||
|
||||
return Ident{
|
||||
.raw_text = bytes,
|
||||
.attributes = Attributes{
|
||||
.effectful = is_effectful,
|
||||
.ignored = is_ignored,
|
||||
.reassignable = false,
|
||||
},
|
||||
.attributes = Attributes.fromString(bytes),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -96,6 +78,14 @@ pub const Attributes = packed struct(u3) {
|
|||
effectful: bool,
|
||||
ignored: bool,
|
||||
reassignable: bool,
|
||||
|
||||
pub fn fromString(text: []const u8) Attributes {
|
||||
return .{
|
||||
.effectful = std.mem.endsWith(u8, text, "!"),
|
||||
.ignored = std.mem.startsWith(u8, text, "_"),
|
||||
.reassignable = false,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// An interner for identifier names.
|
||||
|
|
@ -179,13 +169,23 @@ pub const Store = struct {
|
|||
return self.interner.contains(text);
|
||||
}
|
||||
|
||||
/// Find an identifier by its string, returning its index if it exists.
|
||||
/// This is different from insert in that it's guaranteed not to modify the store.
|
||||
pub fn findByString(self: *const Store, text: []const u8) ?Idx {
|
||||
// Look up in the interner without inserting
|
||||
const interner_idx = self.interner.strings.get(text) orelse return null;
|
||||
|
||||
// Create an Idx with inferred attributes from the text
|
||||
return Idx{
|
||||
.attributes = Attributes.fromString(text),
|
||||
.idx = @as(u29, @intCast(@intFromEnum(interner_idx))),
|
||||
};
|
||||
}
|
||||
|
||||
/// Freeze the identifier store, preventing any new entries from being added.
|
||||
pub fn freeze(self: *Store) void {
|
||||
self.interner.freeze();
|
||||
}
|
||||
|
||||
/// Calculate the size needed to serialize this Ident.Store
|
||||
pub fn serializedSize(self: *const Store) usize {
|
||||
var size: usize = 0;
|
||||
|
|
|
|||
50
src/cache/CacheModule.zig
vendored
50
src/cache/CacheModule.zig
vendored
|
|
@ -55,8 +55,7 @@ pub const Header = struct {
|
|||
ident_store: ComponentInfo,
|
||||
line_starts: ComponentInfo,
|
||||
types_store: ComponentInfo,
|
||||
exposed_by_str: ComponentInfo,
|
||||
exposed_nodes: ComponentInfo,
|
||||
exposed_items: ComponentInfo,
|
||||
external_decls: ComponentInfo,
|
||||
|
||||
/// Spans can be stored directly since they're small
|
||||
|
|
@ -68,7 +67,7 @@ pub const Header = struct {
|
|||
warning_count: u32,
|
||||
|
||||
/// Fixed padding to ensure alignment
|
||||
_padding: [16]u8 = [_]u8{0} ** 16,
|
||||
_padding: [24]u8 = [_]u8{0} ** 24,
|
||||
|
||||
/// Error specific to initializing a Header from bytes
|
||||
pub const InitError = error{
|
||||
|
|
@ -121,8 +120,7 @@ pub const CacheModule = struct {
|
|||
const ident_store_size = module_env.idents.serializedSize();
|
||||
const line_starts_size = module_env.line_starts.serializedSize();
|
||||
const types_store_size = module_env.types.serializedSize();
|
||||
const exposed_by_str_size = module_env.exposed_by_str.serializedSize();
|
||||
const exposed_nodes_size = module_env.exposed_nodes.serializedSize();
|
||||
const exposed_items_size = module_env.exposed_items.serializedSize();
|
||||
const external_decls_size = module_env.external_decls.serializedSize();
|
||||
|
||||
// Calculate aligned offsets
|
||||
|
|
@ -153,12 +151,8 @@ pub const CacheModule = struct {
|
|||
offset += @intCast(types_store_size);
|
||||
|
||||
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
|
||||
const exposed_by_str_offset = offset;
|
||||
offset += @intCast(exposed_by_str_size);
|
||||
|
||||
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
|
||||
const exposed_nodes_offset = offset;
|
||||
offset += @intCast(exposed_nodes_size);
|
||||
const exposed_items_offset = offset;
|
||||
offset += @intCast(exposed_items_size);
|
||||
|
||||
offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT));
|
||||
const external_decls_offset = offset;
|
||||
|
|
@ -188,8 +182,7 @@ pub const CacheModule = struct {
|
|||
.ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) },
|
||||
.line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) },
|
||||
.types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) },
|
||||
.exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) },
|
||||
.exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) },
|
||||
.exposed_items = .{ .offset = exposed_items_offset, .length = @intCast(exposed_items_size) },
|
||||
.external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) },
|
||||
.all_defs = module_env.all_defs,
|
||||
.all_statements = module_env.all_statements,
|
||||
|
|
@ -207,8 +200,7 @@ pub const CacheModule = struct {
|
|||
std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(exposed_items_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0);
|
||||
|
||||
// Serialize each component
|
||||
|
|
@ -219,8 +211,7 @@ pub const CacheModule = struct {
|
|||
_ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator);
|
||||
_ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size])));
|
||||
_ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator);
|
||||
_ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]);
|
||||
_ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]);
|
||||
_ = try module_env.exposed_items.serializeInto(data_section[exposed_items_offset .. exposed_items_offset + exposed_items_size]);
|
||||
_ = try module_env.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size])));
|
||||
|
||||
// TODO Calculate and store checksum
|
||||
|
|
@ -300,11 +291,8 @@ pub const CacheModule = struct {
|
|||
var types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator);
|
||||
errdefer types_store.deinit();
|
||||
|
||||
var exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator);
|
||||
errdefer exposed_by_str.deinit(allocator);
|
||||
|
||||
var exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator);
|
||||
errdefer exposed_nodes.deinit(allocator);
|
||||
var exposed_items = try collections.ExposedItems.deserializeFrom(self.getComponentData(.exposed_items), allocator);
|
||||
errdefer exposed_items.deinit(allocator);
|
||||
|
||||
// Create ModuleEnv from deserialized components
|
||||
var module_env = ModuleEnv{
|
||||
|
|
@ -313,8 +301,7 @@ pub const CacheModule = struct {
|
|||
.ident_ids_for_slicing = ident_ids_for_slicing,
|
||||
.strings = strings,
|
||||
.types = types_store,
|
||||
.exposed_by_str = exposed_by_str,
|
||||
.exposed_nodes = exposed_nodes,
|
||||
.exposed_items = exposed_items,
|
||||
.line_starts = line_starts,
|
||||
.source = source,
|
||||
// Module compilation fields - will be initialized after
|
||||
|
|
@ -360,8 +347,7 @@ pub const CacheModule = struct {
|
|||
.ident_store => self.header.ident_store,
|
||||
.line_starts => self.header.line_starts,
|
||||
.types_store => self.header.types_store,
|
||||
.exposed_by_str => self.header.exposed_by_str,
|
||||
.exposed_nodes => self.header.exposed_nodes,
|
||||
.exposed_items => self.header.exposed_items,
|
||||
.external_decls => self.header.external_decls,
|
||||
};
|
||||
return self.data[info.offset .. info.offset + info.length];
|
||||
|
|
@ -381,8 +367,7 @@ pub const CacheModule = struct {
|
|||
.ident_store = self.header.ident_store.length,
|
||||
.line_starts = self.header.line_starts.length,
|
||||
.types_store = self.header.types_store.length,
|
||||
.exposed_by_str = self.header.exposed_by_str.length,
|
||||
.exposed_nodes = self.header.exposed_nodes.length,
|
||||
.exposed_items = self.header.exposed_items.length,
|
||||
.external_decls = self.header.external_decls.length,
|
||||
},
|
||||
};
|
||||
|
|
@ -400,8 +385,7 @@ pub const CacheModule = struct {
|
|||
.ident_store => self.header.ident_store,
|
||||
.line_starts => self.header.line_starts,
|
||||
.types_store => self.header.types_store,
|
||||
.exposed_by_str => self.header.exposed_by_str,
|
||||
.exposed_nodes => self.header.exposed_nodes,
|
||||
.exposed_items => self.header.exposed_items,
|
||||
.external_decls => self.header.external_decls,
|
||||
};
|
||||
|
||||
|
|
@ -568,8 +552,7 @@ const ComponentType = enum {
|
|||
ident_store,
|
||||
line_starts,
|
||||
types_store,
|
||||
exposed_by_str,
|
||||
exposed_nodes,
|
||||
exposed_items,
|
||||
external_decls,
|
||||
};
|
||||
|
||||
|
|
@ -586,8 +569,7 @@ pub const Diagnostics = struct {
|
|||
ident_store: u32,
|
||||
line_starts: u32,
|
||||
types_store: u32,
|
||||
exposed_by_str: u32,
|
||||
exposed_nodes: u32,
|
||||
exposed_items: u32,
|
||||
external_decls: u32,
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -623,17 +623,20 @@ pub fn canonicalizeFile(
|
|||
last_type_anno = null; // Clear after successful use
|
||||
|
||||
// If this declaration successfully defined an exposed value, remove it from exposed_ident_texts
|
||||
// and add it to exposed_nodes
|
||||
// and add the node index to exposed_items
|
||||
const pattern = self.parse_ir.store.getPattern(decl.pattern);
|
||||
if (pattern == .ident) {
|
||||
const token_region = self.parse_ir.tokens.resolve(@intCast(pattern.ident.ident_tok));
|
||||
const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
|
||||
|
||||
// If this identifier is exposed, add it to exposed_nodes
|
||||
// If this identifier is exposed, add it to exposed_items
|
||||
if (self.exposed_ident_texts.contains(ident_text)) {
|
||||
// Store the def index as u16 in exposed_nodes
|
||||
// Get the interned identifier - it should already exist from parsing
|
||||
const ident = base.Ident.for_text(ident_text);
|
||||
const idx = try self.env.idents.insert(self.env.gpa, ident);
|
||||
// Store the def index as u16 in exposed_items
|
||||
const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
|
||||
try self.env.exposed_nodes.put(self.env.gpa, ident_text, def_idx_u16);
|
||||
try self.env.exposed_items.setNodeIndexById(self.env.gpa, @bitCast(idx), def_idx_u16);
|
||||
}
|
||||
|
||||
_ = self.exposed_ident_texts.remove(ident_text);
|
||||
|
|
@ -900,15 +903,15 @@ fn createExposedScope(
|
|||
const exposed = self.parse_ir.store.getExposedItem(exposed_idx);
|
||||
switch (exposed) {
|
||||
.lower_ident => |ident| {
|
||||
// Get the text of the identifier token to use as key
|
||||
// Get the text for tracking redundant exposures
|
||||
const token_region = self.parse_ir.tokens.resolve(@intCast(ident.ident));
|
||||
const ident_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
|
||||
|
||||
// Add to exposed_by_str for permanent storage (unconditionally)
|
||||
try self.env.exposed_by_str.put(gpa, ident_text, {});
|
||||
|
||||
// Also build exposed_scope with proper identifiers
|
||||
// Get the interned identifier
|
||||
if (self.parse_ir.tokens.resolveIdentifier(ident.ident)) |ident_idx| {
|
||||
// Add to exposed_items for permanent storage (unconditionally)
|
||||
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
|
||||
|
||||
// Use a dummy pattern index - we just need to track that it's exposed
|
||||
const dummy_idx = @as(Pattern.Idx, @enumFromInt(0));
|
||||
try self.exposed_scope.put(gpa, .ident, ident_idx, dummy_idx);
|
||||
|
|
@ -933,15 +936,15 @@ fn createExposedScope(
|
|||
}
|
||||
},
|
||||
.upper_ident => |type_name| {
|
||||
// Get the text of the identifier token to use as key
|
||||
// Get the text for tracking redundant exposures
|
||||
const token_region = self.parse_ir.tokens.resolve(@intCast(type_name.ident));
|
||||
const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
|
||||
|
||||
// Add to exposed_by_str for permanent storage (unconditionally)
|
||||
try self.env.exposed_by_str.put(gpa, type_text, {});
|
||||
|
||||
// Also build exposed_scope with proper identifiers
|
||||
// Get the interned identifier
|
||||
if (self.parse_ir.tokens.resolveIdentifier(type_name.ident)) |ident_idx| {
|
||||
// Add to exposed_items for permanent storage (unconditionally)
|
||||
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
|
||||
|
||||
// Use a dummy statement index - we just need to track that it's exposed
|
||||
const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
|
||||
try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
|
||||
|
|
@ -966,15 +969,15 @@ fn createExposedScope(
|
|||
}
|
||||
},
|
||||
.upper_ident_star => |type_with_constructors| {
|
||||
// Get the text of the identifier token to use as key
|
||||
// Get the text for tracking redundant exposures
|
||||
const token_region = self.parse_ir.tokens.resolve(@intCast(type_with_constructors.ident));
|
||||
const type_text = self.parse_ir.env.source[token_region.start.offset..token_region.end.offset];
|
||||
|
||||
// Add to exposed_by_str for permanent storage (unconditionally)
|
||||
try self.env.exposed_by_str.put(gpa, type_text, {});
|
||||
|
||||
// Also build exposed_scope with proper identifiers
|
||||
// Get the interned identifier
|
||||
if (self.parse_ir.tokens.resolveIdentifier(type_with_constructors.ident)) |ident_idx| {
|
||||
// Add to exposed_items for permanent storage (unconditionally)
|
||||
try self.env.exposed_items.addExposedById(gpa, @bitCast(ident_idx));
|
||||
|
||||
// Use a dummy statement index - we just need to track that it's exposed
|
||||
const dummy_idx = @as(Statement.Idx, @enumFromInt(0));
|
||||
try self.exposed_scope.put(gpa, .type_decl, ident_idx, dummy_idx);
|
||||
|
|
@ -1368,7 +1371,7 @@ fn introduceExposedItemsIntoScope(
|
|||
return;
|
||||
}
|
||||
|
||||
// Get the module's exposed_by_str map
|
||||
// Get the module's exposed_items
|
||||
const module_env = envs_map.get(module_name_text).?;
|
||||
|
||||
// Validate each exposed item
|
||||
|
|
@ -1377,7 +1380,14 @@ fn introduceExposedItemsIntoScope(
|
|||
const item_name_text = self.env.idents.getText(exposed_item.name);
|
||||
|
||||
// Check if the item is exposed by the module
|
||||
if (!module_env.exposed_by_str.contains(item_name_text)) {
|
||||
// We need to look up by string because the identifiers are from different modules
|
||||
// First, try to find this identifier in the target module's ident store
|
||||
const is_exposed = if (module_env.idents.findByString(item_name_text)) |target_ident|
|
||||
module_env.exposed_items.containsById(self.env.gpa, @bitCast(target_ident))
|
||||
else
|
||||
false;
|
||||
|
||||
if (!is_exposed) {
|
||||
// Determine if it's a type or value based on capitalization
|
||||
const first_char = item_name_text[0];
|
||||
|
||||
|
|
@ -1702,11 +1712,16 @@ pub fn canonicalizeExpr(
|
|||
};
|
||||
};
|
||||
|
||||
// Look up the target node index in the module's exposed_nodes
|
||||
// Look up the target node index in the module's exposed_items
|
||||
// Need to convert identifier from current module to target module
|
||||
const field_text = self.env.idents.getText(ident);
|
||||
const target_node_idx = if (self.module_envs) |envs_map| blk: {
|
||||
if (envs_map.get(module_text)) |module_env| {
|
||||
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
|
||||
if (module_env.idents.findByString(field_text)) |target_ident| {
|
||||
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
|
|
@ -1761,11 +1776,16 @@ pub fn canonicalizeExpr(
|
|||
};
|
||||
};
|
||||
|
||||
// Look up the target node index in the module's exposed_nodes
|
||||
// Look up the target node index in the module's exposed_items
|
||||
// Need to convert identifier from current module to target module
|
||||
const field_text = self.env.idents.getText(exposed_info.original_name);
|
||||
const target_node_idx = if (self.module_envs) |envs_map| blk: {
|
||||
if (envs_map.get(module_text)) |module_env| {
|
||||
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
|
||||
if (module_env.idents.findByString(field_text)) |target_ident| {
|
||||
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
|
|
@ -6541,11 +6561,16 @@ fn tryModuleQualifiedLookup(self: *Self, field_access: AST.BinOp) std.mem.Alloca
|
|||
|
||||
const region = self.parse_ir.tokenizedRegionToRegion(field_access.region);
|
||||
|
||||
// Look up the target node index in the module's exposed_nodes
|
||||
// Look up the target node index in the module's exposed_items
|
||||
// Need to convert identifier from current module to target module
|
||||
const field_text = self.env.idents.getText(field_name);
|
||||
const target_node_idx = if (self.module_envs) |envs_map| blk: {
|
||||
if (envs_map.get(module_text)) |module_env| {
|
||||
break :blk module_env.exposed_nodes.get(field_text) orelse 0;
|
||||
if (module_env.idents.findByString(field_text)) |target_ident| {
|
||||
break :blk module_env.exposed_items.getNodeIndexById(self.env.gpa, @bitCast(target_ident)) orelse 0;
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
} else {
|
||||
break :blk 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
const std = @import("std");
|
||||
const compile = @import("compile");
|
||||
const parse = @import("parse");
|
||||
const base = @import("base");
|
||||
|
||||
const canonicalize = @import("../../canonicalize.zig");
|
||||
|
||||
|
|
@ -342,7 +343,7 @@ test "complex case with redundant, shadowing, and not implemented" {
|
|||
try testing.expect(found_not_implemented);
|
||||
}
|
||||
|
||||
test "exposed_by_str is populated correctly" {
|
||||
test "exposed_items is populated correctly" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
const source =
|
||||
|
|
@ -365,16 +366,23 @@ test "exposed_by_str is populated correctly" {
|
|||
|
||||
try canonicalizer.canonicalizeFile();
|
||||
|
||||
// Check that exposed_by_str contains all exposed items
|
||||
try testing.expect(env.exposed_by_str.contains("foo"));
|
||||
try testing.expect(env.exposed_by_str.contains("bar"));
|
||||
try testing.expect(env.exposed_by_str.contains("MyType"));
|
||||
// Check that exposed_items contains the correct number of items
|
||||
// The exposed items were added during canonicalization
|
||||
|
||||
// Should have exactly 3 entries (duplicates not stored)
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
|
||||
|
||||
// Check that exposed_items contains all exposed items
|
||||
const foo_idx = env.idents.findByString("foo").?;
|
||||
const bar_idx = env.idents.findByString("bar").?;
|
||||
const mytype_idx = env.idents.findByString("MyType").?;
|
||||
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(mytype_idx)));
|
||||
}
|
||||
|
||||
test "exposed_by_str persists after canonicalization" {
|
||||
test "exposed_items persists after canonicalization" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
const source =
|
||||
|
|
@ -397,16 +405,20 @@ test "exposed_by_str persists after canonicalization" {
|
|||
|
||||
try canonicalizer.canonicalizeFile();
|
||||
|
||||
// All exposed items should be in exposed_by_str, even those not implemented
|
||||
try testing.expect(env.exposed_by_str.contains("x"));
|
||||
try testing.expect(env.exposed_by_str.contains("y"));
|
||||
try testing.expect(env.exposed_by_str.contains("z"));
|
||||
// All exposed items should be in exposed_items, even those not implemented
|
||||
const x_idx = env.idents.findByString("x").?;
|
||||
const y_idx = env.idents.findByString("y").?;
|
||||
const z_idx = env.idents.findByString("z").?;
|
||||
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(x_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(y_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(z_idx)));
|
||||
|
||||
// Verify the map persists in env after canonicalization is complete
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
|
||||
}
|
||||
|
||||
test "exposed_by_str never has entries removed" {
|
||||
test "exposed_items never has entries removed" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
const source =
|
||||
|
|
@ -429,13 +441,57 @@ test "exposed_by_str never has entries removed" {
|
|||
|
||||
try canonicalizer.canonicalizeFile();
|
||||
|
||||
// All exposed items should remain in exposed_by_str
|
||||
// All exposed items should remain in exposed_items
|
||||
// Even though foo appears twice and baz is not implemented,
|
||||
// exposed_by_str should have all unique exposed identifiers
|
||||
try testing.expect(env.exposed_by_str.contains("foo"));
|
||||
try testing.expect(env.exposed_by_str.contains("bar"));
|
||||
try testing.expect(env.exposed_by_str.contains("baz"));
|
||||
// exposed_items should have all unique exposed identifiers
|
||||
const foo_idx = env.idents.findByString("foo").?;
|
||||
const bar_idx = env.idents.findByString("bar").?;
|
||||
const baz_idx = env.idents.findByString("baz").?;
|
||||
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(bar_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(baz_idx)));
|
||||
|
||||
// Should have exactly 3 unique entries
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_by_str.count());
|
||||
try testing.expectEqual(@as(usize, 3), env.exposed_items.count());
|
||||
}
|
||||
|
||||
test "exposed_items handles identifiers with different attributes" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
// Module exposing foo and foo! - these should be treated as different identifiers
|
||||
// Note: Using foo and foo! to test that attributes are properly included in the key
|
||||
const source =
|
||||
\\module [foo, foo!]
|
||||
\\
|
||||
\\foo = 42
|
||||
\\foo! = \x -> x + 1
|
||||
;
|
||||
|
||||
var env = try ModuleEnv.init(allocator, source);
|
||||
defer env.deinit();
|
||||
try env.initCIRFields(allocator, "Test");
|
||||
|
||||
var ast = try parse.parse(&env);
|
||||
defer ast.deinit(allocator);
|
||||
|
||||
var canonicalizer = try canonicalize.init(&env, &ast, null);
|
||||
defer canonicalizer.deinit();
|
||||
|
||||
try canonicalizer.canonicalizeFile();
|
||||
|
||||
// Both should be in exposed_items as separate entries
|
||||
const foo_idx = env.idents.findByString("foo").?;
|
||||
const foo_effectful_idx = env.idents.findByString("foo!").?;
|
||||
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_idx)));
|
||||
try testing.expect(env.exposed_items.containsById(env.gpa, @bitCast(foo_effectful_idx)));
|
||||
|
||||
// Should have exactly 2 entries - if we only used u29 without attributes, they might incorrectly merge
|
||||
try testing.expectEqual(@as(usize, 2), env.exposed_items.count());
|
||||
|
||||
// Verify they have different full u32 values (index + attributes)
|
||||
const foo_u32 = @as(u32, @bitCast(foo_idx));
|
||||
const foo_effectful_u32 = @as(u32, @bitCast(foo_effectful_idx));
|
||||
try testing.expect(foo_u32 != foo_effectful_u32);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,10 +59,15 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
|
|||
}
|
||||
|
||||
// Add exposed items to Json module
|
||||
try json_env.exposed_by_str.put(allocator, "decode", {});
|
||||
try json_env.exposed_by_str.put(allocator, "encode", {});
|
||||
try json_env.exposed_by_str.put(allocator, "JsonError", {});
|
||||
try json_env.exposed_by_str.put(allocator, "DecodeProblem", {});
|
||||
const Ident = base.Ident;
|
||||
const decode_idx = try json_env.idents.insert(allocator, Ident.for_text("decode"));
|
||||
try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_idx));
|
||||
const encode_idx = try json_env.idents.insert(allocator, Ident.for_text("encode"));
|
||||
try json_env.exposed_items.addExposedById(allocator, @bitCast(encode_idx));
|
||||
const json_error_idx = try json_env.idents.insert(allocator, Ident.for_text("JsonError"));
|
||||
try json_env.exposed_items.addExposedById(allocator, @bitCast(json_error_idx));
|
||||
const decode_problem_idx = try json_env.idents.insert(allocator, Ident.for_text("DecodeProblem"));
|
||||
try json_env.exposed_items.addExposedById(allocator, @bitCast(decode_problem_idx));
|
||||
|
||||
try module_envs.put("Json", json_env);
|
||||
|
||||
|
|
@ -75,9 +80,12 @@ test "import validation - mix of MODULE NOT FOUND, TYPE NOT EXPOSED, VALUE NOT E
|
|||
}
|
||||
|
||||
// Add exposed items to Utils module
|
||||
try utils_env.exposed_by_str.put(allocator, "map", {});
|
||||
try utils_env.exposed_by_str.put(allocator, "filter", {});
|
||||
try utils_env.exposed_by_str.put(allocator, "Result", {});
|
||||
const map_idx = try utils_env.idents.insert(allocator, Ident.for_text("map"));
|
||||
try utils_env.exposed_items.addExposedById(allocator, @bitCast(map_idx));
|
||||
const filter_idx = try utils_env.idents.insert(allocator, Ident.for_text("filter"));
|
||||
try utils_env.exposed_items.addExposedById(allocator, @bitCast(filter_idx));
|
||||
const result_idx = try utils_env.idents.insert(allocator, Ident.for_text("Result"));
|
||||
try utils_env.exposed_items.addExposedById(allocator, @bitCast(result_idx));
|
||||
|
||||
try module_envs.put("Utils", utils_env);
|
||||
|
||||
|
|
@ -502,7 +510,7 @@ test "module-qualified lookups with e_lookup_external" {
|
|||
try expectEqual(true, found_dict_empty);
|
||||
}
|
||||
|
||||
test "exposed_nodes - tracking CIR node indices for exposed items" {
|
||||
test "exposed_items - tracking CIR node indices for exposed items" {
|
||||
var gpa_state = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
|
||||
defer std.debug.assert(gpa_state.deinit() == .ok);
|
||||
const allocator = gpa_state.allocator();
|
||||
|
|
@ -519,16 +527,20 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
|
|||
allocator.destroy(math_env);
|
||||
}
|
||||
|
||||
// Add exposed items
|
||||
try math_env.exposed_by_str.put(allocator, "add", {});
|
||||
try math_env.exposed_by_str.put(allocator, "multiply", {});
|
||||
try math_env.exposed_by_str.put(allocator, "PI", {});
|
||||
// Add exposed items and set their node indices
|
||||
const Ident = base.Ident;
|
||||
const add_idx = try math_env.idents.insert(allocator, Ident.for_text("add"));
|
||||
try math_env.exposed_items.addExposedById(allocator, @bitCast(add_idx));
|
||||
const multiply_idx = try math_env.idents.insert(allocator, Ident.for_text("multiply"));
|
||||
try math_env.exposed_items.addExposedById(allocator, @bitCast(multiply_idx));
|
||||
const pi_idx = try math_env.idents.insert(allocator, Ident.for_text("PI"));
|
||||
try math_env.exposed_items.addExposedById(allocator, @bitCast(pi_idx));
|
||||
|
||||
// Simulate having CIR node indices for these exposed items
|
||||
// In real usage, these would be set during canonicalization of MathUtils
|
||||
try math_env.exposed_nodes.put(allocator, "add", 100);
|
||||
try math_env.exposed_nodes.put(allocator, "multiply", 200);
|
||||
try math_env.exposed_nodes.put(allocator, "PI", 300);
|
||||
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(add_idx), 100);
|
||||
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(multiply_idx), 200);
|
||||
try math_env.exposed_items.setNodeIndexById(allocator, @bitCast(pi_idx), 300);
|
||||
|
||||
try module_envs.put("MathUtils", math_env);
|
||||
|
||||
|
|
@ -586,15 +598,16 @@ test "exposed_nodes - tracking CIR node indices for exposed items" {
|
|||
try expectEqual(true, found_multiply_with_idx_200);
|
||||
try expectEqual(true, found_pi_with_idx_300);
|
||||
|
||||
// Test case where exposed_nodes is not populated (should get 0)
|
||||
// Test case where node index is not populated (should get 0)
|
||||
const empty_env = try allocator.create(ModuleEnv);
|
||||
empty_env.* = try ModuleEnv.init(allocator, "");
|
||||
defer {
|
||||
empty_env.deinit();
|
||||
allocator.destroy(empty_env);
|
||||
}
|
||||
try empty_env.exposed_by_str.put(allocator, "undefined", {});
|
||||
// Don't add to exposed_nodes - should default to 0
|
||||
const undefined_idx = try empty_env.idents.insert(allocator, Ident.for_text("undefined"));
|
||||
try empty_env.exposed_items.addExposedById(allocator, @bitCast(undefined_idx));
|
||||
// Don't set node index - should default to 0
|
||||
try module_envs.put("EmptyModule", empty_env);
|
||||
|
||||
const source2 =
|
||||
|
|
|
|||
|
|
@ -860,10 +860,16 @@ pub fn checkExpr(self: *Self, expr_idx: ModuleEnv.Expr.Idx) std.mem.Allocator.Er
|
|||
|
||||
if (origin_module) |module| {
|
||||
// Look up the method in the origin module's exports
|
||||
// Need to convert identifier from current module to target module
|
||||
const method_name_str = self.cir.idents.getText(dot_access.field_name);
|
||||
|
||||
// Search through the module's exposed nodes
|
||||
if (module.exposed_nodes.get(method_name_str)) |node_idx| {
|
||||
// Search through the module's exposed items
|
||||
const node_idx_opt = if (module.idents.findByString(method_name_str)) |target_ident|
|
||||
module.exposed_items.getNodeIndexById(self.gpa, @bitCast(target_ident))
|
||||
else
|
||||
null;
|
||||
|
||||
if (node_idx_opt) |node_idx| {
|
||||
// Found the method!
|
||||
const target_expr_idx = @as(ModuleEnv.Expr.Idx, @enumFromInt(node_idx));
|
||||
|
||||
|
|
|
|||
288
src/collections/ExposedItems.zig
Normal file
288
src/collections/ExposedItems.zig
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
//! A specialized collection for tracking exposed items in modules using interned IDs.
|
||||
//!
|
||||
//! This combines the functionality of exposed_by_str and exposed_nodes into a single
|
||||
//! SortedArrayBuilder to save memory and simplify the API. It uses interned identifier
|
||||
//! indices (u32 matching Ident.Idx) as keys and u16 values (the node indices).
|
||||
//!
|
||||
//! A value of 0 means "exposed but not yet defined", while non-zero values
|
||||
//! are actual node indices.
|
||||
//!
|
||||
//! Note: This module only provides ID-based methods. String-to-ID conversion must be
|
||||
//! handled by the caller who has access to the Ident.Store.
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
|
||||
|
||||
// We use u32 which is the bit representation of base.Ident.Idx
|
||||
// This includes both the 29-bit index AND the 3-bit attributes (effectful, ignored, reassignable)
|
||||
// This is critical because foo, foo!, and _foo are different identifiers
|
||||
const IdentIdx = u32;
|
||||
|
||||
/// A collection that tracks exposed items by their names and associated CIR node indices
|
||||
pub const ExposedItems = struct {
|
||||
/// Maps item name (as interned ID) -> CIR node index (0 means exposed but not defined)
|
||||
items: SortedArrayBuilder(IdentIdx, u16),
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init() Self {
|
||||
return .{
|
||||
.items = SortedArrayBuilder(IdentIdx, u16).init(),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self, allocator: Allocator) void {
|
||||
self.items.deinit(allocator);
|
||||
}
|
||||
|
||||
/// Add an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
|
||||
pub fn addExposedById(self: *Self, allocator: Allocator, ident_idx: IdentIdx) !void {
|
||||
// Add with value 0 to indicate "exposed but not yet defined"
|
||||
// The SortedArrayBuilder will handle duplicates by keeping the last value,
|
||||
// but we don't want to overwrite an existing node index with 0
|
||||
if (self.items.sorted) {
|
||||
// If already sorted, check if it exists first
|
||||
if (self.items.get(allocator, ident_idx) == null) {
|
||||
try self.items.put(allocator, ident_idx, 0);
|
||||
}
|
||||
} else {
|
||||
// If not sorted yet, just add it - duplicates will be handled later
|
||||
try self.items.put(allocator, ident_idx, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
|
||||
pub fn setNodeIndexById(self: *Self, allocator: Allocator, ident_idx: IdentIdx, node_idx: u16) !void {
|
||||
// First ensure the array is sorted so we can search
|
||||
self.items.ensureSorted(allocator);
|
||||
|
||||
// Find the existing entry and update its value
|
||||
const entries = self.items.entries.items;
|
||||
for (entries) |*entry| {
|
||||
if (entry.key == ident_idx) {
|
||||
entry.value = node_idx;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If not found, add a new entry
|
||||
try self.items.put(allocator, ident_idx, node_idx);
|
||||
}
|
||||
|
||||
/// Check if an item is exposed by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
|
||||
pub fn containsById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) bool {
|
||||
var mutable_self = @constCast(self);
|
||||
return mutable_self.items.get(allocator, ident_idx) != null;
|
||||
}
|
||||
|
||||
/// Get the node index for an exposed item by its interned ID (pass @bitCast(base.Ident.Idx) to u32)
|
||||
pub fn getNodeIndexById(self: *const Self, allocator: Allocator, ident_idx: IdentIdx) ?u16 {
|
||||
var mutable_self = @constCast(self);
|
||||
return mutable_self.items.get(allocator, ident_idx);
|
||||
}
|
||||
|
||||
/// Get the number of exposed items
|
||||
pub fn count(self: *const Self) usize {
|
||||
return self.items.count();
|
||||
}
|
||||
|
||||
/// Ensure the array is sorted and deduplicated
|
||||
/// This should be called at the end of canonicalization
|
||||
pub fn ensureSorted(self: *Self, allocator: Allocator) void {
|
||||
self.items.ensureSorted(allocator);
|
||||
}
|
||||
|
||||
/// Detect duplicate exposed items for error reporting
|
||||
/// Returns the interned IDs of duplicates (caller must convert to strings)
|
||||
pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]IdentIdx {
|
||||
return self.items.detectDuplicates(allocator);
|
||||
}
|
||||
|
||||
/// Relocate pointers after memory movement
|
||||
pub fn relocate(self: *Self, offset: isize) void {
|
||||
self.items.relocate(offset);
|
||||
}
|
||||
|
||||
/// Calculate the size needed to serialize this ExposedItems
|
||||
pub fn serializedSize(self: *const Self) usize {
|
||||
// We need to serialize:
|
||||
// 1. The count of items (u32)
|
||||
// 2. For each item: key (u32) + value (u16)
|
||||
var size: usize = @sizeOf(u32); // count
|
||||
|
||||
for (self.items.entries.items) |_| {
|
||||
size += @sizeOf(u32); // key (interned ID)
|
||||
size += @sizeOf(u16); // value
|
||||
}
|
||||
|
||||
// Align to SERIALIZATION_ALIGNMENT
|
||||
const SERIALIZATION_ALIGNMENT = 16;
|
||||
return std.mem.alignForward(usize, size, SERIALIZATION_ALIGNMENT);
|
||||
}
|
||||
|
||||
/// Serialize this ExposedItems into the provided buffer
|
||||
pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 {
|
||||
const size = self.serializedSize();
|
||||
if (buffer.len < size) return error.BufferTooSmall;
|
||||
|
||||
var offset: usize = 0;
|
||||
|
||||
// Write count
|
||||
std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.items.entries.items.len), .little);
|
||||
offset += @sizeOf(u32);
|
||||
|
||||
// Write entries
|
||||
for (self.items.entries.items) |entry| {
|
||||
// Write key (interned ID)
|
||||
std.mem.writeInt(u32, buffer[offset..][0..4], entry.key, .little);
|
||||
offset += @sizeOf(u32);
|
||||
|
||||
// Write value
|
||||
std.mem.writeInt(u16, buffer[offset..][0..2], entry.value, .little);
|
||||
offset += @sizeOf(u16);
|
||||
}
|
||||
|
||||
// Zero padding
|
||||
if (offset < size) {
|
||||
@memset(buffer[offset..size], 0);
|
||||
}
|
||||
|
||||
return buffer[0..size];
|
||||
}
|
||||
|
||||
/// Deserialize ExposedItems from the provided buffer
|
||||
pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self {
|
||||
if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall;
|
||||
|
||||
var offset: usize = 0;
|
||||
|
||||
// Read count
|
||||
const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little);
|
||||
offset += @sizeOf(u32);
|
||||
|
||||
var result = Self.init();
|
||||
errdefer result.deinit(allocator);
|
||||
|
||||
// Read entries
|
||||
for (0..entry_count) |_| {
|
||||
// Read key (interned ID)
|
||||
if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall;
|
||||
const key_u32 = std.mem.readInt(u32, buffer[offset..][0..4], .little);
|
||||
// Use the full u32 (includes both index and attributes)
|
||||
const key = key_u32;
|
||||
offset += @sizeOf(u32);
|
||||
|
||||
// Read value
|
||||
if (offset + @sizeOf(u16) > buffer.len) return error.BufferTooSmall;
|
||||
const value = std.mem.readInt(u16, buffer[offset..][0..2], .little);
|
||||
offset += @sizeOf(u16);
|
||||
|
||||
// Add to builder
|
||||
try result.items.put(allocator, key, value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Append to iovec writer for serialization
|
||||
pub fn appendToIovecs(self: *const Self, writer: anytype) !usize {
|
||||
return self.items.appendToIovecs(writer);
|
||||
}
|
||||
|
||||
/// Iterator for all exposed items
|
||||
pub const Iterator = struct {
|
||||
items: []const SortedArrayBuilder(IdentIdx, u16).Entry,
|
||||
index: usize,
|
||||
|
||||
pub fn next(self: *Iterator) ?struct { ident_idx: IdentIdx, node_idx: u16 } {
|
||||
if (self.index < self.items.len) {
|
||||
const entry = self.items[self.index];
|
||||
self.index += 1;
|
||||
return .{ .ident_idx = entry.key, .node_idx = entry.value };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/// Get an iterator over all exposed items
|
||||
pub fn iterator(self: *const Self) Iterator {
|
||||
return .{ .items = self.items.entries.items, .index = 0 };
|
||||
}
|
||||
};
|
||||
|
||||
test "ExposedItems basic operations" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var exposed = ExposedItems.init();
|
||||
defer exposed.deinit(allocator);
|
||||
|
||||
// Add exposed items (like exposed_by_str.put)
|
||||
try exposed.addExposed(allocator, "foo");
|
||||
try exposed.addExposed(allocator, "bar");
|
||||
try exposed.addExposed(allocator, "MyType");
|
||||
|
||||
// Test count
|
||||
try testing.expectEqual(@as(usize, 3), exposed.count());
|
||||
|
||||
// Test contains (like exposed_by_str.contains)
|
||||
try testing.expect(exposed.contains(allocator, "foo"));
|
||||
try testing.expect(exposed.contains(allocator, "bar"));
|
||||
try testing.expect(exposed.contains(allocator, "MyType"));
|
||||
try testing.expect(!exposed.contains(allocator, "missing"));
|
||||
|
||||
// Test getNodeIndex before setting (should be 0)
|
||||
try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "foo"));
|
||||
|
||||
// Set node indices (like exposed_nodes.put)
|
||||
try exposed.setNodeIndex(allocator, "foo", 42);
|
||||
try exposed.setNodeIndex(allocator, "bar", 84);
|
||||
|
||||
// Test getNodeIndex after setting
|
||||
try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
|
||||
try testing.expectEqual(@as(?u16, 84), exposed.getNodeIndex(allocator, "bar"));
|
||||
try testing.expectEqual(@as(?u16, 0), exposed.getNodeIndex(allocator, "MyType")); // Not set
|
||||
try testing.expectEqual(@as(?u16, null), exposed.getNodeIndex(allocator, "missing")); // Not exposed
|
||||
}
|
||||
|
||||
test "ExposedItems handles duplicates" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var exposed = ExposedItems.init();
|
||||
defer exposed.deinit(allocator);
|
||||
|
||||
// Add items with duplicates
|
||||
try exposed.addExposed(allocator, "foo");
|
||||
try exposed.setNodeIndex(allocator, "foo", 42);
|
||||
try exposed.addExposed(allocator, "foo"); // Duplicate - should not overwrite node index
|
||||
|
||||
// Force sorting
|
||||
exposed.ensureSorted(allocator);
|
||||
|
||||
// Should still have the node index
|
||||
try testing.expectEqual(@as(?u16, 42), exposed.getNodeIndex(allocator, "foo"));
|
||||
}
|
||||
|
||||
test "ExposedItems detectDuplicates" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var exposed = ExposedItems.init();
|
||||
defer exposed.deinit(allocator);
|
||||
|
||||
// Add some duplicates
|
||||
try exposed.addExposed(allocator, "foo");
|
||||
try exposed.addExposed(allocator, "bar");
|
||||
try exposed.addExposed(allocator, "foo"); // duplicate
|
||||
try exposed.setNodeIndex(allocator, "bar", 42);
|
||||
try exposed.addExposed(allocator, "bar"); // duplicate
|
||||
|
||||
// Detect duplicates
|
||||
const duplicates = try exposed.detectDuplicates(allocator);
|
||||
defer allocator.free(duplicates);
|
||||
|
||||
// Should report both duplicates
|
||||
try testing.expectEqual(@as(usize, 2), duplicates.len);
|
||||
}
|
||||
450
src/collections/SortedArrayBuilder.zig
Normal file
450
src/collections/SortedArrayBuilder.zig
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
//! A builder for creating sorted arrays with binary search lookups.
|
||||
//!
|
||||
//! SortedArrayBuilder provides a way to build key-value mappings that are
|
||||
//! optimized for small sizes and deterministic serialization. It maintains
|
||||
//! a sorted array internally, enabling efficient binary search lookups.
|
||||
//!
|
||||
//! Features:
|
||||
//! - Supports both string and numeric key types
|
||||
//! - Maintains sorted order for deterministic serialization
|
||||
//! - Efficient binary search lookups O(log n)
|
||||
//! - Zero-copy deserialization
|
||||
//! - Minimal memory overhead
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
// Import base for both testing and runtime
|
||||
const base = @import("base");
|
||||
const serialization = @import("serialization");
|
||||
const IovecWriter = serialization.IovecWriter;
|
||||
|
||||
/// A builder for creating sorted arrays directly without using hash maps
|
||||
/// This is more efficient when we know we won't have duplicates
|
||||
pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type {
|
||||
return struct {
|
||||
entries: std.ArrayListUnmanaged(Entry) = .{},
|
||||
sorted: bool = true,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Entry = struct {
|
||||
key: K,
|
||||
value: V,
|
||||
|
||||
fn lessThan(_: void, a: Entry, b: Entry) bool {
|
||||
if (K == []const u8) {
|
||||
return std.mem.lessThan(u8, a.key, b.key);
|
||||
} else if (@typeInfo(K) == .int or @typeInfo(K) == .@"enum") {
|
||||
return a.key < b.key;
|
||||
} else {
|
||||
@compileError("Unsupported key type for SortedArrayBuilder");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub fn init() Self {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self, allocator: Allocator) void {
|
||||
if (K == []const u8) {
|
||||
// Free string keys
|
||||
for (self.entries.items) |entry| {
|
||||
allocator.free(entry.key);
|
||||
}
|
||||
}
|
||||
self.entries.deinit(allocator);
|
||||
}
|
||||
|
||||
/// Add a key-value pair
|
||||
pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void {
|
||||
const new_key = if (K == []const u8) try allocator.dupe(u8, key) else key;
|
||||
|
||||
// Check if we need to maintain sorted order
|
||||
if (self.sorted and self.entries.items.len > 0) {
|
||||
const last = self.entries.items[self.entries.items.len - 1];
|
||||
if (K == []const u8) {
|
||||
self.sorted = std.mem.lessThan(u8, last.key, new_key);
|
||||
} else {
|
||||
self.sorted = last.key < new_key;
|
||||
}
|
||||
}
|
||||
|
||||
try self.entries.append(allocator, .{ .key = new_key, .value = value });
|
||||
}
|
||||
|
||||
/// Get value by key (requires sorting first if not already sorted)
|
||||
pub fn get(self: *Self, allocator: Allocator, key: K) ?V {
|
||||
self.ensureSorted(allocator);
|
||||
|
||||
var left: usize = 0;
|
||||
var right: usize = self.entries.items.len;
|
||||
|
||||
while (left < right) {
|
||||
const mid = left + (right - left) / 2;
|
||||
const mid_key = self.entries.items[mid].key;
|
||||
|
||||
const cmp = if (K == []const u8)
|
||||
std.mem.order(u8, mid_key, key)
|
||||
else if (mid_key == key)
|
||||
std.math.Order.eq
|
||||
else if (mid_key < key)
|
||||
std.math.Order.lt
|
||||
else
|
||||
std.math.Order.gt;
|
||||
|
||||
switch (cmp) {
|
||||
.eq => return self.entries.items[mid].value,
|
||||
.lt => left = mid + 1,
|
||||
.gt => right = mid,
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Ensure the array is sorted and deduplicate any duplicate entries
|
||||
pub fn ensureSorted(self: *Self, allocator: Allocator) void {
|
||||
if (!self.sorted) {
|
||||
std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
|
||||
self.sorted = true;
|
||||
|
||||
// Check for and report duplicates, then deduplicate
|
||||
self.deduplicateAndReport(allocator);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for duplicates, report them, and remove duplicates keeping the last occurrence
|
||||
fn deduplicateAndReport(self: *Self, allocator: Allocator) void {
|
||||
if (self.entries.items.len <= 1) return;
|
||||
|
||||
// First pass: detect and report duplicates
|
||||
var i: usize = 1;
|
||||
while (i < self.entries.items.len) {
|
||||
const prev_entry = self.entries.items[i - 1];
|
||||
const curr_entry = self.entries.items[i];
|
||||
const is_duplicate = if (K == []const u8)
|
||||
std.mem.eql(u8, prev_entry.key, curr_entry.key)
|
||||
else
|
||||
prev_entry.key == curr_entry.key;
|
||||
|
||||
if (is_duplicate) {
|
||||
// Note: Duplicate detection is handled by caller via detectDuplicates() method
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// Second pass: deduplicate by keeping last occurrence
|
||||
var write_index: usize = 0;
|
||||
for (self.entries.items, 0..) |entry, read_index| {
|
||||
var should_keep = true;
|
||||
|
||||
// Look ahead to see if there's a duplicate later
|
||||
for (self.entries.items[read_index + 1 ..]) |future_entry| {
|
||||
const is_duplicate = if (K == []const u8)
|
||||
std.mem.eql(u8, entry.key, future_entry.key)
|
||||
else
|
||||
entry.key == future_entry.key;
|
||||
|
||||
if (is_duplicate) {
|
||||
should_keep = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (should_keep) {
|
||||
if (write_index != read_index) {
|
||||
self.entries.items[write_index] = entry;
|
||||
}
|
||||
write_index += 1;
|
||||
} else {
|
||||
// Free the string key that we're not keeping
|
||||
if (K == []const u8) {
|
||||
allocator.free(entry.key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the length to reflect deduplicated entries
|
||||
self.entries.items.len = write_index;
|
||||
}
|
||||
|
||||
/// Detect duplicates without modifying the array - returns list of duplicate keys
|
||||
pub fn detectDuplicates(self: *Self, allocator: Allocator) ![]K {
|
||||
var duplicates = std.ArrayList(K).init(allocator);
|
||||
|
||||
if (self.entries.items.len <= 1) return duplicates.toOwnedSlice();
|
||||
|
||||
// Ensure sorted first
|
||||
if (!self.sorted) {
|
||||
std.sort.pdq(Entry, self.entries.items, {}, Entry.lessThan);
|
||||
self.sorted = true;
|
||||
}
|
||||
|
||||
var reported_keys = if (K == []const u8)
|
||||
std.StringHashMap(void).init(allocator)
|
||||
else
|
||||
std.AutoHashMap(K, void).init(allocator);
|
||||
defer reported_keys.deinit();
|
||||
|
||||
var i: usize = 1;
|
||||
while (i < self.entries.items.len) {
|
||||
const prev_entry = self.entries.items[i - 1];
|
||||
const curr_entry = self.entries.items[i];
|
||||
const is_duplicate = if (K == []const u8)
|
||||
std.mem.eql(u8, prev_entry.key, curr_entry.key)
|
||||
else
|
||||
prev_entry.key == curr_entry.key;
|
||||
|
||||
if (is_duplicate) {
|
||||
// Report duplicate only once per unique key
|
||||
const result = try reported_keys.getOrPut(curr_entry.key);
|
||||
if (!result.found_existing) {
|
||||
try duplicates.append(curr_entry.key);
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return duplicates.toOwnedSlice();
|
||||
}
|
||||
|
||||
/// Relocate pointers after memory movement
|
||||
pub fn relocate(self: *Self, offset: isize) void {
|
||||
// Relocate the entries array pointer
|
||||
if (self.entries.items.len > 0) {
|
||||
const old_ptr = @intFromPtr(self.entries.items.ptr);
|
||||
// Skip relocation if this is a sentinel value
|
||||
// Define sentinel value locally since iovec_serialize is not available
|
||||
const EMPTY_ARRAY_SENTINEL: usize = 0xDEADBEEF;
|
||||
if (old_ptr != EMPTY_ARRAY_SENTINEL) {
|
||||
const new_ptr = @as(usize, @intCast(@as(isize, @intCast(old_ptr)) + offset));
|
||||
self.entries.items.ptr = @ptrFromInt(new_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of entries
|
||||
pub fn count(self: *const Self) usize {
|
||||
return self.entries.items.len;
|
||||
}
|
||||
|
||||
/// Append this SortedArrayBuilder to an iovec writer for serialization
|
||||
pub fn appendToIovecs(self: *const Self, writer: *IovecWriter) !usize {
|
||||
|
||||
// Must be sorted before serialization
|
||||
if (!self.sorted) {
|
||||
@panic("SortedArrayBuilder must be sorted before serialization");
|
||||
}
|
||||
|
||||
// Create a mutable copy of self that we can modify
|
||||
var builder_copy = self.*;
|
||||
|
||||
// Create a buffer for the final serialized struct
|
||||
const builder_copy_buffer = try writer.allocator.alloc(u8, @sizeOf(Self));
|
||||
|
||||
// Track this allocation so it gets freed when writer is deinitialized
|
||||
try writer.owned_buffers.append(builder_copy_buffer);
|
||||
|
||||
// Serialize entries array data
|
||||
const entries_offset = if (self.entries.items.len > 0) blk: {
|
||||
const data = std.mem.sliceAsBytes(self.entries.items);
|
||||
const offset = try writer.appendBytes(Entry, data);
|
||||
break :blk offset;
|
||||
} else 0;
|
||||
|
||||
// Update pointer in the copy to use offset
|
||||
// For empty arrays, use sentinel value instead of 0 to avoid null pointer
|
||||
builder_copy.entries.items.ptr = if (entries_offset == 0)
|
||||
@ptrFromInt(0xDEADBEEF) // EMPTY_ARRAY_SENTINEL
|
||||
else
|
||||
@ptrFromInt(entries_offset);
|
||||
builder_copy.entries.items.len = self.entries.items.len;
|
||||
|
||||
// Note: For string keys, we don't serialize the string data separately here.
|
||||
// This assumes that the strings are already managed elsewhere (e.g., in an interner)
|
||||
// and we're just storing pointers/slices to them. If string keys need to be
|
||||
// serialized as well, that would require additional handling.
|
||||
|
||||
// Copy the modified struct to the buffer
|
||||
@memcpy(builder_copy_buffer, std.mem.asBytes(&builder_copy));
|
||||
|
||||
// Now that all pointers have been converted to offsets, add the copy to iovecs
|
||||
const struct_offset = try writer.appendBytes(Self, builder_copy_buffer);
|
||||
|
||||
return struct_offset;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder basic operations" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder([]const u8, u32).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add items in random order
|
||||
try builder.put(allocator, "zebra", 100);
|
||||
try builder.put(allocator, "apple", 50);
|
||||
try builder.put(allocator, "banana", 150);
|
||||
try builder.put(allocator, "cherry", 30);
|
||||
|
||||
// Test count
|
||||
try testing.expectEqual(@as(usize, 4), builder.count());
|
||||
|
||||
// Test get (forces sorting)
|
||||
try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "apple"));
|
||||
try testing.expectEqual(@as(?u32, 150), builder.get(allocator, "banana"));
|
||||
try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "cherry"));
|
||||
try testing.expectEqual(@as(?u32, 100), builder.get(allocator, "zebra"));
|
||||
try testing.expectEqual(@as(?u32, null), builder.get(allocator, "missing"));
|
||||
|
||||
// Test iovec serialization
|
||||
var writer = IovecWriter.init(allocator);
|
||||
defer writer.deinit();
|
||||
|
||||
_ = try builder.appendToIovecs(&writer);
|
||||
|
||||
// Verify we have some iovecs (basic smoke test)
|
||||
try testing.expect(writer.iovecs.items.len > 0);
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder maintains sorted order when added in order" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder([]const u8, u16).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add items in sorted order
|
||||
try builder.put(allocator, "aaa", 1);
|
||||
try builder.put(allocator, "bbb", 2);
|
||||
try builder.put(allocator, "ccc", 3);
|
||||
try builder.put(allocator, "ddd", 4);
|
||||
|
||||
// Should still be marked as sorted
|
||||
try testing.expect(builder.sorted);
|
||||
|
||||
// Get should work without sorting
|
||||
try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "aaa"));
|
||||
try testing.expectEqual(@as(?u16, 4), builder.get(allocator, "ddd"));
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder detectDuplicates example usage" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder(u32, u16).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add some entries with duplicates
|
||||
try builder.put(allocator, 100, 1);
|
||||
try builder.put(allocator, 200, 2);
|
||||
try builder.put(allocator, 100, 3); // duplicate of 100
|
||||
try builder.put(allocator, 300, 4);
|
||||
try builder.put(allocator, 200, 5); // duplicate of 200
|
||||
|
||||
// Detect duplicates before sorting/deduplicating
|
||||
const duplicates = try builder.detectDuplicates(allocator);
|
||||
defer allocator.free(duplicates);
|
||||
|
||||
// Example: Report duplicates (in real code, this would push diagnostics)
|
||||
for (duplicates) |duplicate_key| {
|
||||
// In a real compiler, you'd push a diagnostic here:
|
||||
// try cir.pushDiagnostic(CIR.Diagnostic{ .duplicate_exposed_item = ... });
|
||||
std.debug.print("Found duplicate key: {d}\n", .{duplicate_key});
|
||||
}
|
||||
|
||||
// Verify we found the expected duplicates
|
||||
try testing.expectEqual(@as(usize, 2), duplicates.len);
|
||||
|
||||
// After detection, normal operations work as expected
|
||||
builder.ensureSorted(allocator);
|
||||
try testing.expectEqual(@as(usize, 3), builder.count()); // Deduplicated
|
||||
try testing.expectEqual(@as(?u16, 3), builder.get(allocator, 100)); // Last value kept
|
||||
try testing.expectEqual(@as(?u16, 5), builder.get(allocator, 200)); // Last value kept
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder handles duplicates with string keys" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder([]const u8, u32).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add duplicate keys with different values
|
||||
try builder.put(allocator, "apple", 10);
|
||||
try builder.put(allocator, "banana", 20);
|
||||
try builder.put(allocator, "apple", 30); // duplicate key
|
||||
try builder.put(allocator, "cherry", 40);
|
||||
try builder.put(allocator, "banana", 50); // duplicate key
|
||||
|
||||
// Initial count includes duplicates
|
||||
try testing.expectEqual(@as(usize, 5), builder.count());
|
||||
|
||||
// Force sorting and deduplication
|
||||
builder.ensureSorted(allocator);
|
||||
|
||||
// Count should be reduced after deduplication
|
||||
try testing.expectEqual(@as(usize, 3), builder.count());
|
||||
|
||||
// Should return the last value for each key (keeping last occurrence)
|
||||
try testing.expectEqual(@as(?u32, 30), builder.get(allocator, "apple"));
|
||||
try testing.expectEqual(@as(?u32, 50), builder.get(allocator, "banana"));
|
||||
try testing.expectEqual(@as(?u32, 40), builder.get(allocator, "cherry"));
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder handles duplicates with numeric keys" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder(u32, []const u8).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add duplicate keys with different values
|
||||
try builder.put(allocator, 100, "first");
|
||||
try builder.put(allocator, 200, "second");
|
||||
try builder.put(allocator, 100, "updated_first"); // duplicate key
|
||||
try builder.put(allocator, 300, "third");
|
||||
|
||||
// Initial count includes duplicates
|
||||
try testing.expectEqual(@as(usize, 4), builder.count());
|
||||
|
||||
// Force sorting and deduplication
|
||||
builder.ensureSorted(allocator);
|
||||
|
||||
// Count should be reduced after deduplication
|
||||
try testing.expectEqual(@as(usize, 3), builder.count());
|
||||
|
||||
// Should return the last value for each key
|
||||
try testing.expectEqual(@as(?[]const u8, "updated_first"), builder.get(allocator, 100));
|
||||
try testing.expectEqual(@as(?[]const u8, "second"), builder.get(allocator, 200));
|
||||
try testing.expectEqual(@as(?[]const u8, "third"), builder.get(allocator, 300));
|
||||
}
|
||||
|
||||
test "SortedArrayBuilder no duplicates case" {
|
||||
const testing = std.testing;
|
||||
const allocator = testing.allocator;
|
||||
|
||||
var builder = SortedArrayBuilder([]const u8, u16).init();
|
||||
defer builder.deinit(allocator);
|
||||
|
||||
// Add unique keys only
|
||||
try builder.put(allocator, "unique1", 1);
|
||||
try builder.put(allocator, "unique2", 2);
|
||||
try builder.put(allocator, "unique3", 3);
|
||||
|
||||
const original_count = builder.count();
|
||||
|
||||
// Force sorting (should not change count)
|
||||
builder.ensureSorted(allocator);
|
||||
|
||||
// Count should remain the same
|
||||
try testing.expectEqual(original_count, builder.count());
|
||||
|
||||
// All values should be retrievable
|
||||
try testing.expectEqual(@as(?u16, 1), builder.get(allocator, "unique1"));
|
||||
try testing.expectEqual(@as(?u16, 2), builder.get(allocator, "unique2"));
|
||||
try testing.expectEqual(@as(?u16, 3), builder.get(allocator, "unique3"));
|
||||
}
|
||||
|
|
@ -17,6 +17,9 @@ pub const SafeMultiList = @import("safe_list.zig").SafeMultiList;
|
|||
|
||||
pub const SafeStringHashMap = @import("safe_hash_map.zig").SafeStringHashMap;
|
||||
|
||||
pub const SortedArrayBuilder = @import("SortedArrayBuilder.zig").SortedArrayBuilder;
|
||||
pub const ExposedItems = @import("ExposedItems.zig").ExposedItems;
|
||||
|
||||
/// A range that must have at least one element
|
||||
pub const NonEmptyRange = struct {
|
||||
/// Starting index (inclusive)
|
||||
|
|
|
|||
|
|
@ -68,12 +68,8 @@ idents: Ident.Store,
|
|||
ident_ids_for_slicing: collections.SafeList(Ident.Idx),
|
||||
strings: StringLiteral.Store,
|
||||
types: types_mod.Store,
|
||||
/// Map of exposed items by their string representation (not interned)
|
||||
/// This is built during canonicalization and preserved for later use
|
||||
exposed_by_str: collections.SafeStringHashMap(void),
|
||||
/// Map of exposed item names to their node indices (stored as u16)
|
||||
/// This is populated during canonicalization to allow cross-module lookups
|
||||
exposed_nodes: collections.SafeStringHashMap(u16),
|
||||
/// The items (a combination of types and values) that this module exposes
|
||||
exposed_items: collections.ExposedItems,
|
||||
|
||||
/// Line starts for error reporting. We retain only start and offset positions in the IR
|
||||
/// and then use these line starts to calculate the line number and column number as required.
|
||||
|
|
@ -130,8 +126,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error!
|
|||
.ident_ids_for_slicing = try collections.SafeList(Ident.Idx).initCapacity(gpa, 256),
|
||||
.strings = try StringLiteral.Store.initCapacityBytes(gpa, 4096),
|
||||
.types = try types_mod.Store.initCapacity(gpa, 2048, 512),
|
||||
.exposed_by_str = try collections.SafeStringHashMap(void).initCapacity(gpa, 64),
|
||||
.exposed_nodes = try collections.SafeStringHashMap(u16).initCapacity(gpa, 64),
|
||||
.exposed_items = collections.ExposedItems.init(),
|
||||
.line_starts = try collections.SafeList(u32).initCapacity(gpa, 256),
|
||||
.source = source,
|
||||
// Initialize compilation fields with empty/default values
|
||||
|
|
@ -152,8 +147,7 @@ pub fn deinit(self: *Self) void {
|
|||
self.strings.deinit(self.gpa);
|
||||
self.types.deinit();
|
||||
self.line_starts.deinit(self.gpa);
|
||||
self.exposed_by_str.deinit(self.gpa);
|
||||
self.exposed_nodes.deinit(self.gpa);
|
||||
self.exposed_items.deinit(self.gpa);
|
||||
// Clean up compilation fields
|
||||
self.external_decls.deinit(self.gpa);
|
||||
self.imports.deinit(self.gpa);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue