This commit is contained in:
Richard Feldman 2025-10-11 10:38:26 -04:00
parent 74c21485e4
commit ab814cca87
No known key found for this signature in database
16 changed files with 273 additions and 102 deletions

View file

@ -91,12 +91,12 @@ pub fn freezeInterners(self: *CommonEnv) void {
}
/// Serialized representation of ModuleEnv
/// Following SafeList.Serialized pattern: NO pointers, NO slices
pub const Serialized = struct {
idents: Ident.Store.Serialized,
strings: StringLiteral.Store.Serialized,
exposed_items: ExposedItems.Serialized,
line_starts: SafeList(u32).Serialized,
source: []const u8, // Serialized as zeros, provided during deserialization
/// Serialize a ModuleEnv into this Serialized struct, appending data to the writer
pub fn serialize(
@ -105,8 +105,6 @@ pub const Serialized = struct {
allocator: std.mem.Allocator,
writer: *CompactWriter,
) !void {
self.source = ""; // Empty slice
// Serialize each component using its Serialized struct
try self.idents.serialize(&env.idents, allocator, writer);
try self.strings.serialize(&env.strings, allocator, writer);

View file

@ -144,6 +144,7 @@ pub fn main() !void {
try stdout.print("Builtin indices generated:\n", .{});
try stdout.print(" Bool type: {}\n", .{bool_type_idx});
try stdout.print(" Result type: {}\n", .{result_type_idx});
try stdout.print("Native (64-bit) @sizeOf(ModuleEnv.Serialized) = {}\n", .{@sizeOf(ModuleEnv.Serialized)});
}
const ModuleDep = struct {
@ -280,6 +281,14 @@ fn serializeModuleEnv(
const file = try std.fs.cwd().createFile(output_path, .{ .read = true });
defer file.close();
// Log the actual values BEFORE serialization
const stdout = std.io.getStdOut().writer();
try stdout.print("Serializing {s}: all_statements.span.start={}, .len={}\n", .{
output_path,
env.all_statements.span.start,
env.all_statements.span.len,
});
// Serialize using CompactWriter
var writer = collections.CompactWriter.init();
defer writer.deinit(arena_alloc);

View file

@ -600,8 +600,7 @@ pub const Import = struct {
}
pub const Serialized = struct {
// Placeholder to match Store size - not serialized
map: std.AutoHashMapUnmanaged(base.StringLiteral.Idx, Import.Idx) = .{},
// NO HashMap! Following SafeList.Serialized pattern: NO pointers, NO slices
imports: collections.SafeList(base.StringLiteral.Idx).Serialized,
/// Serialize a Store into this Serialized struct, appending data to the writer

View file

@ -1403,8 +1403,8 @@ pub fn relocate(self: *Self, offset: isize) void {
}
/// Serialized representation of ModuleEnv
/// Following SafeList.Serialized pattern: NO pointers, NO slices, NO Allocators
pub const Serialized = struct {
gpa: std.mem.Allocator, // Serialized as zeros, provided during deserialization
common: CommonEnv.Serialized,
types: TypeStore.Serialized,
module_kind: ModuleKind, // Must match field order in Self
@ -1414,7 +1414,6 @@ pub const Serialized = struct {
builtin_statements: CIR.Statement.Span,
external_decls: CIR.ExternalDecl.SafeList.Serialized,
imports: CIR.Import.Store.Serialized,
module_name: []const u8, // Serialized as zeros, provided during deserialization
diagnostics: CIR.Diagnostic.Span,
store: NodeStore.Serialized,
@ -1425,9 +1424,6 @@ pub const Serialized = struct {
allocator: std.mem.Allocator,
writer: *CompactWriter,
) !void {
// Set fields that will be provided during deserialization to zeros
self.gpa = undefined; // Will be set to zeros below
try self.common.serialize(&env.common, allocator, writer);
try self.types.serialize(&env.types, allocator, writer);
@ -1445,10 +1441,6 @@ pub const Serialized = struct {
// Serialize NodeStore
try self.store.serialize(&env.store, allocator, writer);
// Set gpa to all zeros; the space needs to be here,
// but the value will be set separately during deserialization.
@memset(@as([*]u8, @ptrCast(&self.gpa))[0..@sizeOf(@TypeOf(self.gpa))], 0);
}
/// Deserialize a ModuleEnv from the buffer, updating the ModuleEnv in place
@ -1465,10 +1457,15 @@ pub const Serialized = struct {
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
const env = @as(*Self, @ptrFromInt(@intFromPtr(self)));
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try self.store.deserialize(offset, gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = Self{
.gpa = gpa,
.common = self.common.deserialize(offset, source).*,
.types = self.types.deserialize(offset).*,
.types = self.types.deserialize(offset, gpa).*,
.module_kind = self.module_kind,
.all_defs = self.all_defs,
.all_statements = self.all_statements,
@ -1478,7 +1475,7 @@ pub const Serialized = struct {
.imports = self.imports.deserialize(offset, gpa).*,
.module_name = module_name,
.diagnostics = self.diagnostics,
.store = self.store.deserialize(offset, gpa).*,
.store = deserialized_store,
};
return env;

View file

@ -3274,30 +3274,12 @@ pub fn relocate(self: *NodeStore, offset: isize) void {
}
/// Serialized representation of NodeStore
/// Following SafeList.Serialized pattern: NO pointers, NO slices, NO ArrayLists, NO HashMaps, NO Allocators
pub const Serialized = struct {
nodes: Node.List.Serialized,
regions: Region.List.Serialized,
extra_data: collections.SafeList(u32).Serialized,
// Scratch arrays - not serialized, just placeholders to match NodeStore size
// TODO move these out of NodeStore so that we don't need to serialize and
// deserialize a bunch of zeros for these; it's a waste of space.
scratch_statements: std.ArrayListUnmanaged(CIR.Statement.Idx) = .{},
scratch_exprs: std.ArrayListUnmanaged(CIR.Expr.Idx) = .{},
scratch_record_fields: std.ArrayListUnmanaged(CIR.RecordField.Idx) = .{},
scratch_match_branches: std.ArrayListUnmanaged(CIR.Expr.Match.Branch.Idx) = .{},
scratch_match_branch_patterns: std.ArrayListUnmanaged(CIR.Expr.Match.BranchPattern.Idx) = .{},
scratch_if_branches: std.ArrayListUnmanaged(CIR.Expr.IfBranch.Idx) = .{},
scratch_where_clauses: std.ArrayListUnmanaged(CIR.WhereClause.Idx) = .{},
scratch_patterns: std.ArrayListUnmanaged(CIR.Pattern.Idx) = .{},
scratch_pattern_record_fields: std.ArrayListUnmanaged(CIR.PatternRecordField.Idx) = .{},
scratch_record_destructs: std.ArrayListUnmanaged(CIR.Pattern.RecordDestruct.Idx) = .{},
scratch_type_annos: std.ArrayListUnmanaged(CIR.TypeAnno.Idx) = .{},
scratch_anno_record_fields: std.ArrayListUnmanaged(CIR.TypeAnno.RecordField.Idx) = .{},
scratch_exposed_items: std.ArrayListUnmanaged(CIR.ExposedItem.Idx) = .{},
scratch_defs: std.ArrayListUnmanaged(CIR.Def.Idx) = .{},
scratch_diagnostics: std.ArrayListUnmanaged(CIR.Diagnostic.Idx) = .{},
scratch_captures: std.ArrayListUnmanaged(CIR.Expr.Capture.Idx) = .{},
gpa: std.mem.Allocator = undefined,
// NO scratch arrays or gpa - those contain pointers and vary in size across platforms
/// Serialize a NodeStore into this Serialized struct, appending data to the writer
pub fn serialize(
@ -3315,18 +3297,32 @@ pub const Serialized = struct {
}
/// Deserialize this Serialized struct into a NodeStore
pub fn deserialize(self: *Serialized, offset: i64, gpa: std.mem.Allocator) *NodeStore {
// NodeStore.Serialized should be at least as big as NodeStore
std.debug.assert(@sizeOf(Serialized) >= @sizeOf(NodeStore));
///
/// IMPORTANT: On 32-bit platforms (like WASM32), NodeStore (~170 bytes) is larger than
/// NodeStore.Serialized (72 bytes) because of scratch arrays and allocator fields.
/// We CANNOT overwrite the Serialized struct in-place, as this would corrupt adjacent
/// memory (the serialized array data that follows ModuleEnv.Serialized in the buffer).
///
/// Instead, we must allocate a separate NodeStore and return a pointer to it.
pub fn deserialize(self: *Serialized, offset: i64, gpa: std.mem.Allocator) !*NodeStore {
// Allocate a new NodeStore (cannot reuse Serialized location due to size mismatch on 32-bit)
const store = try gpa.create(NodeStore);
errdefer gpa.destroy(store);
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
const store = @as(*NodeStore, @ptrFromInt(@intFromPtr(self)));
// CRITICAL: On 32-bit platforms, deserializing nodes in-place corrupts the adjacent
// regions and extra_data fields. We must deserialize in REVERSE order (last to first)
// so that each deserialization doesn't corrupt fields that haven't been deserialized yet.
// Deserialize in reverse order: extra_data, regions, then nodes
const deserialized_extra_data = self.extra_data.deserialize(offset).*;
const deserialized_regions = self.regions.deserialize(offset).*;
const deserialized_nodes = self.nodes.deserialize(offset).*;
store.* = NodeStore{
.gpa = gpa,
.nodes = self.nodes.deserialize(offset).*,
.regions = self.regions.deserialize(offset).*,
.extra_data = self.extra_data.deserialize(offset).*,
.nodes = deserialized_nodes,
.regions = deserialized_regions,
.extra_data = deserialized_extra_data,
// Initialize scratch arrays as proper Scratch instances
.scratch_statements = base.Scratch(CIR.Statement.Idx){ .items = .{} },
.scratch_exprs = base.Scratch(CIR.Expr.Idx){ .items = .{} },

View file

@ -65,11 +65,17 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -78,7 +84,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{
@ -159,11 +165,12 @@ pub fn initWithImport(source: []const u8, other_module_name: []const u8, other_m
try module_env.initCIRFields(gpa, "test");
// Inject builtin type declarations (Bool and Result) following eval.zig pattern
// Use .err content to match the old builtin injection system behavior
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try module_env.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try module_env.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try module_env.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try module_env.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
// Update builtin_statements span
const start_idx = @intFromEnum(actual_bool_idx);
@ -266,10 +273,12 @@ pub fn init(source: []const u8) !TestEnv {
// Get the Bool type declaration from the loaded module using the build-time index
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try module_env.store.addStatement(bool_stmt, base.Region.zero());
_ = try module_env.types.fresh(); // Keep types array in sync with nodes/regions
// Get the Result type declaration from the loaded module using the build-time index
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try module_env.store.addStatement(result_stmt, base.Region.zero());
_ = try module_env.types.fresh(); // Keep types array in sync with nodes/regions
// Update builtin_statements span to include injected Bool and Result
// Use the ACTUAL indices where they landed (not hardcoded!)

View file

@ -64,11 +64,17 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -77,7 +83,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{

View file

@ -218,11 +218,17 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
const env = try gpa.create(ModuleEnv);
errdefer gpa.destroy(env);
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -231,7 +237,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{ .env = env, .buffer = buffer, .gpa = gpa };
}

View file

@ -81,10 +81,16 @@ test "ModuleEnv.Serialized roundtrip" {
// Now manually construct the ModuleEnv using the deserialized CommonEnv
const env = @as(*ModuleEnv, @ptrCast(@alignCast(deserialized_ptr)));
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try deserialized_ptr.store.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr))), deser_alloc);
const deserialized_store = deserialized_store_ptr.*;
deser_alloc.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = deserialized_ptr.common.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr))), source).*,
.types = deserialized_ptr.types.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr)))).*,
.types = deserialized_ptr.types.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr))), deser_alloc).*, // Pass gpa to types deserialize
.module_kind = deserialized_ptr.module_kind,
.all_defs = deserialized_ptr.all_defs,
.all_statements = deserialized_ptr.all_statements,
@ -94,7 +100,7 @@ test "ModuleEnv.Serialized roundtrip" {
.imports = deserialized_ptr.imports.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr))), deser_alloc).*,
.module_name = "TestModule",
.diagnostics = deserialized_ptr.diagnostics,
.store = deserialized_ptr.store.deserialize(@as(i64, @intCast(@intFromPtr(buffer.ptr))), deser_alloc).*,
.store = deserialized_store,
};
// Verify the data was preserved

View file

@ -69,11 +69,17 @@ pub fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_n
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -82,7 +88,7 @@ pub fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_n
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{
@ -376,11 +382,12 @@ pub fn parseAndCanonicalizeExpr(allocator: std.mem.Allocator, source: []const u8
try module_env.initCIRFields(allocator, "test");
// Inject builtin type declarations (Bool and Result) following TestEnv.zig pattern
// Use .err content to match the old builtin injection system behavior
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try module_env.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try module_env.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try module_env.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try module_env.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
// Update builtin_statements span
const start_idx = @intFromEnum(actual_bool_idx);

View file

@ -806,34 +806,44 @@ fn compileSource(source: []const u8) !CompilerStageData {
// Set up the source in WASM filesystem
WasmFilesystem.setSource(allocator, source);
logDebug("compileSource: Starting compilation (source len={})\n", .{source.len});
// Initialize the ModuleEnv
var module_env = try allocator.create(ModuleEnv);
module_env.* = try ModuleEnv.init(allocator, source);
try module_env.common.calcLineStarts(module_env.gpa);
logDebug("compileSource: ModuleEnv initialized\n", .{});
var result = CompilerStageData.init(allocator, module_env);
// Stage 1: Parse (includes tokenization)
logDebug("compileSource: Starting parse stage\n", .{});
var parse_ast = try parse.parse(&module_env.common, module_env.gpa);
result.parse_ast = parse_ast;
logDebug("compileSource: Parse complete\n", .{});
// Generate and store HTML before canonicalization corrupts the AST/tokens
logDebug("compileSource: Starting HTML generation\n", .{});
var local_arena = std.heap.ArenaAllocator.init(allocator);
defer local_arena.deinit();
const temp_alloc = local_arena.allocator();
// Generate Tokens HTML
logDebug("compileSource: Generating tokens HTML\n", .{});
var tokens_html_buffer = std.ArrayList(u8).init(temp_alloc);
const tokens_writer = tokens_html_buffer.writer().any();
AST.tokensToHtml(&parse_ast, &module_env.common, tokens_writer) catch |err| {
logDebug("compileSource: tokensToHtml failed: {}\n", .{err});
};
logDebug("compileSource: Tokens HTML generated, duping to main allocator\n", .{});
result.tokens_html = allocator.dupe(u8, tokens_html_buffer.items) catch |err| {
logDebug("compileSource: failed to dupe tokens_html: {}\n", .{err});
return err;
};
logDebug("compileSource: Tokens HTML complete\n", .{});
// Generate AST HTML
logDebug("compileSource: Generating AST HTML\n", .{});
var ast_html_buffer = std.ArrayList(u8).init(temp_alloc);
const ast_writer = ast_html_buffer.writer().any();
{
@ -842,27 +852,34 @@ fn compileSource(source: []const u8) !CompilerStageData {
var tree = SExprTree.init(temp_alloc);
defer tree.deinit();
logDebug("compileSource: Call pushToSExprTree\n", .{});
try file.pushToSExprTree(module_env.gpa, &module_env.common, &parse_ast, &tree);
logDebug("compileSource: Call toHtml\n", .{});
try tree.toHtml(ast_writer);
}
logDebug("compileSource: AST HTML generated\n", .{});
result.ast_html = allocator.dupe(u8, ast_html_buffer.items) catch |err| {
logDebug("compileSource: failed to dupe ast_html: {}\n", .{err});
return err;
};
logDebug("compileSource: AST HTML complete\n", .{});
// Generate formatted code
logDebug("compileSource: Generating formatted code\n", .{});
var formatted_code_buffer = std.ArrayList(u8).init(temp_alloc);
fmt.formatAst(parse_ast, formatted_code_buffer.writer().any()) catch |err| {
logDebug("compileSource: formatAst failed: {}\n", .{err});
return err;
};
logDebug("compileSource: Formatted code generated\n", .{});
result.formatted_code = allocator.dupe(u8, formatted_code_buffer.items) catch |err| {
logDebug("compileSource: failed to dupe formatted_code: {}\n", .{err});
return err;
};
logDebug("compileSource: Formatted code complete\n", .{});
// Collect tokenize diagnostics with additional error handling
for (parse_ast.tokenize_diagnostics.items) |diagnostic| {
@ -907,31 +924,67 @@ fn compileSource(source: []const u8) !CompilerStageData {
const buffer = try gpa.alignedAlloc(u8, CompactWriter.SERIALIZATION_ALIGNMENT, bin_data.len);
@memcpy(buffer, bin_data);
logDebug("loadCompiledModule: bin_data.len={}, @sizeOf(ModuleEnv.Serialized)={}\n", .{ bin_data.len, @sizeOf(ModuleEnv.Serialized) });
const serialized_ptr = @as(*ModuleEnv.Serialized, @ptrCast(@alignCast(buffer.ptr)));
// Log the raw all_statements value to see what we're reading
logDebug("loadCompiledModule: raw all_statements.span.start={}, .len={}\n", .{
serialized_ptr.all_statements.span.start,
serialized_ptr.all_statements.span.len,
});
const module_env_ptr = try gpa.create(ModuleEnv);
errdefer gpa.destroy(module_env_ptr);
const base_ptr = @intFromPtr(buffer.ptr);
logDebug("loadCompiledModule: About to deserialize common\n", .{});
const deserialized_common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), module_source).*;
logDebug("loadCompiledModule: common deserialized successfully\n", .{});
logDebug("loadCompiledModule: About to deserialize types\n", .{});
const deserialized_types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*;
logDebug("loadCompiledModule: types deserialized successfully\n", .{});
logDebug("loadCompiledModule: About to deserialize external_decls\n", .{});
const deserialized_external_decls = serialized_ptr.external_decls.deserialize(@as(i64, @intCast(base_ptr))).*;
logDebug("loadCompiledModule: external_decls deserialized successfully\n", .{});
logDebug("loadCompiledModule: About to deserialize imports\n", .{});
const deserialized_imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*;
logDebug("loadCompiledModule: imports deserialized successfully\n", .{});
logDebug("loadCompiledModule: About to deserialize store\n", .{});
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr); // Free the pointer after copying the value
logDebug("loadCompiledModule: store deserialized successfully\n", .{});
logDebug("loadCompiledModule: All deserialized, constructing ModuleEnv\n", .{});
module_env_ptr.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), module_source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.common = deserialized_common,
.types = deserialized_types,
.module_kind = serialized_ptr.module_kind,
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
.builtin_statements = serialized_ptr.builtin_statements,
.external_decls = serialized_ptr.external_decls.deserialize(@as(i64, @intCast(base_ptr))).*,
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.external_decls = deserialized_external_decls,
.imports = deserialized_imports,
.module_name = module_name_param,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
logDebug("loadCompiledModule: ModuleEnv constructed successfully\n", .{});
logDebug("loadCompiledModule: Returning LoadedModule\n", .{});
return .{ .env = module_env_ptr, .buffer = buffer, .gpa = gpa };
}
};
logDebug("compileSource: Loading builtin indices\n", .{});
const builtin_indices = blk: {
const aligned_buffer = try allocator.alignedAlloc(u8, @alignOf(can.CIR.BuiltinIndices), compiled_builtins.builtin_indices_bin.len);
defer allocator.free(aligned_buffer);
@ -939,21 +992,74 @@ fn compileSource(source: []const u8) !CompilerStageData {
const indices_ptr = @as(*const can.CIR.BuiltinIndices, @ptrCast(aligned_buffer.ptr));
break :blk indices_ptr.*;
};
logDebug("compileSource: Builtin indices loaded, bool_type={}\n", .{@intFromEnum(builtin_indices.bool_type)});
logDebug("compileSource: Loading Bool module\n", .{});
const bool_source = "Bool := [True, False].{}\n";
var bool_module = try LoadedModule.loadCompiledModule(allocator, compiled_builtins.bool_bin, "Bool", bool_source);
defer bool_module.deinit();
logDebug("compileSource: Bool module loaded\n", .{});
logDebug("compileSource: Loading Result module\n", .{});
const result_source = "Result(ok, err) := [Ok(ok), Err(err)].{}\n";
var result_module = try LoadedModule.loadCompiledModule(allocator, compiled_builtins.result_bin, "Result", result_source);
defer result_module.deinit();
logDebug("compileSource: Result module loaded\n", .{});
// Inject Bool and Result type declarations into the current module
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try env.store.addStatement(bool_stmt, base.Region.zero());
// Use .err content to match the old builtin injection system behavior
logDebug("compileSource: Loading builtin modules\n", .{});
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try env.store.addStatement(result_stmt, base.Region.zero());
logDebug("compileSource: About to slice Bool statements\n", .{});
logDebug("compileSource: Bool extra_data.items.items.len={}, all_statements.span={{start={}, len={}}}\n", .{
bool_module.env.store.extra_data.items.items.len,
bool_module.env.all_statements.span.start,
bool_module.env.all_statements.span.len,
});
const bool_stmts = bool_module.env.store.sliceStatements(bool_module.env.all_statements);
logDebug("compileSource: Sliced Bool statements successfully, count={}\n", .{bool_stmts.len});
logDebug("compileSource: Bool all_statements span: start={}, len={}\n", .{
bool_module.env.all_statements.span.start,
bool_module.env.all_statements.span.len,
});
// Get Bool statement from the sliced statements (bool_stmts[0] is the Bool type declaration)
logDebug("compileSource: About to get Bool statement from sliced statements\n", .{});
logDebug("compileSource: bool_stmts[0] = {}, nodes.len() = {}\n", .{
@intFromEnum(bool_stmts[0]),
bool_module.env.store.nodes.len(),
});
// Check if we can safely access node at index 1
const node_idx_to_access = @intFromEnum(bool_stmts[0]);
logDebug("compileSource: Attempting to access node at index {}\n", .{node_idx_to_access});
if (node_idx_to_access >= bool_module.env.store.nodes.len()) {
logDebug("compileSource: ERROR - node index {} is out of bounds (nodes.len={})\n", .{
node_idx_to_access,
bool_module.env.store.nodes.len(),
});
return error.NodeIndexOutOfBounds;
}
const bool_stmt = bool_module.env.store.getStatement(bool_stmts[0]);
logDebug("compileSource: Got Bool statement successfully\n", .{});
logDebug("compileSource: About to add Bool statement and type var to main env\n", .{});
const actual_bool_idx = try env.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
logDebug("compileSource: Bool statement added successfully, idx={}\n", .{@intFromEnum(actual_bool_idx)});
// Get Result statements
const result_stmts = result_module.env.store.sliceStatements(result_module.env.all_statements);
logDebug("compileSource: About to get Result statement from sliced statements\n", .{});
const result_stmt = result_module.env.store.getStatement(result_stmts[0]);
logDebug("compileSource: Got Result statement successfully\n", .{});
logDebug("compileSource: About to add Result statement and type var to main env\n", .{});
const actual_result_idx = try env.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
logDebug("compileSource: Result statement added successfully, idx={}\n", .{@intFromEnum(actual_result_idx)});
logDebug("compileSource: Builtin injection complete\n", .{});
// Store bool_stmt in result for later use (e.g., in test runner)
result.bool_stmt = actual_bool_idx;
@ -974,6 +1080,7 @@ fn compileSource(source: []const u8) !CompilerStageData {
.result_stmt = actual_result_idx,
};
logDebug("compileSource: Starting canonicalization\n", .{});
var czer = try Can.init(env, &result.parse_ast.?, null);
defer czer.deinit();
@ -992,6 +1099,7 @@ fn compileSource(source: []const u8) !CompilerStageData {
return err;
}
};
logDebug("compileSource: Canonicalization complete\n", .{});
// Copy the modified AST back into the main result to ensure state consistency
result.parse_ast = parse_ast;
@ -1011,6 +1119,7 @@ fn compileSource(source: []const u8) !CompilerStageData {
// Stage 3: Type checking (always run if we have CIR, even with canonicalization errors)
// The type checker works with malformed canonical nodes to provide partial type information
logDebug("compileSource: Starting type checking\n", .{});
{
const type_can_ir = result.module_env;
const empty_modules: []const *ModuleEnv = &.{};
@ -1028,6 +1137,7 @@ fn compileSource(source: []const u8) !CompilerStageData {
return check_err;
}
};
logDebug("compileSource: Type checking complete\n", .{});
// Collect type checking problems and convert them to reports using ReportBuilder
var report_builder = problem.ReportBuilder.init(
@ -1053,6 +1163,7 @@ fn compileSource(source: []const u8) !CompilerStageData {
}
}
logDebug("compileSource: Compilation complete\n", .{});
return result;
}

View file

@ -74,11 +74,17 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -87,7 +93,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{
@ -462,13 +468,15 @@ fn evaluatePureExpression(self: *Repl, expr_source: []const u8, def_ident: ?[]co
var result_module = try loadCompiledModule(self.allocator, compiled_builtins.result_bin, "Result", result_source);
defer result_module.deinit();
// Get the Bool type declaration from the loaded module using the build-time index
// Get the Bool type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try cir.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try cir.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
// Get the Result type declaration from the loaded module using the build-time index
// Get the Result type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try cir.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try cir.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
const common_idents: Check.CommonIdents = .{
.module_name = try cir.insertIdent(base.Ident.for_text(module_name)),
@ -868,13 +876,15 @@ test "Repl - minimal interpreter integration" {
var result_module = try loadCompiledModule(gpa, compiled_builtins.result_bin, "Result", result_source);
defer result_module.deinit();
// Get the Bool type declaration from the loaded module using the build-time index
// Get the Bool type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try cir.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try cir.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
// Get the Result type declaration from the loaded module using the build-time index
// Get the Result type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try cir.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try cir.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
const module_common_idents: Check.CommonIdents = .{
.module_name = try module_env.insertIdent(base.Ident.for_text("test")),

View file

@ -63,10 +63,16 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind,
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
@ -76,7 +82,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{
@ -464,13 +470,15 @@ pub const Repl = struct {
// The indices in builtin_indices refer to positions within Bool.bin/Result.bin
// When we inject them here, they get NEW indices in the current module
// Get the Bool type declaration from the loaded module using the build-time index
// Get the Bool type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const bool_stmt = self.bool_module.env.store.getStatement(self.builtin_indices.bool_type);
const actual_bool_idx = try cir.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try cir.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
// Get the Result type declaration from the loaded module using the build-time index
// Get the Result type declaration from the loaded module
// Use .err content to match the old builtin injection system behavior
const result_stmt = self.result_module.env.store.getStatement(self.builtin_indices.result_type);
const actual_result_idx = try cir.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try cir.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
// Update builtin_statements span to include injected Bool and Result
// Use the ACTUAL indices where they landed (not hardcoded!)

View file

@ -68,11 +68,17 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -81,7 +87,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{
@ -281,11 +287,12 @@ test "Repl - minimal interpreter integration" {
try cir.initCIRFields(gpa, "test");
// Inject builtin type declarations (Bool and Result) following TestEnv.zig pattern
// Use .err content to match the old builtin injection system behavior
const bool_stmt = bool_module.env.store.getStatement(builtin_indices.bool_type);
const actual_bool_idx = try module_env.store.addStatement(bool_stmt, base.Region.zero());
const actual_bool_idx = try module_env.addStatementAndTypeVar(bool_stmt, .err, base.Region.zero());
const result_stmt = result_module.env.store.getStatement(builtin_indices.result_type);
const actual_result_idx = try module_env.store.addStatement(result_stmt, base.Region.zero());
const actual_result_idx = try module_env.addStatementAndTypeVar(result_stmt, .err, base.Region.zero());
// Update builtin_statements span
const start_idx = @intFromEnum(actual_bool_idx);

View file

@ -677,11 +677,16 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
// Deserialize
const base_ptr = @intFromPtr(buffer.ptr);
// Deserialize store separately (returns a pointer that must be freed after copying)
const deserialized_store_ptr = try serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa);
const deserialized_store = deserialized_store_ptr.*;
gpa.destroy(deserialized_store_ptr);
env.* = ModuleEnv{
.gpa = gpa,
.common = serialized_ptr.common.deserialize(@as(i64, @intCast(base_ptr)), source).*,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr))).*,
.module_kind = serialized_ptr.module_kind,
.types = serialized_ptr.types.deserialize(@as(i64, @intCast(base_ptr)), gpa).*, // Pass gpa to types deserialize
.module_kind = serialized_ptr.module_kind.toModuleKind(),
.all_defs = serialized_ptr.all_defs,
.all_statements = serialized_ptr.all_statements,
.exports = serialized_ptr.exports,
@ -690,7 +695,7 @@ fn loadCompiledModule(gpa: std.mem.Allocator, bin_data: []const u8, module_name:
.imports = serialized_ptr.imports.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.module_name = module_name,
.diagnostics = serialized_ptr.diagnostics,
.store = serialized_ptr.store.deserialize(@as(i64, @intCast(base_ptr)), gpa).*,
.store = deserialized_store,
};
return LoadedModule{

View file

@ -727,8 +727,8 @@ pub const Store = struct {
// serialization //
/// Serialized representation of types store
/// Following SafeList.Serialized pattern: NO pointers, NO slices, NO Allocators
pub const Serialized = struct {
gpa: Allocator,
slots: SlotStore.Serialized,
descs: DescStore.Serialized,
vars: VarSafeList.Serialized,
@ -750,13 +750,10 @@ pub const Store = struct {
try self.record_fields.serialize(&store.record_fields, allocator, writer);
try self.tags.serialize(&store.tags, allocator, writer);
try self.static_dispatch_constraints.serialize(&store.static_dispatch_constraints, allocator, writer);
// Store the allocator
self.gpa = allocator;
}
/// Deserialize this Serialized struct into a Store
pub fn deserialize(self: *Serialized, offset: i64) *Store {
pub fn deserialize(self: *Serialized, offset: i64, gpa: Allocator) *Store {
// types.Store.Serialized should be at least as big as types.Store
std.debug.assert(@sizeOf(Serialized) >= @sizeOf(Store));
@ -764,7 +761,7 @@ pub const Store = struct {
const store = @as(*Store, @ptrFromInt(@intFromPtr(self)));
store.* = Store{
.gpa = self.gpa,
.gpa = gpa,
.slots = self.slots.deserialize(offset).*,
.descs = self.descs.deserialize(offset).*,
.vars = self.vars.deserialize(offset).*,