Remove source from ModuleEnv (#7839)

remove source from ModuleEnv
This commit is contained in:
Luke Boswell 2025-06-18 21:46:33 +10:00 committed by GitHub
parent 3c8a46773f
commit 690fb42d30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 33 additions and 32 deletions

View file

@ -27,20 +27,11 @@ types_store: type_mod.Store,
/// and then use these line starts to calculate the line number and column number as required.
/// this is a more compact representation at the expense of extra computation only when generating error diagnostics.
line_starts: std.ArrayList(u32),
/// The original source bytes. We use these to generate error diagnostics.
/// TODO think about how we will manage this using the cache. Should we only
/// read these when we need them to report an error? instead of keeping all of this in memory.
/// This implementation here is simple, but let's us progress with working snapshot tests
/// and we can validate the error messages and region information there.
source: std.ArrayList(u8),
/// Initialize the module environment.
pub fn init(gpa: std.mem.Allocator, source_bytes: []const u8) Self {
pub fn init(gpa: std.mem.Allocator) Self {
// TODO: maybe wire in smarter default based on the initial input text size.
var source = std.ArrayList(u8).init(gpa);
source.appendSlice(source_bytes) catch |err| exitOnOom(err);
return Self{
.gpa = gpa,
.idents = Ident.Store.initCapacity(gpa, 1024),
@ -48,7 +39,6 @@ pub fn init(gpa: std.mem.Allocator, source_bytes: []const u8) Self {
.strings = StringLiteral.Store.initCapacityBytes(gpa, 4096),
.types_store = type_mod.Store.initCapacity(gpa, 2048, 512),
.line_starts = std.ArrayList(u32).init(gpa),
.source = source,
};
}
@ -59,7 +49,6 @@ pub fn deinit(self: *Self) void {
self.strings.deinit(self.gpa);
self.types_store.deinit();
self.line_starts.deinit();
self.source.deinit();
}
/// Calculate and store line starts from the source text

View file

@ -28,6 +28,8 @@ const CIR = @This();
env: *base.ModuleEnv,
store: NodeStore,
ingested_files: IngestedFile.List,
/// Temporary source text used during SExpr generation for region info calculation
temp_source_for_sexpr: ?[]const u8 = null,
imports: ModuleImport.Store,
top_level_defs: Def.Span,
@ -1479,7 +1481,10 @@ pub const ExhaustiveMark = TypeVar;
/// and write it to the given writer.
///
/// If a single expression is provided we only print that expression
pub fn toSExprStr(ir: *CIR, writer: std.io.AnyWriter, maybe_expr_idx: ?Expr.Idx) !void {
pub fn toSExprStr(ir: *CIR, writer: std.io.AnyWriter, maybe_expr_idx: ?Expr.Idx, source: []const u8) !void {
// Set temporary source for region info calculation during SExpr generation
ir.temp_source_for_sexpr = source;
defer ir.temp_source_for_sexpr = null;
const gpa = ir.env.gpa;
if (maybe_expr_idx) |expr_idx| {
@ -1527,17 +1532,24 @@ test "NodeStore - init and deinit" {
/// This is a standalone utility function that takes the source text as a parameter
/// to avoid storing it in the cacheable IR structure.
pub fn calcRegionInfo(self: *const CIR, region: Region) base.RegionInfo {
const empty = base.RegionInfo{
.start_line_idx = 0,
.start_col_idx = 0,
.end_line_idx = 0,
.end_col_idx = 0,
.line_text = "",
};
// In the Can IR, regions store byte offsets directly, not token indices.
// We can use these offsets directly to calculate the diagnostic position.
const info = base.RegionInfo.position(self.env.source.items, self.env.line_starts.items, region.start.offset, region.end.offset) catch {
const source = self.temp_source_for_sexpr orelse {
// No source available, return empty region info
return empty;
};
const info = base.RegionInfo.position(source, self.env.line_starts.items, region.start.offset, region.end.offset) catch {
// Return a zero position if we can't calculate it
return .{
.start_line_idx = 0,
.start_col_idx = 0,
.end_line_idx = 0,
.end_col_idx = 0,
.line_text = "",
};
return empty;
};
return info;

View file

@ -1785,7 +1785,7 @@ const TestEnv = struct {
/// slight more verbose setup for each test
fn init(gpa: std.mem.Allocator) Self {
const module_env = gpa.create(base.ModuleEnv) catch |e| exitOnOutOfMemory(e);
module_env.* = base.ModuleEnv.init(gpa, &[_]u8{});
module_env.* = base.ModuleEnv.init(gpa);
return .{
.module_env = module_env,
.types_store = &module_env.types_store,

View file

@ -1560,7 +1560,7 @@ pub const Tokenizer = struct {
fn testTokenization(gpa: std.mem.Allocator, input: []const u8, expected: []const Token.Tag) !void {
var messages: [10]Diagnostic = undefined;
var env = base.ModuleEnv.init(gpa, input);
var env = base.ModuleEnv.init(gpa);
defer env.deinit();
var tokenizer = Tokenizer.init(&env, input, &messages);
@ -1578,7 +1578,7 @@ fn testTokenization(gpa: std.mem.Allocator, input: []const u8, expected: []const
/// Assert the invariants of the tokenizer are held.
pub fn checkTokenizerInvariants(gpa: std.mem.Allocator, input: []const u8, debug: bool) void {
var env = base.ModuleEnv.init(gpa, input);
var env = base.ModuleEnv.init(gpa);
defer env.deinit();
// Initial tokenization.

View file

@ -480,7 +480,7 @@ fn parseDependenciesFromPackageRoot(
return .{ .failed_to_read_root_file = err };
defer gpa.free(contents);
var env = base.ModuleEnv.init(gpa, contents);
var env = base.ModuleEnv.init(gpa);
defer env.deinit();
var parse_ast = parse.parse(&env, contents);

View file

@ -112,7 +112,7 @@ fn loadOrCompileCanIr(
// this is temporary so we can generate error reporting and diagnostics/region info.
// We should probably be reading the file on demand or something else. Leaving this
// comment here so we discuss the plan and make the necessary changes.
var module_env = base.ModuleEnv.init(gpa, contents);
var module_env = base.ModuleEnv.init(gpa);
var parse_ir = parse.parse(&module_env, contents);
parse_ir.store.emptyScratch();

View file

@ -51,7 +51,7 @@ pub fn checkSource(
source: []const u8,
) ![]const CIR.Diagnostic {
// Initialize the ModuleEnv
var module_env = ModuleEnv.init(gpa, source);
var module_env = ModuleEnv.init(gpa);
defer module_env.deinit();
// Parse the source code
@ -88,7 +88,7 @@ pub fn processSource(
source: []const u8,
) !ProcessResult {
// Initialize the ModuleEnv
var module_env = ModuleEnv.init(gpa, source);
var module_env = ModuleEnv.init(gpa);
defer module_env.deinit();
// Parse the source code

View file

@ -146,7 +146,7 @@ pub fn formatFilePath(gpa: std.mem.Allocator, base_dir: std.fs.Dir, path: []cons
};
defer gpa.free(contents);
var module_env = base.ModuleEnv.init(gpa, contents);
var module_env = base.ModuleEnv.init(gpa);
defer module_env.deinit();
var parse_ast = parse.parse(&module_env, contents);
@ -1942,7 +1942,7 @@ pub fn moduleFmtsStable(gpa: std.mem.Allocator, input: []const u8, debug: bool)
}
fn parseAndFmt(gpa: std.mem.Allocator, input: []const u8, debug: bool) ![]const u8 {
var module_env = base.ModuleEnv.init(gpa, input);
var module_env = base.ModuleEnv.init(gpa);
defer module_env.deinit();
var parse_ast = parse.parse(&module_env, input);

View file

@ -437,7 +437,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor
}
};
var module_env = base.ModuleEnv.init(gpa, content.source);
var module_env = base.ModuleEnv.init(gpa);
defer module_env.deinit();
// Parse the source code
@ -671,7 +671,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor
var canonicalized = std.ArrayList(u8).init(gpa);
defer canonicalized.deinit();
try can_ir.toSExprStr(canonicalized.writer().any(), maybe_expr_idx);
try can_ir.toSExprStr(canonicalized.writer().any(), maybe_expr_idx, content.source);
try writer.writeAll(Section.CANONICALIZE);
try writer.writeAll("\n");