Merge pull request #8614 from roc-lang/fix-more-can-stuff

Fix more can stuff
This commit is contained in:
Richard Feldman 2025-12-10 14:02:20 -05:00 committed by GitHub
commit b60aa9a30f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 108 additions and 65 deletions

View file

@ -2449,9 +2449,9 @@ fn canonicalizeStmtDecl(self: *Self, decl: AST.Statement.Decl, mb_last_anno: ?Ty
const pattern_region = self.parse_ir.tokenizedRegionToRegion(ast_pattern.to_tokenized_region());
mb_validated_anno = try self.createAnnotationFromTypeAnno(anno_info.anno_idx, anno_info.where, pattern_region);
}
} else {
// TODO: Diagnostic
}
// Note: If resolveIdentifier returns null, the identifier token is malformed.
// The parser already handles this; we just don't match it with the annotation.
}
}
@ -2860,34 +2860,6 @@ fn checkExposedButNotImplemented(self: *Self) std.mem.Allocator.Error!void {
}
}
fn bringIngestedFileIntoScope(
self: *Self,
import: *const parse.AST.Stmt.Import,
) void {
const res = self.env.modules.getOrInsert(
import.name,
import.package_shorthand,
);
if (res.was_present) {
// _ = self.env.problems.append(Problem.Canonicalize.make(.DuplicateImport{
// .duplicate_import_region = import.name_region,
// }));
}
// scope.introduce(self: *Scope, comptime item_kind: Level.ItemKind, ident: Ident.Idx)
for (import.exposing.items.items) |exposed| {
const exposed_ident = switch (exposed) {
.Value => |ident| ident,
.Type => |ident| ident,
.CustomTagUnion => |custom| custom.name,
};
self.env.addExposedIdentForModule(exposed_ident, res.module_idx);
// TODO: Implement scope introduction for exposed identifiers
}
}
/// Process a module import with common logic shared by explicit imports and auto-imports.
/// This handles everything after module name and alias resolution.
/// Process import with an alias (normal import like `import json.Json` or `import json.Json as J`)
@ -4116,8 +4088,8 @@ pub fn canonicalizeExpr(
// Check if this is a used underscore variable
try self.checkUsedUnderscoreVariable(ident, region);
// We found the ident in scope, lookup to reference the pattern
// TODO(RANK)
// We found the ident in scope, create a lookup to reference the pattern
// Note: Rank tracking for let-polymorphism is handled by the type checker (Check.zig)
const expr_idx = try self.env.addExpr(CIR.Expr{ .e_lookup_local = .{
.pattern_idx = found_pattern_idx,
} }, region);
@ -4386,7 +4358,7 @@ pub fn canonicalizeExpr(
} else if (std.mem.eql(u8, suffix, "dec")) {
break :blk .dec;
} else {
// TODO: Create a new error type
// Invalid numeric suffix - the suffix doesn't match any known type
const expr_idx = try self.env.pushMalformed(Expr.Idx, Diagnostic{ .invalid_num_literal = .{ .region = region } });
return CanonicalizedExpr{ .idx = expr_idx, .free_vars = DataSpan.empty() };
}
@ -6559,7 +6531,7 @@ pub fn canonicalizePattern(
} else if (std.mem.eql(u8, suffix, "dec")) {
break :blk .dec;
} else {
// TODO: Create a new error type
// Invalid numeric suffix - the suffix doesn't match any known type
return try self.env.pushMalformed(Pattern.Idx, Diagnostic{ .invalid_num_literal = .{ .region = region } });
}
};
@ -8503,8 +8475,8 @@ fn canonicalizeTypeHeader(self: *Self, header_idx: AST.TypeHeader.Idx, type_kind
// Check if this is a builtin type
// Allow builtin type names to be redeclared in the Builtin module
// (e.g., Str := ... within Builtin.roc)
// TODO: Can we compare idents or something here? The byte slice comparison is ineffecient
if (TypeAnno.Builtin.fromBytes(self.env.getIdentText(name_ident))) |_| {
// Use identifier index comparison instead of string comparison for efficiency
if (TypeAnno.Builtin.isBuiltinTypeIdent(name_ident, self.env.idents)) {
const is_builtin_module = std.mem.eql(u8, self.env.module_name, "Builtin");
if (!is_builtin_module) {
return try self.env.pushMalformed(CIR.TypeHeader.Idx, Diagnostic{ .ident_already_in_scope = .{
@ -9488,9 +9460,7 @@ pub fn canonicalizeBlockDecl(self: *Self, d: AST.Statement.Decl, mb_last_anno: ?
else => {},
}
// check against last anno
// Get the last annotation, if it exists
// Check if this declaration matches the last type annotation
var mb_validated_anno: ?Annotation.Idx = null;
if (mb_last_anno) |anno_info| {
if (ast_pattern == .ident) {
@ -9501,9 +9471,9 @@ pub fn canonicalizeBlockDecl(self: *Self, d: AST.Statement.Decl, mb_last_anno: ?
const pattern_region = self.parse_ir.tokenizedRegionToRegion(ast_pattern.to_tokenized_region());
mb_validated_anno = try self.createAnnotationFromTypeAnno(anno_info.anno_idx, anno_info.where, pattern_region);
}
} else {
// TODO: Diagnostic
}
// Note: If resolveIdentifier returns null, the identifier token is malformed.
// The parser already handles this; we just don't match it with the annotation.
}
}

View file

@ -41,6 +41,10 @@ pub fn replaceAnnoOnlyWithHosted(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
if (pattern == .assign) {
const full_ident = pattern.assign.ident;
// Get the region from the original def for better error messages
const def_node_idx: @TypeOf(env.store.nodes).Idx = @enumFromInt(@intFromEnum(def_idx));
const def_region = env.store.getRegionAt(def_node_idx);
// Extract the unqualified name (e.g., "line!" from "Stdout.line!")
// The pattern might contain a qualified name, but we need the unqualified one
const full_name = env.getIdent(full_ident);
@ -72,13 +76,14 @@ pub fn replaceAnnoOnlyWithHosted(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
} else 0;
// Create dummy parameter patterns for the lambda (one for each argument)
// Use the def's region for better error diagnostics
const patterns_start = env.store.scratchTop("patterns");
var arg_i: usize = 0;
while (arg_i < num_args) : (arg_i += 1) {
const arg_name = try std.fmt.allocPrint(gpa, "_arg{}", .{arg_i});
defer gpa.free(arg_name);
const arg_ident = env.common.findIdent(arg_name) orelse try env.common.insertIdent(gpa, base.Ident.for_text(arg_name));
const arg_pattern_idx = try env.addPattern(.{ .assign = .{ .ident = arg_ident } }, base.Region.zero());
const arg_pattern_idx = try env.addPattern(.{ .assign = .{ .ident = arg_ident } }, def_region);
try env.store.scratch.?.patterns.append(arg_pattern_idx);
}
const args_span = CIR.Pattern.Span{ .span = .{ .start = @intCast(patterns_start), .len = @intCast(num_args) } };
@ -87,9 +92,9 @@ pub fn replaceAnnoOnlyWithHosted(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
const error_msg_lit = try env.insertString("Hosted functions cannot be called in the interpreter");
const diagnostic_idx = try env.addDiagnostic(.{ .not_implemented = .{
.feature = error_msg_lit,
.region = base.Region.zero(),
.region = def_region,
} });
const body_idx = try env.addExpr(.{ .e_runtime_error = .{ .diagnostic = diagnostic_idx } }, base.Region.zero());
const body_idx = try env.addExpr(.{ .e_runtime_error = .{ .diagnostic = diagnostic_idx } }, def_region);
// Ensure types array has entries for all new expressions
const body_int = @intFromEnum(body_idx);
@ -105,7 +110,7 @@ pub fn replaceAnnoOnlyWithHosted(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
.args = args_span,
.body = body_idx,
},
}, base.Region.zero());
}, def_region);
// Ensure types array has an entry for this new expression
const expr_int = @intFromEnum(expr_idx);
@ -116,7 +121,7 @@ pub fn replaceAnnoOnlyWithHosted(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
// Now replace the e_anno_only expression with the e_hosted_lambda
// We need to modify the def's expr field in extra_data (NOT data_2!)
// The expr is stored in extra_data[extra_start + 1]
const def_node_idx = @as(@TypeOf(env.store.nodes).Idx, @enumFromInt(@intFromEnum(def_idx)));
// (reuse def_node_idx from above)
const def_node = env.store.nodes.get(def_node_idx);
const extra_start = def_node.data_1;

View file

@ -124,6 +124,22 @@ pub const CommonIdents = extern struct {
list: Ident.Idx,
box: Ident.Idx,
// Unqualified builtin type names (for checking if a type name shadows a builtin)
num: Ident.Idx,
u8: Ident.Idx,
u16: Ident.Idx,
u32: Ident.Idx,
u64: Ident.Idx,
u128: Ident.Idx,
i8: Ident.Idx,
i16: Ident.Idx,
i32: Ident.Idx,
i64: Ident.Idx,
i128: Ident.Idx,
f32: Ident.Idx,
f64: Ident.Idx,
dec: Ident.Idx,
// Fully-qualified type identifiers for type checking and layout generation
builtin_try: Ident.Idx,
builtin_numeral: Ident.Idx,
@ -198,6 +214,21 @@ pub const CommonIdents = extern struct {
.str = try common.insertIdent(gpa, Ident.for_text("Str")),
.list = try common.insertIdent(gpa, Ident.for_text("List")),
.box = try common.insertIdent(gpa, Ident.for_text("Box")),
// Unqualified builtin type names
.num = try common.insertIdent(gpa, Ident.for_text("Num")),
.u8 = try common.insertIdent(gpa, Ident.for_text("U8")),
.u16 = try common.insertIdent(gpa, Ident.for_text("U16")),
.u32 = try common.insertIdent(gpa, Ident.for_text("U32")),
.u64 = try common.insertIdent(gpa, Ident.for_text("U64")),
.u128 = try common.insertIdent(gpa, Ident.for_text("U128")),
.i8 = try common.insertIdent(gpa, Ident.for_text("I8")),
.i16 = try common.insertIdent(gpa, Ident.for_text("I16")),
.i32 = try common.insertIdent(gpa, Ident.for_text("I32")),
.i64 = try common.insertIdent(gpa, Ident.for_text("I64")),
.i128 = try common.insertIdent(gpa, Ident.for_text("I128")),
.f32 = try common.insertIdent(gpa, Ident.for_text("F32")),
.f64 = try common.insertIdent(gpa, Ident.for_text("F64")),
.dec = try common.insertIdent(gpa, Ident.for_text("Dec")),
.builtin_try = try common.insertIdent(gpa, Ident.for_text("Try")),
.builtin_numeral = try common.insertIdent(gpa, Ident.for_text("Num.Numeral")),
.builtin_str = try common.insertIdent(gpa, Ident.for_text("Builtin.Str")),
@ -272,6 +303,21 @@ pub const CommonIdents = extern struct {
.str = common.findIdent("Str") orelse unreachable,
.list = common.findIdent("List") orelse unreachable,
.box = common.findIdent("Box") orelse unreachable,
// Unqualified builtin type names
.num = common.findIdent("Num") orelse unreachable,
.u8 = common.findIdent("U8") orelse unreachable,
.u16 = common.findIdent("U16") orelse unreachable,
.u32 = common.findIdent("U32") orelse unreachable,
.u64 = common.findIdent("U64") orelse unreachable,
.u128 = common.findIdent("U128") orelse unreachable,
.i8 = common.findIdent("I8") orelse unreachable,
.i16 = common.findIdent("I16") orelse unreachable,
.i32 = common.findIdent("I32") orelse unreachable,
.i64 = common.findIdent("I64") orelse unreachable,
.i128 = common.findIdent("I128") orelse unreachable,
.f32 = common.findIdent("F32") orelse unreachable,
.f64 = common.findIdent("F64") orelse unreachable,
.dec = common.findIdent("Dec") orelse unreachable,
.builtin_try = common.findIdent("Try") orelse unreachable,
.builtin_numeral = common.findIdent("Num.Numeral") orelse unreachable,
.builtin_str = common.findIdent("Builtin.Str") orelse unreachable,
@ -457,17 +503,23 @@ pub fn initModuleEnvFields(self: *Self, module_name: []const u8) !void {
return self.initCIRFields(module_name);
}
/// Initialize the module environment.
/// Initialize the module environment with capacity heuristics based on source size.
pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error!Self {
// TODO: maybe wire in smarter default based on the initial input text size.
var common = try CommonEnv.init(gpa, source);
const idents = try CommonIdents.insert(gpa, &common);
// Use source-based heuristics for initial capacities
// Typical Roc code generates ~1 node per 20 bytes, ~1 type per 50 bytes
// Use generous minimums to avoid too many reallocations for small files
const source_len = source.len;
const node_capacity = @max(1024, @min(100_000, source_len / 20));
const type_capacity = @max(2048, @min(50_000, source_len / 50));
const var_capacity = @max(512, @min(10_000, source_len / 100));
return Self{
.gpa = gpa,
.common = common,
.types = try TypeStore.initCapacity(gpa, 2048, 512),
.types = try TypeStore.initCapacity(gpa, type_capacity, var_capacity),
.module_kind = .deprecated_module, // Placeholder - set to actual kind during header canonicalization
.all_defs = .{ .span = .{ .start = 0, .len = 0 } },
.all_statements = .{ .span = .{ .start = 0, .len = 0 } },
@ -479,7 +531,7 @@ pub fn init(gpa: std.mem.Allocator, source: []const u8) std.mem.Allocator.Error!
.module_name = undefined, // Will be set later during canonicalization
.module_name_idx = Ident.Idx.NONE, // Will be set later during canonicalization
.diagnostics = CIR.Diagnostic.Span{ .span = base.DataSpan{ .start = 0, .len = 0 } },
.store = try NodeStore.initCapacity(gpa, 10_000), // Default node store capacity
.store = try NodeStore.initCapacity(gpa, node_capacity),
.evaluation_order = null, // Will be set after canonicalization completes
.idents = idents,
.deferred_numeric_literals = try DeferredNumericLiteral.SafeList.initCapacity(gpa, 32),

View file

@ -384,8 +384,6 @@ pub const TypeAnno = union(enum) {
list,
box,
num,
frac,
int,
u8,
u16,
u32,
@ -406,8 +404,6 @@ pub const TypeAnno = union(enum) {
.list => return "List",
.box => return "Box",
.num => return "Num",
.frac => return "Frac",
.int => return "Int",
.u8 => return "U8",
.u16 => return "U16",
.u32 => return "U32",
@ -429,8 +425,6 @@ pub const TypeAnno = union(enum) {
if (std.mem.eql(u8, bytes, "List")) return .list;
if (std.mem.eql(u8, bytes, "Box")) return .box;
if (std.mem.eql(u8, bytes, "Num")) return .num;
if (std.mem.eql(u8, bytes, "Frac")) return .frac;
if (std.mem.eql(u8, bytes, "Int")) return .int;
if (std.mem.eql(u8, bytes, "U8")) return .u8;
if (std.mem.eql(u8, bytes, "U16")) return .u16;
if (std.mem.eql(u8, bytes, "U32")) return .u32;
@ -446,5 +440,27 @@ pub const TypeAnno = union(enum) {
if (std.mem.eql(u8, bytes, "Dec")) return .dec;
return null;
}
/// Check if an identifier index matches any builtin type name.
/// This is more efficient than fromBytes() as it compares indices directly.
pub fn isBuiltinTypeIdent(ident: base.Ident.Idx, idents: anytype) bool {
return ident == idents.list or
ident == idents.box or
ident == idents.str or
ident == idents.num or
ident == idents.u8 or
ident == idents.u16 or
ident == idents.u32 or
ident == idents.u64 or
ident == idents.u128 or
ident == idents.i8 or
ident == idents.i16 or
ident == idents.i32 or
ident == idents.i64 or
ident == idents.i128 or
ident == idents.f32 or
ident == idents.f64 or
ident == idents.dec;
}
};
};

View file

@ -2075,10 +2075,9 @@ fn generateBuiltinTypeInstance(
const box_content = try self.mkBoxContent(anno_args[0]);
return try self.freshFromContent(box_content, env, anno_region);
},
// Polymorphic number types (Num, Int, Frac) are no longer supported
// They have been replaced with concrete nominal types (U8, I32, F64, Dec, etc.)
.num, .int, .frac => {
// Return error - these should not be used anymore
// Polymorphic Num type is a module, not a type itself
.num => {
// Return error - Num is a module containing numeric types, not a type
return try self.freshFromContent(.err, env, anno_region);
},
}

View file

@ -127,7 +127,8 @@ test "ModuleEnv.Serialized roundtrip" {
// Plus 2 synthetic identifiers for ? operator desugaring: #ok, #err
// Plus 2 numeric method identifiers: abs, abs_diff
// Plus 1 inspect method identifier: to_inspect
try testing.expectEqual(@as(u32, 65), original.common.idents.interner.entry_count);
// Plus 14 unqualified builtin type names: Num, U8, U16, U32, U64, U128, I8, I16, I32, I64, I128, F32, F64, Dec
try testing.expectEqual(@as(u32, 79), original.common.idents.interner.entry_count);
try testing.expectEqualStrings("hello", original.getIdent(hello_idx));
try testing.expectEqualStrings("world", original.getIdent(world_idx));
@ -136,9 +137,9 @@ test "ModuleEnv.Serialized roundtrip" {
try testing.expectEqual(@as(usize, 2), original.imports.imports.len()); // Should have 2 unique imports
// First verify that the CommonEnv data was preserved after deserialization
// Should have same 65 identifiers as original: hello, world, TestModule + 18 well-known identifiers + 19 type identifiers + 3 field/tag identifiers + 7 more identifiers + 2 Try tag identifiers + 1 method identifier + 2 Bool tag identifiers + 6 from_utf8 identifiers + 2 synthetic identifiers for ? operator desugaring + 2 numeric method identifiers (abs, abs_diff) + 1 inspect method identifier (to_inspect) from ModuleEnv.init()
// Should have same 79 identifiers as original: hello, world, TestModule + 18 well-known identifiers + 19 type identifiers + 3 field/tag identifiers + 7 more identifiers + 2 Try tag identifiers + 1 method identifier + 2 Bool tag identifiers + 6 from_utf8 identifiers + 2 synthetic identifiers for ? operator desugaring + 2 numeric method identifiers (abs, abs_diff) + 1 inspect method identifier (to_inspect) + 14 unqualified builtin type names from ModuleEnv.init()
// (Note: "Try" is now shared with well-known identifiers, reducing total by 1)
try testing.expectEqual(@as(u32, 65), env.common.idents.interner.entry_count);
try testing.expectEqual(@as(u32, 79), env.common.idents.interner.entry_count);
try testing.expectEqual(@as(usize, 1), env.common.exposed_items.count());
try testing.expectEqual(@as(?u16, 42), env.common.exposed_items.getNodeIndexById(gpa, @as(u32, @bitCast(hello_idx))));

View file

@ -31,7 +31,7 @@ const expected_safelist_u8_size = 24;
const expected_safelist_u32_size = 24;
const expected_safemultilist_teststruct_size = 24;
const expected_safemultilist_node_size = 24;
const expected_moduleenv_size = 1040; // Platform-independent size
const expected_moduleenv_size = 1096; // Platform-independent size
const expected_nodestore_size = 96; // Platform-independent size
// Compile-time assertions - build will fail if sizes don't match expected values