Use list_get_unsafe to get List.get working

This commit is contained in:
Richard Feldman 2025-11-07 11:40:39 -05:00
parent 004219d4ee
commit d437dc9527
No known key found for this signature in database
8 changed files with 362 additions and 60 deletions

View file

@ -155,6 +155,11 @@ fn replaceStrIsEmptyWithLowLevel(env: *ModuleEnv) !std.ArrayList(CIR.Def.Idx) {
if (env.common.findIdent("Builtin.List.is_empty")) |list_is_empty_ident| {
try low_level_map.put(list_is_empty_ident, .list_is_empty);
}
// list_get_unsafe is a private top-level function (not in Builtin.List)
// Module-level functions use simple names, not qualified names
if (env.common.findIdent("list_get_unsafe")) |list_get_unsafe_ident| {
try low_level_map.put(list_get_unsafe_ident, .list_get_unsafe);
}
if (env.common.findIdent("Builtin.Set.is_empty")) |set_is_empty_ident| {
try low_level_map.put(set_is_empty_ident, .set_is_empty);
}

View file

@ -1,34 +1,9 @@
Builtin := [].{
Str := [ProvidedByCompiler].{
is_empty : Str -> Bool
contains : Str, Str -> Bool
contains = |_str, _other| True
}
List := [ProvidedByCompiler].{
len : List(a) -> U64
is_empty : List(a) -> Bool
first : List(a) -> Try(a, [ListWasEmpty])
first = |_| Err(ListWasEmpty)
map : List(a), (a -> b) -> List(b)
map = |_, _| []
keep_if : List(a), (a -> Bool) -> List(a)
keep_if = |_, _| []
concat : List(a), List(a) -> List(a)
concat = |_, _| []
}
Bool := [True, False].{
not : Bool -> Bool
not = |bool| match bool {
Bool.True => Bool.False
Bool.False => Bool.True
True => False
False => True
}
is_eq : Bool, Bool -> Bool
@ -75,6 +50,33 @@ Builtin := [].{
#}
}
Str := [ProvidedByCompiler].{
is_empty : Str -> Bool
contains : Str, Str -> Bool
contains = |_str, _other| True
}
List := [ProvidedByCompiler].{
len : List(_elem) -> U64
is_empty : List(_elem) -> Bool
first : List(elem) -> Try(a, [ListWasEmpty])
first = |list| List.get(list, 0)
get : List(elem), U64 -> Try(elem, [ListWasEmpty])
get = |list, index| if List.is_empty(list) Err(ListWasEmpty) else Ok(list_get_unsafe(list, index))
map : List(a), (a -> b) -> List(b)
map = |_, _| []
keep_if : List(a), (a -> Bool) -> List(a)
keep_if = |_, _| []
concat : List(a), List(a) -> List(a)
concat = |_, _| []
}
Dict := [EmptyDict].{}
Set(elem) := [].{
@ -333,3 +335,5 @@ Builtin := [].{
}
}
}
list_get_unsafe : List(elem), U64 -> elem

View file

@ -385,6 +385,18 @@ pub fn listIsEmpty(list: RocList) callconv(.c) bool {
return list.isEmpty();
}
/// Get a pointer to an element at the given index without bounds checking.
/// UNSAFE: No bounds checking is performed. Index must be < list.len().
/// This is intended for internal use by low-level operations only.
/// Returns a pointer to the element at the given index.
pub fn listGetUnsafe(list: RocList, index: u64, element_width: usize) callconv(.c) ?[*]u8 {
if (list.bytes) |bytes| {
const byte_offset = @as(usize, @intCast(index)) * element_width;
return bytes + byte_offset;
}
return null;
}
/// Decrement reference count and deallocate when no longer shared.
pub fn listDecref(
list: RocList,

View file

@ -618,6 +618,10 @@ fn processAssociatedItemsSecondPass(
const type_text = self.env.getIdent(type_ident);
const qualified_idx = try self.env.insertQualifiedIdent(parent_text, type_text);
// Enter a new scope for the nested associated block
try self.scopeEnter(self.env.gpa, false);
defer self.scopeExit(self.env.gpa) catch unreachable;
try self.processAssociatedItemsSecondPass(qualified_idx, assoc.statements);
}
},
@ -689,6 +693,25 @@ fn processAssociatedItemsSecondPass(
// Register this associated item by its qualified name
const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
try self.env.setExposedNodeIndexById(qualified_idx, def_idx_u16);
// Also make the unqualified name and short qualified name available in the current scope
// (This allows `get`, `List.get`, and `Builtin.List.get` to all work)
const def_cir = self.env.store.getDef(def_idx);
const pattern_idx = def_cir.pattern;
const current_scope = &self.scopes.items[self.scopes.items.len - 1];
// Add unqualified name (e.g., "get")
try current_scope.idents.put(self.env.gpa, name_ident, pattern_idx);
// Also add short qualified name (e.g., "List.get")
// Extract the last component of parent_name (e.g., "List" from "Builtin.List")
const parent_full_text = self.env.getIdent(parent_name);
const short_parent_text = if (std.mem.lastIndexOf(u8, parent_full_text, ".")) |last_dot|
parent_full_text[last_dot + 1..]
else
parent_full_text;
const short_qualified_idx = try self.env.insertQualifiedIdent(short_parent_text, decl_text);
try current_scope.idents.put(self.env.gpa, short_qualified_idx, pattern_idx);
} else {}
}
}
@ -711,6 +734,25 @@ fn processAssociatedItemsSecondPass(
const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
try self.env.setExposedNodeIndexById(qualified_idx, def_idx_u16);
// Also make the unqualified name and short qualified name available in the current scope
// (This allows `is_empty`, `List.is_empty`, and `Builtin.List.is_empty` to all work)
const def_cir = self.env.store.getDef(def_idx);
const pattern_idx = def_cir.pattern;
const current_scope = &self.scopes.items[self.scopes.items.len - 1];
// Add unqualified name (e.g., "is_empty")
try current_scope.idents.put(self.env.gpa, name_ident, pattern_idx);
// Also add short qualified name (e.g., "List.is_empty")
// Extract the last component of parent_name (e.g., "List" from "Builtin.List")
const parent_full_text = self.env.getIdent(parent_name);
const short_parent_text = if (std.mem.lastIndexOf(u8, parent_full_text, ".")) |last_dot|
parent_full_text[last_dot + 1..]
else
parent_full_text;
const short_qualified_idx = try self.env.insertQualifiedIdent(short_parent_text, name_text);
try current_scope.idents.put(self.env.gpa, short_qualified_idx, pattern_idx);
try self.env.store.addScratchDef(def_idx);
},
}
@ -823,6 +865,8 @@ fn processAssociatedItemsFirstPass(
},
else => {
// Skip other statement types in first pass
// Note: .type_anno is skipped here because anno-only patterns are created
// in the second pass, not the first pass
},
}
}
@ -1148,6 +1192,20 @@ pub fn canonicalizeFile(
}
break;
}
// If we didn't find any next statement, create an anno-only def
// (This handles the case where the type annotation is the last statement in the file)
if (next_i >= ast_stmt_idxs.len) {
const def_idx = try self.createAnnoOnlyDef(name_ident, type_anno_idx, where_clauses, region);
try self.env.store.addScratchDef(def_idx);
// If this identifier should be exposed, register it
const ident_text = self.env.getIdent(name_ident);
if (self.exposed_ident_texts.contains(ident_text)) {
const def_idx_u16: u16 = @intCast(@intFromEnum(def_idx));
try self.env.setExposedNodeIndexById(name_ident, def_idx_u16);
}
}
},
.malformed => |malformed| {
// We won't touch this since it's already a parse error.
@ -1248,11 +1306,48 @@ pub fn canonicalizeFile(
}
}
},
else => {},
else => {
// Note: .type_anno is not handled here because anno-only patterns
// are created during processAssociatedItemsSecondPass, so they need
// to be re-introduced AFTER that call completes
},
}
}
try self.processAssociatedItemsSecondPass(type_ident, assoc.statements);
// After processing, re-introduce anno-only defs into the associated block scope
// (They were just created by processAssociatedItemsSecondPass and need to be available
// for use within the associated block)
for (self.parse_ir.store.statementSlice(assoc.statements)) |anno_stmt_idx| {
const anno_stmt = self.parse_ir.store.getStatement(anno_stmt_idx);
switch (anno_stmt) {
.type_anno => |type_anno| {
if (self.parse_ir.tokens.resolveIdentifier(type_anno.name)) |anno_ident| {
// Build qualified name
const parent_text = self.env.getIdent(type_ident);
const anno_text = self.env.getIdent(anno_ident);
const qualified_ident_idx = try self.env.insertQualifiedIdent(parent_text, anno_text);
// Look up the qualified pattern that was just created
switch (self.scopeLookup(.ident, qualified_ident_idx)) {
.found => |pattern_idx| {
const current_scope = &self.scopes.items[self.scopes.items.len - 1];
// Add both unqualified and qualified names to the current scope
// This allows both `len` and `List.len` to work inside the associated block
try current_scope.idents.put(self.env.gpa, anno_ident, pattern_idx);
try current_scope.idents.put(self.env.gpa, qualified_ident_idx, pattern_idx);
},
.not_found => {
// This can happen if the type_anno was followed by a matching decl
// (in which case it's not an anno-only def)
},
}
}
},
else => {},
}
}
}
},
else => {
@ -2755,21 +2850,52 @@ pub fn canonicalizeExpr(
break :blk null;
} orelse {
// Not a module alias and not an auto-imported module
// This is a qualified identifier with an invalid qualifier
// Check if the qualifier is a type - if so, try to lookup associated items
if (self.scopeLookupTypeBinding(module_alias)) |_| {
// This is a type with a potential associated item
// Build the fully qualified name and try to look it up
const type_text = self.env.getIdent(module_alias);
const field_text = self.env.getIdent(ident);
const type_qualified_idx = try self.env.insertQualifiedIdent(type_text, field_text);
// Check if the qualifier is in scope as a type/value
// If so, provide a more helpful error message
const diagnostic = if (self.scopeLookupTypeBinding(module_alias) != null)
Diagnostic{ .nested_value_not_found = .{
.parent_name = module_alias,
.nested_name = ident,
.region = region,
} }
else
Diagnostic{ .qualified_ident_does_not_exist = .{
.ident = qualified_ident,
.region = region,
} };
// Try to look up the associated item in the current scope
switch (self.scopeLookup(.ident, type_qualified_idx)) {
.found => |found_pattern_idx| {
// Found the associated item! Mark it as used.
try self.used_patterns.put(self.env.gpa, found_pattern_idx, {});
// Return a local lookup expression
const expr_idx = try self.env.addExpr(CIR.Expr{ .e_lookup_local = .{
.pattern_idx = found_pattern_idx,
} }, region);
const free_vars_start = self.scratch_free_vars.top();
try self.scratch_free_vars.append(found_pattern_idx);
return CanonicalizedExpr{
.idx = expr_idx,
.free_vars = DataSpan.init(free_vars_start, 1)
};
},
.not_found => {
// Associated item not found - generate error
const diagnostic = Diagnostic{ .nested_value_not_found = .{
.parent_name = module_alias,
.nested_name = ident,
.region = region,
} };
return CanonicalizedExpr{
.idx = try self.env.pushMalformed(Expr.Idx, diagnostic),
.free_vars = null,
};
},
}
}
// Not a type either - generate appropriate error
const diagnostic = Diagnostic{ .qualified_ident_does_not_exist = .{
.ident = qualified_ident,
.region = region,
} };
return CanonicalizedExpr{
.idx = try self.env.pushMalformed(Expr.Idx, diagnostic),

View file

@ -390,6 +390,7 @@ pub const Expr = union(enum) {
// List operations
list_len,
list_is_empty,
list_get_unsafe, // Internal only - private top-level function
// Set operations
set_is_empty,

View file

@ -579,7 +579,19 @@ fn freshFromContent(self: *Self, content: Content, rank: types_mod.Rank, new_reg
/// The the region for a variable
fn freshBool(self: *Self, rank: Rank, new_region: Region) Allocator.Error!Var {
// Use the copied Bool type from the type store (set by copyBuiltinTypes)
return try self.instantiateVar(self.bool_var, rank, .{ .explicit = new_region });
const resolved_bool = self.types.resolveVar(self.bool_var);
std.debug.print("\nDEBUG: freshBool called\n", .{});
std.debug.print(" self.bool_var={} rank={} content_tag={s}\n", .{ @intFromEnum(self.bool_var), resolved_bool.desc.rank, @tagName(resolved_bool.desc.content) });
if (resolved_bool.desc.content == .structure) {
std.debug.print(" structure tag: {s}\n", .{@tagName(resolved_bool.desc.content.structure)});
}
const result = try self.instantiateVar(self.bool_var, rank, .{ .explicit = new_region });
const resolved_result = self.types.resolveVar(result);
std.debug.print(" result var={} rank={} content_tag={s}\n", .{ @intFromEnum(result), resolved_result.desc.rank, @tagName(resolved_result.desc.content) });
if (resolved_result.desc.content == .structure) {
std.debug.print(" result structure tag: {s}\n", .{@tagName(resolved_result.desc.content.structure)});
}
return result;
}
// fresh vars //
@ -596,16 +608,26 @@ fn updateVar(self: *Self, target_var: Var, content: types_mod.Content, rank: typ
/// other modules directly. The Bool and Result types are used in language constructs like
/// `if` conditions and need to be available in every module's type store.
fn copyBuiltinTypes(self: *Self) !void {
const bool_stmt_idx = self.common_idents.bool_stmt;
if (self.common_idents.builtin_module) |builtin_env| {
// Copy Bool type from Builtin module using the direct reference
const bool_stmt_idx = self.common_idents.bool_stmt;
const bool_type_var = ModuleEnv.varFrom(bool_stmt_idx);
self.bool_var = try self.copyVar(bool_type_var, builtin_env, Region.zero());
} else {
// If Builtin module reference is null, use the statement from the current module
// This happens when compiling the Builtin module itself
self.bool_var = ModuleEnv.varFrom(bool_stmt_idx);
// If Builtin module reference is null, we're compiling the Builtin module itself
// Search for the Bool type declaration in all_statements
const all_stmts = self.cir.store.sliceStatements(self.cir.all_statements);
for (all_stmts) |stmt_idx| {
const stmt = self.cir.store.getStatement(stmt_idx);
if (stmt == .s_nominal_decl) {
const header = self.cir.store.getTypeHeader(stmt.s_nominal_decl.header);
const ident_text = self.cir.getIdent(header.name);
if (std.mem.eql(u8, ident_text, "Builtin.Bool")) {
self.bool_var = ModuleEnv.varFrom(stmt_idx);
break;
}
}
}
}
// Result type is accessed via external references, no need to copy it here
@ -618,9 +640,6 @@ pub fn checkFile(self: *Self) std.mem.Allocator.Error!void {
try ensureTypeStoreIsFilled(self);
// Copy builtin types (Bool, Result) into this module's type store
try self.copyBuiltinTypes();
// First, iterate over the builtin statements, generating types for each type declaration
const builtin_stmts_slice = self.cir.store.sliceStatements(self.cir.builtin_statements);
for (builtin_stmts_slice) |builtin_stmt_idx| {
@ -638,6 +657,11 @@ pub fn checkFile(self: *Self) std.mem.Allocator.Error!void {
try self.generateStmtTypeDeclType(stmt_idx);
}
// Copy builtin types (Bool, Result) into this module's type store
// This must happen AFTER type declarations are generated so that when compiling
// Builtin itself, the Bool and Try types have already been created
try self.copyBuiltinTypes();
// First pass: assign placeholder type vars
const defs_slice = self.cir.store.sliceDefs(self.cir.all_defs);
for (defs_slice) |def_idx| {
@ -767,8 +791,30 @@ fn checkDef(self: *Self, def_idx: CIR.Def.Idx) std.mem.Allocator.Error!void {
// Unify the fresh pattern var with the placeholder
_ = try self.unify(fresh_ptrn_var, placeholder_ptrn_var, rank);
// Debug: check if this is is_empty
const pattern = self.cir.store.getPattern(def.pattern);
if (pattern == .assign) {
const ident_text = self.cir.getIdent(pattern.assign.ident);
std.debug.print("\nDEBUG: Checking def for ident: {s}\n", .{ident_text});
if (std.mem.eql(u8, ident_text, "is_empty")) {
const before_generalize = self.types.resolveVar(placeholder_ptrn_var).desc;
std.debug.print("\nDEBUG: Before generalizing is_empty\n", .{});
std.debug.print(" placeholder_ptrn_var={} rank={} content_tag={s}\n", .{ @intFromEnum(placeholder_ptrn_var), before_generalize.rank, @tagName(before_generalize.content) });
}
}
// Now that we are existing the scope, we must generalize then pop this rank
try self.generalizer.generalize(&self.var_pool, rank);
// Debug: check after generalization
if (pattern == .assign) {
const ident_text = self.cir.getIdent(pattern.assign.ident);
if (std.mem.eql(u8, ident_text, "is_empty")) {
const after_generalize = self.types.resolveVar(placeholder_ptrn_var).desc;
std.debug.print("\nDEBUG: After generalizing is_empty\n", .{});
std.debug.print(" placeholder_ptrn_var={} rank={} content_tag={s}\n", .{ @intFromEnum(placeholder_ptrn_var), after_generalize.rank, @tagName(after_generalize.content) });
}
}
}
// create types for type decls //
@ -875,16 +921,28 @@ fn generateStmtTypeDeclType(
.num_args = @intCast(header_args.len),
} });
const nominal_content = try self.types.mkNominal(
.{ .ident_idx = header.name },
backing_var,
header_vars,
self.common_idents.module_name,
);
try self.updateVar(
decl_var,
try self.types.mkNominal(
.{ .ident_idx = header.name },
backing_var,
header_vars,
self.common_idents.module_name,
),
nominal_content,
Rank.generalized,
);
// Debug: print ALL nominal type declarations
const ident_text = self.cir.getIdent(header.name);
std.debug.print("\nDEBUG: Generated nominal type: {s}\n", .{ident_text});
std.debug.print(" decl_var={} decl_idx={}\n", .{ @intFromEnum(decl_var), @intFromEnum(decl_idx) });
std.debug.print(" backing_var={}\n", .{ @intFromEnum(backing_var) });
const resolved = self.types.resolveVar(decl_var);
std.debug.print(" decl_var resolves to: rank={} content_tag={s}\n", .{ resolved.desc.rank, @tagName(resolved.desc.content) });
if (resolved.desc.content == .structure) {
std.debug.print(" structure tag: {s}\n", .{@tagName(resolved.desc.content.structure)});
}
},
.s_runtime_error => {
try self.updateVar(decl_var, .err, Rank.generalized);
@ -2450,6 +2508,17 @@ fn checkExpr(self: *Self, expr_idx: CIR.Expr.Idx, rank: types_mod.Rank, expected
const pat_var = ModuleEnv.varFrom(lookup.pattern_idx);
const resolved_pat = self.types.resolveVar(pat_var).desc;
// Debug: check if this is is_empty
const pattern = self.cir.store.getPattern(lookup.pattern_idx);
if (pattern == .assign) {
const ident_text = self.cir.getIdent(pattern.assign.ident);
if (std.mem.eql(u8, ident_text, "is_empty")) {
std.debug.print("\nDEBUG: Looking up is_empty\n", .{});
std.debug.print(" pat_var={} rank={} content_tag={s}\n", .{ @intFromEnum(pat_var), resolved_pat.rank, @tagName(resolved_pat.content) });
std.debug.print(" Will instantiate: {}\n", .{resolved_pat.rank == Rank.generalized and resolved_pat.content != .rigid});
}
}
// We never instantiate rigid variables
if (resolved_pat.rank == Rank.generalized and resolved_pat.content != .rigid) {
const instantiated = try self.instantiateVar(pat_var, rank, .use_last_var);
@ -2979,14 +3048,14 @@ fn checkExpr(self: *Self, expr_idx: CIR.Expr.Idx, rank: types_mod.Rank, expected
// For annotation-only expressions, the type comes from the annotation.
// This case should only occur when the expression has an annotation (which is
// enforced during canonicalization), so the expected type should be set.
// The type will be unified with the expected type in the code below.
switch (expected) {
.no_expectation => {
// This shouldn't happen since we always create e_anno_only with an annotation
try self.updateVar(expr_var, .err, rank);
},
.expected => {
// The expr_var will be unified with the annotation var below
.expected => |expected_type| {
// Redirect expr_var to the annotation var so that lookups get the correct type
_ = try self.types.setVarRedirect(expr_var, expected_type.var_);
},
}
},
@ -3244,6 +3313,23 @@ fn checkIfElseExpr(
var does_fx = try self.checkExpr(first_branch.cond, rank, .no_expectation);
const first_cond_var: Var = ModuleEnv.varFrom(first_branch.cond);
const bool_var = try self.freshBool(rank, expr_region);
// Debug: print types before unification
const resolved_cond = self.types.resolveVar(first_cond_var);
const resolved_bool = self.types.resolveVar(bool_var);
std.debug.print("\nDEBUG: If condition analysis\n", .{});
std.debug.print(" Condition var={} rank={} content_tag={s}\n", .{@intFromEnum(first_cond_var), resolved_cond.desc.rank, @tagName(resolved_cond.desc.content)});
std.debug.print(" Expected var={} rank={} content_tag={s}\n", .{@intFromEnum(bool_var), resolved_bool.desc.rank, @tagName(resolved_bool.desc.content)});
// Debug: if both are structure (nominal), print their details
if (resolved_cond.desc.content == .structure and resolved_bool.desc.content == .structure) {
std.debug.print(" Both are nominal types\n", .{});
const cond_structure = resolved_cond.desc.content.structure;
const bool_structure = resolved_bool.desc.content.structure;
std.debug.print(" Condition structure tag: {s}\n", .{@tagName(cond_structure)});
std.debug.print(" Expected structure tag: {s}\n", .{@tagName(bool_structure)});
}
const first_cond_result = try self.unify(bool_var, first_cond_var, rank);
self.setDetailIfTypeMismatch(first_cond_result, .incompatible_if_cond);

View file

@ -2151,6 +2151,57 @@ pub const Interpreter = struct {
return try self.makeSimpleBoolValue(result);
},
.list_get_unsafe => {
// Internal operation: Get element at index without bounds checking
// Args: List(a), U64 (index)
// Returns: a (the element)
if (args.len != 2) return error.TypeMismatch;
const list_arg = args[0];
const index_arg = args[1];
if (list_arg.ptr == null) return error.TypeMismatch;
// Extract element layout from List(a)
if (list_arg.layout.tag != .list and list_arg.layout.tag != .list_of_zst) {
return error.TypeMismatch;
}
const roc_list: *const builtins.list.RocList = @ptrCast(@alignCast(list_arg.ptr.?));
const index = index_arg.asI128(); // U64 stored as i128
// Get element layout
const elem_layout_idx = list_arg.layout.data.list;
const elem_layout = self.runtime_layout_store.getLayout(elem_layout_idx);
const elem_size = self.runtime_layout_store.layoutSize(elem_layout);
if (elem_size == 0) {
// ZST element - return zero-sized value
return StackValue{
.layout = elem_layout,
.ptr = null,
.is_initialized = true,
};
}
// Get pointer to element (no bounds checking!)
const elem_ptr = builtins.list.listGetUnsafe(roc_list.*, @intCast(index), elem_size);
if (elem_ptr == null) {
self.triggerCrash("list_get_unsafe: null pointer returned", false, roc_ops);
return error.Crash;
}
// Create StackValue pointing to the element
const elem_value = StackValue{
.layout = elem_layout,
.ptr = @ptrCast(elem_ptr.?),
.is_initialized = true,
};
// Copy to new location and increment refcount
return try self.pushCopy(elem_value, roc_ops);
},
.set_is_empty => {
// TODO: implement Set.is_empty
self.triggerCrash("Set.is_empty not yet implemented", false, roc_ops);

View file

@ -210,3 +210,20 @@ test "e_anno_only - value only crashes when accessed (False branch)" {
try testing.expectEqual(@as(u32, 2), summary.evaluated);
try testing.expectEqual(@as(u32, 0), summary.crashed);
}
test "List.first on nonempty list" {
const src =
\\import Builtin exposing [List, Try]
\\
\\result = List.first([1, 2, 3])
;
var result = try parseCheckAndEvalModule(src);
defer cleanupEvalModule(&result);
const summary = try result.evaluator.evalAll();
// Should evaluate 1 declaration with 0 crashes (List.first should succeed)
try testing.expectEqual(@as(u32, 1), summary.evaluated);
try testing.expectEqual(@as(u32, 0), summary.crashed);
}