roc/src/cache.zig

339 lines
14 KiB
Zig

//! Exposes the readCacheInto and writeToCache functions for
//! serializing IR to and from disk. The caller is responsible for:
//! - Determining the base directory where the cache files should go.
//! - Determining what hash should be used as the cache key.
//! - Providing either the data to write to disk, or a buffer to read into.
const std = @import("std");
const builtin = @import("builtin");
const base = @import("base.zig");
const canonicalize = @import("check/canonicalize.zig");
const assert = std.debug.assert;
const Filesystem = @import("coordinate/Filesystem.zig");
const Package = base.Package;
const Allocator = std.mem.Allocator;
const hash_encoder = std.base64.url_safe_no_pad.Encoder;
const file_ext = ".rcir";
/// The header that gets written to disk right before the cached data.
/// Having this header makes it possible to read the entire cached file
/// into a buffer in one syscall, because the header provides all the
/// information necessary to process the remainder of the information
/// (e.g. rehydrating pointers).
pub const CacheHeader = struct {
total_cached_bytes: u32,
/// Error specific to initializing a CacheHeader from bytes.
/// Returned when the buffer is too small to contain a complete header
/// or the complete data that the header specifies.
pub const InitError = error{
PartialRead,
};
/// Verify that the given buffer begins with a valid CacheHeader,
/// and also that it has a valid number of bytes in it. Returns
/// a pointer to the CacheHeader within the buffer.
pub fn initFromBytes(buf: []align(@alignOf(CacheHeader)) u8) InitError!*CacheHeader {
if (buf.len == 0) {
return InitError.PartialRead;
}
// The buffer might not contain a complete header.
if (buf.len < @sizeOf(CacheHeader)) {
return InitError.PartialRead;
}
const header = @as(*CacheHeader, @ptrCast(buf.ptr));
const data_start = @sizeOf(CacheHeader);
const data_end = data_start + header.total_cached_bytes;
// The buffer might not contain complete data after the header.
if (buf.len < data_end) {
return InitError.PartialRead;
}
return header;
}
};
/// Reads the canonical IR for a given file hash and Roc version into the given buffer.
///
/// If this succeeds, then it's the caller's responsibility to:
/// - Verify that there are bytes left over in the buffer. (If the buffer is now full,
/// then this was a partial read and the caller needs to call this again with a bigger buffer).
/// - Cast the bytes to a CacheHeader
/// - Truncate the buffer's length based on the total_cached_bytes field of the CacheHeader.
///
/// Returns the number of bytes read or an error if file operations fail.
pub fn readCacheInto(
dest: []align(@alignOf(CacheHeader)) u8,
abs_cache_dir: []const u8,
hash: []const u8,
fs: Filesystem,
allocator: Allocator,
) (Filesystem.ReadError || Allocator.Error)!usize {
const path_result = try createCachePath(allocator, abs_cache_dir, hash);
defer allocator.free(path_result.path);
return try fs.readFileInto(path_result.path, dest);
}
/// Writes the given content to a cache file for the specified hash.
/// Creates any missing intermediate directories as necessary.
pub fn writeToCache(
cache_dir_path: []const u8,
hash: []const u8,
header: *const CacheHeader, // Must be followed in memory by the contents of the header
fs: Filesystem,
allocator: Allocator,
) (Filesystem.WriteError || Filesystem.MakePathError || Allocator.Error)!void {
const cache_path = try createCachePath(allocator, cache_dir_path, hash);
defer allocator.free(cache_path.path);
// Create enclosing directories as needed.
const hash_start = cache_dir_path.len + 1; // +1 for path separator
const hash_sep_pos = hash_start + cache_path.half_encoded_len;
try fs.makePath(cache_path.path[0..hash_sep_pos]);
// Write to the file both the header and the cache data immediately following it in memory.
const total_bytes = @sizeOf(CacheHeader) + header.total_cached_bytes;
const header_and_content = @as([*]const u8, @ptrCast(header))[0..total_bytes];
try fs.writeFile(cache_path.path, header_and_content);
}
/// TODO: implement
pub fn getPackageRootAbsDir(url_data: Package.Url, gpa: Allocator, fs: Filesystem) []const u8 {
_ = url_data;
_ = gpa;
_ = fs;
@panic("not implemented");
}
/// TODO: implement
pub fn getCanIrForHashAndRocVersion(file_hash: []const u8, roc_version: []const u8, fs: Filesystem, allocator: Allocator) ?canonicalize.CIR {
_ = file_hash;
_ = roc_version;
_ = fs;
_ = allocator;
return null;
}
/// Allocates and returns the full path to the cache file for the given hash.
/// Also returns the length of the hash path part.
///
/// The path format is: abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext
///
/// All other path-related values can be derived from the returned values.
///
/// Returns a tuple containing:
/// - The full path as a null-terminated string
/// - The hash path length
fn createCachePath(allocator: Allocator, abs_cache_dir: []const u8, hash: []const u8) Allocator.Error!struct { path: [:0]u8, half_encoded_len: usize } {
// Calculate required space: abs_cache_dir + "/" + hash_path + file_ext + null terminator
// We need hash_encoder.calcSize(hash.len) + 1 bytes for the hash path (+1 for the separator)
const required_bytes = abs_cache_dir.len + 1 + hash_encoder.calcSize(hash.len) + 1 + file_ext.len + 1;
var path_buf = try allocator.allocSentinel(u8, required_bytes - 1, 0);
errdefer allocator.free(path_buf);
// abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext
@memcpy(path_buf[0..abs_cache_dir.len], abs_cache_dir);
path_buf[abs_cache_dir.len] = std.fs.path.sep;
const hash_start = abs_cache_dir.len + 1; // +1 for the path separator
// Inline the writeHashToPath function here with the hash bytes split in half
const half_hash_len = hash.len / 2;
const half_encoded_len = hash_encoder.calcSize(half_hash_len);
// Encode the first half of the hash
_ = hash_encoder.encode(path_buf[hash_start .. hash_start + half_encoded_len], hash[0..half_hash_len]);
// Add path separator
path_buf[hash_start + half_encoded_len] = std.fs.path.sep;
// Encode the second half of the hash
_ = hash_encoder.encode(path_buf[hash_start + half_encoded_len + 1 ..], hash[half_hash_len..hash.len]);
const hash_path_len = (half_encoded_len * 2) + 1;
const ext_start = hash_start + hash_path_len;
const ext_end = ext_start + file_ext.len;
@memcpy(path_buf[ext_start..ext_end], file_ext);
return .{ .path = path_buf, .half_encoded_len = half_encoded_len };
}
test "CacheHeader.initFromBytes - valid data" {
const test_data = "This is test data for our cache!";
const test_data_len = test_data.len;
var buffer: [1024]u8 align(@alignOf(CacheHeader)) = .{0} ** 1024;
var header = @as(*CacheHeader, @ptrCast(&buffer[0]));
header.total_cached_bytes = test_data_len;
const data_start = @sizeOf(CacheHeader);
@memcpy(buffer[data_start .. data_start + test_data_len], test_data);
const parsed_header = try CacheHeader.initFromBytes(&buffer);
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
}
test "CacheHeader.initFromBytes - buffer too small" {
// Create a buffer smaller than CacheHeader size
var small_buffer: [4]u8 align(@alignOf(CacheHeader)) = undefined;
// Test that it returns PartialRead error
const result = CacheHeader.initFromBytes(&small_buffer);
try std.testing.expectError(CacheHeader.InitError.PartialRead, result);
}
test "CacheHeader.initFromBytes - insufficient data bytes" {
var buffer: [128]u8 align(@alignOf(CacheHeader)) = .{0} ** 128;
var header = @as(*CacheHeader, @ptrCast(&buffer[0]));
// Set header to request more data than is available in the buffer
const available_data_space = buffer.len - @sizeOf(CacheHeader);
header.total_cached_bytes = available_data_space + 1;
const result = CacheHeader.initFromBytes(&buffer);
try std.testing.expectError(CacheHeader.InitError.PartialRead, result);
}
test "readCacheInto - file too big" {
var mock_fs = Filesystem.testing();
const err = error.FileTooBig;
mock_fs.readFileInto = struct {
fn readFileInto(path: []const u8, buf: []u8) Filesystem.ReadError!usize {
_ = path;
_ = buf;
return err;
}
}.readFileInto;
var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined;
const result = readCacheInto(&read_buffer, "/fake/cache/dir", "not-a-hash", mock_fs, std.testing.allocator);
try std.testing.expectError(err, result);
}
test "readCacheInto after writeToCache" {
var tmp_dir = std.testing.tmpDir(.{});
defer tmp_dir.cleanup();
// Get absolute path of tmp_dir to use as cache directory
var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined;
const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf);
const fs = Filesystem.default();
const hash = "0123456789abcdef";
const test_data = "Test data for caching!";
const test_data_len = test_data.len;
// Create buffer with header and data
const buffer_size = @sizeOf(CacheHeader) + test_data_len;
var write_buffer: []align(@alignOf(CacheHeader)) u8 = try std.testing.allocator.alignedAlloc(u8, @alignOf(CacheHeader), buffer_size);
defer std.testing.allocator.free(write_buffer);
var header = @as(*CacheHeader, @ptrCast(write_buffer.ptr));
header.total_cached_bytes = test_data_len;
const data_start = @sizeOf(CacheHeader);
@memcpy(write_buffer[data_start .. data_start + test_data_len], test_data);
// Write to cache
try writeToCache(abs_cache_dir, hash, header, fs, std.testing.allocator);
// Read it back
var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined;
const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, hash, fs, std.testing.allocator);
// Verify header was read correctly
try std.testing.expect(bytes_read >= @sizeOf(CacheHeader));
const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]);
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
// Verify data was read correctly
const expected_total_bytes = @sizeOf(CacheHeader) + parsed_header.total_cached_bytes;
try std.testing.expectEqual(expected_total_bytes, bytes_read);
const data_bytes = read_buffer[@sizeOf(CacheHeader)..expected_total_bytes];
try std.testing.expectEqualStrings(test_data, data_bytes);
}
// TODO expand this test gradually to more of our Can IR until
// we can round-trip a whole type-checked module from cache
test "NodeStore cache round-trip" {
const NodeStore = @import("check/canonicalize/NodeStore.zig");
const Node = @import("check/canonicalize/Node.zig");
var tmp_dir = std.testing.tmpDir(.{});
defer tmp_dir.cleanup();
var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined;
const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf);
const fs = Filesystem.default();
const allocator = std.testing.allocator;
const test_hash = "0123456789abcdef";
var store = NodeStore.initCapacity(allocator, 10);
defer store.deinit();
const expr_node = Node{
.data_1 = 42,
.data_2 = 100,
.data_3 = 200,
.region = .{ .start = .{ .offset = 0 }, .end = .{ .offset = 10 } },
.tag = .expr_string,
};
const expr_idx = store.nodes.append(store.gpa, expr_node);
try store.extra_data.append(store.gpa, 1234);
try store.extra_data.append(store.gpa, 5678);
const store_size = store.serializedSize();
const store_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), store_size);
defer allocator.free(store_buffer);
const serialized = try store.serializeInto(store_buffer);
try std.testing.expectEqual(store_size, serialized.len);
const header_size = @sizeOf(CacheHeader);
const aligned_header_size = std.mem.alignForward(usize, header_size, @alignOf(Node));
const total_size = aligned_header_size + store_size;
var write_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), total_size);
defer allocator.free(write_buffer);
const header = @as(*CacheHeader, @ptrCast(write_buffer.ptr));
header.* = .{
.total_cached_bytes = @intCast(store_size),
};
@memcpy(write_buffer[aligned_header_size..total_size], serialized);
try writeToCache(abs_cache_dir, test_hash, header, fs, allocator);
var read_buffer: [4096]u8 align(@alignOf(Node)) = undefined;
const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, test_hash, fs, allocator);
const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]);
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
const data_start = std.mem.alignForward(usize, @sizeOf(CacheHeader), @alignOf(Node));
const data_end = data_start + parsed_header.total_cached_bytes;
var restored_store = try NodeStore.deserializeFrom(@as([]align(@alignOf(Node)) const u8, @alignCast(read_buffer[data_start..data_end])), allocator);
defer restored_store.deinit();
try std.testing.expectEqual(store.nodes.len(), restored_store.nodes.len());
try std.testing.expectEqual(store.extra_data.items.len, restored_store.extra_data.items.len);
const restored_node = restored_store.nodes.get(expr_idx);
try std.testing.expectEqual(expr_node.data_1, restored_node.data_1);
try std.testing.expectEqual(expr_node.data_2, restored_node.data_2);
try std.testing.expectEqual(expr_node.data_3, restored_node.data_3);
try std.testing.expectEqual(expr_node.tag, restored_node.tag);
try std.testing.expectEqual(@as(u32, 1234), restored_store.extra_data.items[0]);
try std.testing.expectEqual(@as(u32, 5678), restored_store.extra_data.items[1]);
}