mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 04:08:19 +00:00
339 lines
14 KiB
Zig
339 lines
14 KiB
Zig
//! Exposes the readCacheInto and writeToCache functions for
|
|
//! serializing IR to and from disk. The caller is responsible for:
|
|
//! - Determining the base directory where the cache files should go.
|
|
//! - Determining what hash should be used as the cache key.
|
|
//! - Providing either the data to write to disk, or a buffer to read into.
|
|
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const base = @import("base.zig");
|
|
const canonicalize = @import("check/canonicalize.zig");
|
|
const assert = std.debug.assert;
|
|
const Filesystem = @import("coordinate/Filesystem.zig");
|
|
const Package = base.Package;
|
|
const Allocator = std.mem.Allocator;
|
|
|
|
const hash_encoder = std.base64.url_safe_no_pad.Encoder;
|
|
const file_ext = ".rcir";
|
|
|
|
/// The header that gets written to disk right before the cached data.
|
|
/// Having this header makes it possible to read the entire cached file
|
|
/// into a buffer in one syscall, because the header provides all the
|
|
/// information necessary to process the remainder of the information
|
|
/// (e.g. rehydrating pointers).
|
|
pub const CacheHeader = struct {
|
|
total_cached_bytes: u32,
|
|
|
|
/// Error specific to initializing a CacheHeader from bytes.
|
|
/// Returned when the buffer is too small to contain a complete header
|
|
/// or the complete data that the header specifies.
|
|
pub const InitError = error{
|
|
PartialRead,
|
|
};
|
|
|
|
/// Verify that the given buffer begins with a valid CacheHeader,
|
|
/// and also that it has a valid number of bytes in it. Returns
|
|
/// a pointer to the CacheHeader within the buffer.
|
|
pub fn initFromBytes(buf: []align(@alignOf(CacheHeader)) u8) InitError!*CacheHeader {
|
|
if (buf.len == 0) {
|
|
return InitError.PartialRead;
|
|
}
|
|
|
|
// The buffer might not contain a complete header.
|
|
if (buf.len < @sizeOf(CacheHeader)) {
|
|
return InitError.PartialRead;
|
|
}
|
|
|
|
const header = @as(*CacheHeader, @ptrCast(buf.ptr));
|
|
const data_start = @sizeOf(CacheHeader);
|
|
const data_end = data_start + header.total_cached_bytes;
|
|
|
|
// The buffer might not contain complete data after the header.
|
|
if (buf.len < data_end) {
|
|
return InitError.PartialRead;
|
|
}
|
|
|
|
return header;
|
|
}
|
|
};
|
|
|
|
/// Reads the canonical IR for a given file hash and Roc version into the given buffer.
|
|
///
|
|
/// If this succeeds, then it's the caller's responsibility to:
|
|
/// - Verify that there are bytes left over in the buffer. (If the buffer is now full,
|
|
/// then this was a partial read and the caller needs to call this again with a bigger buffer).
|
|
/// - Cast the bytes to a CacheHeader
|
|
/// - Truncate the buffer's length based on the total_cached_bytes field of the CacheHeader.
|
|
///
|
|
/// Returns the number of bytes read or an error if file operations fail.
|
|
pub fn readCacheInto(
|
|
dest: []align(@alignOf(CacheHeader)) u8,
|
|
abs_cache_dir: []const u8,
|
|
hash: []const u8,
|
|
fs: Filesystem,
|
|
allocator: Allocator,
|
|
) (Filesystem.ReadError || Allocator.Error)!usize {
|
|
const path_result = try createCachePath(allocator, abs_cache_dir, hash);
|
|
defer allocator.free(path_result.path);
|
|
return try fs.readFileInto(path_result.path, dest);
|
|
}
|
|
|
|
/// Writes the given content to a cache file for the specified hash.
|
|
/// Creates any missing intermediate directories as necessary.
|
|
pub fn writeToCache(
|
|
cache_dir_path: []const u8,
|
|
hash: []const u8,
|
|
header: *const CacheHeader, // Must be followed in memory by the contents of the header
|
|
fs: Filesystem,
|
|
allocator: Allocator,
|
|
) (Filesystem.WriteError || Filesystem.MakePathError || Allocator.Error)!void {
|
|
const cache_path = try createCachePath(allocator, cache_dir_path, hash);
|
|
defer allocator.free(cache_path.path);
|
|
|
|
// Create enclosing directories as needed.
|
|
const hash_start = cache_dir_path.len + 1; // +1 for path separator
|
|
const hash_sep_pos = hash_start + cache_path.half_encoded_len;
|
|
try fs.makePath(cache_path.path[0..hash_sep_pos]);
|
|
|
|
// Write to the file both the header and the cache data immediately following it in memory.
|
|
const total_bytes = @sizeOf(CacheHeader) + header.total_cached_bytes;
|
|
const header_and_content = @as([*]const u8, @ptrCast(header))[0..total_bytes];
|
|
try fs.writeFile(cache_path.path, header_and_content);
|
|
}
|
|
|
|
/// TODO: implement
|
|
pub fn getPackageRootAbsDir(url_data: Package.Url, gpa: Allocator, fs: Filesystem) []const u8 {
|
|
_ = url_data;
|
|
_ = gpa;
|
|
_ = fs;
|
|
|
|
@panic("not implemented");
|
|
}
|
|
|
|
/// TODO: implement
|
|
pub fn getCanIrForHashAndRocVersion(file_hash: []const u8, roc_version: []const u8, fs: Filesystem, allocator: Allocator) ?canonicalize.CIR {
|
|
_ = file_hash;
|
|
_ = roc_version;
|
|
_ = fs;
|
|
_ = allocator;
|
|
return null;
|
|
}
|
|
|
|
/// Allocates and returns the full path to the cache file for the given hash.
|
|
/// Also returns the length of the hash path part.
|
|
///
|
|
/// The path format is: abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext
|
|
///
|
|
/// All other path-related values can be derived from the returned values.
|
|
///
|
|
/// Returns a tuple containing:
|
|
/// - The full path as a null-terminated string
|
|
/// - The hash path length
|
|
fn createCachePath(allocator: Allocator, abs_cache_dir: []const u8, hash: []const u8) Allocator.Error!struct { path: [:0]u8, half_encoded_len: usize } {
|
|
// Calculate required space: abs_cache_dir + "/" + hash_path + file_ext + null terminator
|
|
// We need hash_encoder.calcSize(hash.len) + 1 bytes for the hash path (+1 for the separator)
|
|
const required_bytes = abs_cache_dir.len + 1 + hash_encoder.calcSize(hash.len) + 1 + file_ext.len + 1;
|
|
|
|
var path_buf = try allocator.allocSentinel(u8, required_bytes - 1, 0);
|
|
errdefer allocator.free(path_buf);
|
|
|
|
// abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext
|
|
@memcpy(path_buf[0..abs_cache_dir.len], abs_cache_dir);
|
|
path_buf[abs_cache_dir.len] = std.fs.path.sep;
|
|
const hash_start = abs_cache_dir.len + 1; // +1 for the path separator
|
|
|
|
// Inline the writeHashToPath function here with the hash bytes split in half
|
|
const half_hash_len = hash.len / 2;
|
|
const half_encoded_len = hash_encoder.calcSize(half_hash_len);
|
|
|
|
// Encode the first half of the hash
|
|
_ = hash_encoder.encode(path_buf[hash_start .. hash_start + half_encoded_len], hash[0..half_hash_len]);
|
|
|
|
// Add path separator
|
|
path_buf[hash_start + half_encoded_len] = std.fs.path.sep;
|
|
|
|
// Encode the second half of the hash
|
|
_ = hash_encoder.encode(path_buf[hash_start + half_encoded_len + 1 ..], hash[half_hash_len..hash.len]);
|
|
|
|
const hash_path_len = (half_encoded_len * 2) + 1;
|
|
|
|
const ext_start = hash_start + hash_path_len;
|
|
const ext_end = ext_start + file_ext.len;
|
|
@memcpy(path_buf[ext_start..ext_end], file_ext);
|
|
|
|
return .{ .path = path_buf, .half_encoded_len = half_encoded_len };
|
|
}
|
|
|
|
test "CacheHeader.initFromBytes - valid data" {
|
|
const test_data = "This is test data for our cache!";
|
|
const test_data_len = test_data.len;
|
|
|
|
var buffer: [1024]u8 align(@alignOf(CacheHeader)) = .{0} ** 1024;
|
|
|
|
var header = @as(*CacheHeader, @ptrCast(&buffer[0]));
|
|
header.total_cached_bytes = test_data_len;
|
|
|
|
const data_start = @sizeOf(CacheHeader);
|
|
@memcpy(buffer[data_start .. data_start + test_data_len], test_data);
|
|
|
|
const parsed_header = try CacheHeader.initFromBytes(&buffer);
|
|
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
|
|
}
|
|
|
|
test "CacheHeader.initFromBytes - buffer too small" {
|
|
// Create a buffer smaller than CacheHeader size
|
|
var small_buffer: [4]u8 align(@alignOf(CacheHeader)) = undefined;
|
|
|
|
// Test that it returns PartialRead error
|
|
const result = CacheHeader.initFromBytes(&small_buffer);
|
|
try std.testing.expectError(CacheHeader.InitError.PartialRead, result);
|
|
}
|
|
|
|
test "CacheHeader.initFromBytes - insufficient data bytes" {
|
|
var buffer: [128]u8 align(@alignOf(CacheHeader)) = .{0} ** 128;
|
|
|
|
var header = @as(*CacheHeader, @ptrCast(&buffer[0]));
|
|
|
|
// Set header to request more data than is available in the buffer
|
|
const available_data_space = buffer.len - @sizeOf(CacheHeader);
|
|
header.total_cached_bytes = available_data_space + 1;
|
|
|
|
const result = CacheHeader.initFromBytes(&buffer);
|
|
try std.testing.expectError(CacheHeader.InitError.PartialRead, result);
|
|
}
|
|
|
|
test "readCacheInto - file too big" {
|
|
var mock_fs = Filesystem.testing();
|
|
const err = error.FileTooBig;
|
|
|
|
mock_fs.readFileInto = struct {
|
|
fn readFileInto(path: []const u8, buf: []u8) Filesystem.ReadError!usize {
|
|
_ = path;
|
|
_ = buf;
|
|
return err;
|
|
}
|
|
}.readFileInto;
|
|
|
|
var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined;
|
|
const result = readCacheInto(&read_buffer, "/fake/cache/dir", "not-a-hash", mock_fs, std.testing.allocator);
|
|
|
|
try std.testing.expectError(err, result);
|
|
}
|
|
|
|
test "readCacheInto after writeToCache" {
|
|
var tmp_dir = std.testing.tmpDir(.{});
|
|
defer tmp_dir.cleanup();
|
|
|
|
// Get absolute path of tmp_dir to use as cache directory
|
|
var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined;
|
|
const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf);
|
|
|
|
const fs = Filesystem.default();
|
|
const hash = "0123456789abcdef";
|
|
const test_data = "Test data for caching!";
|
|
const test_data_len = test_data.len;
|
|
|
|
// Create buffer with header and data
|
|
const buffer_size = @sizeOf(CacheHeader) + test_data_len;
|
|
var write_buffer: []align(@alignOf(CacheHeader)) u8 = try std.testing.allocator.alignedAlloc(u8, @alignOf(CacheHeader), buffer_size);
|
|
defer std.testing.allocator.free(write_buffer);
|
|
var header = @as(*CacheHeader, @ptrCast(write_buffer.ptr));
|
|
header.total_cached_bytes = test_data_len;
|
|
const data_start = @sizeOf(CacheHeader);
|
|
@memcpy(write_buffer[data_start .. data_start + test_data_len], test_data);
|
|
|
|
// Write to cache
|
|
try writeToCache(abs_cache_dir, hash, header, fs, std.testing.allocator);
|
|
|
|
// Read it back
|
|
var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined;
|
|
const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, hash, fs, std.testing.allocator);
|
|
|
|
// Verify header was read correctly
|
|
try std.testing.expect(bytes_read >= @sizeOf(CacheHeader));
|
|
const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]);
|
|
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
|
|
|
|
// Verify data was read correctly
|
|
const expected_total_bytes = @sizeOf(CacheHeader) + parsed_header.total_cached_bytes;
|
|
try std.testing.expectEqual(expected_total_bytes, bytes_read);
|
|
|
|
const data_bytes = read_buffer[@sizeOf(CacheHeader)..expected_total_bytes];
|
|
try std.testing.expectEqualStrings(test_data, data_bytes);
|
|
}
|
|
|
|
// TODO expand this test gradually to more of our Can IR until
|
|
// we can round-trip a whole type-checked module from cache
|
|
test "NodeStore cache round-trip" {
|
|
const NodeStore = @import("check/canonicalize/NodeStore.zig");
|
|
const Node = @import("check/canonicalize/Node.zig");
|
|
|
|
var tmp_dir = std.testing.tmpDir(.{});
|
|
defer tmp_dir.cleanup();
|
|
|
|
var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined;
|
|
const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf);
|
|
|
|
const fs = Filesystem.default();
|
|
const allocator = std.testing.allocator;
|
|
const test_hash = "0123456789abcdef";
|
|
|
|
var store = NodeStore.initCapacity(allocator, 10);
|
|
defer store.deinit();
|
|
|
|
const expr_node = Node{
|
|
.data_1 = 42,
|
|
.data_2 = 100,
|
|
.data_3 = 200,
|
|
.region = .{ .start = .{ .offset = 0 }, .end = .{ .offset = 10 } },
|
|
.tag = .expr_string,
|
|
};
|
|
const expr_idx = store.nodes.append(store.gpa, expr_node);
|
|
|
|
try store.extra_data.append(store.gpa, 1234);
|
|
try store.extra_data.append(store.gpa, 5678);
|
|
|
|
const store_size = store.serializedSize();
|
|
const store_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), store_size);
|
|
defer allocator.free(store_buffer);
|
|
const serialized = try store.serializeInto(store_buffer);
|
|
try std.testing.expectEqual(store_size, serialized.len);
|
|
|
|
const header_size = @sizeOf(CacheHeader);
|
|
const aligned_header_size = std.mem.alignForward(usize, header_size, @alignOf(Node));
|
|
const total_size = aligned_header_size + store_size;
|
|
var write_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), total_size);
|
|
defer allocator.free(write_buffer);
|
|
|
|
const header = @as(*CacheHeader, @ptrCast(write_buffer.ptr));
|
|
header.* = .{
|
|
.total_cached_bytes = @intCast(store_size),
|
|
};
|
|
|
|
@memcpy(write_buffer[aligned_header_size..total_size], serialized);
|
|
|
|
try writeToCache(abs_cache_dir, test_hash, header, fs, allocator);
|
|
|
|
var read_buffer: [4096]u8 align(@alignOf(Node)) = undefined;
|
|
const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, test_hash, fs, allocator);
|
|
|
|
const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]);
|
|
try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes);
|
|
|
|
const data_start = std.mem.alignForward(usize, @sizeOf(CacheHeader), @alignOf(Node));
|
|
const data_end = data_start + parsed_header.total_cached_bytes;
|
|
|
|
var restored_store = try NodeStore.deserializeFrom(@as([]align(@alignOf(Node)) const u8, @alignCast(read_buffer[data_start..data_end])), allocator);
|
|
defer restored_store.deinit();
|
|
|
|
try std.testing.expectEqual(store.nodes.len(), restored_store.nodes.len());
|
|
try std.testing.expectEqual(store.extra_data.items.len, restored_store.extra_data.items.len);
|
|
|
|
const restored_node = restored_store.nodes.get(expr_idx);
|
|
try std.testing.expectEqual(expr_node.data_1, restored_node.data_1);
|
|
try std.testing.expectEqual(expr_node.data_2, restored_node.data_2);
|
|
try std.testing.expectEqual(expr_node.data_3, restored_node.data_3);
|
|
try std.testing.expectEqual(expr_node.tag, restored_node.tag);
|
|
|
|
try std.testing.expectEqual(@as(u32, 1234), restored_store.extra_data.items[0]);
|
|
try std.testing.expectEqual(@as(u32, 5678), restored_store.extra_data.items[1]);
|
|
}
|