Merge branch 'main' of github.com:roc-lang/roc into windows-tests

This commit is contained in:
Anton-4 2022-10-29 16:58:21 +02:00
commit 499177736c
No known key found for this signature in database
GPG key ID: A13F4A6E21141925
416 changed files with 20321 additions and 10706 deletions

View file

@ -15,7 +15,7 @@ lazy_static = "1.4.0"
[build-dependencies]
# dunce can be removed once ziglang/zig#5109 is fixed
dunce = "1.0.2"
dunce = "1.0.3"
[target.'cfg(target_os = "macos")'.build-dependencies]
tempfile = "3.2.0"

View file

@ -1,6 +1,5 @@
const std = @import("std");
const utils = @import("utils.zig");
const RocResult = utils.RocResult;
const UpdateMode = utils.UpdateMode;
const mem = std.mem;
const math = std.math;
@ -93,20 +92,6 @@ pub const RocList = extern struct {
return (ptr - 1)[0] == utils.REFCOUNT_ONE;
}
pub fn allocate(
alignment: u32,
length: usize,
element_size: usize,
) RocList {
const data_bytes = length * element_size;
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity = length,
};
}
pub fn makeUniqueExtra(self: RocList, alignment: u32, element_width: usize, update_mode: UpdateMode) RocList {
if (update_mode == .InPlace) {
return self;
@ -140,6 +125,24 @@ pub const RocList = extern struct {
return new_list;
}
pub fn allocate(
alignment: u32,
length: usize,
element_width: usize,
) RocList {
if (length == 0) {
return empty();
}
const capacity = utils.calculateCapacity(0, length, element_width);
const data_bytes = capacity * element_width;
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity = capacity,
};
}
pub fn reallocate(
self: RocList,
alignment: u32,
@ -151,13 +154,14 @@ pub const RocList = extern struct {
if (self.capacity >= new_length) {
return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity };
} else {
const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_length, element_width);
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_length };
const new_capacity = utils.calculateCapacity(self.capacity, new_length, element_width);
const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_capacity, element_width);
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_capacity };
}
}
return self.reallocateFresh(alignment, new_length, element_width);
}
return self.reallocateFresh(alignment, new_length, element_width);
return RocList.allocate(alignment, new_length, element_width);
}
/// reallocate by explicitly making a new allocation and copying elements over
@ -170,24 +174,16 @@ pub const RocList = extern struct {
const old_length = self.length;
const delta_length = new_length - old_length;
const data_bytes = new_length * element_width;
const first_slot = utils.allocateWithRefcount(data_bytes, alignment);
const result = RocList.allocate(alignment, new_length, element_width);
// transfer the memory
if (self.bytes) |source_ptr| {
const dest_ptr = first_slot;
const dest_ptr = result.bytes orelse unreachable;
@memcpy(dest_ptr, source_ptr, old_length * element_width);
@memset(dest_ptr + old_length * element_width, 0, delta_length * element_width);
}
const result = RocList{
.bytes = first_slot,
.length = new_length,
.capacity = new_length,
};
utils.decref(self.bytes, old_length * element_width, alignment);
return result;
@ -517,17 +513,25 @@ pub fn listSublist(
len: usize,
dec: Dec,
) callconv(.C) RocList {
if (len == 0) {
const size = list.len();
if (len == 0 or start >= size) {
if (list.isUnique()) {
// Decrement the reference counts of all elements.
if (list.bytes) |source_ptr| {
var i: usize = 0;
while (i < size) : (i += 1) {
const element = source_ptr + i * element_width;
dec(element);
}
var output = list;
output.length = 0;
return output;
}
}
return RocList.empty();
}
if (list.bytes) |source_ptr| {
const size = list.len();
if (start >= size) {
return RocList.empty();
}
const keep_len = std.math.min(len, size - start);
const drop_start_len = start;
const drop_end_len = size - (start + keep_len);
@ -546,10 +550,17 @@ pub fn listSublist(
dec(element);
}
if (start == 0 and list.isUnique()) {
if (list.isUnique()) {
var output = list;
output.length = keep_len;
return output;
if (start == 0) {
return output;
} else {
// We want memmove due to aliasing. Zig does not expose it directly.
// Instead use copy which can write to aliases as long as the dest is before the source.
mem.copy(u8, source_ptr[0 .. keep_len * element_width], source_ptr[start * element_width .. (start + keep_len) * element_width]);
return output;
}
} else {
const output = RocList.allocate(alignment, keep_len, element_width);
const target_ptr = output.bytes orelse unreachable;
@ -595,9 +606,10 @@ pub fn listDropAt(
if (list.isUnique()) {
var i = drop_index;
while (i < size) : (i += 1) {
while (i < size - 1) : (i += 1) {
const copy_target = source_ptr + i * element_width;
const copy_source = copy_target + element_width;
@memcpy(copy_target, copy_source, element_width);
}
@ -733,37 +745,54 @@ pub fn listConcat(list_a: RocList, list_b: RocList, alignment: u32, element_widt
} else if (list_a.isUnique()) {
const total_length: usize = list_a.len() + list_b.len();
if (list_a.bytes) |source| {
const new_source = if (list_a.capacity >= total_length)
source
else
utils.unsafeReallocate(
source,
alignment,
list_a.len(),
total_length,
element_width,
);
const resized_list_a = list_a.reallocate(alignment, total_length, element_width);
if (list_b.bytes) |source_b| {
@memcpy(new_source + list_a.len() * element_width, source_b, list_b.len() * element_width);
}
// These must exist, otherwise, the lists would have been empty.
const source_a = resized_list_a.bytes orelse unreachable;
const source_b = list_b.bytes orelse unreachable;
@memcpy(source_a + list_a.len() * element_width, source_b, list_b.len() * element_width);
return RocList{ .bytes = new_source, .length = total_length, .capacity = total_length };
}
// decrement list b.
utils.decref(source_b, list_b.len(), alignment);
return resized_list_a;
} else if (list_b.isUnique()) {
const total_length: usize = list_a.len() + list_b.len();
const resized_list_b = list_b.reallocate(alignment, total_length, element_width);
// These must exist, otherwise, the lists would have been empty.
const source_a = list_a.bytes orelse unreachable;
const source_b = resized_list_b.bytes orelse unreachable;
// This is a bit special, we need to first copy the elements of list_b to the end,
// then copy the elements of list_a to the beginning.
// This first call must use mem.copy because the slices might overlap.
const byte_count_a = list_a.len() * element_width;
const byte_count_b = list_b.len() * element_width;
mem.copy(u8, source_b[byte_count_a .. byte_count_a + byte_count_b], source_b[0..byte_count_b]);
@memcpy(source_b, source_a, byte_count_a);
// decrement list a.
utils.decref(source_a, list_a.len(), alignment);
return resized_list_b;
}
const total_length: usize = list_a.len() + list_b.len();
const output = RocList.allocate(alignment, total_length, element_width);
if (output.bytes) |target| {
if (list_a.bytes) |source| {
@memcpy(target, source, list_a.len() * element_width);
}
if (list_b.bytes) |source| {
@memcpy(target + list_a.len() * element_width, source, list_b.len() * element_width);
}
}
// These must exist, otherwise, the lists would have been empty.
const target = output.bytes orelse unreachable;
const source_a = list_a.bytes orelse unreachable;
const source_b = list_b.bytes orelse unreachable;
@memcpy(target, source_a, list_a.len() * element_width);
@memcpy(target + list_a.len() * element_width, source_b, list_b.len() * element_width);
// decrement list a and b.
utils.decref(source_a, list_a.len(), alignment);
utils.decref(source_b, list_b.len(), alignment);
return output;
}

View file

@ -144,6 +144,8 @@ comptime {
exportStrFn(str.strTrimLeft, "trim_left");
exportStrFn(str.strTrimRight, "trim_right");
exportStrFn(str.strCloneTo, "clone_to");
exportStrFn(str.withCapacity, "with_capacity");
exportStrFn(str.strGraphemes, "graphemes");
inline for (INTEGERS) |T| {
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");

View file

@ -1,5 +1,6 @@
const utils = @import("utils.zig");
const RocList = @import("list.zig").RocList;
const grapheme = @import("helpers/grapheme.zig");
const UpdateMode = utils.UpdateMode;
const std = @import("std");
const mem = std.mem;
@ -50,7 +51,7 @@ pub const RocStr = extern struct {
// This clones the pointed-to bytes if they won't fit in a
// small string, and returns a (pointer, len) tuple which points to them.
pub fn init(bytes_ptr: [*]const u8, length: usize) RocStr {
var result = RocStr.allocate(length, length);
var result = RocStr.allocate(length);
@memcpy(result.asU8ptr(), bytes_ptr, length);
return result;
@ -70,11 +71,14 @@ pub const RocStr = extern struct {
};
}
// allocate space for a (big or small) RocStr, but put nothing in it yet
pub fn allocate(length: usize, capacity: usize) RocStr {
const result_is_big = capacity >= SMALL_STRING_SIZE;
// allocate space for a (big or small) RocStr, but put nothing in it yet.
// May have a larger capacity than the length.
pub fn allocate(length: usize) RocStr {
const element_width = 1;
const result_is_big = length >= SMALL_STRING_SIZE;
if (result_is_big) {
const capacity = utils.calculateCapacity(0, length, element_width);
return RocStr.allocateBig(length, capacity);
} else {
var string = RocStr.empty();
@ -91,25 +95,6 @@ pub const RocStr = extern struct {
}
}
// This takes ownership of the pointed-to bytes if they won't fit in a
// small string, and returns a (pointer, len) tuple which points to them.
pub fn withCapacity(length: usize) RocStr {
const roc_str_size = @sizeOf(RocStr);
if (length < roc_str_size) {
return RocStr.empty();
} else {
var new_bytes = utils.alloc(length, RocStr.alignment) catch unreachable;
var new_bytes_ptr: [*]u8 = @ptrCast([*]u8, &new_bytes);
return RocStr{
.str_bytes = new_bytes_ptr,
.str_len = length,
};
}
}
pub fn eq(self: RocStr, other: RocStr) bool {
// If they are byte-for-byte equal, they're definitely equal!
if (self.str_bytes == other.str_bytes and self.str_len == other.str_len and self.str_capacity == other.str_capacity) {
@ -169,38 +154,43 @@ pub const RocStr = extern struct {
pub fn reallocate(
self: RocStr,
new_length: usize,
new_capacity: usize,
) RocStr {
const element_width = 1;
const old_capacity = self.getCapacity();
if (self.str_bytes) |source_ptr| {
if (self.isUnique() and !self.isSmallStr()) {
const new_source = utils.unsafeReallocate(
source_ptr,
RocStr.alignment,
old_capacity,
new_capacity,
element_width,
);
return RocStr{ .str_bytes = new_source, .str_len = new_length, .str_capacity = new_capacity };
}
if (self.isSmallStr() or !self.isUnique()) {
return self.reallocateFresh(new_length);
}
return self.reallocateFresh(new_length, new_capacity);
if (self.str_bytes) |source_ptr| {
if (old_capacity > new_length) {
var output = self;
output.setLen(new_length);
return output;
}
const new_capacity = utils.calculateCapacity(old_capacity, new_length, element_width);
const new_source = utils.unsafeReallocate(
source_ptr,
RocStr.alignment,
old_capacity,
new_capacity,
element_width,
);
return RocStr{ .str_bytes = new_source, .str_len = new_length, .str_capacity = new_capacity };
}
return self.reallocateFresh(new_length);
}
/// reallocate by explicitly making a new allocation and copying elements over
pub fn reallocateFresh(
fn reallocateFresh(
self: RocStr,
new_length: usize,
new_capacity: usize,
) RocStr {
const old_length = self.len();
const delta_length = new_length - old_length;
const result = RocStr.allocate(new_length, new_capacity);
const result = RocStr.allocate(new_length);
// transfer the memory
@ -238,6 +228,14 @@ pub const RocStr = extern struct {
}
}
pub fn setLen(self: *RocStr, length: usize) void {
if (self.isSmallStr()) {
self.asU8ptr()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
} else {
self.str_len = length;
}
}
pub fn getCapacity(self: RocStr) usize {
if (self.isSmallStr()) {
return SMALL_STR_MAX_LENGTH;
@ -800,6 +798,12 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
while (delimiter_index < delimiter_len) {
var delimiterChar = delimiter_bytes_ptrs[delimiter_index];
if (str_index + delimiter_index >= str_len) {
matches_delimiter = false;
break;
}
var strChar = str_bytes[str_index + delimiter_index];
if (delimiterChar != strChar) {
@ -1209,7 +1213,6 @@ test "countSegments: string equals delimiter" {
}
// Str.countGraphemeClusters
const grapheme = @import("helpers/grapheme.zig");
pub fn countGraphemeClusters(string: RocStr) callconv(.C) usize {
if (string.isEmpty()) {
return 0;
@ -1245,59 +1248,76 @@ pub fn countGraphemeClusters(string: RocStr) callconv(.C) usize {
return count;
}
test "countGraphemeClusters: empty string" {
const count = countGraphemeClusters(RocStr.empty());
try expectEqual(count, 0);
// Str.graphemes
pub fn strGraphemes(roc_str: RocStr) callconv(.C) RocList {
var break_state: ?grapheme.BoundClass = null;
var opt_last_codepoint: ?u21 = null;
var index: usize = 0;
var last_codepoint_len: u8 = 0;
var result = RocList.allocate(@alignOf(RocStr), countGraphemeClusters(roc_str), @sizeOf(RocStr));
const graphemes = result.elements(RocStr) orelse return result;
var slice = roc_str.asSlice();
var iter = (unicode.Utf8View.init(slice) catch unreachable).iterator();
while (iter.nextCodepoint()) |cur_codepoint| {
const cur_codepoint_len = unicode.utf8CodepointSequenceLength(cur_codepoint) catch unreachable;
if (opt_last_codepoint) |last_codepoint| {
var did_break = grapheme.isGraphemeBreak(last_codepoint, cur_codepoint, &break_state);
if (did_break) {
graphemes[index] = RocStr.fromSlice(slice[0..last_codepoint_len]);
slice = slice[last_codepoint_len..];
index += 1;
break_state = null;
last_codepoint_len = 0;
}
}
last_codepoint_len += cur_codepoint_len;
opt_last_codepoint = cur_codepoint;
}
// Append last grapheme
graphemes[index] = RocStr.fromSlice(slice);
return result;
}
test "countGraphemeClusters: ascii characters" {
const bytes_arr = "abcd";
const bytes_len = bytes_arr.len;
const str = RocStr.init(bytes_arr, bytes_len);
defer str.deinit();
// these test both countGraphemeClusters() and strGraphemes()
fn graphemesTest(input: []const u8, expected: []const []const u8) !void {
const rocstr = RocStr.fromSlice(input);
defer rocstr.deinit();
const count = countGraphemeClusters(rocstr);
try expectEqual(expected.len, count);
const count = countGraphemeClusters(str);
try expectEqual(count, 4);
const graphemes = strGraphemes(rocstr);
defer graphemes.deinit(u8);
if (input.len == 0) return; // empty string
const elems = graphemes.elements(RocStr) orelse unreachable;
for (expected) |g, i| {
try std.testing.expectEqualStrings(g, elems[i].asSlice());
}
}
test "countGraphemeClusters: utf8 characters" {
const bytes_arr = "ãxā";
const bytes_len = bytes_arr.len;
const str = RocStr.init(bytes_arr, bytes_len);
defer str.deinit();
const count = countGraphemeClusters(str);
try expectEqual(count, 3);
test "graphemes: empty string" {
try graphemesTest("", &.{});
}
test "countGraphemeClusters: emojis" {
const bytes_arr = "🤔🤔🤔";
const bytes_len = bytes_arr.len;
const str = RocStr.init(bytes_arr, bytes_len);
defer str.deinit();
const count = countGraphemeClusters(str);
try expectEqual(count, 3);
test "graphemes: ascii characters" {
try graphemesTest("abcd", &.{ "a", "b", "c", "d" });
}
test "countGraphemeClusters: emojis and ut8 characters" {
const bytes_arr = "🤔å🤔¥🤔ç";
const bytes_len = bytes_arr.len;
const str = RocStr.init(bytes_arr, bytes_len);
defer str.deinit();
const count = countGraphemeClusters(str);
try expectEqual(count, 6);
test "graphemes: utf8 characters" {
try graphemesTest("ãxā", &.{ "ã", "x", "ā" });
}
test "countGraphemeClusters: emojis, ut8, and ascii characters" {
const bytes_arr = "6🤔å🤔e¥🤔çpp";
const bytes_len = bytes_arr.len;
const str = RocStr.init(bytes_arr, bytes_len);
defer str.deinit();
test "graphemes: emojis" {
try graphemesTest("🤔🤔🤔", &.{ "🤔", "🤔", "🤔" });
}
const count = countGraphemeClusters(str);
try expectEqual(count, 10);
test "graphemes: emojis and ut8 characters" {
try graphemesTest("🤔å🤔¥🤔ç", &.{ "🤔", "å", "🤔", "¥", "🤔", "ç" });
}
test "graphemes: emojis, ut8, and ascii characters" {
try graphemesTest("6🤔å🤔e¥🤔çpp", &.{ "6", "🤔", "å", "🤔", "e", "¥", "🤔", "ç", "p", "p" });
}
pub fn countUtf8Bytes(string: RocStr) callconv(.C) usize {
@ -1381,7 +1401,7 @@ pub fn repeat(string: RocStr, count: usize) callconv(.C) RocStr {
const bytes_len = string.len();
const bytes_ptr = string.asU8ptr();
var ret_string = RocStr.allocate(count * bytes_len, count * bytes_len);
var ret_string = RocStr.allocate(count * bytes_len);
var ret_string_ptr = ret_string.asU8ptr();
var i: usize = 0;
@ -1522,7 +1542,7 @@ fn strConcat(arg1: RocStr, arg2: RocStr) RocStr {
} else {
const combined_length = arg1.len() + arg2.len();
const result = arg1.reallocate(combined_length, combined_length);
const result = arg1.reallocate(combined_length);
@memcpy(result.asU8ptr() + arg1.len(), arg2.asU8ptr(), arg2.len());
@ -1594,7 +1614,7 @@ fn strJoinWith(list: RocListStr, separator: RocStr) RocStr {
// include size of the separator
total_size += separator.len() * (len - 1);
var result = RocStr.allocate(total_size, total_size);
var result = RocStr.allocate(total_size);
var result_ptr = result.asU8ptr();
var offset: usize = 0;
@ -2506,14 +2526,14 @@ test "capacity: big string" {
var data = RocStr.init(data_bytes, data_bytes.len);
defer data.deinit();
try expectEqual(data.getCapacity(), data_bytes.len);
try expect(data.getCapacity() >= data_bytes.len);
}
pub fn appendScalar(string: RocStr, scalar_u32: u32) callconv(.C) RocStr {
const scalar = @intCast(u21, scalar_u32);
const width = std.unicode.utf8CodepointSequenceLength(scalar) catch unreachable;
var output = string.reallocate(string.len() + width, string.len() + width);
var output = string.reallocate(string.len() + width);
var slice = output.asSliceWithCapacity();
_ = std.unicode.utf8Encode(scalar, slice[string.len() .. string.len() + width]) catch unreachable;
@ -2581,15 +2601,23 @@ test "appendScalar: big 😀" {
try expect(actual.eq(expected));
}
pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr {
if (capacity > string.getCapacity()) {
// expand allocation but keep string length the same
return string.reallocate(string.len(), capacity);
} else {
pub fn reserve(string: RocStr, spare: usize) callconv(.C) RocStr {
const old_length = string.len();
if (string.getCapacity() >= old_length + spare) {
return string;
} else {
var output = string.reallocate(old_length + spare);
output.setLen(old_length);
return output;
}
}
pub fn withCapacity(capacity: usize) callconv(.C) RocStr {
var str = RocStr.allocate(capacity);
str.setLen(0);
return str;
}
pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } {
const slice = string.asSlice();
const bytesParsed = @intCast(usize, std.unicode.utf8ByteSequenceLength(slice[index]) catch unreachable);

View file

@ -213,6 +213,53 @@ inline fn decref_ptr_to_refcount(
}
}
// We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList.
// Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back:
//
// (1) initial size
// Instead of growing to size 1 from empty, fbvector allocates at least
// 64 bytes. You may still use reserve to reserve a lesser amount of
// memory.
// (2) 1.5x
// For medium-sized vectors, the growth strategy is 1.5x. See the docs
// for details.
// This does not apply to very small or very large fbvectors. This is a
// heuristic.
//
// In our case, we exposed allocate and reallocate, which will use a smart growth stategy.
// We also expose allocateExact and reallocateExact for case where a specific number of elements is requested.
// calculateCapacity should only be called in cases the list will be growing.
// requested_length should always be greater than old_capacity.
pub inline fn calculateCapacity(
old_capacity: usize,
requested_length: usize,
element_width: usize,
) usize {
// TODO: there are two adjustments that would likely lead to better results for Roc.
// 1. Deal with the fact we allocate an extra u64 for refcount.
// This may lead to allocating page size + 8 bytes.
// That could mean allocating an entire page for 8 bytes of data which isn't great.
// 2. Deal with the fact that we can request more than 1 element at a time.
// fbvector assumes just appending 1 element at a time when using this algorithm.
// As such, they will generally grow in a way that should better match certain memory multiple.
// This is also the normal case for roc, but we could also grow by a much larger amount.
// We may want to round to multiples of 2 or something similar.
var new_capacity: usize = 0;
if (element_width == 0) {
return requested_length;
} else if (old_capacity == 0) {
new_capacity = 64 / element_width;
} else if (old_capacity < 4096 / element_width) {
new_capacity = old_capacity * 2;
} else if (old_capacity > 4096 * 32 / element_width) {
new_capacity = old_capacity * 2;
} else {
new_capacity = (old_capacity * 3 + 1) / 2;
}
return @maximum(new_capacity, requested_length);
}
pub fn allocateWithRefcountC(
data_bytes: usize,
element_alignment: u32,
@ -267,25 +314,6 @@ pub fn unsafeReallocate(
return new_source;
}
pub const RocResult = extern struct {
bytes: ?[*]u8,
pub fn isOk(self: RocResult) bool {
// assumptions
//
// - the tag is the first field
// - the tag is usize bytes wide
// - Ok has tag_id 1, because Err < Ok
const usizes: [*]usize = @ptrCast([*]usize, @alignCast(@alignOf(usize), self.bytes));
return usizes[0] == 1;
}
pub fn isErr(self: RocResult) bool {
return !self.isOk();
}
};
pub const Ordering = enum(u8) {
EQ = 0,
GT = 1,

View file

@ -1,8 +1,40 @@
interface Bool
exposes [Bool, true, false, and, or, not, isEq, isNotEq]
exposes [Bool, Eq, true, false, and, or, not, isEq, isNotEq]
imports []
Bool := [True, False]
## Defines a type that can be compared for total equality.
##
## Total equality means that all values of the type can be compared to each
## other, and two values `a`, `b` are identical if and only if `isEq a b` is
## `Bool.true`.
##
## Not all types support total equality. For example, [F32] and [F64] can
## be a `NaN` ([Not a Number](https://en.wikipedia.org/wiki/NaN)), and the
## [IEEE-754](https://en.wikipedia.org/wiki/IEEE_754) floating point standard
## specifies that two `NaN`s are not equal.
Eq has
## Returns `Bool.true` if the input values are equal. This is
## equivalent to the logic
## [XNOR](https://en.wikipedia.org/wiki/Logical_equality) gate. The infix
## operator `==` can be used as shorthand for `Bool.isEq`.
##
## **Note** that when `isEq` is determined by the Roc compiler, values are
## compared using structural equality. The rules for this are as follows:
##
## 1. Tags are equal if their name and also contents are equal.
## 2. Records are equal if their fields are equal.
## 3. The collections [Str], [List], [Dict], and [Set] are equal iff they
## are the same length and their elements are equal.
## 4. [Num] values are equal if their numbers are equal. However, if both
## inputs are *NaN* then `isEq` returns `Bool.false`. Refer to `Num.isNaN`
## for more detail.
## 5. Functions cannot be compared for structural equality, therefore Roc
## cannot derive `isEq` for types that contain functions.
isEq : a, a -> Bool | a has Eq
Bool := [True, False] has [Eq { isEq: boolIsEq }]
boolIsEq = \@Bool b1, @Bool b2 -> structuralEq b1 b2
## The boolean true value.
true : Bool
@ -12,80 +44,74 @@ true = @Bool True
false : Bool
false = @Bool False
## Returns `Bool.true` when given `Bool.true` and `Bool.true`, and `Bool.false` when either argument is `Bool.false`.
## Returns `Bool.true` when both inputs are `Bool.true`. This is equivalent to
## the logic [AND](https://en.wikipedia.org/wiki/Logical_conjunction)
## gate. The infix operator `&&` can also be used as shorthand for
## `Bool.and`.
##
## `a && b` is shorthand for `Bool.and a b`
## expect (Bool.and Bool.true Bool.true) == Bool.true
## expect (Bool.true && Bool.true) == Bool.true
## expect (Bool.false && Bool.true) == Bool.false
## expect (Bool.true && Bool.false) == Bool.false
## expect (Bool.false && Bool.false) == Bool.false
##
## >>> Bool.true && Bool.true
##
## >>> Bool.true && Bool.false
##
## >>> Bool.false && Bool.true
##
## >>> Bool.false && Bool.false
##
## ## Performance Notes
##
## In some languages, `&&` and `||` are special-cased in the compiler to skip
## evaluating the expression after the operator under certain circumstances.
## For example, in some languages, `enablePets && likesDogs user` would compile
## to the equivalent of:
## **Performance Note** that in Roc the `&&` and `||` work the same way as any
## other function. However, in some languages `&&` and `||` are special-cased.
## In these languages the compiler will skip evaluating the expression after the
## first operator under certain circumstances. For example an expression like
## `enablePets && likesDogs user` would compile to.
##
## if enablePets then
## likesDogs user
## else
## Bool.false
##
## In Roc, however, `&&` and `||` are not special. They work the same way as
## other functions. Conditionals like `if` and `when` have a performance cost,
## and sometimes calling a function like `likesDogs user` can be faster across
## the board than doing an `if` to decide whether to skip calling it.
##
## (Naturally, if you expect the `if` to improve performance, you can always add
## one explicitly!)
## Roc does not do this because conditionals like `if` and `when` have a
## performance cost. Calling a function can sometimes be faster across the board
## than doing an `if` to decide whether to skip calling it.
and : Bool, Bool -> Bool
## Returns `Bool.true` when given `Bool.true` for either argument, and `Bool.false` only when given `Bool.false` and `Bool.false`.
## Returns `Bool.true` when either input is a `Bool.true`. This is equivalent to
## the logic [OR](https://en.wikipedia.org/wiki/Logical_disjunction) gate.
## The infix operator `||` can also be used as shorthand for `Bool.or`.
##
## `a || b` is shorthand for `Bool.or a b`.
## expect (Bool.or Bool.false Bool.true) == Bool.true
## expect (Bool.true || Bool.true) == Bool.true
## expect (Bool.false || Bool.true) == Bool.true
## expect (Bool.true || Bool.false) == Bool.true
## expect (Bool.false || Bool.false) == Bool.false
##
## >>> Bool.true || Bool.true
##
## >>> Bool.true || Bool.false
##
## >>> Bool.false || Bool.true
##
## >>> Bool.false || Bool.false
##
## ## Performance Notes
##
## In some languages, `&&` and `||` are special-cased in the compiler to skip
## evaluating the expression after the operator under certain circumstances.
## In Roc, this is not the case. See the performance notes for [Bool.and] for details.
## **Performance Note** that in Roc the `&&` and `||` work the same way as any
## other functions. However, in some languages `&&` and `||` are special-cased.
## Refer to the note in `Bool.and` for more detail.
or : Bool, Bool -> Bool
# xor : Bool, Bool -> Bool # currently unimplemented
## Returns `Bool.false` when given `Bool.true`, and vice versa.
## Returns `Bool.false` when given `Bool.true`, and vice versa. This is
## equivalent to the logic [NOT](https://en.wikipedia.org/wiki/Negation)
## gate. The operator `!` can also be used as shorthand for `Bool.not`.
##
## expect (Bool.not Bool.false) == Bool.true
## expect (!Bool.false) == Bool.true
not : Bool -> Bool
## Returns `Bool.true` if the two values are *structurally equal*, and `Bool.false` otherwise.
## This will call the function `Bool.isEq` on the inputs, and then `Bool.not`
## on the result. The is equivalent to the logic
## [XOR](https://en.wikipedia.org/wiki/Exclusive_or) gate. The infix operator
## `!=` can also be used as shorthand for `Bool.isNotEq`.
##
## `a == b` is shorthand for `Bool.isEq a b`
## **Note** that `isNotEq` does not accept arguments whose types contain
## functions.
##
## Structural equality works as follows:
##
## 1. Tags are equal if they have the same tag name, and also their contents (if any) are equal.
## 2. Records are equal if all their fields are equal.
## 3. Collections ([Str], [List], [Dict], and [Set]) are equal if they are the same length, and also all their corresponding elements are equal.
## 4. [Num](Num#Num) values are equal if their numbers are equal, with one exception: if both arguments to `isEq` are *NaN*, then `isEq` returns `Bool.false`. See `Num.isNaN` for more about *NaN*.
##
## Note that `isEq` takes `'val` instead of `val`, which means `isEq` does not
## accept arguments whose types contain functions.
isEq : a, a -> Bool
## expect (Bool.isNotEq Bool.false Bool.true) == Bool.true
## expect (Bool.false != Bool.false) == Bool.false
## expect "Apples" != "Oranges"
isNotEq : a, a -> Bool | a has Eq
isNotEq = \a, b -> structuralNotEq a b
## Calls [isEq] on the given values, then calls [not] on the result.
##
## `a != b` is shorthand for `Bool.isNotEq a b`
##
## Note that `isNotEq` takes `'val` instead of `val`, which means `isNotEq` does not
## accept arguments whose types contain functions.
isNotEq : a, a -> Bool
# INTERNAL COMPILER USE ONLY: used to lower calls to `isEq` to structural
# equality via the `Eq` low-level for derived types.
structuralEq : a, a -> Bool
# INTERNAL COMPILER USE ONLY: used to lower calls to `isNotEq` to structural
# inequality via the `NotEq` low-level for derived types.
structuralNotEq : a, a -> Bool

View file

@ -2,7 +2,17 @@ interface Box
exposes [box, unbox]
imports []
## Allocate a value on the heap. Boxing is an expensive process as it copies
## the value from the stack to the heap. This may provide a performance
## optimization for advanced use cases with large values. A platform may require
## that some values are boxed.
##
## expect Box.unbox (Box.box "Stack Faster") == "Stack Faster"
box : a -> Box a
## Returns a boxed value.
##
## expect Box.unbox (Box.box "Stack Faster") == "Stack Faster"
unbox : Box a -> a
# # we'd need reset/reuse for box for this to be efficient

View file

@ -27,9 +27,27 @@ interface Decode
decodeWith,
fromBytesPartial,
fromBytes,
mapResult,
]
imports [
List,
Result.{ Result },
Num.{
U8,
U16,
U32,
U64,
U128,
I8,
I16,
I32,
I64,
I128,
F32,
F64,
Dec,
},
Bool.{ Bool },
]
DecodeError : [TooShort]
@ -79,3 +97,6 @@ fromBytes = \bytes, fmt ->
Err TooShort -> Err TooShort
else
Err (Leftover rest)
mapResult : DecodeResult a, (a -> b) -> DecodeResult b
mapResult = \{ result, rest }, mapper -> { result: Result.map result mapper, rest }

View file

@ -9,6 +9,7 @@ interface Dict
insert,
len,
remove,
update,
contains,
keys,
values,
@ -17,20 +18,28 @@ interface Dict
removeAll,
]
imports [
Bool.{ Bool },
Bool.{ Bool, Eq },
Result.{ Result },
List,
Str,
Num.{ Nat, U64, U8 },
Hash.{ Hasher },
]
## A [dictionary](https://en.wikipedia.org/wiki/Associative_array) that lets you can associate keys with values.
## A [dictionary](https://en.wikipedia.org/wiki/Associative_array) that lets you
## associate keys with values.
##
## ### Inserting
##
## The most basic way to use a dictionary is to start with an empty one and then:
## 1. Call [Dict.insert] passing a key and a value, to associate that key with that value in the dictionary.
## 2. Later, call [Dict.get] passing the same key as before, and it will return the value you stored.
## The most basic way to use a dictionary is to start with an empty one and
## then:
## 1. Call [Dict.insert] passing a key and a value, to associate that key with
## that value in the dictionary.
## 2. Later, call [Dict.get] passing the same key as before, and it will return
## the value you stored.
##
## Here's an example of a dictionary which uses a city's name as the key, and its population as the associated value.
## Here's an example of a dictionary which uses a city's name as the key, and
## its population as the associated value.
##
## populationByCity =
## Dict.empty
@ -42,11 +51,12 @@ interface Dict
##
## ### Accessing keys or values
##
## We can use [Dict.keys] and [Dict.values] functions to get only the keys or only the values.
## We can use [Dict.keys] and [Dict.values] functions to get only the keys or
## only the values.
##
## You may notice that these lists have the same order as the original insertion order. This will be true if
## all you ever do is [insert] and [get] operations on the dictionary, but [remove] operations can change this order.
## Let's see how that looks.
## You may notice that these lists have the same order as the original insertion
## order. This will be true if all you ever do is [Dict.insert] and [Dict.get] operations
## on the dictionary, but [Dict.remove] operations can change this order.
##
## ### Removing
##
@ -58,28 +68,44 @@ interface Dict
## ==
## ["London", "Amsterdam", "Shanghai", "Delhi"]
##
## Notice that the order changed! Philadelphia has been not only removed from the list, but Amsterdam - the last
## entry we inserted - has been moved into the spot where Philadelphia was previously. This is exactly what
## [Dict.remove] does: it removes an element and moves the most recent insertion into the vacated spot.
## Notice that the order has changed. Philadelphia was not only removed from the
## list, but Amsterdam - the last entry we inserted - has been moved into the
## spot where Philadelphia was previously. This is exactly what [Dict.remove]
## does. It removes an element and moves the most recent insertion into the
## vacated spot.
##
## This move is done as a performance optimization, and it lets [remove] have
## [constant time complexity](https://en.wikipedia.org/wiki/Time_complexity#Constant_time). ##
## This move is done as a performance optimization, and it lets [Dict.remove]
## have [constant time complexity](https://en.wikipedia.org/wiki/Time_complexity#Constant_time).
##
## ### Equality
##
## When comparing two dictionaries for equality, they are `==` only if their both their contents and their
## orderings match. This preserves the property that if `dict1 == dict2`, you should be able to rely on
## `fn dict1 == fn dict2` also being `Bool.true`, even if `fn` relies on the dictionary's ordering.
Dict k v := List [Pair k v]
## Two dictionaries are equal when their contents and orderings match. This
## means that when `dict1 == dict2`, the expression `fn dict1 == fn dict2` will
## also evaluate to `Bool.true`. The function `fn` can count on the ordering of
## values in the dictionary to also match.
Dict k v := List [Pair k v] has [Eq]
## An empty dictionary.
## Return an empty dictionary.
empty : Dict k v
empty = @Dict []
## Return a dictionary with space allocated for a number of entries. This
## may provide a performance optimisation if you know how many entries will be
## inserted.
withCapacity : Nat -> Dict k v
withCapacity = \n -> @Dict (List.withCapacity n)
get : Dict k v, k -> Result v [KeyNotFound]*
## Get the value for a given key. If there is a value for the specified key it
## will return [Ok value], otherwise return [Err KeyNotFound].
##
## dictionary =
## Dict.empty
## |> Dict.insert 1 "Apple"
## |> Dict.insert 2 "Orange"
##
## expect Dict.get dictionary 1 == Ok "Apple"
## expect Dict.get dictionary 2000 == Err KeyNotFound
get : Dict k v, k -> Result v [KeyNotFound]* | k has Eq
get = \@Dict list, needle ->
when List.findFirst list (\Pair key _ -> key == needle) is
Ok (Pair _ v) ->
@ -88,11 +114,28 @@ get = \@Dict list, needle ->
Err NotFound ->
Err KeyNotFound
## Iterate through the keys and values in the dictionary and call the provided
## function with signature `state, k, v -> state` for each value, with an
## initial `state` value provided for the first call.
##
## expect
## Dict.empty
## |> Dict.insert "Apples" 12
## |> Dict.insert "Orange" 24
## |> Dict.walk 0 (\count, _, qty -> count + qty)
## |> Bool.isEq 36
walk : Dict k v, state, (state, k, v -> state) -> state
walk = \@Dict list, initialState, transform ->
List.walk list initialState (\state, Pair k v -> transform state k v)
insert : Dict k v, k, v -> Dict k v
## Insert a value into the dictionary at a specified key.
##
## expect
## Dict.empty
## |> Dict.insert "Apples" 12
## |> Dict.get "Apples"
## |> Bool.isEq (Ok 12)
insert : Dict k v, k, v -> Dict k v | k has Eq
insert = \@Dict list, k, v ->
when List.findFirstIndex list (\Pair key _ -> key == k) is
Err NotFound ->
@ -103,11 +146,28 @@ insert = \@Dict list, k, v ->
|> List.set index (Pair k v)
|> @Dict
## Returns the number of values in the dictionary.
##
## expect
## Dict.empty
## |> Dict.insert "One" "A Song"
## |> Dict.insert "Two" "Candy Canes"
## |> Dict.insert "Three" "Boughs of Holly"
## |> Dict.len
## |> Bool.isEq 3
len : Dict k v -> Nat
len = \@Dict list ->
List.len list
remove : Dict k v, k -> Dict k v
## Remove a value from the dictionary for a specified key.
##
## expect
## Dict.empty
## |> Dict.insert "Some" "Value"
## |> Dict.remove "Some"
## |> Dict.len
## |> Bool.isEq 0
remove : Dict k v, k -> Dict k v | k has Eq
remove = \@Dict list, key ->
when List.findFirstIndex list (\Pair k _ -> k == key) is
Err NotFound ->
@ -121,7 +181,50 @@ remove = \@Dict list, key ->
|> List.dropLast
|> @Dict
contains : Dict k v, k -> Bool
## Insert or remove a value for a specified key. This function enables a
## performance optimisation for the use case of providing a default when a value
## is missing. This is more efficient than doing both a `Dict.get` and then a
## `Dict.insert` call, and supports being piped.
##
## alterValue : [Present Bool, Missing] -> [Present Bool, Missing]
## alterValue = \possibleValue ->
## when possibleValue is
## Missing -> Present Bool.false
## Present value -> if value then Missing else Present Bool.true
##
## expect Dict.update Dict.empty "a" alterValue == Dict.single "a" Bool.false
## expect Dict.update (Dict.single "a" Bool.false) "a" alterValue == Dict.single "a" Bool.true
## expect Dict.update (Dict.single "a" Bool.true) "a" alterValue == Dict.empty
update : Dict k v, k, ([Present v, Missing] -> [Present v, Missing]) -> Dict k v | k has Eq
update = \dict, key, alter ->
possibleValue =
get dict key
|> Result.map Present
|> Result.withDefault Missing
when alter possibleValue is
Present value -> insert dict key value
Missing -> remove dict key
# Internal for testing only
alterValue : [Present Bool, Missing] -> [Present Bool, Missing]
alterValue = \possibleValue ->
when possibleValue is
Missing -> Present Bool.false
Present value -> if value then Missing else Present Bool.true
expect update empty "a" alterValue == single "a" Bool.false
expect update (single "a" Bool.false) "a" alterValue == single "a" Bool.true
expect update (single "a" Bool.true) "a" alterValue == empty
## Check if the dictionary has a value for a specified key.
##
## expect
## Dict.empty
## |> Dict.insert 1234 "5678"
## |> Dict.contains 1234
## |> Bool.isEq Bool.true
contains : Dict k v, k -> Bool | k has Eq
contains = \@Dict list, needle ->
step = \_, Pair key _val ->
if key == needle then
@ -133,33 +236,110 @@ contains = \@Dict list, needle ->
Continue _ -> Bool.false
Break _ -> Bool.true
## Returns a dictionary containing the key and value provided as input.
##
## expect
## Dict.single "A" "B"
## |> Bool.isEq (Dict.insert Dict.empty "A" "B")
single : k, v -> Dict k v
single = \key, value ->
@Dict [Pair key value]
## Returns a [List] of the dictionary's keys.
## Returns the keys of a dictionary as a [List].
##
## expect
## Dict.single 1 "One"
## |> Dict.insert 2 "Two"
## |> Dict.insert 3 "Three"
## |> Dict.insert 4 "Four"
## |> Dict.keys
## |> Bool.isEq [1,2,3,4]
keys : Dict k v -> List k
keys = \@Dict list ->
List.map list (\Pair k _ -> k)
## Returns a [List] of the Dict's values
## Returns the values of a dictionary as a [List].
##
## expect
## Dict.single 1 "One"
## |> Dict.insert 2 "Two"
## |> Dict.insert 3 "Three"
## |> Dict.insert 4 "Four"
## |> Dict.values
## |> Bool.isEq ["One","Two","Three","Four"]
values : Dict k v -> List v
values = \@Dict list ->
List.map list (\Pair _ v -> v)
# union : Dict k v, Dict k v -> Dict k v
insertAll : Dict k v, Dict k v -> Dict k v
## Combine two dictionaries by keeping the [union](https://en.wikipedia.org/wiki/Union_(set_theory))
## of all the key-value pairs. This means that all the key-value pairs in
## both dictionaries will be combined. Note that where there are pairs
## with the same key, the value contained in the first input will be
## retained, and the value in the second input will be removed.
##
## first =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
##
## second =
## Dict.single 1 "Not Me"
## |> Dict.insert 3 "Me Too"
## |> Dict.insert 4 "And Also Me"
##
## expected =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
## |> Dict.insert 3 "Me Too"
## |> Dict.insert 4 "And Also Me"
##
## expect
## Dict.insertAll first second == expected
insertAll : Dict k v, Dict k v -> Dict k v | k has Eq
insertAll = \xs, @Dict ys ->
List.walk ys xs (\state, Pair k v -> Dict.insertIfVacant state k v)
# intersection : Dict k v, Dict k v -> Dict k v
keepShared : Dict k v, Dict k v -> Dict k v
## Combine two dictionaries by keeping the [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory))
## of all the key-value pairs. This means that we keep only those pairs
## that are in both dictionaries. Note that where there are pairs with
## the same key, the value contained in the first input will be retained,
## and the value in the second input will be removed.
##
## first =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
##
## second =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
## |> Dict.insert 3 "But Not Me"
## |> Dict.insert 4 "Or Me"
##
## expect Dict.keepShared first second == first
keepShared : Dict k v, Dict k v -> Dict k v | k has Eq
keepShared = \@Dict xs, ys ->
List.keepIf xs (\Pair k _ -> Dict.contains ys k)
|> @Dict
# difference : Dict k v, Dict k v -> Dict k v
removeAll : Dict k v, Dict k v -> Dict k v
## Remove the key-value pairs in the first input that are also in the second
## using the [set difference](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement)
## of the values. This means that we will be left with only those pairs that
## are in the first dictionary and whose keys are not in the second.
##
## first =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
## |> Dict.insert 3 "Remove Me"
##
## second =
## Dict.single 3 "Remove Me"
## |> Dict.insert 4 "I do nothing..."
##
## expected =
## Dict.single 1 "Keep Me"
## |> Dict.insert 2 "And Me"
##
## expect Dict.removeAll first second == expected
removeAll : Dict k v, Dict k v -> Dict k v | k has Eq
removeAll = \xs, @Dict ys ->
List.walk ys xs (\state, Pair k _ -> Dict.remove state k)
@ -172,9 +352,402 @@ insertFresh = \@Dict list, k, v ->
|> List.append (Pair k v)
|> @Dict
insertIfVacant : Dict k v, k, v -> Dict k v
insertIfVacant : Dict k v, k, v -> Dict k v | k has Eq
insertIfVacant = \dict, key, value ->
if Dict.contains dict key then
dict
else
Dict.insert dict key value
# We have decided not to expose the standard roc hashing algorithm.
# This is to avoid external dependence and the need for versioning.
# The current implementation is a form of [Wyhash final3](https://github.com/wangyi-fudan/wyhash/blob/a5995b98ebfa7bd38bfadc0919326d2e7aabb805/wyhash.h).
# It is 64bit and little endian specific currently.
# TODO: wyhash is slow for large keys, use something like cityhash if the keys are too long.
# TODO: Add a builtin to distinguish big endian systems and change loading orders.
# TODO: Switch out Wymum on systems with slow 128bit multiplication.
LowLevelHasher := { originalSeed : U64, state : U64 } has [
Hasher {
addBytes,
addU8,
addU16,
addU32,
addU64,
addU128,
complete,
},
]
# unsafe primitive that does not perform a bounds check
# TODO hide behind an InternalList.roc module
listGetUnsafe : List a, Nat -> a
createLowLevelHasher : { seed ?U64 } -> LowLevelHasher
createLowLevelHasher = \{ seed ? 0x526F_6352_616E_643F } ->
@LowLevelHasher { originalSeed: seed, state: seed }
combineState : LowLevelHasher, { a : U64, b : U64, seed : U64, length : U64 } -> LowLevelHasher
combineState = \@LowLevelHasher { originalSeed, state }, { a, b, seed, length } ->
tmp = wymix (Num.bitwiseXor wyp1 a) (Num.bitwiseXor seed b)
hash = wymix (Num.bitwiseXor wyp1 length) tmp
@LowLevelHasher { originalSeed, state: wymix state hash }
complete = \@LowLevelHasher { state } -> state
# These implementations hash each value individually with the seed and then mix
# the resulting hash with the state. There are other options that may be faster
# like using the output of the last hash as the seed to the current hash.
# I am simply not sure the tradeoffs here. Theoretically this method is more sound.
# Either way, the performance will be similar and we can change this later.
addU8 = \@LowLevelHasher { originalSeed, state }, u8 ->
seed = Num.bitwiseXor originalSeed wyp0
p0 = Num.toU64 u8
a =
Num.shiftLeftBy p0 16
|> Num.bitwiseOr (Num.shiftLeftBy p0 8)
|> Num.bitwiseOr p0
b = 0
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 1 }
addU16 = \@LowLevelHasher { originalSeed, state }, u16 ->
seed = Num.bitwiseXor originalSeed wyp0
p0 = Num.bitwiseAnd u16 0xFF |> Num.toU64
p1 = Num.shiftRightZfBy u16 8 |> Num.toU64
a =
Num.shiftLeftBy p0 16
|> Num.bitwiseOr (Num.shiftLeftBy p1 8)
|> Num.bitwiseOr p1
b = 0
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 2 }
addU32 = \@LowLevelHasher { originalSeed, state }, u32 ->
seed = Num.bitwiseXor originalSeed wyp0
p0 = Num.toU64 u32
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p0
combineState (@LowLevelHasher { originalSeed, state }) { a, b: a, seed, length: 4 }
addU64 = \@LowLevelHasher { originalSeed, state }, u64 ->
seed = Num.bitwiseXor originalSeed wyp0
p0 = Num.bitwiseAnd 0xFFFF_FFFF u64
p1 = Num.shiftRightZfBy u64 32
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p1
b = Num.shiftLeftBy p1 32 |> Num.bitwiseOr p0
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 8 }
addU128 = \@LowLevelHasher { originalSeed, state }, u128 ->
seed = Num.bitwiseXor originalSeed wyp0
lower = u128 |> Num.toU64
upper = Num.shiftRightZfBy u128 64 |> Num.toU64
p0 = Num.bitwiseAnd 0xFFFF_FFFF lower
p1 = Num.shiftRightZfBy lower 32 |> Num.bitwiseAnd 0xFFFF_FFFF
p2 = Num.bitwiseAnd 0xFFFF_FFFF upper
p3 = Num.shiftRightZfBy upper 32 |> Num.bitwiseAnd 0xFFFF_FFFF
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p2
b = Num.shiftLeftBy p3 32 |> Num.bitwiseOr p1
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 16 }
addBytes : LowLevelHasher, List U8 -> LowLevelHasher
addBytes = \@LowLevelHasher { originalSeed, state }, list ->
length = List.len list
seed = Num.bitwiseXor originalSeed wyp0
abs =
if length <= 16 then
if length >= 4 then
x = Num.shiftRightZfBy length 3 |> Num.shiftLeftBy 2
a = Num.bitwiseOr (wyr4 list 0 |> Num.shiftLeftBy 32) (wyr4 list x)
b =
(wyr4 list (Num.subWrap length 4) |> Num.shiftLeftBy 32)
|> Num.bitwiseOr (wyr4 list (Num.subWrap length 4 |> Num.subWrap x))
{ a, b, seed }
else if length > 0 then
{ a: wyr3 list 0 length, b: 0, seed }
else
{ a: 0, b: 0, seed }
else if length <= 48 then
hashBytesHelper16 seed list 0 length
else
hashBytesHelper48 seed seed seed list 0 length
combineState (@LowLevelHasher { originalSeed, state }) { a: abs.a, b: abs.b, seed: abs.seed, length: Num.toU64 length }
hashBytesHelper48 : U64, U64, U64, List U8, Nat, Nat -> { a : U64, b : U64, seed : U64 }
hashBytesHelper48 = \seed, see1, see2, list, index, remaining ->
newSeed = wymix (Num.bitwiseXor (wyr8 list index) wyp1) (Num.bitwiseXor (wyr8 list (Num.addWrap index 8)) seed)
newSee1 = wymix (Num.bitwiseXor (wyr8 list (Num.addWrap index 16)) wyp2) (Num.bitwiseXor (wyr8 list (Num.addWrap index 24)) see1)
newSee2 = wymix (Num.bitwiseXor (wyr8 list (Num.addWrap index 32)) wyp3) (Num.bitwiseXor (wyr8 list (Num.addWrap index 40)) see2)
newRemaining = Num.subWrap remaining 48
newIndex = Num.addWrap index 48
if newRemaining > 48 then
hashBytesHelper48 newSeed newSee1 newSee2 list newIndex newRemaining
else if newRemaining > 16 then
finalSeed = Num.bitwiseXor newSee2 (Num.bitwiseXor newSee1 newSeed)
hashBytesHelper16 finalSeed list newIndex newRemaining
else
finalSeed = Num.bitwiseXor newSee2 (Num.bitwiseXor newSee1 newSeed)
{ a: wyr8 list (Num.subWrap newRemaining 16 |> Num.addWrap newIndex), b: wyr8 list (Num.subWrap newRemaining 8 |> Num.addWrap newIndex), seed: finalSeed }
hashBytesHelper16 : U64, List U8, Nat, Nat -> { a : U64, b : U64, seed : U64 }
hashBytesHelper16 = \seed, list, index, remaining ->
newSeed = wymix (Num.bitwiseXor (wyr8 list index) wyp1) (Num.bitwiseXor (wyr8 list (Num.addWrap index 8)) seed)
newRemaining = Num.subWrap remaining 16
newIndex = Num.addWrap index 16
if newRemaining <= 16 then
{ a: wyr8 list (Num.subWrap newRemaining 16 |> Num.addWrap newIndex), b: wyr8 list (Num.subWrap newRemaining 8 |> Num.addWrap newIndex), seed: newSeed }
else
hashBytesHelper16 newSeed list newIndex newRemaining
wyp0 : U64
wyp0 = 0xa0761d6478bd642f
wyp1 : U64
wyp1 = 0xe7037ed1a0b428db
wyp2 : U64
wyp2 = 0x8ebc6af09c88c6e3
wyp3 : U64
wyp3 = 0x589965cc75374cc3
wymix : U64, U64 -> U64
wymix = \a, b ->
{ lower, upper } = wymum a b
Num.bitwiseXor lower upper
wymum : U64, U64 -> { lower : U64, upper : U64 }
wymum = \a, b ->
r = Num.toU128 a * Num.toU128 b
lower = Num.toU64 r
upper = Num.shiftRightZfBy r 64 |> Num.toU64
# This is the more robust form.
# { lower: Num.bitwiseXor a lower, upper: Num.bitwiseXor b upper }
{ lower, upper }
# Get the next 8 bytes as a U64
wyr8 : List U8, Nat -> U64
wyr8 = \list, index ->
# With seamless slices and Num.fromBytes, this should be possible to make faster and nicer.
# It would also deal with the fact that on big endian systems we want to invert the order here.
# Without seamless slices, we would need fromBytes to take an index.
p1 = listGetUnsafe list index |> Num.toU64
p2 = listGetUnsafe list (Num.addWrap index 1) |> Num.toU64
p3 = listGetUnsafe list (Num.addWrap index 2) |> Num.toU64
p4 = listGetUnsafe list (Num.addWrap index 3) |> Num.toU64
p5 = listGetUnsafe list (Num.addWrap index 4) |> Num.toU64
p6 = listGetUnsafe list (Num.addWrap index 5) |> Num.toU64
p7 = listGetUnsafe list (Num.addWrap index 6) |> Num.toU64
p8 = listGetUnsafe list (Num.addWrap index 7) |> Num.toU64
a = Num.bitwiseOr p1 (Num.shiftLeftBy p2 8)
b = Num.bitwiseOr (Num.shiftLeftBy p3 16) (Num.shiftLeftBy p4 24)
c = Num.bitwiseOr (Num.shiftLeftBy p5 32) (Num.shiftLeftBy p6 40)
d = Num.bitwiseOr (Num.shiftLeftBy p7 48) (Num.shiftLeftBy p8 56)
Num.bitwiseOr (Num.bitwiseOr a b) (Num.bitwiseOr c d)
# Get the next 4 bytes as a U64 with some shifting.
wyr4 : List U8, Nat -> U64
wyr4 = \list, index ->
p1 = listGetUnsafe list index |> Num.toU64
p2 = listGetUnsafe list (Num.addWrap index 1) |> Num.toU64
p3 = listGetUnsafe list (Num.addWrap index 2) |> Num.toU64
p4 = listGetUnsafe list (Num.addWrap index 3) |> Num.toU64
a = Num.bitwiseOr p1 (Num.shiftLeftBy p2 8)
b = Num.bitwiseOr (Num.shiftLeftBy p3 16) (Num.shiftLeftBy p4 24)
Num.bitwiseOr a b
# Get the next K bytes with some shifting.
# K must be 3 or less.
wyr3 : List U8, Nat, Nat -> U64
wyr3 = \list, index, k ->
# ((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1]
p1 = listGetUnsafe list index |> Num.toU64
p2 = listGetUnsafe list (Num.shiftRightZfBy k 1 |> Num.addWrap index) |> Num.toU64
p3 = listGetUnsafe list (Num.subWrap k 1 |> Num.addWrap index) |> Num.toU64
a = Num.bitwiseOr (Num.shiftLeftBy p1 16) (Num.shiftLeftBy p2 8)
Num.bitwiseOr a p3
# TODO: would be great to have table driven expects for this.
# Would also be great to have some sort of property based hasher
# where we can compare `addU*` functions to the `addBytes` function.
expect
hash =
createLowLevelHasher {}
|> addBytes []
|> complete
hash == 0x1C3F_F8BF_07F9_B0B3
expect
hash =
createLowLevelHasher {}
|> addBytes [0x42]
|> complete
hash == 0x8F9F_0A1E_E06F_0D52
expect
hash =
createLowLevelHasher {}
|> addU8 0x42
|> complete
hash == 0x8F9F_0A1E_E06F_0D52
expect
hash =
createLowLevelHasher {}
|> addBytes [0xFF, 0xFF]
|> complete
hash == 0x86CC_8B71_563F_F084
expect
hash =
createLowLevelHasher {}
|> addU16 0xFFFF
|> complete
hash == 0x86CC_8B71_563F_F084
expect
hash =
createLowLevelHasher {}
|> addBytes [0x36, 0xA7]
|> complete
hash == 0xD1A5_0F24_2536_84F8
expect
hash =
createLowLevelHasher {}
|> addU16 0xA736
|> complete
hash == 0xD1A5_0F24_2536_84F8
expect
hash =
createLowLevelHasher {}
|> addBytes [0x00, 0x00, 0x00, 0x00]
|> complete
hash == 0x3762_ACB1_7604_B541
expect
hash =
createLowLevelHasher {}
|> addU32 0x0000_0000
|> complete
hash == 0x3762_ACB1_7604_B541
expect
hash =
createLowLevelHasher {}
|> addBytes [0xA9, 0x2F, 0xEE, 0x21]
|> complete
hash == 0x20F3_3FD7_D32E_C7A9
expect
hash =
createLowLevelHasher {}
|> addU32 0x21EE_2FA9
|> complete
hash == 0x20F3_3FD7_D32E_C7A9
expect
hash =
createLowLevelHasher {}
|> addBytes [0x5D, 0x66, 0xB1, 0x8F, 0x68, 0x44, 0xC7, 0x03, 0xE1, 0xDD, 0x23, 0x34, 0xBB, 0x9A, 0x42, 0xA7]
|> complete
hash == 0xA16F_DDAA_C167_74C7
expect
hash =
createLowLevelHasher {}
|> addU128 0xA742_9ABB_3423_DDE1_03C7_4468_8FB1_665D
|> complete
hash == 0xA16F_DDAA_C167_74C7
expect
hash =
createLowLevelHasher {}
|> Hash.hashStrBytes "abcdefghijklmnopqrstuvwxyz"
|> complete
hash == 0xBEE0_A8FD_E990_D285
expect
hash =
createLowLevelHasher {}
|> Hash.hashStrBytes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
|> complete
hash == 0xB3C5_8528_9D82_A6EF
expect
hash =
createLowLevelHasher {}
|> Hash.hashStrBytes "1234567890123456789012345678901234567890123456789012345678901234567890"
|> complete
hash == 0xDB6B_7997_7A55_BA03
expect
hash =
createLowLevelHasher {}
|> addBytes (List.repeat 0x77 100)
|> complete
hash == 0x171F_EEE2_B764_8E5E
# Note, had to specify u8 in the lists below to avoid ability type resolution error.
# Apparently it won't pick the default integer.
expect
hash =
createLowLevelHasher {}
|> Hash.hashUnordered [8u8, 82u8, 3u8, 8u8, 24u8] List.walk
|> complete
hash == 0x999F_B530_3529_F17D
expect
hash1 =
createLowLevelHasher {}
|> Hash.hashUnordered ([0u8, 1u8, 2u8, 3u8, 4u8]) List.walk
|> complete
hash2 =
createLowLevelHasher {}
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8] List.walk
|> complete
hash1 == hash2
expect
hash1 =
createLowLevelHasher {}
|> Hash.hashUnordered [0u8, 1u8, 2u8, 3u8, 4u8] List.walk
|> complete
hash2 =
createLowLevelHasher {}
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8, 0u8] List.walk
|> complete
hash1 != hash2

View file

@ -27,7 +27,24 @@ interface Encode
append,
toBytes,
]
imports []
imports [
Num.{
U8,
U16,
U32,
U64,
U128,
I8,
I16,
I32,
I64,
I128,
F32,
F64,
Dec,
},
Bool.{ Bool },
]
Encoder fmt := List U8, fmt -> List U8 | fmt has EncoderFormatting

View file

@ -0,0 +1,113 @@
interface Hash
exposes [
Hash,
Hasher,
hash,
addBytes,
addU8,
addU16,
addU32,
addU64,
addU128,
hashI8,
hashI16,
hashI32,
hashI64,
hashI128,
complete,
hashStrBytes,
hashList,
hashUnordered,
] imports [
List,
Str,
Num.{ U8, U16, U32, U64, U128, I8, I16, I32, I64, I128 },
]
## A value that can hashed.
Hash has
## Hashes a value into a [Hasher].
## Note that [hash] does not produce a hash value itself; the hasher must be
## [complete]d in order to extract the hash value.
hash : hasher, a -> hasher | a has Hash, hasher has Hasher
## Describes a hashing algorithm that is fed bytes and produces an integer hash.
##
## The [Hasher] ability describes general-purpose hashers. It only allows
## emission of 64-bit unsigned integer hashes. It is not suitable for
## cryptographically-secure hashing.
Hasher has
## Adds a list of bytes to the hasher.
addBytes : a, List U8 -> a | a has Hasher
## Adds a single U8 to the hasher.
addU8 : a, U8 -> a | a has Hasher
## Adds a single U16 to the hasher.
addU16 : a, U16 -> a | a has Hasher
## Adds a single U32 to the hasher.
addU32 : a, U32 -> a | a has Hasher
## Adds a single U64 to the hasher.
addU64 : a, U64 -> a | a has Hasher
## Adds a single U128 to the hasher.
addU128 : a, U128 -> a | a has Hasher
## Completes the hasher, extracting a hash value from its
## accumulated hash state.
complete : a -> U64 | a has Hasher
## Adds a string into a [Hasher] by hashing its UTF-8 bytes.
hashStrBytes = \hasher, s ->
addBytes hasher (Str.toUtf8 s)
## Adds a list of [Hash]able elements to a [Hasher] by hashing each element.
hashList = \hasher, lst ->
List.walk lst hasher \accumHasher, elem ->
hash accumHasher elem
## Adds a single I8 to a hasher.
hashI8 : a, I8 -> a | a has Hasher
hashI8 = \hasher, n -> addU8 hasher (Num.toU8 n)
## Adds a single I16 to a hasher.
hashI16 : a, I16 -> a | a has Hasher
hashI16 = \hasher, n -> addU16 hasher (Num.toU16 n)
## Adds a single I32 to a hasher.
hashI32 : a, I32 -> a | a has Hasher
hashI32 = \hasher, n -> addU32 hasher (Num.toU32 n)
## Adds a single I64 to a hasher.
hashI64 : a, I64 -> a | a has Hasher
hashI64 = \hasher, n -> addU64 hasher (Num.toU64 n)
## Adds a single I128 to a hasher.
hashI128 : a, I128 -> a | a has Hasher
hashI128 = \hasher, n -> addU128 hasher (Num.toU128 n)
## Adds a container of [Hash]able elements to a [Hasher] by hashing each element.
## The container is iterated using the walk method passed in.
## The order of the elements does not affect the final hash.
hashUnordered = \hasher, container, walk ->
walk
container
0
(\accum, elem ->
x =
# Note, we intentionally copy the hasher in every iteration.
# Having the same base state is required for unordered hashing.
hasher
|> hash elem
|> complete
nextAccum = Num.addWrap accum x
if nextAccum < accum then
# we don't want to lose a bit of entropy on overflow, so add it back in.
Num.addWrap nextAccum 1
else
nextAccum
)
|> \accum -> addU64 hasher accum

View file

@ -18,6 +18,23 @@ interface Json
DecoderFormatting,
DecodeResult,
},
Num.{
U8,
U16,
U32,
U64,
U128,
I8,
I16,
I32,
I64,
I128,
F32,
F64,
Dec,
},
Bool.{ Bool, Eq },
Result,
]
Json := {} has [
@ -187,9 +204,8 @@ takeWhile = \list, predicate ->
helper { taken: [], rest: list }
asciiByte = \b -> Num.toU8 b
digits = List.range (asciiByte '0') (asciiByte '9' + 1)
digits : List U8
digits = List.range '0' ('9' + 1)
takeDigits = \bytes ->
takeWhile bytes \n -> List.contains digits n
@ -198,10 +214,10 @@ takeFloat = \bytes ->
{ taken: intPart, rest } = takeDigits bytes
when List.get rest 0 is
Ok 46 -> # 46 = .
Ok '.' ->
{ taken: floatPart, rest: afterAll } = takeDigits (List.split rest 1).others
builtFloat =
List.concat (List.append intPart (asciiByte '.')) floatPart
List.concat (List.append intPart '.') floatPart
{ taken: builtFloat, rest: afterAll }
@ -305,14 +321,14 @@ decodeBool = Decode.custom \bytes, @Json {} ->
# Note: this could be more performant by traversing both branches char-by-char.
# Doing that would also make `rest` more correct in the erroring case.
if
maybeFalse == [asciiByte 'f', asciiByte 'a', asciiByte 'l', asciiByte 's', asciiByte 'e']
maybeFalse == ['f', 'a', 'l', 's', 'e']
then
{ result: Ok Bool.false, rest: afterFalse }
else
{ before: maybeTrue, others: afterTrue } = List.split bytes 4
if
maybeTrue == [asciiByte 't', asciiByte 'r', asciiByte 'u', asciiByte 'e']
maybeTrue == ['t', 'r', 'u', 'e']
then
{ result: Ok Bool.true, rest: afterTrue }
else
@ -323,10 +339,10 @@ jsonString = \bytes ->
{ before, others: afterStartingQuote } = List.split bytes 1
if
before == [asciiByte '"']
before == ['"']
then
# TODO: handle escape sequences
{ taken: strSequence, rest } = takeWhile afterStartingQuote \n -> n != asciiByte '"'
{ taken: strSequence, rest } = takeWhile afterStartingQuote \n -> n != '"'
when Str.fromUtf8 strSequence is
Ok s ->
@ -351,7 +367,7 @@ decodeList = \decodeElem -> Decode.custom \bytes, @Json {} ->
{ before: afterElem, others } = List.split rest 1
if
afterElem == [asciiByte ',']
afterElem == [',']
then
decodeElems others (List.append accum val)
else
@ -362,7 +378,7 @@ decodeList = \decodeElem -> Decode.custom \bytes, @Json {} ->
{ before, others: afterStartingBrace } = List.split bytes 1
if
before == [asciiByte '[']
before == ['[']
then
# TODO: empty lists
when decodeElems afterStartingBrace [] is
@ -371,7 +387,7 @@ decodeList = \decodeElem -> Decode.custom \bytes, @Json {} ->
{ before: maybeEndingBrace, others: afterEndingBrace } = List.split rest 1
if
maybeEndingBrace == [asciiByte ']']
maybeEndingBrace == [']']
then
{ result: Ok vals, rest: afterEndingBrace }
else
@ -393,10 +409,10 @@ parseExactChar = \bytes, char ->
Err _ -> { result: Err TooShort, rest: bytes }
openBrace : List U8 -> DecodeResult {}
openBrace = \bytes -> parseExactChar bytes (asciiByte '{')
openBrace = \bytes -> parseExactChar bytes '{'
closingBrace : List U8 -> DecodeResult {}
closingBrace = \bytes -> parseExactChar bytes (asciiByte '}')
closingBrace = \bytes -> parseExactChar bytes '}'
recordKey : List U8 -> DecodeResult Str
recordKey = \bytes -> jsonString bytes
@ -405,10 +421,10 @@ anything : List U8 -> DecodeResult {}
anything = \bytes -> { result: Err TooShort, rest: bytes }
colon : List U8 -> DecodeResult {}
colon = \bytes -> parseExactChar bytes (asciiByte ':')
colon = \bytes -> parseExactChar bytes ':'
comma : List U8 -> DecodeResult {}
comma = \bytes -> parseExactChar bytes (asciiByte ',')
comma = \bytes -> parseExactChar bytes ','
tryDecode : DecodeResult a, ({ val : a, rest : List U8 } -> DecodeResult b) -> DecodeResult b
tryDecode = \{ result, rest }, mapper ->

View file

@ -29,6 +29,8 @@ interface List
map3,
product,
walkUntil,
walkFrom,
walkFromUntil,
range,
sortWith,
drop,
@ -39,6 +41,7 @@ interface List
max,
map4,
mapTry,
walkTry,
dropFirst,
joinMap,
any,
@ -60,9 +63,13 @@ interface List
sortAsc,
sortDesc,
reserve,
walkBackwardsUntil,
countIf,
]
imports [
Bool.{ Bool },
Bool.{ Bool, Eq },
Result.{ Result },
Num.{ Nat, Num, Int },
]
## Types
@ -85,9 +92,8 @@ interface List
##
## ## Performance Details
##
## Under the hood, a list is a record containing a `len : Nat` field as well
## as a pointer to a reference count and a flat array of bytes. Unique lists
## store a capacity #Nat instead of a reference count.
## Under the hood, a list is a record containing a `len : Nat` field, a `capacity : Nat`
## field, and a pointer to a reference count and a flat array of bytes.
##
## ## Shared Lists
##
@ -109,9 +115,8 @@ interface List
## begins with a refcount of 1, because so far only `ratings` is referencing it.
##
## The second line alters this refcount. `{ foo: ratings` references
## the `ratings` list, which will result in its refcount getting incremented
## from 0 to 1. Similarly, `bar: ratings }` also references the `ratings` list,
## which will result in its refcount getting incremented from 1 to 2.
## the `ratings` list, and so does `bar: ratings }`. This will result in its
## refcount getting incremented from 1 to 3.
##
## Let's turn this example into a function.
##
@ -129,11 +134,11 @@ interface List
##
## Since `ratings` represented a way to reference the list, and that way is no
## longer accessible, the list's refcount gets decremented when `ratings` goes
## out of scope. It will decrease from 2 back down to 1.
## out of scope. It will decrease from 3 back down to 2.
##
## Putting these together, when we call `getRatings 5`, what we get back is
## a record with two fields, `foo`, and `bar`, each of which refers to the same
## list, and that list has a refcount of 1.
## list, and that list has a refcount of 2.
##
## Let's change the last line to be `(getRatings 5).bar` instead of `getRatings 5`:
##
@ -351,7 +356,7 @@ join = \lists ->
List.walk lists (List.withCapacity totalLength) (\state, list -> List.concat state list)
contains : List a, a -> Bool
contains : List a, a -> Bool | a has Eq
contains = \list, needle ->
List.any list (\x -> x == needle)
@ -433,6 +438,29 @@ walkUntil = \list, initial, step ->
Continue new -> new
Break new -> new
## Same as [List.walkUntil], but does it from the end of the list instead.
walkBackwardsUntil : List elem, state, (state, elem -> [Continue state, Break state]) -> state
walkBackwardsUntil = \list, initial, func ->
when List.iterateBackwards list initial func is
Continue new -> new
Break new -> new
## Walks to the end of the list from a specified starting index
walkFrom : List elem, Nat, state, (state, elem -> state) -> state
walkFrom = \list, index, state, func ->
walkHelp : _, _ -> [Continue _, Break []]
walkHelp = \currentState, element -> Continue (func currentState element)
when List.iterHelp list state walkHelp index (List.len list) is
Continue new -> new
## A combination of [List.walkFrom] and [List.walkUntil]
walkFromUntil : List elem, Nat, state, (state, elem -> [Continue state, Break state]) -> state
walkFromUntil = \list, index, state, func ->
when List.iterHelp list state func index (List.len list) is
Continue new -> new
Break new -> new
sum : List (Num a) -> Num a
sum = \list ->
List.walk list 0 Num.add
@ -519,6 +547,18 @@ dropIf : List a, (a -> Bool) -> List a
dropIf = \list, predicate ->
List.keepIf list (\e -> Bool.not (predicate e))
## Run the given function on each element of a list, and return the
## number of elements for which the function returned `Bool.true`.
countIf : List a, (a -> Bool) -> Nat
countIf = \list, predicate ->
walkState = \state, elem ->
if predicate elem then
state + 1
else
state
List.walk list 0 walkState
## This works like [List.map], except only the transformed values that are
## wrapped in `Ok` are kept. Any that are wrapped in `Err` are dropped.
##
@ -881,7 +921,7 @@ intersperse = \list, sep ->
## is considered to "start with" an empty list.
##
## If the first list is empty, this only returns `Bool.true` if the second list is empty.
startsWith : List elem, List elem -> Bool
startsWith : List elem, List elem -> Bool | elem has Eq
startsWith = \list, prefix ->
# TODO once we have seamless slices, verify that this wouldn't
# have better performance with a function like List.compareSublists
@ -893,7 +933,7 @@ startsWith = \list, prefix ->
## is considered to "end with" an empty list.
##
## If the first list is empty, this only returns `Bool.true` if the second list is empty.
endsWith : List elem, List elem -> Bool
endsWith : List elem, List elem -> Bool | elem has Eq
endsWith = \list, suffix ->
# TODO once we have seamless slices, verify that this wouldn't
# have better performance with a function like List.compareSublists
@ -922,7 +962,7 @@ split = \elements, userSplitIndex ->
## remaining elements after that occurrence. If the delimiter is not found, returns `Err`.
##
## List.splitFirst [Foo, Z, Bar, Z, Baz] Z == Ok { before: [Foo], after: [Bar, Baz] }
splitFirst : List elem, elem -> Result { before : List elem, after : List elem } [NotFound]*
splitFirst : List elem, elem -> Result { before : List elem, after : List elem } [NotFound]* | elem has Eq
splitFirst = \list, delimiter ->
when List.findFirstIndex list (\elem -> elem == delimiter) is
Ok index ->
@ -937,7 +977,7 @@ splitFirst = \list, delimiter ->
## remaining elements after that occurrence. If the delimiter is not found, returns `Err`.
##
## List.splitLast [Foo, Z, Bar, Z, Baz] Z == Ok { before: [Foo, Bar], after: [Baz] }
splitLast : List elem, elem -> Result { before : List elem, after : List elem } [NotFound]*
splitLast : List elem, elem -> Result { before : List elem, after : List elem } [NotFound]* | elem has Eq
splitLast = \list, delimiter ->
when List.findLastIndex list (\elem -> elem == delimiter) is
Ok index ->
@ -957,9 +997,8 @@ mapTry = \list, toResult ->
Result.map (toResult elem) \ok ->
List.append state ok
## This is the same as `iterate` but with Result instead of [Continue, Break].
## This is the same as `iterate` but with [Result] instead of `[Continue, Break]`.
## Using `Result` saves a conditional in `mapTry`.
## It might be useful to expose this in userspace?
walkTry : List elem, state, (state, elem -> Result state err) -> Result state err
walkTry = \list, init, func ->
walkTryHelp list init func 0 (List.len list)

View file

@ -145,6 +145,7 @@ interface Num
]
imports [
Bool.{ Bool },
Result.{ Result },
]
## Represents a number that could be either an [Int] or a [Frac].
@ -574,7 +575,6 @@ isGte : Num a, Num a -> Bool
## Returns `Bool.true` if the number is `0`, and `Bool.false` otherwise.
isZero : Num a -> Bool
isZero = \x -> x == 0
## A number is even if dividing it by 2 gives a remainder of 0.
##
@ -793,7 +793,7 @@ div : Frac a, Frac a -> Frac a
divChecked : Frac a, Frac a -> Result (Frac a) [DivByZero]*
divChecked = \a, b ->
if b == 0 then
if Num.isZero b then
Err DivByZero
else
Ok (Num.div a b)
@ -802,7 +802,7 @@ divCeil : Int a, Int a -> Int a
divCeilChecked : Int a, Int a -> Result (Int a) [DivByZero]*
divCeilChecked = \a, b ->
if b == 0 then
if Num.isZero b then
Err DivByZero
else
Ok (Num.divCeil a b)
@ -827,7 +827,7 @@ divTrunc : Int a, Int a -> Int a
divTruncChecked : Int a, Int a -> Result (Int a) [DivByZero]*
divTruncChecked = \a, b ->
if b == 0 then
if Num.isZero b then
Err DivByZero
else
Ok (Num.divTrunc a b)
@ -847,7 +847,7 @@ rem : Int a, Int a -> Int a
remChecked : Int a, Int a -> Result (Int a) [DivByZero]*
remChecked = \a, b ->
if b == 0 then
if Num.isZero b then
Err DivByZero
else
Ok (Num.rem a b)
@ -868,7 +868,7 @@ bitwiseOr : Int a, Int a -> Int a
## >>> 0b0000_0101 |> shiftLeftBy 2 == 0b0000_1100
##
## In some languages `shiftLeftBy` is implemented as a binary operator `<<`.
shiftLeftBy : Int a, Int a -> Int a
shiftLeftBy : Int a, U8 -> Int a
## Bitwise arithmetic shift of a number by another
##
@ -881,7 +881,7 @@ shiftLeftBy : Int a, Int a -> Int a
## >>> 0b1001_0000 |> shiftRightBy 2 == 0b1110_0100
##
## In some languages `shiftRightBy` is implemented as a binary operator `>>>`.
shiftRightBy : Int a, Int a -> Int a
shiftRightBy : Int a, U8 -> Int a
## Bitwise logical right shift of a number by another
##
@ -895,7 +895,7 @@ shiftRightBy : Int a, Int a -> Int a
## >>> 0b1001_0000 |> shiftRightBy 2 == 0b0010_0100
##
## In some languages `shiftRightBy` is implemented as a binary operator `>>`.
shiftRightZfBy : Int a, Int a -> Int a
shiftRightZfBy : Int a, U8 -> Int a
## Round off the given fraction to the nearest integer.
round : Frac * -> Int *
@ -1223,19 +1223,19 @@ toU64 : Int * -> U64
toU128 : Int * -> U128
## Convert an [Int] to a [Nat]. If the given number doesn't fit in [Nat], it will be truncated.
## Since #Nat has a different maximum number depending on the system you're building
## Since [Nat] has a different maximum number depending on the system you're building
## for, this may give a different answer on different systems.
##
## For example, on a 32-bit system, #Num.maxNat will return the same answer as
## [Num.maxU32]. This means that calling `Num.toNat 9_000_000_000` on a 32-bit
## system will return [Num.maxU32] instead of 9 billion, because 9 billion is
## higher than [Num.maxU32] and will not fit in a [Nat] on a 32-bit system.
## For example, on a 32-bit system, `Num.maxNat` will return the same answer as
## `Num.maxU32`. This means that calling `Num.toNat 9_000_000_000` on a 32-bit
## system will return `Num.maxU32` instead of 9 billion, because 9 billion is
## higher than `Num.maxU32` and will not fit in a [Nat] on a 32-bit system.
##
## However, calling `Num.toNat 9_000_000_000` on a 64-bit system will return
## the #Nat value of 9_000_000_000. This is because on a 64-bit system, [Nat] can
## hold up to [Num.maxU64], and 9_000_000_000 is lower than [Num.maxU64].
## the [Nat] value of 9_000_000_000. This is because on a 64-bit system, [Nat] can
## hold up to `Num.maxU64`, and 9_000_000_000 is lower than `Num.maxU64`.
##
## To convert a [Frac] to a [Nat], first call either #Num.round, #Num.ceil, or [Num.floor]
## To convert a [Frac] to a [Nat], first call either `Num.round`, `Num.ceil`, or `Num.floor`
## on it, then call this on the resulting [Int].
toNat : Int * -> Nat

View file

@ -14,9 +14,9 @@ interface Set
intersection,
difference,
]
imports [List, Bool.{ Bool }, Dict.{ Dict }]
imports [List, Bool.{ Bool, Eq }, Dict.{ Dict }, Num.{ Nat }]
Set k := Dict.Dict k {}
Set k := Dict.Dict k {} has [Eq]
fromDict : Dict k {} -> Set k
fromDict = \dict -> @Set dict
@ -35,7 +35,7 @@ single = \key ->
## Make sure never to insert a *NaN* to a [Set]! Because *NaN* is defined to be
## unequal to *NaN*, adding a *NaN* results in an entry that can never be
## retrieved or removed from the [Set].
insert : Set k, k -> Set k
insert : Set k, k -> Set k | k has Eq
insert = \@Set dict, key ->
dict
|> Dict.insert key {}
@ -75,11 +75,11 @@ expect
actual == 3
## Drops the given element from the set.
remove : Set k, k -> Set k
remove : Set k, k -> Set k | k has Eq
remove = \@Set dict, key ->
@Set (Dict.remove dict key)
contains : Set k, k -> Bool
contains : Set k, k -> Bool | k has Eq
contains = \set, key ->
set
|> Set.toDict
@ -89,21 +89,21 @@ toList : Set k -> List k
toList = \@Set dict ->
Dict.keys dict
fromList : List k -> Set k
fromList : List k -> Set k | k has Eq
fromList = \list ->
initial = @Set (Dict.withCapacity (List.len list))
List.walk list initial \set, key -> Set.insert set key
union : Set k, Set k -> Set k
union : Set k, Set k -> Set k | k has Eq
union = \@Set dict1, @Set dict2 ->
@Set (Dict.insertAll dict1 dict2)
intersection : Set k, Set k -> Set k
intersection : Set k, Set k -> Set k | k has Eq
intersection = \@Set dict1, @Set dict2 ->
@Set (Dict.keepShared dict1 dict2)
difference : Set k, Set k -> Set k
difference : Set k, Set k -> Set k | k has Eq
difference = \@Set dict1, @Set dict2 ->
@Set (Dict.removeAll dict1 dict2)

View file

@ -43,8 +43,16 @@ interface Str
appendScalar,
walkScalars,
walkScalarsUntil,
withCapacity,
withPrefix,
graphemes,
]
imports [
Bool.{ Bool, Eq },
Result.{ Result },
List,
Num.{ Nat, Num, U8, U16, U32, U64, U128, I8, I16, I32, I64, I128, F32, F64, Dec },
]
imports [Bool.{ Bool }, Result.{ Result }, List]
## # Types
##
@ -67,7 +75,7 @@ interface Str
## programming, and "extended grapheme cluster" is a mouthful, in Roc we use the
## term "grapheme" as a shorthand for the more precise "extended grapheme cluster."
##
## You can get the number of graphemes in a string by calling [Str.countGraphemes] on it:
## You can get the number of graphemes in a string by calling `Str.countGraphemes` on it:
##
## Str.countGraphemes "Roc!"
## Str.countGraphemes "折り紙"
@ -133,46 +141,56 @@ Utf8Problem : { byteIndex : Nat, problem : Utf8ByteProblem }
## Returns `Bool.true` if the string is empty, and `Bool.false` otherwise.
##
## >>> Str.isEmpty "hi!"
##
## >>> Str.isEmpty ""
## expect Str.isEmpty "hi!" == Bool.false
## expect Str.isEmpty "" == Bool.true
isEmpty : Str -> Bool
## Concatenate two [Str] values together.
##
## expect Str.concat "Hello" "World" == "HelloWorld"
concat : Str, Str -> Str
## Combine a list of strings into a single string, with a separator
## string in between each.
## Returns a [Str] of the specified capacity [Num] without any content
withCapacity : Nat -> Str
## Combine a [List] of [Str] into a single [Str], with a separator
## [Str] in between each.
##
## >>> Str.joinWith ["one", "two", "three"] ", "
## expect Str.joinWith ["one", "two", "three"] ", " == "one, two, three"
## expect Str.joinWith ["1", "2", "3", "4"] "." == "1.2.3.4"
joinWith : List Str, Str -> Str
## Split a string around a separator.
## Split a [Str] around a separator. Passing `""` for the separator is not
## useful; it returns the original string wrapped in a list. To split a string
## into its individual [graphemes](https://stackoverflow.com/a/27331885/4200103), use `Str.graphemes`
##
## >>> Str.split "1,2,3" ","
##
## Passing `""` for the separator is not useful; it returns the original string
## wrapped in a list.
##
## >>> Str.split "1,2,3" ""
##
## To split a string into its individual graphemes, use `Str.graphemes`
## expect Str.split "1,2,3" "," == ["1","2","3"]
## expect Str.split "1,2,3" "" == ["1,2,3"]
split : Str, Str -> List Str
## Repeat a given [Str] value [Nat] times.
##
## expect Str.repeat ">" 3 == ">>>"
repeat : Str, Nat -> Str
## Count the number of [extended grapheme clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)
## in the string.
##
## Str.countGraphemes "Roc!" # 4
## Str.countGraphemes "七巧板" # 3
## Str.countGraphemes "üïä" # 1
## expect Str.countGraphemes "Roc!" == 4
## expect Str.countGraphemes "七巧板" == 9
## expect Str.countGraphemes "üïä" == 4
countGraphemes : Str -> Nat
## Split a string into its constituent grapheme clusters
graphemes : Str -> List Str
## If the string begins with a [Unicode code point](http://www.unicode.org/glossary/#code_point)
## equal to the given [U32], return `Bool.true`. Otherwise return `Bool.false`.
##
## If the given [Str] is empty, or if the given [U32] is not a valid
## code point, this will return `Bool.false`.
##
## **Performance Note:** This runs slightly faster than [Str.startsWith], so
## **Performance Note:** This runs slightly faster than `Str.startsWith`, so
## if you want to check whether a string begins with something that's representable
## in a single code point, you can use (for example) `Str.startsWithScalar '鹏'`
## instead of `Str.startsWith "鹏"`. ('鹏' evaluates to the [U32] value `40527`.)
@ -183,25 +201,25 @@ countGraphemes : Str -> Nat
startsWithScalar : Str, U32 -> Bool
## Return a [List] of the [unicode scalar values](https://unicode.org/glossary/#unicode_scalar_value)
## in the given string.
## in the given string. Strings contain only scalar values, not [surrogate code points](https://unicode.org/glossary/#surrogate_code_point),
## so this is equivalent to returning a list of the string's [code points](https://unicode.org/glossary/#code_point).
##
## (Strings contain only scalar values, not [surrogate code points](https://unicode.org/glossary/#surrogate_code_point),
## so this is equivalent to returning a list of the string's [code points](https://unicode.org/glossary/#code_point).)
## expect Str.toScalars "I ♥ Roc" == [73, 32, 9829, 32, 82, 111, 99]
toScalars : Str -> List U32
## Return a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
## see [Str.split].)
## To split the string into a [List] of smaller [Str] values instead of [U8] values,
## see `Str.split`.
##
## >>> Str.toUtf8 "👩‍👩‍👦‍👦"
##
## >>> Str.toUtf8 "Roc"
##
## >>> Str.toUtf8 "鹏"
##
## >>> Str.toUtf8 "🐦"
## expect Str.toUtf8 "鹏" == [233, 185, 143]
## expect Str.toUtf8 "🐦" == [240, 159, 144, 166]
toUtf8 : Str -> List U8
## Encode a [List] of [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit)
## into a [Str]
##
## expect Str.fromUtf8 [233, 185, 143] == Ok "鹏"
## expect Str.fromUtf8 [0xb0] == Err (BadUtf8 InvalidStartByte 0)
fromUtf8 : List U8 -> Result Str [BadUtf8 Utf8ByteProblem Nat]*
fromUtf8 = \bytes ->
result = fromUtf8RangeLowlevel bytes 0 (List.len bytes)
@ -211,6 +229,10 @@ fromUtf8 = \bytes ->
else
Err (BadUtf8 result.dProblemCode result.aByteIndex)
## Encode part of a [List] of [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit)
## into a [Str]
##
## expect Str.fromUtf8Range [72, 105, 80, 103] { start : 0, count : 2 } == Ok "Hi"
fromUtf8Range : List U8, { start : Nat, count : Nat } -> Result Str [BadUtf8 Utf8ByteProblem Nat, OutOfBounds]*
fromUtf8Range = \bytes, config ->
if config.start + config.count <= List.len bytes then
@ -232,64 +254,210 @@ FromUtf8Result : {
fromUtf8RangeLowlevel : List U8, Nat, Nat -> FromUtf8Result
## Check if the given [Str] starts with a value.
##
## expect Str.startsWith "ABC" "A" == Bool.true
## expect Str.startsWith "ABC" "X" == Bool.false
startsWith : Str, Str -> Bool
## Check if the given [Str] ends with a value.
##
## expect Str.endsWith "ABC" "C" == Bool.true
## expect Str.endsWith "ABC" "X" == Bool.false
endsWith : Str, Str -> Bool
## Return the string with any blank spaces removed from both the beginning
## Return the [Str] with all whitespace removed from both the beginning
## as well as the end.
##
## expect Str.trim " Hello \n\n" == "Hello"
trim : Str -> Str
## Return the [Str] with all whitespace removed from the beginning.
##
## expect Str.trimLeft " Hello \n\n" == "Hello \n\n"
trimLeft : Str -> Str
## Return the [Str] with all whitespace removed from the end.
##
## expect Str.trimRight " Hello \n\n" == " Hello"
trimRight : Str -> Str
## Encode a [Str] to a [Dec]. A [Dec] value is a 128-bit decimal
## [fixed-point number](https://en.wikipedia.org/wiki/Fixed-point_arithmetic).
##
## expect Str.toDec "10" == Ok 10dec
## expect Str.toDec "-0.25" == Ok -0.25dec
## expect Str.toDec "not a number" == Err InvalidNumStr
toDec : Str -> Result Dec [InvalidNumStr]*
toDec = \string -> strToNumHelp string
## Encode a [Str] to a [F64]. A [F64] value is a 64-bit
## [floating-point number](https://en.wikipedia.org/wiki/IEEE_754) and can be
## specified with a `f64` suffix.
##
## expect Str.toF64 "0.10" == Ok 0.10f64
## expect Str.toF64 "not a number" == Err InvalidNumStr
toF64 : Str -> Result F64 [InvalidNumStr]*
toF64 = \string -> strToNumHelp string
## Encode a [Str] to a [F32].A [F32] value is a 32-bit
## [floating-point number](https://en.wikipedia.org/wiki/IEEE_754) and can be
## specified with a `f32` suffix.
##
## expect Str.toF32 "0.10" == Ok 0.10f32
## expect Str.toF32 "not a number" == Err InvalidNumStr
toF32 : Str -> Result F32 [InvalidNumStr]*
toF32 = \string -> strToNumHelp string
## Convert a [Str] to a [Nat]. If the given number doesn't fit in [Nat], it will be [truncated](https://www.ualberta.ca/computing-science/media-library/teaching-resources/java/truncation-rounding.html).
## [Nat] has a different maximum number depending on the system you're building
## for, so this may give a different answer on different systems.
##
## For example, on a 32-bit system, `Num.maxNat` will return the same answer as
## `Num.maxU32`. This means that calling `Str.toNat "9_000_000_000"` on a 32-bit
## system will return `Num.maxU32` instead of 9 billion, because 9 billion is
## larger than `Num.maxU32` and will not fit in a [Nat] on a 32-bit system.
##
## Calling `Str.toNat "9_000_000_000"` on a 64-bit system will return
## the [Nat] value of 9_000_000_000. This is because on a 64-bit system, [Nat] can
## hold up to `Num.maxU64`, and 9_000_000_000 is smaller than `Num.maxU64`.
##
## expect Str.toNat "9_000_000_000" == Ok 9000000000
## expect Str.toNat "not a number" == Err InvalidNumStr
toNat : Str -> Result Nat [InvalidNumStr]*
toNat = \string -> strToNumHelp string
## Encode a [Str] to an unsigned [U128] integer. A [U128] value can hold numbers
## from `0` to `340_282_366_920_938_463_463_374_607_431_768_211_455` (over
## 340 undecillion). It can be specified with a u128 suffix.
##
## expect Str.toU128 "1500" == Ok 1500u128
## expect Str.toU128 "0.1" == Err InvalidNumStr
## expect Str.toU128 "-1" == Err InvalidNumStr
## expect Str.toU128 "not a number" == Err InvalidNumStr
toU128 : Str -> Result U128 [InvalidNumStr]*
toU128 = \string -> strToNumHelp string
## Encode a [Str] to a signed [I128] integer. A [I128] value can hold numbers
## from `-170_141_183_460_469_231_731_687_303_715_884_105_728` to
## `170_141_183_460_469_231_731_687_303_715_884_105_727`. It can be specified
## with a i128 suffix.
##
## expect Str.toI128 "1500" == Ok 1500i128
## expect Str.toI128 "-1" == Ok -1i128
## expect Str.toI128 "0.1" == Err InvalidNumStr
## expect Str.toI128 "not a number" == Err InvalidNumStr
toI128 : Str -> Result I128 [InvalidNumStr]*
toI128 = \string -> strToNumHelp string
## Encode a [Str] to an unsigned [U64] integer. A [U64] value can hold numbers
## from `0` to `18_446_744_073_709_551_615` (over 18 quintillion). It
## can be specified with a u64 suffix.
##
## expect Str.toU64 "1500" == Ok 1500u64
## expect Str.toU64 "0.1" == Err InvalidNumStr
## expect Str.toU64 "-1" == Err InvalidNumStr
## expect Str.toU64 "not a number" == Err InvalidNumStr
toU64 : Str -> Result U64 [InvalidNumStr]*
toU64 = \string -> strToNumHelp string
## Encode a [Str] to a signed [I64] integer. A [I64] value can hold numbers
## from `-9_223_372_036_854_775_808` to `9_223_372_036_854_775_807`. It can be
## specified with a i64 suffix.
##
## expect Str.toI64 "1500" == Ok 1500i64
## expect Str.toI64 "-1" == Ok -1i64
## expect Str.toI64 "0.1" == Err InvalidNumStr
## expect Str.toI64 "not a number" == Err InvalidNumStr
toI64 : Str -> Result I64 [InvalidNumStr]*
toI64 = \string -> strToNumHelp string
## Encode a [Str] to an unsigned [U32] integer. A [U32] value can hold numbers
## from `0` to `4_294_967_295` (over 4 billion). It can be specified with
## a u32 suffix.
##
## expect Str.toU32 "1500" == Ok 1500u32
## expect Str.toU32 "0.1" == Err InvalidNumStr
## expect Str.toU32 "-1" == Err InvalidNumStr
## expect Str.toU32 "not a number" == Err InvalidNumStr
toU32 : Str -> Result U32 [InvalidNumStr]*
toU32 = \string -> strToNumHelp string
## Encode a [Str] to a signed [I32] integer. A [I32] value can hold numbers
## from `-2_147_483_648` to `2_147_483_647`. It can be
## specified with a i32 suffix.
##
## expect Str.toI32 "1500" == Ok 1500i32
## expect Str.toI32 "-1" == Ok -1i32
## expect Str.toI32 "0.1" == Err InvalidNumStr
## expect Str.toI32 "not a number" == Err InvalidNumStr
toI32 : Str -> Result I32 [InvalidNumStr]*
toI32 = \string -> strToNumHelp string
## Encode a [Str] to an unsigned [U16] integer. A [U16] value can hold numbers
## from `0` to `65_535`. It can be specified with a u16 suffix.
##
## expect Str.toU16 "1500" == Ok 1500u16
## expect Str.toU16 "0.1" == Err InvalidNumStr
## expect Str.toU16 "-1" == Err InvalidNumStr
## expect Str.toU16 "not a number" == Err InvalidNumStr
toU16 : Str -> Result U16 [InvalidNumStr]*
toU16 = \string -> strToNumHelp string
## Encode a [Str] to a signed [I16] integer. A [I16] value can hold numbers
## from `-32_768` to `32_767`. It can be
## specified with a i16 suffix.
##
## expect Str.toI16 "1500" == Ok 1500i16
## expect Str.toI16 "-1" == Ok -1i16
## expect Str.toI16 "0.1" == Err InvalidNumStr
## expect Str.toI16 "not a number" == Err InvalidNumStr
toI16 : Str -> Result I16 [InvalidNumStr]*
toI16 = \string -> strToNumHelp string
## Encode a [Str] to an unsigned [U8] integer. A [U8] value can hold numbers
## from `0` to `255`. It can be specified with a u8 suffix.
##
## expect Str.toU8 "250" == Ok 250u8
## expect Str.toU8 "-0.1" == Err InvalidNumStr
## expect Str.toU8 "not a number" == Err InvalidNumStr
## expect Str.toU8 "1500" == Err InvalidNumStr
toU8 : Str -> Result U8 [InvalidNumStr]*
toU8 = \string -> strToNumHelp string
## Encode a [Str] to a signed [I8] integer. A [I8] value can hold numbers
## from `-128` to `127`. It can be
## specified with a i8 suffix.
##
## expect Str.toI8 "-15" == Ok -15i8
## expect Str.toI8 "150.00" == Err InvalidNumStr
## expect Str.toI8 "not a number" == Err InvalidNumStr
toI8 : Str -> Result I8 [InvalidNumStr]*
toI8 = \string -> strToNumHelp string
## Gets the byte at the given index, without performing a bounds check
## Get the byte at the given index, without performing a bounds check.
getUnsafe : Str, Nat -> U8
## gives the number of string bytes
## Gives the number of bytes in a [Str] value.
##
## expect Str.countUtf8Bytes "Hello World" == 11
countUtf8Bytes : Str -> Nat
## string slice that does not do bounds checking or utf-8 verification
substringUnsafe : Str, Nat, Nat -> Str
## Returns the string with each occurrence of a substring replaced with a replacement.
## If the substring is not found, returns `Err NotFound`.
## Returns the given [Str] with each occurrence of a substring replaced.
## Returns [Err NotFound] if the substring is not found.
##
## Str.replaceEach "foo/bar/baz" "/" "_" == Ok "foo_bar_baz"
## expect Str.replaceEach "foo/bar/baz" "/" "_" == Ok "foo_bar_baz"
## expect Str.replaceEach "not here" "/" "_" == Err NotFound
replaceEach : Str, Str, Str -> Result Str [NotFound]*
replaceEach = \haystack, needle, flower ->
when splitFirst haystack needle is
Ok { before, after } ->
# We found at least one needle, so start the buffer off with
# `before` followed by the first replacement flower.
Str.reserve "" (Str.countUtf8Bytes haystack)
Str.withCapacity (Str.countUtf8Bytes haystack)
|> Str.concat before
|> Str.concat flower
|> replaceEachHelp after needle flower
@ -310,10 +478,11 @@ replaceEachHelp = \buf, haystack, needle, flower ->
expect Str.replaceEach "abXdeXghi" "X" "_" == Ok "ab_de_ghi"
## Returns the string with the first occurrence of a substring replaced with a replacement.
## If the substring is not found, returns `Err NotFound`.
## Returns the given [Str] with the first occurrence of a substring replaced.
## Returns [Err NotFound] if the substring is not found.
##
## Str.replaceFirst "foo/bar/baz" "/" "_" == Ok "foo_bar/baz"
## expect Str.replaceFirst "foo/bar/baz" "/" "_" == Ok "foo_bar/baz"
## expect Str.replaceFirst "no slashes here" "/" "_" == Err NotFound
replaceFirst : Str, Str, Str -> Result Str [NotFound]*
replaceFirst = \haystack, needle, flower ->
when splitFirst haystack needle is
@ -324,10 +493,11 @@ replaceFirst = \haystack, needle, flower ->
expect Str.replaceFirst "abXdeXghi" "X" "_" == Ok "ab_deXghi"
## Returns the string with the last occurrence of a substring replaced with a replacement.
## If the substring is not found, returns `Err NotFound`.
## Returns the given [Str] with the last occurrence of a substring replaced.
## Returns [Err NotFound] if the substring is not found.
##
## Str.replaceLast "foo/bar/baz" "/" "_" == Ok "foo/bar_baz"
## expect Str.replaceLast "foo/bar/baz" "/" "_" == Ok "foo/bar_baz"
## expect Str.replaceLast "no slashes here" "/" "_" == Err NotFound
replaceLast : Str, Str, Str -> Result Str [NotFound]*
replaceLast = \haystack, needle, flower ->
when splitLast haystack needle is
@ -338,10 +508,12 @@ replaceLast = \haystack, needle, flower ->
expect Str.replaceLast "abXdeXghi" "X" "_" == Ok "abXde_ghi"
## Returns the string before the first occurrence of a delimiter, as well as the
## rest of the string after that occurrence. If the delimiter is not found, returns `Err`.
## Returns the given [Str] before the first occurrence of a [delimiter](https://www.computerhope.com/jargon/d/delimite.htm), as well
## as the rest of the string after that occurrence.
## Returns [ Err NotFound] if the delimiter is not found.
##
## Str.splitFirst "foo/bar/baz" "/" == Ok { before: "foo", after: "bar/baz" }
## expect Str.splitFirst "foo/bar/baz" "/" == Ok { before: "foo", after: "bar/baz" }
## expect Str.splitFirst "no slashes here" "/" == Err NotFound
splitFirst : Str, Str -> Result { before : Str, after : Str } [NotFound]*
splitFirst = \haystack, needle ->
when firstMatch haystack needle is
@ -359,6 +531,9 @@ splitFirst = \haystack, needle ->
# splitFirst when needle isn't in haystack
expect splitFirst "foo" "z" == Err NotFound
# splitFirst when needle isn't in haystack, and haystack is empty
expect splitFirst "" "z" == Err NotFound
# splitFirst when haystack ends with needle repeated
expect splitFirst "foo" "o" == Ok { before: "f", after: "o" }
@ -386,10 +561,12 @@ firstMatchHelp = \haystack, needle, index, lastPossible ->
else
None
## Returns the string before the last occurrence of a delimiter, as well as the
## rest of the string after that occurrence. If the delimiter is not found, returns `Err`.
## Returns the given [Str] before the last occurrence of a delimiter, as well as
## the rest of the string after that occurrence.
## Returns [Err NotFound] if the delimiter is not found.
##
## Str.splitLast "foo/bar/baz" "/" == Ok { before: "foo/bar", after: "baz" }
## expect Str.splitLast "foo/bar/baz" "/" == Ok { before: "foo/bar", after: "baz" }
## expect Str.splitLast "no slashes here" "/" == Err NotFound
splitLast : Str, Str -> Result { before : Str, after : Str } [NotFound]*
splitLast = \haystack, needle ->
when lastMatch haystack needle is
@ -444,20 +621,44 @@ matchesAt = \haystack, haystackIndex, needle ->
needleLength = Str.countUtf8Bytes needle
endIndex = min (haystackIndex + needleLength) haystackLength
matchesAtHelp haystack haystackIndex needle 0 endIndex
matchesAtHelp {
haystack,
haystackIndex,
needle,
needleIndex: 0,
needleLength,
endIndex,
}
matchesAtHelp : Str, Nat, Str, Nat, Nat -> Bool
matchesAtHelp = \haystack, haystackIndex, needle, needleIndex, endIndex ->
if haystackIndex < endIndex then
if Str.getUnsafe haystack haystackIndex == Str.getUnsafe needle needleIndex then
matchesAtHelp haystack (haystackIndex + 1) needle (needleIndex + 1) endIndex
else
Bool.false
matchesAtHelp = \state ->
{ haystack, haystackIndex, needle, needleIndex, needleLength, endIndex } = state
isAtEndOfHaystack = haystackIndex >= endIndex
if isAtEndOfHaystack then
didWalkEntireNeedle = needleIndex == needleLength
didWalkEntireNeedle
else
Bool.true
doesThisMatch =
Str.getUnsafe haystack haystackIndex
==
Str.getUnsafe needle needleIndex
doesRestMatch =
matchesAtHelp
{ state &
haystackIndex: haystackIndex + 1,
needleIndex: needleIndex + 1,
}
## Walks over the string's UTF-8 bytes, calling a function which updates a state using each
## UTF-8 `U8` byte as well as the index of that byte within the string.
doesThisMatch && doesRestMatch
## Walks over the `UTF-8` bytes of the given [Str] and calls a function to update
## state for each byte. The index for that byte in the string is provided
## to the update function.
##
## f : List U8, U8, Nat -> List U8
## f = \state, byte, _ -> List.append state byte
## expect Str.walkUtf8WithIndex "ABC" [] f == [65, 66, 67]
walkUtf8WithIndex : Str, state, (state, U8, Nat -> state) -> state
walkUtf8WithIndex = \string, state, step ->
walkUtf8WithIndexHelp string state step 0 (Str.countUtf8Bytes string)
@ -472,12 +673,17 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
else
state
## Make sure at least some number of bytes fit in this string without reallocating
## Enlarge the given [Str] for at least capacity additional bytes.
reserve : Str, Nat -> Str
## is UB when the scalar is invalid
appendScalarUnsafe : Str, U32 -> Str
## Append a [U32] scalar to the given [Str]. If the given scalar is not a valid
## unicode value, it will return [Err InvalidScalar].
##
## expect Str.appendScalar "H" 105 == Ok "Hi"
## expect Str.appendScalar "😢" 0xabcdef == Err InvalidScalar
appendScalar : Str, U32 -> Result Str [InvalidScalar]*
appendScalar = \string, scalar ->
if isValidScalar scalar then
@ -491,6 +697,12 @@ isValidScalar = \scalar ->
getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
## Walks over the unicode [U32] values for the given [Str] and calls a function
## to update state for each.
##
## f : List U32, U32 -> List U32
## f = \state, scalar -> List.append state scalar
## expect Str.walkScalars "ABC" [] f == [65, 66, 67]
walkScalars : Str, state, (state, U32 -> state) -> state
walkScalars = \string, init, step ->
walkScalarsHelp string init step 0 (Str.countUtf8Bytes string)
@ -505,6 +717,18 @@ walkScalarsHelp = \string, state, step, index, length ->
else
state
## Walks over the unicode [U32] values for the given [Str] and calls a function
## to update state for each.
##
## f : List U32, U32 -> [Break (List U32), Continue (List U32)]
## f = \state, scalar ->
## check = 66
## if scalar == check then
## Break [check]
## else
## Continue (List.append state scalar)
## expect Str.walkScalarsUntil "ABC" [] f == [66]
## expect Str.walkScalarsUntil "AxC" [] f == [65, 120, 67]
walkScalarsUntil : Str, state, (state, U32 -> [Break state, Continue state]) -> state
walkScalarsUntil = \string, init, step ->
walkScalarsUntilHelp string init step 0 (Str.countUtf8Bytes string)
@ -534,3 +758,9 @@ strToNumHelp = \string ->
Ok result.aresult
else
Err InvalidNumStr
## Adds a prefix to the given [Str].
##
## expect Str.withPrefix "Awesome" "Roc" == "RocAwesome"
withPrefix : Str, Str -> Str
withPrefix = \str, prefix -> Str.concat prefix str

View file

@ -361,6 +361,8 @@ pub const STR_RESERVE: &str = "roc_builtins.str.reserve";
pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes";
pub const LIST_MAP: &str = "roc_builtins.list.map";
pub const LIST_MAP2: &str = "roc_builtins.list.map2";

View file

@ -13,6 +13,7 @@ pub fn module_source(module_id: ModuleId) -> &'static str {
ModuleId::BOOL => BOOL,
ModuleId::ENCODE => ENCODE,
ModuleId::DECODE => DECODE,
ModuleId::HASH => HASH,
ModuleId::JSON => JSON,
_ => panic!(
"ModuleId {:?} is not part of the standard library",
@ -31,4 +32,5 @@ const BOX: &str = include_str!("../roc/Box.roc");
const BOOL: &str = include_str!("../roc/Bool.roc");
const ENCODE: &str = include_str!("../roc/Encode.roc");
const DECODE: &str = include_str!("../roc/Decode.roc");
const HASH: &str = include_str!("../roc/Hash.roc");
const JSON: &str = include_str!("../roc/Json.roc");