Merge branch 'main' into str-withprefix

This commit is contained in:
Prajwal S N 2022-10-09 15:53:16 +05:30
commit aef15ac1e8
No known key found for this signature in database
GPG key ID: D0FECEE245BC2695
57 changed files with 1388 additions and 631 deletions

View file

@ -15,7 +15,7 @@ lazy_static = "1.4.0"
[build-dependencies]
# dunce can be removed once ziglang/zig#5109 is fixed
dunce = "1.0.2"
dunce = "1.0.3"
[target.'cfg(target_os = "macos")'.build-dependencies]
tempfile = "3.2.0"

View file

@ -93,20 +93,6 @@ pub const RocList = extern struct {
return (ptr - 1)[0] == utils.REFCOUNT_ONE;
}
pub fn allocate(
alignment: u32,
length: usize,
element_size: usize,
) RocList {
const data_bytes = length * element_size;
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity = length,
};
}
pub fn makeUniqueExtra(self: RocList, alignment: u32, element_width: usize, update_mode: UpdateMode) RocList {
if (update_mode == .InPlace) {
return self;
@ -140,11 +126,117 @@ pub const RocList = extern struct {
return new_list;
}
// We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList.
// Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back:
//
// (1) initial size
// Instead of growing to size 1 from empty, fbvector allocates at least
// 64 bytes. You may still use reserve to reserve a lesser amount of
// memory.
// (2) 1.5x
// For medium-sized vectors, the growth strategy is 1.5x. See the docs
// for details.
// This does not apply to very small or very large fbvectors. This is a
// heuristic.
//
// In our case, we exposed allocate and reallocate, which will use a smart growth stategy.
// We also expose allocateExact and reallocateExact for case where a specific number of elements is requested.
// calculateCapacity should only be called in cases the list will be growing.
// requested_length should always be greater than old_capacity.
inline fn calculateCapacity(
old_capacity: usize,
requested_length: usize,
element_width: usize,
) usize {
// TODO: there are two adjustments that would likely lead to better results for Roc.
// 1. Deal with the fact we allocate an extra u64 for refcount.
// This may lead to allocating page size + 8 bytes.
// That could mean allocating an entire page for 8 bytes of data which isn't great.
// 2. Deal with the fact that we can request more than 1 element at a time.
// fbvector assumes just appending 1 element at a time when using this algorithm.
// As such, they will generally grow in a way that should better match certain memory multiple.
// This is also the normal case for roc, but we could also grow by a much larger amount.
// We may want to round to multiples of 2 or something similar.
var new_capacity: usize = 0;
if (element_width == 0) {
return requested_length;
} else if (old_capacity == 0) {
new_capacity = 64 / element_width;
} else if (old_capacity < 4096 / element_width) {
new_capacity = old_capacity * 2;
} else if (old_capacity > 4096 * 32 / element_width) {
new_capacity = old_capacity * 2;
} else {
new_capacity = (old_capacity * 3 + 1) / 2;
}
return @maximum(new_capacity, requested_length);
}
pub fn allocate(
alignment: u32,
length: usize,
element_width: usize,
) RocList {
if (length == 0) {
return empty();
}
const capacity = calculateCapacity(0, length, element_width);
const data_bytes = capacity * element_width;
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity = capacity,
};
}
pub fn allocateExact(
alignment: u32,
length: usize,
element_width: usize,
) RocList {
if (length == 0) {
return empty();
}
const data_bytes = length * element_width;
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity = length,
};
}
pub fn reallocate(
self: RocList,
alignment: u32,
new_length: usize,
element_width: usize,
) RocList {
if (self.bytes) |source_ptr| {
if (self.isUnique()) {
if (self.capacity >= new_length) {
return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity };
} else {
const new_capacity = calculateCapacity(self.capacity, new_length, element_width);
const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_capacity, element_width);
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_capacity };
}
}
// TODO: Investigate the performance of this.
// Maybe we should just always reallocate to the new_length instead of expanding capacity?
const new_capacity = if (self.capacity >= new_length) self.capacity else calculateCapacity(self.capacity, new_length, element_width);
return self.reallocateFresh(alignment, new_length, new_capacity, element_width);
}
return RocList.allocate(alignment, new_length, element_width);
}
pub fn reallocateExact(
self: RocList,
alignment: u32,
new_length: usize,
element_width: usize,
) RocList {
if (self.bytes) |source_ptr| {
if (self.isUnique()) {
@ -155,9 +247,9 @@ pub const RocList = extern struct {
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_length };
}
}
return self.reallocateFresh(alignment, new_length, new_length, element_width);
}
return self.reallocateFresh(alignment, new_length, element_width);
return RocList.allocateExact(alignment, new_length, element_width);
}
/// reallocate by explicitly making a new allocation and copying elements over
@ -165,16 +257,16 @@ pub const RocList = extern struct {
self: RocList,
alignment: u32,
new_length: usize,
new_capacity: usize,
element_width: usize,
) RocList {
const old_length = self.length;
const delta_length = new_length - old_length;
const data_bytes = new_length * element_width;
const data_bytes = new_capacity * element_width;
const first_slot = utils.allocateWithRefcount(data_bytes, alignment);
// transfer the memory
if (self.bytes) |source_ptr| {
const dest_ptr = first_slot;
@ -185,7 +277,7 @@ pub const RocList = extern struct {
const result = RocList{
.bytes = first_slot,
.length = new_length,
.capacity = new_length,
.capacity = new_capacity,
};
utils.decref(self.bytes, old_length * element_width, alignment);
@ -412,7 +504,7 @@ pub fn listWithCapacity(
alignment: u32,
element_width: usize,
) callconv(.C) RocList {
var output = RocList.allocate(alignment, capacity, element_width);
var output = RocList.allocateExact(alignment, capacity, element_width);
output.length = 0;
return output;
}

View file

@ -144,6 +144,7 @@ comptime {
exportStrFn(str.strTrimLeft, "trim_left");
exportStrFn(str.strTrimRight, "trim_right");
exportStrFn(str.strCloneTo, "clone_to");
exportStrFn(str.withCapacity, "with_capacity");
inline for (INTEGERS) |T| {
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");

View file

@ -2596,6 +2596,10 @@ pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr {
}
}
pub fn withCapacity(capacity: usize) callconv(.C) RocStr {
return RocStr.allocate(0, capacity);
}
pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } {
const slice = string.asSlice();
const bytesParsed = @intCast(usize, std.unicode.utf8ByteSequenceLength(slice[index]) catch unreachable);

View file

@ -61,6 +61,7 @@ interface List
sortAsc,
sortDesc,
reserve,
walkBackwardsUntil,
]
imports [
Bool.{ Bool },
@ -88,9 +89,8 @@ interface List
##
## ## Performance Details
##
## Under the hood, a list is a record containing a `len : Nat` field as well
## as a pointer to a reference count and a flat array of bytes. Unique lists
## store a capacity #Nat instead of a reference count.
## Under the hood, a list is a record containing a `len : Nat` field, a `capacity : Nat`
## field, and a pointer to a reference count and a flat array of bytes.
##
## ## Shared Lists
##
@ -112,9 +112,8 @@ interface List
## begins with a refcount of 1, because so far only `ratings` is referencing it.
##
## The second line alters this refcount. `{ foo: ratings` references
## the `ratings` list, which will result in its refcount getting incremented
## from 0 to 1. Similarly, `bar: ratings }` also references the `ratings` list,
## which will result in its refcount getting incremented from 1 to 2.
## the `ratings` list, and so does `bar: ratings }`. This will result in its
## refcount getting incremented from 1 to 3.
##
## Let's turn this example into a function.
##
@ -132,11 +131,11 @@ interface List
##
## Since `ratings` represented a way to reference the list, and that way is no
## longer accessible, the list's refcount gets decremented when `ratings` goes
## out of scope. It will decrease from 2 back down to 1.
## out of scope. It will decrease from 3 back down to 2.
##
## Putting these together, when we call `getRatings 5`, what we get back is
## a record with two fields, `foo`, and `bar`, each of which refers to the same
## list, and that list has a refcount of 1.
## list, and that list has a refcount of 2.
##
## Let's change the last line to be `(getRatings 5).bar` instead of `getRatings 5`:
##
@ -436,6 +435,13 @@ walkUntil = \list, initial, step ->
Continue new -> new
Break new -> new
## Same as [List.walkUntil], but does it from the end of the list instead.
walkBackwardsUntil : List elem, state, (state, elem -> [Continue state, Break state]) -> state
walkBackwardsUntil = \list, initial, func ->
when List.iterateBackwards list initial func is
Continue new -> new
Break new -> new
sum : List (Num a) -> Num a
sum = \list ->
List.walk list 0 Num.add

View file

@ -43,6 +43,7 @@ interface Str
appendScalar,
walkScalars,
walkScalarsUntil,
withCapacity,
withPrefix,
]
imports [
@ -145,6 +146,9 @@ Utf8Problem : { byteIndex : Nat, problem : Utf8ByteProblem }
isEmpty : Str -> Bool
concat : Str, Str -> Str
## Returns a string of the specified capacity without any content
withCapacity : Nat -> Str
## Combine a list of strings into a single string, with a separator
## string in between each.
##

View file

@ -361,6 +361,7 @@ pub const STR_RESERVE: &str = "roc_builtins.str.reserve";
pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
pub const LIST_MAP: &str = "roc_builtins.list.map";
pub const LIST_MAP2: &str = "roc_builtins.list.map2";