mirror of
https://github.com/roc-lang/roc.git
synced 2025-07-24 06:55:15 +00:00
Merge pull request #3365 from rtfeldman/to-scalars
Add Str.toScalars builtin
This commit is contained in:
commit
75b4b3a206
17 changed files with 460 additions and 20 deletions
|
@ -3,6 +3,7 @@ const utils = @import("utils.zig");
|
||||||
const RocResult = utils.RocResult;
|
const RocResult = utils.RocResult;
|
||||||
const UpdateMode = utils.UpdateMode;
|
const UpdateMode = utils.UpdateMode;
|
||||||
const mem = std.mem;
|
const mem = std.mem;
|
||||||
|
const math = std.math;
|
||||||
|
|
||||||
const EqFn = fn (?[*]u8, ?[*]u8) callconv(.C) bool;
|
const EqFn = fn (?[*]u8, ?[*]u8) callconv(.C) bool;
|
||||||
const CompareFn = fn (?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) u8;
|
const CompareFn = fn (?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) u8;
|
||||||
|
@ -30,6 +31,57 @@ pub const RocList = extern struct {
|
||||||
return RocList{ .bytes = null, .length = 0, .capacity = 0 };
|
return RocList{ .bytes = null, .length = 0, .capacity = 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn eql(self: RocList, other: RocList) bool {
|
||||||
|
if (self.len() != other.len()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Their lengths are the same, and one is empty; they're both empty!
|
||||||
|
if (self.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
var index: usize = 0;
|
||||||
|
const self_bytes = self.bytes orelse unreachable;
|
||||||
|
const other_bytes = other.bytes orelse unreachable;
|
||||||
|
|
||||||
|
while (index < self.len()) {
|
||||||
|
if (self_bytes[index] != other_bytes[index]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fromSlice(comptime T: type, slice: []const T) RocList {
|
||||||
|
if (slice.len == 0) {
|
||||||
|
return RocList.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var list = allocate(@alignOf(T), slice.len, @sizeOf(T));
|
||||||
|
|
||||||
|
if (slice.len > 0) {
|
||||||
|
const dest = list.bytes orelse unreachable;
|
||||||
|
const src = @ptrCast([*]const u8, slice.ptr);
|
||||||
|
const num_bytes = slice.len * @sizeOf(T);
|
||||||
|
|
||||||
|
@memcpy(dest, src, num_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: RocList, comptime T: type) void {
|
||||||
|
utils.decref(self.bytes, self.len(), @alignOf(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn elements(self: RocList, comptime T: type) ?[*]T {
|
||||||
|
return @ptrCast(?[*]T, @alignCast(@alignOf(T), self.bytes));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn isUnique(self: RocList) bool {
|
pub fn isUnique(self: RocList) bool {
|
||||||
// the empty list is unique (in the sense that copying it will not leak memory)
|
// the empty list is unique (in the sense that copying it will not leak memory)
|
||||||
if (self.isEmpty()) {
|
if (self.isEmpty()) {
|
||||||
|
|
|
@ -152,6 +152,7 @@ comptime {
|
||||||
const str = @import("str.zig");
|
const str = @import("str.zig");
|
||||||
comptime {
|
comptime {
|
||||||
exportStrFn(str.init, "init");
|
exportStrFn(str.init, "init");
|
||||||
|
exportStrFn(str.strToScalarsC, "to_scalars");
|
||||||
exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
|
exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
|
||||||
exportStrFn(str.countSegments, "count_segments");
|
exportStrFn(str.countSegments, "count_segments");
|
||||||
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
||||||
|
|
|
@ -56,6 +56,10 @@ pub const RocStr = extern struct {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn fromSlice(slice: []const u8) RocStr {
|
||||||
|
return RocStr.init(slice.ptr, slice.len);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn initBig(_: InPlace, number_of_chars: usize) RocStr {
|
pub fn initBig(_: InPlace, number_of_chars: usize) RocStr {
|
||||||
const first_element = utils.allocateWithRefcount(number_of_chars, @sizeOf(usize));
|
const first_element = utils.allocateWithRefcount(number_of_chars, @sizeOf(usize));
|
||||||
|
|
||||||
|
@ -227,6 +231,17 @@ pub const RocStr = extern struct {
|
||||||
return self.str_capacity ^ MASK;
|
return self.str_capacity ^ MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This does a small string check, but no bounds checking whatsoever!
|
||||||
|
pub fn getUnchecked(self: RocStr, index: usize) u8 {
|
||||||
|
if (self.isSmallStr()) {
|
||||||
|
return self.asArray()[index];
|
||||||
|
} else {
|
||||||
|
const bytes = self.str_bytes orelse unreachable;
|
||||||
|
|
||||||
|
return bytes[index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn isEmpty(self: RocStr) bool {
|
pub fn isEmpty(self: RocStr) bool {
|
||||||
return self.len() == 0;
|
return self.len() == 0;
|
||||||
}
|
}
|
||||||
|
@ -239,7 +254,7 @@ pub const RocStr = extern struct {
|
||||||
const length = self.len();
|
const length = self.len();
|
||||||
const longest_small_str = @sizeOf(RocStr) - 1;
|
const longest_small_str = @sizeOf(RocStr) - 1;
|
||||||
|
|
||||||
// NOTE: We want to compare length here, *NOT* check for is_small_str!
|
// NOTE: We want to compare length here, *NOT* check for isSmallStr!
|
||||||
// This is because we explicitly want the empty string to be handled in
|
// This is because we explicitly want the empty string to be handled in
|
||||||
// this branch, even though the empty string is not a small string.
|
// this branch, even though the empty string is not a small string.
|
||||||
//
|
//
|
||||||
|
@ -452,6 +467,230 @@ pub fn strNumberOfBytes(string: RocStr) callconv(.C) usize {
|
||||||
return string.len();
|
return string.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Str.toScalars
|
||||||
|
pub fn strToScalarsC(str: RocStr) callconv(.C) RocList {
|
||||||
|
return @call(.{ .modifier = always_inline }, strToScalars, .{str});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strToScalars(string: RocStr) callconv(.C) RocList {
|
||||||
|
const str_len = string.len();
|
||||||
|
|
||||||
|
if (str_len == 0) {
|
||||||
|
return RocList.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var capacity = str_len;
|
||||||
|
|
||||||
|
if (!string.isSmallStr()) {
|
||||||
|
capacity = string.capacity();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For purposes of preallocation, assume the number of code points is the same
|
||||||
|
// as the number of bytes. This might be longer than necessary, but definitely
|
||||||
|
// should not require a second allocation.
|
||||||
|
var answer = RocList.allocate(@alignOf(u32), capacity, @sizeOf(u32));
|
||||||
|
|
||||||
|
// We already did an early return to verify the string was nonempty.
|
||||||
|
var answer_elems = answer.elements(u32) orelse unreachable;
|
||||||
|
var src_index: usize = 0;
|
||||||
|
var answer_index: usize = 0;
|
||||||
|
|
||||||
|
while (src_index < str_len) {
|
||||||
|
const utf8_byte = string.getUnchecked(src_index);
|
||||||
|
|
||||||
|
// How UTF-8 bytes work:
|
||||||
|
// https://docs.teradata.com/r/Teradata-Database-International-Character-Set-Support/June-2017/Client-Character-Set-Options/UTF8-Client-Character-Set-Support/UTF8-Multibyte-Sequences
|
||||||
|
if (utf8_byte <= 127) {
|
||||||
|
// It's an ASCII character. Copy it over directly.
|
||||||
|
answer_elems[answer_index] = @intCast(u32, utf8_byte);
|
||||||
|
src_index += 1;
|
||||||
|
} else if (utf8_byte >> 5 == 0b0000_0110) {
|
||||||
|
// Its three high order bits are 110, so this is a two-byte sequence.
|
||||||
|
|
||||||
|
// Example:
|
||||||
|
// utf-8: 1100 1111 1011 0001
|
||||||
|
// code pt: 0000 0011 1111 0001 (decimal: 1009)
|
||||||
|
|
||||||
|
// Discard the first byte's high order bits of 110.
|
||||||
|
var code_pt = @intCast(u32, utf8_byte & 0b0001_1111);
|
||||||
|
|
||||||
|
// Discard the second byte's high order bits of 10.
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 1) & 0b0011_1111;
|
||||||
|
|
||||||
|
answer_elems[answer_index] = code_pt;
|
||||||
|
src_index += 2;
|
||||||
|
} else if (utf8_byte >> 4 == 0b0000_1110) {
|
||||||
|
// Its four high order bits are 1110, so this is a three-byte sequence.
|
||||||
|
|
||||||
|
// Discard the first byte's high order bits of 1110.
|
||||||
|
var code_pt = @intCast(u32, utf8_byte & 0b0000_1111);
|
||||||
|
|
||||||
|
// Discard the second byte's high order bits of 10.
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 1) & 0b0011_1111;
|
||||||
|
|
||||||
|
// Discard the third byte's high order bits of 10 (same as second byte).
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 2) & 0b0011_1111;
|
||||||
|
|
||||||
|
answer_elems[answer_index] = code_pt;
|
||||||
|
src_index += 3;
|
||||||
|
} else {
|
||||||
|
// This must be a four-byte sequence, so the five high order bits should be 11110.
|
||||||
|
|
||||||
|
// Discard the first byte's high order bits of 11110.
|
||||||
|
var code_pt = @intCast(u32, utf8_byte & 0b0000_0111);
|
||||||
|
|
||||||
|
// Discard the second byte's high order bits of 10.
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 1) & 0b0011_1111;
|
||||||
|
|
||||||
|
// Discard the third byte's high order bits of 10 (same as second byte).
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 2) & 0b0011_1111;
|
||||||
|
|
||||||
|
// Discard the fourth byte's high order bits of 10 (same as second and third).
|
||||||
|
code_pt <<= 6;
|
||||||
|
code_pt |= string.getUnchecked(src_index + 3) & 0b0011_1111;
|
||||||
|
|
||||||
|
answer_elems[answer_index] = code_pt;
|
||||||
|
src_index += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
answer_index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
answer.length = answer_index;
|
||||||
|
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: empty string" {
|
||||||
|
const str = RocStr.fromSlice("");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected = RocList.empty();
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: One ASCII char" {
|
||||||
|
const str = RocStr.fromSlice("R");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{82};
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: Multiple ASCII chars" {
|
||||||
|
const str = RocStr.fromSlice("Roc!");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{ 82, 111, 99, 33 };
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: One 2-byte UTF-8 character" {
|
||||||
|
const str = RocStr.fromSlice("é");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{233};
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: Multiple 2-byte UTF-8 characters" {
|
||||||
|
const str = RocStr.fromSlice("Cäfés");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{ 67, 228, 102, 233, 115 };
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: One 3-byte UTF-8 character" {
|
||||||
|
const str = RocStr.fromSlice("鹏");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{40527};
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: Multiple 3-byte UTF-8 characters" {
|
||||||
|
const str = RocStr.fromSlice("鹏很有趣");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{ 40527, 24456, 26377, 36259 };
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: One 4-byte UTF-8 character" {
|
||||||
|
// from https://design215.com/toolbox/utf8-4byte-characters.php
|
||||||
|
const str = RocStr.fromSlice("𒀀");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{73728};
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "strToScalars: Multiple 4-byte UTF-8 characters" {
|
||||||
|
// from https://design215.com/toolbox/utf8-4byte-characters.php
|
||||||
|
const str = RocStr.fromSlice("𒀀𒀁");
|
||||||
|
defer RocStr.deinit(str);
|
||||||
|
|
||||||
|
const expected_array = [_]u32{ 73728, 73729 };
|
||||||
|
const expected = RocList.fromSlice(u32, expected_array[0..expected_array.len]);
|
||||||
|
defer RocList.deinit(expected, u32);
|
||||||
|
|
||||||
|
const actual = strToScalars(str);
|
||||||
|
defer RocList.deinit(actual, u32);
|
||||||
|
|
||||||
|
try expect(RocList.eql(actual, expected));
|
||||||
|
}
|
||||||
|
|
||||||
// Str.fromInt
|
// Str.fromInt
|
||||||
pub fn exportFromInt(comptime T: type, comptime name: []const u8) void {
|
pub fn exportFromInt(comptime T: type, comptime name: []const u8) void {
|
||||||
comptime var f = struct {
|
comptime var f = struct {
|
||||||
|
|
|
@ -183,7 +183,7 @@ pub fn decref(
|
||||||
|
|
||||||
var bytes = bytes_or_null orelse return;
|
var bytes = bytes_or_null orelse return;
|
||||||
|
|
||||||
const isizes: [*]isize = @ptrCast([*]isize, @alignCast(@sizeOf(isize), bytes));
|
const isizes: [*]isize = @ptrCast([*]isize, @alignCast(@alignOf(isize), bytes));
|
||||||
|
|
||||||
decref_ptr_to_refcount(isizes - 1, alignment);
|
decref_ptr_to_refcount(isizes - 1, alignment);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
interface Str
|
interface Str
|
||||||
exposes
|
exposes
|
||||||
[
|
[
|
||||||
concat,
|
|
||||||
Utf8Problem,
|
Utf8Problem,
|
||||||
Utf8ByteProblem,
|
Utf8ByteProblem,
|
||||||
|
concat,
|
||||||
isEmpty,
|
isEmpty,
|
||||||
joinWith,
|
joinWith,
|
||||||
split,
|
split,
|
||||||
|
@ -32,6 +32,7 @@ interface Str
|
||||||
toI16,
|
toI16,
|
||||||
toU8,
|
toU8,
|
||||||
toI8,
|
toI8,
|
||||||
|
toScalars,
|
||||||
]
|
]
|
||||||
imports [Bool.{ Bool }, Result.{ Result }]
|
imports [Bool.{ Bool }, Result.{ Result }]
|
||||||
|
|
||||||
|
@ -172,6 +173,13 @@ countGraphemes : Str -> Nat
|
||||||
## single [U32]. You'd need to use `Str.startsWithCodePt "🕊"` instead.
|
## single [U32]. You'd need to use `Str.startsWithCodePt "🕊"` instead.
|
||||||
startsWithCodePt : Str, U32 -> Bool
|
startsWithCodePt : Str, U32 -> Bool
|
||||||
|
|
||||||
|
## Return a [List] of the [unicode scalar values](https://unicode.org/glossary/#unicode_scalar_value)
|
||||||
|
## in the given string.
|
||||||
|
##
|
||||||
|
## (Strings contain only scalar values, not [surrogate code points](https://unicode.org/glossary/#surrogate_code_point),
|
||||||
|
## so this is equivalent to returning a list of the string's [code points](https://unicode.org/glossary/#code_point).)
|
||||||
|
toScalars : Str -> List U32
|
||||||
|
|
||||||
## Return a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
|
## Return a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
|
||||||
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
|
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
|
||||||
## see [Str.split].)
|
## see [Str.split].)
|
||||||
|
|
|
@ -311,6 +311,7 @@ pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
||||||
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
||||||
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
||||||
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
||||||
|
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
|
||||||
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
||||||
pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
|
pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
|
||||||
pub const STR_STARTS_WITH_CODE_PT: &str = "roc_builtins.str.starts_with_code_point";
|
pub const STR_STARTS_WITH_CODE_PT: &str = "roc_builtins.str.starts_with_code_point";
|
||||||
|
|
|
@ -873,6 +873,13 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
|
||||||
Box::new(str_type()),
|
Box::new(str_type()),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Str.toScalars : Str -> List U32
|
||||||
|
add_top_level_function_type!(
|
||||||
|
Symbol::STR_TO_SCALARS,
|
||||||
|
vec![str_type()],
|
||||||
|
Box::new(list_type(u32_type())),
|
||||||
|
);
|
||||||
|
|
||||||
// isEmpty : Str -> Bool
|
// isEmpty : Str -> Bool
|
||||||
add_top_level_function_type!(
|
add_top_level_function_type!(
|
||||||
Symbol::STR_IS_EMPTY,
|
Symbol::STR_IS_EMPTY,
|
||||||
|
|
|
@ -73,6 +73,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
|
||||||
BOOL_NOT => bool_not,
|
BOOL_NOT => bool_not,
|
||||||
STR_CONCAT => str_concat,
|
STR_CONCAT => str_concat,
|
||||||
STR_JOIN_WITH => str_join_with,
|
STR_JOIN_WITH => str_join_with,
|
||||||
|
STR_TO_SCALARS => str_to_scalars,
|
||||||
STR_SPLIT => str_split,
|
STR_SPLIT => str_split,
|
||||||
STR_IS_EMPTY => str_is_empty,
|
STR_IS_EMPTY => str_is_empty,
|
||||||
STR_STARTS_WITH => str_starts_with,
|
STR_STARTS_WITH => str_starts_with,
|
||||||
|
@ -1672,6 +1673,26 @@ fn str_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Str.toScalars : Str -> List U32
|
||||||
|
fn str_to_scalars(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
|
let str_var = var_store.fresh();
|
||||||
|
let list_u32_var = var_store.fresh();
|
||||||
|
|
||||||
|
let body = RunLowLevel {
|
||||||
|
op: LowLevel::StrToScalars,
|
||||||
|
args: vec![(str_var, Var(Symbol::ARG_1))],
|
||||||
|
ret_var: list_u32_var,
|
||||||
|
};
|
||||||
|
|
||||||
|
defn(
|
||||||
|
symbol,
|
||||||
|
vec![(str_var, Symbol::ARG_1)],
|
||||||
|
var_store,
|
||||||
|
body,
|
||||||
|
list_u32_var,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Str.joinWith : List Str, Str -> Str
|
/// Str.joinWith : List Str, Str -> Str
|
||||||
fn str_join_with(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
fn str_join_with(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
let list_str_var = var_store.fresh();
|
let list_str_var = var_store.fresh();
|
||||||
|
|
|
@ -5310,6 +5310,14 @@ fn run_low_level<'a, 'ctx, 'env>(
|
||||||
|
|
||||||
call_str_bitcode_fn(env, &[list.into(), string], bitcode::STR_JOIN_WITH)
|
call_str_bitcode_fn(env, &[list.into(), string], bitcode::STR_JOIN_WITH)
|
||||||
}
|
}
|
||||||
|
StrToScalars => {
|
||||||
|
// Str.toScalars : Str -> List U32
|
||||||
|
debug_assert_eq!(args.len(), 1);
|
||||||
|
|
||||||
|
let string = load_symbol(scope, &args[0]);
|
||||||
|
|
||||||
|
call_list_bitcode_fn(env, &[string], bitcode::STR_TO_SCALARS)
|
||||||
|
}
|
||||||
StrStartsWith => {
|
StrStartsWith => {
|
||||||
// Str.startsWith : Str, Str -> Bool
|
// Str.startsWith : Str, Str -> Bool
|
||||||
debug_assert_eq!(args.len(), 2);
|
debug_assert_eq!(args.len(), 2);
|
||||||
|
|
|
@ -217,6 +217,7 @@ impl<'a> LowLevelCall<'a> {
|
||||||
match self.lowlevel {
|
match self.lowlevel {
|
||||||
// Str
|
// Str
|
||||||
StrConcat => self.load_args_and_call_zig(backend, bitcode::STR_CONCAT),
|
StrConcat => self.load_args_and_call_zig(backend, bitcode::STR_CONCAT),
|
||||||
|
StrToScalars => self.load_args_and_call_zig(backend, bitcode::STR_TO_SCALARS),
|
||||||
StrJoinWith => self.load_args_and_call_zig(backend, bitcode::STR_JOIN_WITH),
|
StrJoinWith => self.load_args_and_call_zig(backend, bitcode::STR_JOIN_WITH),
|
||||||
StrIsEmpty => match backend.storage.get(&self.arguments[0]) {
|
StrIsEmpty => match backend.storage.get(&self.arguments[0]) {
|
||||||
StoredValue::StackMemory { location, .. } => {
|
StoredValue::StackMemory { location, .. } => {
|
||||||
|
|
|
@ -23,6 +23,7 @@ pub enum LowLevel {
|
||||||
StrTrimLeft,
|
StrTrimLeft,
|
||||||
StrTrimRight,
|
StrTrimRight,
|
||||||
StrToNum,
|
StrToNum,
|
||||||
|
StrToScalars,
|
||||||
ListLen,
|
ListLen,
|
||||||
ListWithCapacity,
|
ListWithCapacity,
|
||||||
ListGetUnsafe,
|
ListGetUnsafe,
|
||||||
|
@ -184,6 +185,7 @@ impl LowLevelWrapperType {
|
||||||
|
|
||||||
match symbol {
|
match symbol {
|
||||||
Symbol::STR_CONCAT => CanBeReplacedBy(StrConcat),
|
Symbol::STR_CONCAT => CanBeReplacedBy(StrConcat),
|
||||||
|
Symbol::STR_TO_SCALARS => CanBeReplacedBy(StrToScalars),
|
||||||
Symbol::STR_JOIN_WITH => CanBeReplacedBy(StrJoinWith),
|
Symbol::STR_JOIN_WITH => CanBeReplacedBy(StrJoinWith),
|
||||||
Symbol::STR_IS_EMPTY => CanBeReplacedBy(StrIsEmpty),
|
Symbol::STR_IS_EMPTY => CanBeReplacedBy(StrIsEmpty),
|
||||||
Symbol::STR_STARTS_WITH => CanBeReplacedBy(StrStartsWith),
|
Symbol::STR_STARTS_WITH => CanBeReplacedBy(StrStartsWith),
|
||||||
|
|
|
@ -1189,6 +1189,7 @@ define_builtins! {
|
||||||
31 STR_TO_I16: "toI16"
|
31 STR_TO_I16: "toI16"
|
||||||
32 STR_TO_U8: "toU8"
|
32 STR_TO_U8: "toU8"
|
||||||
33 STR_TO_I8: "toI8"
|
33 STR_TO_I8: "toI8"
|
||||||
|
34 STR_TO_SCALARS: "toScalars"
|
||||||
}
|
}
|
||||||
5 LIST: "List" => {
|
5 LIST: "List" => {
|
||||||
0 LIST_LIST: "List" imported // the List.List type alias
|
0 LIST_LIST: "List" imported // the List.List type alias
|
||||||
|
|
|
@ -890,7 +890,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
|
||||||
// - arguments that we may want to update destructively must be Owned
|
// - arguments that we may want to update destructively must be Owned
|
||||||
// - other refcounted arguments are Borrowed
|
// - other refcounted arguments are Borrowed
|
||||||
match op {
|
match op {
|
||||||
ListLen | StrIsEmpty | StrCountGraphemes => arena.alloc_slice_copy(&[borrowed]),
|
ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes => {
|
||||||
|
arena.alloc_slice_copy(&[borrowed])
|
||||||
|
}
|
||||||
ListWithCapacity => arena.alloc_slice_copy(&[irrelevant]),
|
ListWithCapacity => arena.alloc_slice_copy(&[irrelevant]),
|
||||||
ListReplaceUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
ListReplaceUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
||||||
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
|
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
|
||||||
|
|
|
@ -1603,3 +1603,100 @@ fn issue_2811() {
|
||||||
RocStr
|
RocStr
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm"))]
|
||||||
|
fn to_scalar_1_byte() {
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "R"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[82u32]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "Roc!"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[82u32, 111, 99, 33]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm"))]
|
||||||
|
fn to_scalar_2_byte() {
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "é"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[233u32]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "Cäfés"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[67u32, 228, 102, 233, 115]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm"))]
|
||||||
|
fn to_scalar_3_byte() {
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "鹏"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[40527u32]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "鹏很有趣"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[40527u32, 24456, 26377, 36259]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm"))]
|
||||||
|
fn to_scalar_4_byte() {
|
||||||
|
// from https://design215.com/toolbox/utf8-4byte-characters.php
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "𒀀"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[73728u32]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.toScalars "𒀀𒀁"
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[73728u32, 73729u32]),
|
||||||
|
RocList<u32>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
@ -39,17 +39,17 @@ procedure Num.22 (#Attr.2, #Attr.3):
|
||||||
|
|
||||||
procedure Str.27 (#Attr.2):
|
procedure Str.27 (#Attr.2):
|
||||||
let #Attr.3 : {I64, U8} = lowlevel StrToNum #Attr.2;
|
let #Attr.3 : {I64, U8} = lowlevel StrToNum #Attr.2;
|
||||||
let Str.70 : U8 = StructAtIndex 1 #Attr.3;
|
let Str.72 : U8 = StructAtIndex 1 #Attr.3;
|
||||||
let Str.71 : U8 = 0i64;
|
let Str.73 : U8 = 0i64;
|
||||||
let Str.67 : Int1 = lowlevel NumGt Str.70 Str.71;
|
let Str.69 : Int1 = lowlevel NumGt Str.72 Str.73;
|
||||||
if Str.67 then
|
if Str.69 then
|
||||||
let Str.69 : Int1 = false;
|
let Str.71 : Int1 = false;
|
||||||
let Str.68 : [C Int1, C I64] = Err Str.69;
|
let Str.70 : [C Int1, C I64] = Err Str.71;
|
||||||
ret Str.68;
|
ret Str.70;
|
||||||
else
|
else
|
||||||
let Str.66 : I64 = StructAtIndex 0 #Attr.3;
|
let Str.68 : I64 = StructAtIndex 0 #Attr.3;
|
||||||
let Str.65 : [C Int1, C I64] = Ok Str.66;
|
let Str.67 : [C Int1, C I64] = Ok Str.68;
|
||||||
ret Str.65;
|
ret Str.67;
|
||||||
|
|
||||||
procedure Test.0 ():
|
procedure Test.0 ():
|
||||||
let Test.4 : Int1 = true;
|
let Test.4 : Int1 = true;
|
||||||
|
|
|
@ -27,12 +27,12 @@ procedure Num.22 (#Attr.2, #Attr.3):
|
||||||
ret Num.273;
|
ret Num.273;
|
||||||
|
|
||||||
procedure Str.16 (#Attr.2, #Attr.3):
|
procedure Str.16 (#Attr.2, #Attr.3):
|
||||||
let Str.65 : Str = lowlevel StrRepeat #Attr.2 #Attr.3;
|
let Str.67 : Str = lowlevel StrRepeat #Attr.2 #Attr.3;
|
||||||
ret Str.65;
|
ret Str.67;
|
||||||
|
|
||||||
procedure Str.3 (#Attr.2, #Attr.3):
|
procedure Str.3 (#Attr.2, #Attr.3):
|
||||||
let Str.66 : Str = lowlevel StrConcat #Attr.2 #Attr.3;
|
let Str.68 : Str = lowlevel StrConcat #Attr.2 #Attr.3;
|
||||||
ret Str.66;
|
ret Str.68;
|
||||||
|
|
||||||
procedure Test.1 ():
|
procedure Test.1 ():
|
||||||
let Test.21 : Str = "lllllllllllllllllllllooooooooooong";
|
let Test.21 : Str = "lllllllllllllllllllllooooooooooong";
|
||||||
|
|
|
@ -29,8 +29,8 @@ procedure Num.22 (#Attr.2, #Attr.3):
|
||||||
ret Num.273;
|
ret Num.273;
|
||||||
|
|
||||||
procedure Str.3 (#Attr.2, #Attr.3):
|
procedure Str.3 (#Attr.2, #Attr.3):
|
||||||
let Str.66 : Str = lowlevel StrConcat #Attr.2 #Attr.3;
|
let Str.68 : Str = lowlevel StrConcat #Attr.2 #Attr.3;
|
||||||
ret Str.66;
|
ret Str.68;
|
||||||
|
|
||||||
procedure Test.1 ():
|
procedure Test.1 ():
|
||||||
let Test.21 : Str = "lllllllllllllllllllllooooooooooong";
|
let Test.21 : Str = "lllllllllllllllllllllooooooooooong";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue