mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-03 19:58:18 +00:00
builtin(str): implement Str.graphemes
Signed-off-by: Prajwal S N <prajwalnadig21@gmail.com>
This commit is contained in:
parent
b6a282b0ce
commit
df7e4eea7e
10 changed files with 67 additions and 3 deletions
|
@ -145,6 +145,7 @@ comptime {
|
|||
exportStrFn(str.strTrimRight, "trim_right");
|
||||
exportStrFn(str.strCloneTo, "clone_to");
|
||||
exportStrFn(str.withCapacity, "with_capacity");
|
||||
exportStrFn(str.strGraphemes, "str_graphemes");
|
||||
|
||||
inline for (INTEGERS) |T| {
|
||||
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
const utils = @import("utils.zig");
|
||||
const RocList = @import("list.zig").RocList;
|
||||
const grapheme = @import("helpers/grapheme.zig");
|
||||
const UpdateMode = utils.UpdateMode;
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
|
@ -1212,7 +1213,6 @@ test "countSegments: string equals delimiter" {
|
|||
}
|
||||
|
||||
// Str.countGraphemeClusters
|
||||
const grapheme = @import("helpers/grapheme.zig");
|
||||
pub fn countGraphemeClusters(string: RocStr) callconv(.C) usize {
|
||||
if (string.isEmpty()) {
|
||||
return 0;
|
||||
|
@ -1303,6 +1303,43 @@ test "countGraphemeClusters: emojis, ut8, and ascii characters" {
|
|||
try expectEqual(count, 10);
|
||||
}
|
||||
|
||||
// Str.graphemes
|
||||
pub fn strGraphemes(string: RocStr) callconv(.C) RocList {
|
||||
var list = RocList.allocate(@alignOf(RocStr), countGraphemeClusters(string), @sizeOf(RocStr));
|
||||
const graphemes = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), list.bytes));
|
||||
|
||||
const bytes_ptr = string.asU8ptr();
|
||||
var bytes = bytes_ptr[0..string.len()];
|
||||
var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator();
|
||||
var grapheme_break_state: ?grapheme.BoundClass = null;
|
||||
var grapheme_break_state_ptr = &grapheme_break_state;
|
||||
var opt_last_codepoint: ?u21 = null;
|
||||
|
||||
var list_index: usize = 0;
|
||||
var start_index: usize = 0;
|
||||
var str_index: usize = 0;
|
||||
var cur_codepoint_len: usize = 0;
|
||||
|
||||
while (iter.nextCodepoint()) |cur_codepoint| {
|
||||
cur_codepoint_len = unicode.utf8CodepointSequenceLength(cur_codepoint) catch unreachable;
|
||||
if (opt_last_codepoint) |last_codepoint| {
|
||||
var did_break = grapheme.isGraphemeBreak(last_codepoint, cur_codepoint, grapheme_break_state_ptr);
|
||||
if (did_break) {
|
||||
graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len);
|
||||
list_index += 1;
|
||||
start_index = str_index + cur_codepoint_len;
|
||||
grapheme_break_state = null;
|
||||
}
|
||||
str_index += cur_codepoint_len;
|
||||
}
|
||||
opt_last_codepoint = cur_codepoint;
|
||||
}
|
||||
// Append last grapheme
|
||||
graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
pub fn countUtf8Bytes(string: RocStr) callconv(.C) usize {
|
||||
return string.len();
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ interface Str
|
|||
walkScalarsUntil,
|
||||
withCapacity,
|
||||
withPrefix,
|
||||
graphemes,
|
||||
]
|
||||
imports [
|
||||
Bool.{ Bool, Eq },
|
||||
|
@ -180,6 +181,9 @@ repeat : Str, Nat -> Str
|
|||
## expect Str.countGraphemes "üïä" == 4
|
||||
countGraphemes : Str -> Nat
|
||||
|
||||
## Split a string into its constituent grapheme clusters
|
||||
graphemes : Str -> List Str
|
||||
|
||||
## If the string begins with a [Unicode code point](http://www.unicode.org/glossary/#code_point)
|
||||
## equal to the given [U32], return `Bool.true`. Otherwise return `Bool.false`.
|
||||
##
|
||||
|
|
|
@ -362,6 +362,7 @@ pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
|
|||
pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
|
||||
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
||||
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
||||
pub const STR_GRAPHEMES: &str = "roc_builtins.str.str_graphemes";
|
||||
|
||||
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
||||
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue