mirror of
https://github.com/roc-lang/roc.git
synced 2025-07-24 15:03:46 +00:00
Add Str.toScalars builtin
This commit is contained in:
parent
b1fe76adbf
commit
acb7cf99e1
11 changed files with 73 additions and 3 deletions
|
@ -155,6 +155,7 @@ comptime {
|
|||
const str = @import("str.zig");
|
||||
comptime {
|
||||
exportStrFn(str.init, "init");
|
||||
exportStrFn(str.strToScalarsC, "to_scalars");
|
||||
exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
|
||||
exportStrFn(str.countSegments, "count_segments");
|
||||
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
||||
|
|
|
@ -470,7 +470,7 @@ pub fn strNumberOfBytes(string: RocStr) callconv(.C) usize {
|
|||
|
||||
// Str.toScalars
|
||||
pub fn strToScalarsC(str: RocStr) callconv(.C) RocList {
|
||||
return @call(.{ .modifier = always_inline }, strToScalars, .{ RocStr, str });
|
||||
return @call(.{ .modifier = always_inline }, strToScalars, .{ str });
|
||||
}
|
||||
|
||||
fn strToScalars(string: RocStr) callconv(.C) RocList {
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
interface Str
|
||||
exposes
|
||||
[
|
||||
concat,
|
||||
Utf8Problem,
|
||||
Utf8ByteProblem,
|
||||
concat,
|
||||
isEmpty,
|
||||
joinWith,
|
||||
split,
|
||||
|
@ -32,6 +32,7 @@ interface Str
|
|||
toI16,
|
||||
toU8,
|
||||
toI8,
|
||||
toScalars,
|
||||
]
|
||||
imports [Bool.{ Bool }, Result.{ Result }]
|
||||
|
||||
|
@ -172,6 +173,31 @@ countGraphemes : Str -> Nat
|
|||
## single [U32]. You'd need to use `Str.startsWithCodePt "🕊"` instead.
|
||||
startsWithCodePt : Str, U32 -> Bool
|
||||
|
||||
toScalars : Str -> List U32
|
||||
|
||||
# walkScalars : Str, state, (state, U32, Str -> state) -> state
|
||||
# walkScalars = \inputStr, init, update ->
|
||||
# # TODO rewrite this in Zig to speed it up a ton!
|
||||
# answer =
|
||||
# List.walk
|
||||
# (toUtf8 inputStr)
|
||||
# { index: 0, answer: init }
|
||||
# \{ index, state }, byte ->
|
||||
# { codePt, codePtStr } =
|
||||
# if byte <= 127 then
|
||||
# # This can never fail. Also, this list means one allocation per step! 😱
|
||||
# str = Str.fromUtf8 [byte] |> Result.withDefault ""
|
||||
|
||||
# { codePt: Num.toU32 byte, codePtStr: str }
|
||||
# else
|
||||
# # TODO handle multibyte UTF-8 string by looking ahead in the list as needed
|
||||
# # https://docs.teradata.com/r/Teradata-Database-International-Character-Set-Support/June-2017/Client-Character-Set-Options/UTF8-Client-Character-Set-Support/UTF8-Multibyte-Sequences
|
||||
|
||||
# { index: index + 1, state: update state codePt codePtStr }
|
||||
|
||||
# answer.state
|
||||
|
||||
|
||||
## Return a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
|
||||
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
|
||||
## see [Str.split].)
|
||||
|
|
|
@ -311,6 +311,7 @@ pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
|||
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
||||
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
||||
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
||||
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
|
||||
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
||||
pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
|
||||
pub const STR_STARTS_WITH_CODE_PT: &str = "roc_builtins.str.starts_with_code_point";
|
||||
|
|
|
@ -873,6 +873,13 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
|
|||
Box::new(str_type()),
|
||||
);
|
||||
|
||||
// Str.toScalars : Str -> List U32
|
||||
add_top_level_function_type!(
|
||||
Symbol::STR_TO_SCALARS,
|
||||
vec![str_type()],
|
||||
Box::new(list_type(u32_type())),
|
||||
);
|
||||
|
||||
// isEmpty : Str -> Bool
|
||||
add_top_level_function_type!(
|
||||
Symbol::STR_IS_EMPTY,
|
||||
|
|
|
@ -73,6 +73,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
|
|||
BOOL_NOT => bool_not,
|
||||
STR_CONCAT => str_concat,
|
||||
STR_JOIN_WITH => str_join_with,
|
||||
STR_TO_SCALARS => str_to_scalars,
|
||||
STR_SPLIT => str_split,
|
||||
STR_IS_EMPTY => str_is_empty,
|
||||
STR_STARTS_WITH => str_starts_with,
|
||||
|
@ -1677,6 +1678,26 @@ fn str_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
|||
)
|
||||
}
|
||||
|
||||
/// Str.toScalars : Str -> List U32
|
||||
fn str_to_scalars(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||
let str_var = var_store.fresh();
|
||||
let list_u32_var = var_store.fresh();
|
||||
|
||||
let body = RunLowLevel {
|
||||
op: LowLevel::StrToScalars,
|
||||
args: vec![(str_var, Var(Symbol::ARG_1))],
|
||||
ret_var: str_var,
|
||||
};
|
||||
|
||||
defn(
|
||||
symbol,
|
||||
vec![(str_var, Symbol::ARG_1)],
|
||||
var_store,
|
||||
body,
|
||||
list_u32_var,
|
||||
)
|
||||
}
|
||||
|
||||
/// Str.joinWith : List Str, Str -> Str
|
||||
fn str_join_with(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||
let list_str_var = var_store.fresh();
|
||||
|
|
|
@ -5415,6 +5415,14 @@ fn run_low_level<'a, 'ctx, 'env>(
|
|||
|
||||
call_str_bitcode_fn(env, &[list.into(), string], bitcode::STR_JOIN_WITH)
|
||||
}
|
||||
StrToScalars => {
|
||||
// Str.toScalars : Str -> List U32
|
||||
debug_assert_eq!(args.len(), 1);
|
||||
|
||||
let string = load_symbol(scope, &args[0]);
|
||||
|
||||
call_str_bitcode_fn(env, &[string], bitcode::STR_TO_SCALARS)
|
||||
}
|
||||
StrStartsWith => {
|
||||
// Str.startsWith : Str, Str -> Bool
|
||||
debug_assert_eq!(args.len(), 2);
|
||||
|
|
|
@ -217,6 +217,7 @@ impl<'a> LowLevelCall<'a> {
|
|||
match self.lowlevel {
|
||||
// Str
|
||||
StrConcat => self.load_args_and_call_zig(backend, bitcode::STR_CONCAT),
|
||||
StrToScalars => self.load_args_and_call_zig(backend, bitcode::STR_TO_SCALARS),
|
||||
StrJoinWith => self.load_args_and_call_zig(backend, bitcode::STR_JOIN_WITH),
|
||||
StrIsEmpty => match backend.storage.get(&self.arguments[0]) {
|
||||
StoredValue::StackMemory { location, .. } => {
|
||||
|
|
|
@ -23,6 +23,7 @@ pub enum LowLevel {
|
|||
StrTrimLeft,
|
||||
StrTrimRight,
|
||||
StrToNum,
|
||||
StrToScalars,
|
||||
ListLen,
|
||||
ListWithCapacity,
|
||||
ListGetUnsafe,
|
||||
|
@ -193,6 +194,7 @@ impl LowLevelWrapperType {
|
|||
|
||||
match symbol {
|
||||
Symbol::STR_CONCAT => CanBeReplacedBy(StrConcat),
|
||||
Symbol::STR_TO_SCALARS => CanBeReplacedBy(StrToScalars),
|
||||
Symbol::STR_JOIN_WITH => CanBeReplacedBy(StrJoinWith),
|
||||
Symbol::STR_IS_EMPTY => CanBeReplacedBy(StrIsEmpty),
|
||||
Symbol::STR_STARTS_WITH => CanBeReplacedBy(StrStartsWith),
|
||||
|
|
|
@ -1189,6 +1189,7 @@ define_builtins! {
|
|||
31 STR_TO_I16: "toI16"
|
||||
32 STR_TO_U8: "toU8"
|
||||
33 STR_TO_I8: "toI8"
|
||||
34 STR_TO_SCALARS: "toScalars"
|
||||
}
|
||||
5 LIST: "List" => {
|
||||
0 LIST_LIST: "List" imported // the List.List type alias
|
||||
|
|
|
@ -896,7 +896,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
|
|||
// - arguments that we may want to update destructively must be Owned
|
||||
// - other refcounted arguments are Borrowed
|
||||
match op {
|
||||
ListLen | StrIsEmpty | StrCountGraphemes => arena.alloc_slice_copy(&[borrowed]),
|
||||
ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes => {
|
||||
arena.alloc_slice_copy(&[borrowed])
|
||||
}
|
||||
ListWithCapacity => arena.alloc_slice_copy(&[irrelevant]),
|
||||
ListReplaceUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
||||
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue