Str.walkScalars

This commit is contained in:
Folkert 2022-07-04 18:01:19 +02:00
parent 1de9270ecf
commit 6d7c329698
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
15 changed files with 109 additions and 2 deletions

View file

@ -159,6 +159,7 @@ comptime {
exportStrFn(str.substringUnsafe, "substring_unsafe");
exportStrFn(str.getUnsafe, "get_unsafe");
exportStrFn(str.reserve, "reserve");
exportStrFn(str.getScalarUnsafe, "get_scalar_unsafe");
exportStrFn(str.appendScalar, "append_scalar");
exportStrFn(str.strToUtf8C, "to_utf8");
exportStrFn(str.fromUtf8C, "from_utf8");

View file

@ -2469,3 +2469,23 @@ pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr {
return string;
}
}
pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } {
const slice = string.asSlice();
const bytesParsed = @intCast(usize, std.unicode.utf8ByteSequenceLength(slice[index]) catch unreachable);
const scalar = std.unicode.utf8Decode(slice[index .. index + bytesParsed]) catch unreachable;
return .{ .bytesParsed = bytesParsed, .scalar = @intCast(u32, scalar) };
}
test "getScalarUnsafe" {
const data_bytes = "A";
var data = RocStr.init(data_bytes, data_bytes.len);
const result = getScalarUnsafe(data, 0);
const expected = try std.unicode.utf8Decode("A");
try expectEqual(result.scalar, @intCast(u32, expected));
try expectEqual(result.bytesParsed, 1);
}

View file

@ -39,6 +39,8 @@ interface Str
walkUtf8WithIndex,
reserve,
appendScalar,
walkScalars,
walkScalarsUntil,
]
imports [Bool.{ Bool }, Result.{ Result }]
@ -360,3 +362,36 @@ appendScalar = \string, scalar ->
isValidScalar : U32 -> Bool
isValidScalar = \scalar ->
scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF)
getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
walkScalars : Str, state, (state, U32 -> state) -> state
walkScalars = \string, init, step ->
walkScalarsHelp string init step 0 (Str.countUtf8Bytes string)
walkScalarsHelp : Str, state, (state, U32 -> state), Nat, Nat -> state
walkScalarsHelp = \string, state, step, index, length ->
if index < length then
{ scalar, bytesParsed } = getScalarUnsafe string index
newState = step state scalar
walkScalarsHelp string newState step (index + bytesParsed) length
else
state
walkScalarsUntil : Str, state, (state, U32 -> [Break state, Continue state]) -> state
walkScalarsUntil = \string, init, step ->
walkScalarsUntilHelp string init step 0 (Str.countUtf8Bytes string)
walkScalarsUntilHelp : Str, state, (state, U32 -> [Break state, Continue state]), Nat, Nat -> state
walkScalarsUntilHelp = \string, state, step, index, length ->
if index < length then
{ scalar, bytesParsed } = getScalarUnsafe string index
when step state scalar is
Continue newState ->
walkScalarsHelp string newState step (index + bytesParsed) length
Done newState ->
newState
else
state

View file

@ -336,6 +336,7 @@ pub const STR_TRIM_RIGHT: &str = "roc_builtins.str.trim_right";
pub const STR_GET_UNSAFE: &str = "roc_builtins.str.get_unsafe";
pub const STR_RESERVE: &str = "roc_builtins.str.reserve";
pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
pub const DICT_HASH: &str = "roc_builtins.dict.hash";
pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";

View file

@ -85,6 +85,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
STR_SUBSTRING_UNSAFE => str_substring_unsafe,
STR_RESERVE => str_reserve,
STR_APPEND_SCALAR_UNSAFE => str_append_scalar_unsafe,
STR_GET_SCALAR_UNSAFE => str_get_scalar_unsafe,
STR_FROM_UTF8 => str_from_utf8,
STR_FROM_UTF8_RANGE => str_from_utf8_range,
STR_TO_UTF8 => str_to_utf8,
@ -1749,6 +1750,11 @@ fn str_append_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def {
lowlevel_2(symbol, LowLevel::StrAppendScalar, var_store)
}
/// Str.getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
fn str_get_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def {
lowlevel_2(symbol, LowLevel::StrGetScalarUnsafe, var_store)
}
/// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]*
fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
let bytes_var = var_store.fresh();

View file

@ -5371,6 +5371,14 @@ fn run_low_level<'a, 'ctx, 'env>(
let string = load_symbol(scope, &args[0]);
call_bitcode_fn(env, &[string], bitcode::STR_COUNT_GRAPEHEME_CLUSTERS)
}
StrGetScalarUnsafe => {
// Str.getScalarUnsafe : Str, Nat -> { bytesParsed : Nat, scalar : U32 }
debug_assert_eq!(args.len(), 2);
let string = load_symbol(scope, &args[0]);
let index = load_symbol(scope, &args[1]);
call_bitcode_fn(env, &[string, index], bitcode::STR_GET_SCALAR_UNSAFE)
}
StrCountUtf8Bytes => {
// Str.countGraphemes : Str -> Nat
debug_assert_eq!(args.len(), 1);

View file

@ -293,6 +293,9 @@ impl<'a> LowLevelCall<'a> {
StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT),
StrAppendScalar => self.load_args_and_call_zig(backend, bitcode::STR_APPEND_SCALAR),
StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM),
StrGetScalarUnsafe => {
self.load_args_and_call_zig(backend, bitcode::STR_GET_SCALAR_UNSAFE)
}
StrSubstringUnsafe => {
self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE)
}

View file

@ -176,6 +176,7 @@ wasm_result_primitive!(u16, i32_store16, Align::Bytes2);
wasm_result_primitive!(i16, i32_store16, Align::Bytes2);
wasm_result_primitive!(u32, i32_store, Align::Bytes4);
wasm_result_primitive!(i32, i32_store, Align::Bytes4);
wasm_result_primitive!(char, i32_store, Align::Bytes4);
wasm_result_primitive!(u64, i64_store, Align::Bytes8);
wasm_result_primitive!(i64, i64_store, Align::Bytes8);
wasm_result_primitive!(usize, i32_store, Align::Bytes4);

View file

@ -23,7 +23,7 @@ macro_rules! wasm32_sized_primitive {
}
wasm32_sized_primitive!(
u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
);
impl Wasm32Sized for () {

View file

@ -29,6 +29,7 @@ pub enum LowLevel {
StrSubstringUnsafe,
StrReserve,
StrAppendScalar,
StrGetScalarUnsafe,
ListLen,
ListWithCapacity,
ListGetUnsafe,

View file

@ -1199,6 +1199,9 @@ define_builtins! {
41 STR_RESERVE: "reserve"
42 STR_APPEND_SCALAR_UNSAFE: "appendScalarUnsafe"
43 STR_APPEND_SCALAR: "appendScalar"
44 STR_GET_SCALAR_UNSAFE: "getScalarUnsafe"
45 STR_WALK_SCALARS: "walkScalars"
46 STR_WALK_SCALARS_UNTIL: "walkScalarsUntil"
}
5 LIST: "List" => {
0 LIST_LIST: "List" imported // the List.List type alias

View file

@ -902,6 +902,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
StrReserve => arena.alloc_slice_copy(&[owned, irrelevant]),
StrAppendScalar => arena.alloc_slice_copy(&[owned, irrelevant]),
StrGetScalarUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
StrTrim => arena.alloc_slice_copy(&[owned]),
StrTrimLeft => arena.alloc_slice_copy(&[owned]),
StrTrimRight => arena.alloc_slice_copy(&[owned]),

View file

@ -1758,3 +1758,17 @@ fn str_append_scalar() {
RocStr
);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
fn str_walk_scalars() {
assert_evals_to!(
indoc!(
r#"
Str.walkScalars "abcd" [] List.append
"#
),
RocList::from_slice(&['a', 'b', 'c', 'd']),
RocList<char>
);
}

View file

@ -40,7 +40,7 @@ macro_rules! from_wasm_memory_primitive {
}
from_wasm_memory_primitive!(
u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
);
impl FromWasmerMemory for () {

View file

@ -1314,3 +1314,16 @@ fn str_to_dec() {
RocDec
);
}
#[test]
fn str_walk_scalars() {
assert_evals_to!(
indoc!(
r#"
Str.walkScalars "abcd" [] List.append
"#
),
RocList::from_slice(&['a', 'b', 'c', 'd']),
RocList<char>
);
}