mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-03 03:42:17 +00:00
Str.walkScalars
This commit is contained in:
parent
1de9270ecf
commit
6d7c329698
15 changed files with 109 additions and 2 deletions
|
@ -159,6 +159,7 @@ comptime {
|
||||||
exportStrFn(str.substringUnsafe, "substring_unsafe");
|
exportStrFn(str.substringUnsafe, "substring_unsafe");
|
||||||
exportStrFn(str.getUnsafe, "get_unsafe");
|
exportStrFn(str.getUnsafe, "get_unsafe");
|
||||||
exportStrFn(str.reserve, "reserve");
|
exportStrFn(str.reserve, "reserve");
|
||||||
|
exportStrFn(str.getScalarUnsafe, "get_scalar_unsafe");
|
||||||
exportStrFn(str.appendScalar, "append_scalar");
|
exportStrFn(str.appendScalar, "append_scalar");
|
||||||
exportStrFn(str.strToUtf8C, "to_utf8");
|
exportStrFn(str.strToUtf8C, "to_utf8");
|
||||||
exportStrFn(str.fromUtf8C, "from_utf8");
|
exportStrFn(str.fromUtf8C, "from_utf8");
|
||||||
|
|
|
@ -2469,3 +2469,23 @@ pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr {
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } {
|
||||||
|
const slice = string.asSlice();
|
||||||
|
const bytesParsed = @intCast(usize, std.unicode.utf8ByteSequenceLength(slice[index]) catch unreachable);
|
||||||
|
const scalar = std.unicode.utf8Decode(slice[index .. index + bytesParsed]) catch unreachable;
|
||||||
|
|
||||||
|
return .{ .bytesParsed = bytesParsed, .scalar = @intCast(u32, scalar) };
|
||||||
|
}
|
||||||
|
|
||||||
|
test "getScalarUnsafe" {
|
||||||
|
const data_bytes = "A";
|
||||||
|
var data = RocStr.init(data_bytes, data_bytes.len);
|
||||||
|
|
||||||
|
const result = getScalarUnsafe(data, 0);
|
||||||
|
|
||||||
|
const expected = try std.unicode.utf8Decode("A");
|
||||||
|
|
||||||
|
try expectEqual(result.scalar, @intCast(u32, expected));
|
||||||
|
try expectEqual(result.bytesParsed, 1);
|
||||||
|
}
|
||||||
|
|
|
@ -39,6 +39,8 @@ interface Str
|
||||||
walkUtf8WithIndex,
|
walkUtf8WithIndex,
|
||||||
reserve,
|
reserve,
|
||||||
appendScalar,
|
appendScalar,
|
||||||
|
walkScalars,
|
||||||
|
walkScalarsUntil,
|
||||||
]
|
]
|
||||||
imports [Bool.{ Bool }, Result.{ Result }]
|
imports [Bool.{ Bool }, Result.{ Result }]
|
||||||
|
|
||||||
|
@ -360,3 +362,36 @@ appendScalar = \string, scalar ->
|
||||||
isValidScalar : U32 -> Bool
|
isValidScalar : U32 -> Bool
|
||||||
isValidScalar = \scalar ->
|
isValidScalar = \scalar ->
|
||||||
scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF)
|
scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF)
|
||||||
|
|
||||||
|
getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
|
||||||
|
|
||||||
|
walkScalars : Str, state, (state, U32 -> state) -> state
|
||||||
|
walkScalars = \string, init, step ->
|
||||||
|
walkScalarsHelp string init step 0 (Str.countUtf8Bytes string)
|
||||||
|
|
||||||
|
walkScalarsHelp : Str, state, (state, U32 -> state), Nat, Nat -> state
|
||||||
|
walkScalarsHelp = \string, state, step, index, length ->
|
||||||
|
if index < length then
|
||||||
|
{ scalar, bytesParsed } = getScalarUnsafe string index
|
||||||
|
newState = step state scalar
|
||||||
|
|
||||||
|
walkScalarsHelp string newState step (index + bytesParsed) length
|
||||||
|
else
|
||||||
|
state
|
||||||
|
|
||||||
|
walkScalarsUntil : Str, state, (state, U32 -> [Break state, Continue state]) -> state
|
||||||
|
walkScalarsUntil = \string, init, step ->
|
||||||
|
walkScalarsUntilHelp string init step 0 (Str.countUtf8Bytes string)
|
||||||
|
|
||||||
|
walkScalarsUntilHelp : Str, state, (state, U32 -> [Break state, Continue state]), Nat, Nat -> state
|
||||||
|
walkScalarsUntilHelp = \string, state, step, index, length ->
|
||||||
|
if index < length then
|
||||||
|
{ scalar, bytesParsed } = getScalarUnsafe string index
|
||||||
|
|
||||||
|
when step state scalar is
|
||||||
|
Continue newState ->
|
||||||
|
walkScalarsHelp string newState step (index + bytesParsed) length
|
||||||
|
Done newState ->
|
||||||
|
newState
|
||||||
|
else
|
||||||
|
state
|
||||||
|
|
|
@ -336,6 +336,7 @@ pub const STR_TRIM_RIGHT: &str = "roc_builtins.str.trim_right";
|
||||||
pub const STR_GET_UNSAFE: &str = "roc_builtins.str.get_unsafe";
|
pub const STR_GET_UNSAFE: &str = "roc_builtins.str.get_unsafe";
|
||||||
pub const STR_RESERVE: &str = "roc_builtins.str.reserve";
|
pub const STR_RESERVE: &str = "roc_builtins.str.reserve";
|
||||||
pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
|
pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar";
|
||||||
|
pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
|
||||||
|
|
||||||
pub const DICT_HASH: &str = "roc_builtins.dict.hash";
|
pub const DICT_HASH: &str = "roc_builtins.dict.hash";
|
||||||
pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
|
pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
|
||||||
|
|
|
@ -85,6 +85,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
|
||||||
STR_SUBSTRING_UNSAFE => str_substring_unsafe,
|
STR_SUBSTRING_UNSAFE => str_substring_unsafe,
|
||||||
STR_RESERVE => str_reserve,
|
STR_RESERVE => str_reserve,
|
||||||
STR_APPEND_SCALAR_UNSAFE => str_append_scalar_unsafe,
|
STR_APPEND_SCALAR_UNSAFE => str_append_scalar_unsafe,
|
||||||
|
STR_GET_SCALAR_UNSAFE => str_get_scalar_unsafe,
|
||||||
STR_FROM_UTF8 => str_from_utf8,
|
STR_FROM_UTF8 => str_from_utf8,
|
||||||
STR_FROM_UTF8_RANGE => str_from_utf8_range,
|
STR_FROM_UTF8_RANGE => str_from_utf8_range,
|
||||||
STR_TO_UTF8 => str_to_utf8,
|
STR_TO_UTF8 => str_to_utf8,
|
||||||
|
@ -1749,6 +1750,11 @@ fn str_append_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
lowlevel_2(symbol, LowLevel::StrAppendScalar, var_store)
|
lowlevel_2(symbol, LowLevel::StrAppendScalar, var_store)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Str.getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
|
||||||
|
fn str_get_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
|
lowlevel_2(symbol, LowLevel::StrGetScalarUnsafe, var_store)
|
||||||
|
}
|
||||||
|
|
||||||
/// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]*
|
/// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]*
|
||||||
fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
|
||||||
let bytes_var = var_store.fresh();
|
let bytes_var = var_store.fresh();
|
||||||
|
|
|
@ -5371,6 +5371,14 @@ fn run_low_level<'a, 'ctx, 'env>(
|
||||||
let string = load_symbol(scope, &args[0]);
|
let string = load_symbol(scope, &args[0]);
|
||||||
call_bitcode_fn(env, &[string], bitcode::STR_COUNT_GRAPEHEME_CLUSTERS)
|
call_bitcode_fn(env, &[string], bitcode::STR_COUNT_GRAPEHEME_CLUSTERS)
|
||||||
}
|
}
|
||||||
|
StrGetScalarUnsafe => {
|
||||||
|
// Str.getScalarUnsafe : Str, Nat -> { bytesParsed : Nat, scalar : U32 }
|
||||||
|
debug_assert_eq!(args.len(), 2);
|
||||||
|
|
||||||
|
let string = load_symbol(scope, &args[0]);
|
||||||
|
let index = load_symbol(scope, &args[1]);
|
||||||
|
call_bitcode_fn(env, &[string, index], bitcode::STR_GET_SCALAR_UNSAFE)
|
||||||
|
}
|
||||||
StrCountUtf8Bytes => {
|
StrCountUtf8Bytes => {
|
||||||
// Str.countGraphemes : Str -> Nat
|
// Str.countGraphemes : Str -> Nat
|
||||||
debug_assert_eq!(args.len(), 1);
|
debug_assert_eq!(args.len(), 1);
|
||||||
|
|
|
@ -293,6 +293,9 @@ impl<'a> LowLevelCall<'a> {
|
||||||
StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT),
|
StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT),
|
||||||
StrAppendScalar => self.load_args_and_call_zig(backend, bitcode::STR_APPEND_SCALAR),
|
StrAppendScalar => self.load_args_and_call_zig(backend, bitcode::STR_APPEND_SCALAR),
|
||||||
StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM),
|
StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM),
|
||||||
|
StrGetScalarUnsafe => {
|
||||||
|
self.load_args_and_call_zig(backend, bitcode::STR_GET_SCALAR_UNSAFE)
|
||||||
|
}
|
||||||
StrSubstringUnsafe => {
|
StrSubstringUnsafe => {
|
||||||
self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE)
|
self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE)
|
||||||
}
|
}
|
||||||
|
|
|
@ -176,6 +176,7 @@ wasm_result_primitive!(u16, i32_store16, Align::Bytes2);
|
||||||
wasm_result_primitive!(i16, i32_store16, Align::Bytes2);
|
wasm_result_primitive!(i16, i32_store16, Align::Bytes2);
|
||||||
wasm_result_primitive!(u32, i32_store, Align::Bytes4);
|
wasm_result_primitive!(u32, i32_store, Align::Bytes4);
|
||||||
wasm_result_primitive!(i32, i32_store, Align::Bytes4);
|
wasm_result_primitive!(i32, i32_store, Align::Bytes4);
|
||||||
|
wasm_result_primitive!(char, i32_store, Align::Bytes4);
|
||||||
wasm_result_primitive!(u64, i64_store, Align::Bytes8);
|
wasm_result_primitive!(u64, i64_store, Align::Bytes8);
|
||||||
wasm_result_primitive!(i64, i64_store, Align::Bytes8);
|
wasm_result_primitive!(i64, i64_store, Align::Bytes8);
|
||||||
wasm_result_primitive!(usize, i32_store, Align::Bytes4);
|
wasm_result_primitive!(usize, i32_store, Align::Bytes4);
|
||||||
|
|
|
@ -23,7 +23,7 @@ macro_rules! wasm32_sized_primitive {
|
||||||
}
|
}
|
||||||
|
|
||||||
wasm32_sized_primitive!(
|
wasm32_sized_primitive!(
|
||||||
u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
|
u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
|
||||||
);
|
);
|
||||||
|
|
||||||
impl Wasm32Sized for () {
|
impl Wasm32Sized for () {
|
||||||
|
|
|
@ -29,6 +29,7 @@ pub enum LowLevel {
|
||||||
StrSubstringUnsafe,
|
StrSubstringUnsafe,
|
||||||
StrReserve,
|
StrReserve,
|
||||||
StrAppendScalar,
|
StrAppendScalar,
|
||||||
|
StrGetScalarUnsafe,
|
||||||
ListLen,
|
ListLen,
|
||||||
ListWithCapacity,
|
ListWithCapacity,
|
||||||
ListGetUnsafe,
|
ListGetUnsafe,
|
||||||
|
|
|
@ -1199,6 +1199,9 @@ define_builtins! {
|
||||||
41 STR_RESERVE: "reserve"
|
41 STR_RESERVE: "reserve"
|
||||||
42 STR_APPEND_SCALAR_UNSAFE: "appendScalarUnsafe"
|
42 STR_APPEND_SCALAR_UNSAFE: "appendScalarUnsafe"
|
||||||
43 STR_APPEND_SCALAR: "appendScalar"
|
43 STR_APPEND_SCALAR: "appendScalar"
|
||||||
|
44 STR_GET_SCALAR_UNSAFE: "getScalarUnsafe"
|
||||||
|
45 STR_WALK_SCALARS: "walkScalars"
|
||||||
|
46 STR_WALK_SCALARS_UNTIL: "walkScalarsUntil"
|
||||||
}
|
}
|
||||||
5 LIST: "List" => {
|
5 LIST: "List" => {
|
||||||
0 LIST_LIST: "List" imported // the List.List type alias
|
0 LIST_LIST: "List" imported // the List.List type alias
|
||||||
|
|
|
@ -902,6 +902,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
|
||||||
StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
|
||||||
StrReserve => arena.alloc_slice_copy(&[owned, irrelevant]),
|
StrReserve => arena.alloc_slice_copy(&[owned, irrelevant]),
|
||||||
StrAppendScalar => arena.alloc_slice_copy(&[owned, irrelevant]),
|
StrAppendScalar => arena.alloc_slice_copy(&[owned, irrelevant]),
|
||||||
|
StrGetScalarUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
|
||||||
StrTrim => arena.alloc_slice_copy(&[owned]),
|
StrTrim => arena.alloc_slice_copy(&[owned]),
|
||||||
StrTrimLeft => arena.alloc_slice_copy(&[owned]),
|
StrTrimLeft => arena.alloc_slice_copy(&[owned]),
|
||||||
StrTrimRight => arena.alloc_slice_copy(&[owned]),
|
StrTrimRight => arena.alloc_slice_copy(&[owned]),
|
||||||
|
|
|
@ -1758,3 +1758,17 @@ fn str_append_scalar() {
|
||||||
RocStr
|
RocStr
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm"))]
|
||||||
|
fn str_walk_scalars() {
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.walkScalars "abcd" [] List.append
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&['a', 'b', 'c', 'd']),
|
||||||
|
RocList<char>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ macro_rules! from_wasm_memory_primitive {
|
||||||
}
|
}
|
||||||
|
|
||||||
from_wasm_memory_primitive!(
|
from_wasm_memory_primitive!(
|
||||||
u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
|
u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder,
|
||||||
);
|
);
|
||||||
|
|
||||||
impl FromWasmerMemory for () {
|
impl FromWasmerMemory for () {
|
||||||
|
|
|
@ -1314,3 +1314,16 @@ fn str_to_dec() {
|
||||||
RocDec
|
RocDec
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn str_walk_scalars() {
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.walkScalars "abcd" [] List.append
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&['a', 'b', 'c', 'd']),
|
||||||
|
RocList<char>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue