wasm: fix Str.fromUtf8

This commit is contained in:
Brian Carroll 2022-07-04 10:56:28 +01:00
parent 02ec30425c
commit a342f02824
No known key found for this signature in database
GPG key ID: 9CF4E3BF9C4722C7
4 changed files with 238 additions and 239 deletions

View file

@ -1803,7 +1803,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
// Ok arg_2.str
// else
// # problem
// Err (BadUtf8 { byteIndex: arg_2.byteIndex, problem : arg_2.problem })
// Err (BadUtf8 arg_2.problem arg_2.byteIndex)
let def = crate::def::Def {
loc_pattern: no_region(Pattern::Identifier(Symbol::ARG_2)),

View file

@ -270,6 +270,6 @@ pub const DEBUG_SETTINGS: WasmDebugSettings = WasmDebugSettings {
let_stmt_ir: false && cfg!(debug_assertions),
instructions: false && cfg!(debug_assertions),
storage_map: false && cfg!(debug_assertions),
keep_test_binary: false && cfg!(debug_assertions),
keep_test_binary: true && cfg!(debug_assertions),
skip_dead_code_elim: false && cfg!(debug_assertions),
};

View file

@ -260,7 +260,36 @@ impl<'a> LowLevelCall<'a> {
}
StrFromInt => self.num_to_str(backend),
StrFromFloat => self.num_to_str(backend),
StrFromUtf8 => self.load_args_and_call_zig(backend, bitcode::STR_FROM_UTF8),
StrFromUtf8 => {
/*
Low-level op returns a struct with all the data for both Ok and Err.
Roc AST wrapper converts this to a tag union, with app-dependent tag IDs.
fromUtf8C(arg: RocList, update_mode: UpdateMode, output: *FromUtf8Result) callconv(.C) void
arg: RocList i64, i32
update_mode: UpdateMode i32
output: *FromUtf8Result i32
*/
let (ret_ptr, ret_offset) = match &self.ret_storage {
StoredValue::StackMemory { location, .. } => {
location.local_and_offset(backend.storage.stack_frame_pointer)
}
_ => internal_error!("Low-level op StrFromUtf8 should return a struct"),
};
// Return pointer is the last arg rather than the first, so we can't use the usual helper.
backend
.storage
.load_symbol_zig(&mut backend.code_builder, self.arguments[0]);
backend.code_builder.i32_const(UPDATE_MODE_IMMUTABLE);
backend.code_builder.get_local(ret_ptr);
if ret_offset != 0 {
backend.code_builder.i32_const(ret_offset as i32);
backend.code_builder.i32_add();
}
backend.call_host_fn_after_loading_args(bitcode::STR_FROM_UTF8, 4, false);
}
StrTrimLeft => self.load_args_and_call_zig(backend, bitcode::STR_TRIM_LEFT),
StrTrimRight => self.load_args_and_call_zig(backend, bitcode::STR_TRIM_RIGHT),
StrFromUtf8Range => self.load_args_and_call_zig(backend, bitcode::STR_FROM_UTF8_RANGE),

View file

@ -486,224 +486,224 @@ fn str_starts_with_false_small_str() {
assert_evals_to!(r#"Str.startsWith "1234" "23""#, false, bool);
}
// #[test]
// fn str_from_utf8_pass_single_ascii() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_single_ascii() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_many_ascii() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 98, 99, 0x7E] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("abc~"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_many_ascii() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 98, 99, 0x7E] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("abc~"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_single_unicode() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [0xE2, 0x88, 0x86] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("∆"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_single_unicode() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [0xE2, 0x88, 0x86] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from(""),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_many_unicode() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [0xE2, 0x88, 0x86, 0xC5, 0x93, 0xC2, 0xAC] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("∆œ¬"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_many_unicode() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [0xE2, 0x88, 0x86, 0xC5, 0x93, 0xC2, 0xAC] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("∆œ¬"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_single_grapheme() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("💖"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_single_grapheme() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("💖"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_many_grapheme() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96, 0xF0, 0x9F, 0xA4, 0xA0, 0xF0, 0x9F, 0x9A, 0x80] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("💖🤠🚀"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_many_grapheme() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96, 0xF0, 0x9F, 0xA4, 0xA0, 0xF0, 0x9F, 0x9A, 0x80] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("💖🤠🚀"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_pass_all() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96, 98, 0xE2, 0x88, 0x86] is
// Ok val -> val
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from("💖b∆"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_pass_all() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [0xF0, 0x9F, 0x92, 0x96, 98, 0xE2, 0x88, 0x86] is
Ok val -> val
Err _ -> ""
"#
),
roc_std::RocStr::from("💖b∆"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_invalid_start_byte() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 98, 0x80, 99] is
// Err (BadUtf8 InvalidStartByte byteIndex) ->
// if byteIndex == 2 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_invalid_start_byte() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 98, 0x80, 99] is
Err (BadUtf8 InvalidStartByte byteIndex) ->
if byteIndex == 2 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_unexpected_end_of_sequence() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 98, 99, 0xC2] is
// Err (BadUtf8 UnexpectedEndOfSequence byteIndex) ->
// if byteIndex == 3 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_unexpected_end_of_sequence() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 98, 99, 0xC2] is
Err (BadUtf8 UnexpectedEndOfSequence byteIndex) ->
if byteIndex == 3 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_expected_continuation() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 98, 99, 0xC2, 0x00] is
// Err (BadUtf8 ExpectedContinuation byteIndex) ->
// if byteIndex == 3 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_expected_continuation() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 98, 99, 0xC2, 0x00] is
Err (BadUtf8 ExpectedContinuation byteIndex) ->
if byteIndex == 3 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_overlong_encoding() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 0xF0, 0x80, 0x80, 0x80] is
// Err (BadUtf8 OverlongEncoding byteIndex) ->
// if byteIndex == 1 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_overlong_encoding() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 0xF0, 0x80, 0x80, 0x80] is
Err (BadUtf8 OverlongEncoding byteIndex) ->
if byteIndex == 1 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_codepoint_too_large() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 0xF4, 0x90, 0x80, 0x80] is
// Err (BadUtf8 CodepointTooLarge byteIndex) ->
// if byteIndex == 1 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_codepoint_too_large() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 0xF4, 0x90, 0x80, 0x80] is
Err (BadUtf8 CodepointTooLarge byteIndex) ->
if byteIndex == 1 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
// #[test]
// fn str_from_utf8_fail_surrogate_half() {
// assert_evals_to!(
// indoc!(
// r#"
// when Str.fromUtf8 [97, 98, 0xED, 0xA0, 0x80] is
// Err (BadUtf8 EncodesSurrogateHalf byteIndex) ->
// if byteIndex == 2 then
// "a"
// else
// "b"
// _ -> ""
// "#
// ),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
#[test]
fn str_from_utf8_fail_surrogate_half() {
assert_evals_to!(
indoc!(
r#"
when Str.fromUtf8 [97, 98, 0xED, 0xA0, 0x80] is
Err (BadUtf8 EncodesSurrogateHalf byteIndex) ->
if byteIndex == 2 then
"a"
else
"b"
_ -> ""
"#
),
roc_std::RocStr::from("a"),
roc_std::RocStr
);
}
#[test]
fn str_equality() {
@ -717,36 +717,6 @@ fn str_equality() {
assert_evals_to!(r#""a" == "b""#, false, bool);
}
// #[test]
// fn nested_recursive_literal() {
// assert_evals_to!(
// indoc!(
// r#"
// Expr : [Add Expr Expr, Val I64, Var I64]
// expr : Expr
// expr = Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))
// printExpr : Expr -> Str
// printExpr = \e ->
// when e is
// Add a b ->
// "Add ("
// |> Str.concat (printExpr a)
// |> Str.concat ") ("
// |> Str.concat (printExpr b)
// |> Str.concat ")"
// Val v -> "Val " |> Str.concat (Num.toStr v)
// Var v -> "Var " |> Str.concat (Num.toStr v)
// printExpr expr
// "#
// ),
// RocStr::from("Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr
// );
// }
#[test]
fn str_join_comma_small() {
assert_evals_to!(