From 3537f90e6dc33d05f8c0cf7eb39928fd9a40e59b Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 23 Feb 2022 23:10:51 +0000 Subject: [PATCH] repl: get Str working properly in CLI and Wasm REPLs --- Cargo.lock | 1 + repl_cli/Cargo.toml | 3 ++- repl_cli/src/lib.rs | 4 ++- repl_eval/src/eval.rs | 58 ++++++++++-------------------------------- repl_test/src/tests.rs | 3 --- repl_test/src/wasm.rs | 1 + repl_wasm/src/repl.rs | 20 ++++++++++++--- 7 files changed, 36 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bbaa9d4ce2..11e1f2c839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3713,6 +3713,7 @@ dependencies = [ "roc_mono", "roc_parse", "roc_repl_eval", + "roc_std", "roc_target", "roc_types", "rustyline", diff --git a/repl_cli/Cargo.toml b/repl_cli/Cargo.toml index 1ae373c4d5..77383b8df7 100644 --- a/repl_cli/Cargo.toml +++ b/repl_cli/Cargo.toml @@ -24,15 +24,16 @@ target-lexicon = "0.12.2" # TODO: make llvm optional roc_build = {path = "../compiler/build", features = ["llvm"]} +roc_builtins = {path = "../compiler/builtins"} roc_collections = {path = "../compiler/collections"} roc_gen_llvm = {path = "../compiler/gen_llvm"} roc_load = {path = "../compiler/load"} roc_mono = {path = "../compiler/mono"} roc_parse = {path = "../compiler/parse"} roc_repl_eval = {path = "../repl_eval"} +roc_std = {path = "../roc_std"} roc_target = {path = "../compiler/roc_target"} roc_types = {path = "../compiler/types"} -roc_builtins = {path = "../compiler/builtins"} [lib] name = "roc_repl_cli" diff --git a/repl_cli/src/lib.rs b/repl_cli/src/lib.rs index 08ccbfbdd1..76070a6e69 100644 --- a/repl_cli/src/lib.rs +++ b/repl_cli/src/lib.rs @@ -20,6 +20,7 @@ use roc_parse::parser::{EExpr, ELambda, SyntaxError}; use roc_repl_eval::eval::jit_to_ast; use roc_repl_eval::gen::{compile_to_mono, format_answer, ReplOutput}; use roc_repl_eval::{ReplApp, ReplAppMemory}; +use roc_std::RocStr; use roc_target::TargetInfo; use roc_types::pretty_print::{content_to_string, name_all_type_vars}; @@ -183,7 +184,8 @@ impl ReplAppMemory for CliMemory { deref_number!(deref_f64, f64); fn deref_str(&self, addr: usize) -> &str { - unsafe { *(addr as *const &'static str) } + let reference: &RocStr = unsafe { std::mem::transmute(addr) }; + reference.as_str() } } diff --git a/repl_eval/src/eval.rs b/repl_eval/src/eval.rs index 483d7b6be6..3ef0d2199b 100644 --- a/repl_eval/src/eval.rs +++ b/repl_eval/src/eval.rs @@ -318,10 +318,16 @@ fn jit_to_ast_help<'a, A: ReplApp<'a>>( Ok(result) } - Layout::Builtin(Builtin::Str) => Ok(app - .call_function(main_fn_name, |_, string: &'static str| { - str_to_ast(env.arena, env.arena.alloc(string)) - })), + Layout::Builtin(Builtin::Str) => { + let size = layout.stack_size(env.target_info) as usize; + Ok( + app.call_function_dynamic_size(main_fn_name, size, |mem: &A::Memory, addr| { + let string = mem.deref_str(addr); + let arena_str = env.arena.alloc_str(string); + Expr::Str(StrLiteral::PlainLine(arena_str)) + }), + ) + } Layout::Builtin(Builtin::List(elem_layout)) => Ok(app.call_function( main_fn_name, |mem: &A::Memory, (addr, len): (usize, usize)| { @@ -512,9 +518,9 @@ fn addr_to_ast<'a, M: ReplAppMemory>( list_to_ast(env, mem, elem_addr, len, elem_layout, content) } (_, Layout::Builtin(Builtin::Str)) => { - let arena_str = mem.deref_str(addr); - - str_to_ast(env.arena, arena_str) + let string = mem.deref_str(addr); + let arena_str = env.arena.alloc_str(string); + Expr::Str(StrLiteral::PlainLine(arena_str)) } (_, Layout::Struct{field_layouts, ..}) => match content { Content::Structure(FlatType::Record(fields, _)) => { @@ -1214,41 +1220,3 @@ fn num_to_ast<'a>(env: &Env<'a, '_>, num_expr: Expr<'a>, content: &Content) -> E fn number_literal_to_ast(arena: &Bump, num: T) -> Expr<'_> { Expr::Num(arena.alloc(format!("{}", num))) } - -#[cfg(target_endian = "little")] -/// NOTE: As of this writing, we don't have big-endian small strings implemented yet! -fn str_to_ast<'a>(arena: &'a Bump, string: &'a str) -> Expr<'a> { - const STR_SIZE: usize = 2 * std::mem::size_of::(); - - let bytes: [u8; STR_SIZE] = unsafe { std::mem::transmute(string) }; - let is_small = (bytes[STR_SIZE - 1] & 0b1000_0000) != 0; - - if is_small { - let len = (bytes[STR_SIZE - 1] & 0b0111_1111) as usize; - let mut string = bumpalo::collections::String::with_capacity_in(len, arena); - - for byte in bytes.iter().take(len) { - string.push(*byte as char); - } - - str_slice_to_ast(arena, arena.alloc(string)) - } else { - // Roc string literals are stored inside the constant section of the program - // That means this memory is gone when the jit function is done - // (as opposed to heap memory, which we can leak and then still use after) - // therefore we must make an owned copy of the string here - let string = bumpalo::collections::String::from_str_in(string, arena).into_bump_str(); - str_slice_to_ast(arena, string) - } -} - -fn str_slice_to_ast<'a>(_arena: &'a Bump, string: &'a str) -> Expr<'a> { - if string.contains('\n') { - todo!( - "this string contains newlines, so render it as a multiline string: {:?}", - Expr::Str(StrLiteral::PlainLine(string)) - ); - } else { - Expr::Str(StrLiteral::PlainLine(string)) - } -} diff --git a/repl_test/src/tests.rs b/repl_test/src/tests.rs index d6e88e44c3..6e1e6c31e8 100644 --- a/repl_test/src/tests.rs +++ b/repl_test/src/tests.rs @@ -221,19 +221,16 @@ fn literal_empty_str() { expect_success("\"\"", "\"\" : Str"); } -#[cfg(not(feature = "wasm"))] #[test] fn literal_ascii_str() { expect_success("\"Hello, World!\"", "\"Hello, World!\" : Str"); } -#[cfg(not(feature = "wasm"))] #[test] fn literal_utf8_str() { expect_success("\"👩‍👩‍👦‍👦\"", "\"👩‍👩‍👦‍👦\" : Str"); } -#[cfg(not(feature = "wasm"))] #[test] fn str_concat() { expect_success( diff --git a/repl_test/src/wasm.rs b/repl_test/src/wasm.rs index a6f0a86a99..18859f4397 100644 --- a/repl_test/src/wasm.rs +++ b/repl_test/src/wasm.rs @@ -254,6 +254,7 @@ pub fn expect_success(input: &'static str, expected: &str) { assert_eq!(output, expected); } +#[allow(dead_code)] pub fn expect_failure(input: &'static str, expected: &str) { let (ok, output) = run(input); assert_eq!(ok, false); diff --git a/repl_wasm/src/repl.rs b/repl_wasm/src/repl.rs index 5c3686b664..c4c39feba9 100644 --- a/repl_wasm/src/repl.rs +++ b/repl_wasm/src/repl.rs @@ -64,10 +64,22 @@ impl<'a> ReplAppMemory for WasmMemory<'a> { deref_number!(deref_f64, f64); fn deref_str(&self, addr: usize) -> &str { - let elems_addr = self.deref_usize(addr); - let len = self.deref_usize(addr + size_of::()); - let bytes = &self.copied_bytes[elems_addr..][..len]; - std::str::from_utf8(bytes).unwrap() + // We can't use RocStr, we need our own small/big string logic. + // The first field is *not* a pointer. We can calculate a pointer for it, but only for big strings. + // If changing this code, remember it also runs in wasm32, not just the app. + let last_byte = self.copied_bytes[addr + 7] as i8; + let is_small = last_byte < 0; + + let str_bytes = if is_small { + let len = (last_byte & 0x7f) as usize; + &self.copied_bytes[addr..][..len] + } else { + let chars_index = self.deref_usize(addr); + let len = self.deref_usize(addr + 4); + &self.copied_bytes[chars_index..][..len] + }; + + unsafe { std::str::from_utf8_unchecked(str_bytes) } } }