repl: get Str working properly in CLI and Wasm REPLs

This commit is contained in:
Brian Carroll 2022-02-23 23:10:51 +00:00
parent 9544b3ba6c
commit 3537f90e6d
7 changed files with 36 additions and 54 deletions

1
Cargo.lock generated
View file

@ -3713,6 +3713,7 @@ dependencies = [
"roc_mono",
"roc_parse",
"roc_repl_eval",
"roc_std",
"roc_target",
"roc_types",
"rustyline",

View file

@ -24,15 +24,16 @@ target-lexicon = "0.12.2"
# TODO: make llvm optional
roc_build = {path = "../compiler/build", features = ["llvm"]}
roc_builtins = {path = "../compiler/builtins"}
roc_collections = {path = "../compiler/collections"}
roc_gen_llvm = {path = "../compiler/gen_llvm"}
roc_load = {path = "../compiler/load"}
roc_mono = {path = "../compiler/mono"}
roc_parse = {path = "../compiler/parse"}
roc_repl_eval = {path = "../repl_eval"}
roc_std = {path = "../roc_std"}
roc_target = {path = "../compiler/roc_target"}
roc_types = {path = "../compiler/types"}
roc_builtins = {path = "../compiler/builtins"}
[lib]
name = "roc_repl_cli"

View file

@ -20,6 +20,7 @@ use roc_parse::parser::{EExpr, ELambda, SyntaxError};
use roc_repl_eval::eval::jit_to_ast;
use roc_repl_eval::gen::{compile_to_mono, format_answer, ReplOutput};
use roc_repl_eval::{ReplApp, ReplAppMemory};
use roc_std::RocStr;
use roc_target::TargetInfo;
use roc_types::pretty_print::{content_to_string, name_all_type_vars};
@ -183,7 +184,8 @@ impl ReplAppMemory for CliMemory {
deref_number!(deref_f64, f64);
fn deref_str(&self, addr: usize) -> &str {
unsafe { *(addr as *const &'static str) }
let reference: &RocStr = unsafe { std::mem::transmute(addr) };
reference.as_str()
}
}

View file

@ -318,10 +318,16 @@ fn jit_to_ast_help<'a, A: ReplApp<'a>>(
Ok(result)
}
Layout::Builtin(Builtin::Str) => Ok(app
.call_function(main_fn_name, |_, string: &'static str| {
str_to_ast(env.arena, env.arena.alloc(string))
})),
Layout::Builtin(Builtin::Str) => {
let size = layout.stack_size(env.target_info) as usize;
Ok(
app.call_function_dynamic_size(main_fn_name, size, |mem: &A::Memory, addr| {
let string = mem.deref_str(addr);
let arena_str = env.arena.alloc_str(string);
Expr::Str(StrLiteral::PlainLine(arena_str))
}),
)
}
Layout::Builtin(Builtin::List(elem_layout)) => Ok(app.call_function(
main_fn_name,
|mem: &A::Memory, (addr, len): (usize, usize)| {
@ -512,9 +518,9 @@ fn addr_to_ast<'a, M: ReplAppMemory>(
list_to_ast(env, mem, elem_addr, len, elem_layout, content)
}
(_, Layout::Builtin(Builtin::Str)) => {
let arena_str = mem.deref_str(addr);
str_to_ast(env.arena, arena_str)
let string = mem.deref_str(addr);
let arena_str = env.arena.alloc_str(string);
Expr::Str(StrLiteral::PlainLine(arena_str))
}
(_, Layout::Struct{field_layouts, ..}) => match content {
Content::Structure(FlatType::Record(fields, _)) => {
@ -1214,41 +1220,3 @@ fn num_to_ast<'a>(env: &Env<'a, '_>, num_expr: Expr<'a>, content: &Content) -> E
fn number_literal_to_ast<T: std::fmt::Display>(arena: &Bump, num: T) -> Expr<'_> {
Expr::Num(arena.alloc(format!("{}", num)))
}
#[cfg(target_endian = "little")]
/// NOTE: As of this writing, we don't have big-endian small strings implemented yet!
fn str_to_ast<'a>(arena: &'a Bump, string: &'a str) -> Expr<'a> {
const STR_SIZE: usize = 2 * std::mem::size_of::<usize>();
let bytes: [u8; STR_SIZE] = unsafe { std::mem::transmute(string) };
let is_small = (bytes[STR_SIZE - 1] & 0b1000_0000) != 0;
if is_small {
let len = (bytes[STR_SIZE - 1] & 0b0111_1111) as usize;
let mut string = bumpalo::collections::String::with_capacity_in(len, arena);
for byte in bytes.iter().take(len) {
string.push(*byte as char);
}
str_slice_to_ast(arena, arena.alloc(string))
} else {
// Roc string literals are stored inside the constant section of the program
// That means this memory is gone when the jit function is done
// (as opposed to heap memory, which we can leak and then still use after)
// therefore we must make an owned copy of the string here
let string = bumpalo::collections::String::from_str_in(string, arena).into_bump_str();
str_slice_to_ast(arena, string)
}
}
fn str_slice_to_ast<'a>(_arena: &'a Bump, string: &'a str) -> Expr<'a> {
if string.contains('\n') {
todo!(
"this string contains newlines, so render it as a multiline string: {:?}",
Expr::Str(StrLiteral::PlainLine(string))
);
} else {
Expr::Str(StrLiteral::PlainLine(string))
}
}

View file

@ -221,19 +221,16 @@ fn literal_empty_str() {
expect_success("\"\"", "\"\" : Str");
}
#[cfg(not(feature = "wasm"))]
#[test]
fn literal_ascii_str() {
expect_success("\"Hello, World!\"", "\"Hello, World!\" : Str");
}
#[cfg(not(feature = "wasm"))]
#[test]
fn literal_utf8_str() {
expect_success("\"👩‍👩‍👦‍👦\"", "\"👩‍👩‍👦‍👦\" : Str");
}
#[cfg(not(feature = "wasm"))]
#[test]
fn str_concat() {
expect_success(

View file

@ -254,6 +254,7 @@ pub fn expect_success(input: &'static str, expected: &str) {
assert_eq!(output, expected);
}
#[allow(dead_code)]
pub fn expect_failure(input: &'static str, expected: &str) {
let (ok, output) = run(input);
assert_eq!(ok, false);

View file

@ -64,10 +64,22 @@ impl<'a> ReplAppMemory for WasmMemory<'a> {
deref_number!(deref_f64, f64);
fn deref_str(&self, addr: usize) -> &str {
let elems_addr = self.deref_usize(addr);
let len = self.deref_usize(addr + size_of::<usize>());
let bytes = &self.copied_bytes[elems_addr..][..len];
std::str::from_utf8(bytes).unwrap()
// We can't use RocStr, we need our own small/big string logic.
// The first field is *not* a pointer. We can calculate a pointer for it, but only for big strings.
// If changing this code, remember it also runs in wasm32, not just the app.
let last_byte = self.copied_bytes[addr + 7] as i8;
let is_small = last_byte < 0;
let str_bytes = if is_small {
let len = (last_byte & 0x7f) as usize;
&self.copied_bytes[addr..][..len]
} else {
let chars_index = self.deref_usize(addr);
let len = self.deref_usize(addr + 4);
&self.copied_bytes[chars_index..][..len]
};
unsafe { std::str::from_utf8_unchecked(str_bytes) }
}
}