store big strings on the heap; enable a lot more string tests

This commit is contained in:
Folkert 2023-02-11 18:20:21 +01:00
parent 1e1da8a61b
commit 4afeb1d67a
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
3 changed files with 173 additions and 74 deletions

View file

@ -2,7 +2,7 @@ use crate::{
single_register_floats, single_register_int_builtins, single_register_integers, Backend, Env,
Relocation,
};
use bumpalo::collections::Vec;
use bumpalo::collections::{CollectIn, Vec};
use roc_builtins::bitcode::{self, FloatWidth, IntWidth};
use roc_collections::all::MutMap;
use roc_error_macros::internal_error;
@ -1832,7 +1832,7 @@ impl<
&mut self,
sym: &Symbol,
element_in_layout: &InLayout<'a>,
elements: &'a [ListLiteralElement<'a>],
elements: &[ListLiteralElement<'a>],
) {
let element_layout = self.layout_interner.get(*element_in_layout);
let element_width = self.layout_interner.stack_size(*element_in_layout) as u64;
@ -2092,33 +2092,45 @@ impl<
let val = *x as f32;
ASM::mov_freg32_imm32(&mut self.buf, &mut self.relocs, reg, val);
}
(Literal::Str(x), Layout::Builtin(Builtin::Str)) if x.len() < 24 => {
// Load small string.
self.storage_manager.with_tmp_general_reg(
&mut self.buf,
|storage_manager, buf, reg| {
let base_offset = storage_manager.claim_stack_area(sym, 24);
let mut bytes = [0; 24];
bytes[..x.len()].copy_from_slice(x.as_bytes());
bytes[23] = (x.len() as u8) | 0b1000_0000;
(Literal::Str(x), Layout::Builtin(Builtin::Str)) => {
if x.len() < 24 {
// Load small string.
self.storage_manager.with_tmp_general_reg(
&mut self.buf,
|storage_manager, buf, reg| {
let base_offset = storage_manager.claim_stack_area(sym, 24);
let mut bytes = [0; 24];
bytes[..x.len()].copy_from_slice(x.as_bytes());
bytes[23] = (x.len() as u8) | 0b1000_0000;
let mut num_bytes = [0; 8];
num_bytes.copy_from_slice(&bytes[..8]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset, reg);
let mut num_bytes = [0; 8];
num_bytes.copy_from_slice(&bytes[..8]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset, reg);
num_bytes.copy_from_slice(&bytes[8..16]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset + 8, reg);
num_bytes.copy_from_slice(&bytes[8..16]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset + 8, reg);
num_bytes.copy_from_slice(&bytes[16..]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset + 16, reg);
},
);
num_bytes.copy_from_slice(&bytes[16..]);
let num = i64::from_ne_bytes(num_bytes);
ASM::mov_reg64_imm64(buf, reg, num);
ASM::mov_base32_reg64(buf, base_offset + 16, reg);
},
);
} else {
// load large string (pretend it's a `List U8`). We should move this data into
// the binary eventually because our RC algorithm won't free this value
let elements: Vec<_> = x
.as_bytes()
.iter()
.map(|b| ListLiteralElement::Literal(Literal::Byte(*b)))
.collect_in(self.storage_manager.env.arena);
self.create_array(sym, &Layout::U8, elements.into_bump_slice())
}
}
x => todo!("loading literal, {:?}", x),
}

View file

@ -833,6 +833,94 @@ trait Backend<'a> {
arg_layouts,
ret_layout,
),
// LowLevel::StrToUtf8 => self.build_fn_call(
// sym,
// bitcode::STR_TO_UTF8.to_string(),
// args,
// arg_layouts,
// ret_layout,
// ),
LowLevel::StrRepeat => self.build_fn_call(
sym,
bitcode::STR_REPEAT.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrTrim => self.build_fn_call(
sym,
bitcode::STR_TRIM.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrTrimLeft => self.build_fn_call(
sym,
bitcode::STR_TRIM_LEFT.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrTrimRight => self.build_fn_call(
sym,
bitcode::STR_TRIM_RIGHT.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrReserve => self.build_fn_call(
sym,
bitcode::STR_RESERVE.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrWithCapacity => self.build_fn_call(
sym,
bitcode::STR_WITH_CAPACITY.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrToScalars => self.build_fn_call(
sym,
bitcode::STR_TO_SCALARS.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrGetUnsafe => self.build_fn_call(
sym,
bitcode::STR_GET_UNSAFE.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::StrGetScalarUnsafe => self.build_fn_call(
sym,
bitcode::STR_GET_SCALAR_UNSAFE.to_string(),
args,
arg_layouts,
ret_layout,
),
// LowLevel::StrToNum => {
// let number_layout = match self.interner().get(*ret_layout) {
// Layout::Struct { field_layouts, .. } => field_layouts[0], // TODO: why is it sometimes a struct?
// _ => unreachable!(),
// };
//
// // match on the return layout to figure out which zig builtin we need
// let intrinsic = match self.interner().get(number_layout) {
// Layout::Builtin(Builtin::Int(int_width)) => &bitcode::STR_TO_INT[int_width],
// Layout::Builtin(Builtin::Float(float_width)) => {
// &bitcode::STR_TO_FLOAT[float_width]
// }
// Layout::Builtin(Builtin::Decimal) => bitcode::DEC_FROM_STR,
// _ => unreachable!(),
// };
//
// self.build_fn_call(sym, intrinsic.to_string(), args, arg_layouts, ret_layout)
// }
LowLevel::PtrCast => {
debug_assert_eq!(
1,

View file

@ -16,7 +16,7 @@ use indoc::indoc;
use roc_std::{RocList, RocResult, RocStr};
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_empty_delimiter() {
assert_evals_to!(
indoc!(
@ -46,7 +46,7 @@ fn str_split_empty_delimiter() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_bigger_delimiter_small_str() {
assert_evals_to!(
indoc!(
@ -110,7 +110,7 @@ fn str_split_small_str_bigger_delimiter() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_big_str_small_delimiter() {
assert_evals_to!(
indoc!(
@ -154,7 +154,7 @@ fn str_split_small_str_small_delimiter() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_bigger_delimiter_big_strs() {
assert_evals_to!(
indoc!(
@ -198,7 +198,7 @@ fn str_split_minimal_example() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_small_str_big_delimiter() {
assert_evals_to!(
indoc!(
@ -227,7 +227,7 @@ fn str_split_small_str_big_delimiter() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_small_str_20_char_delimiter() {
assert_evals_to!(
indoc!(
@ -243,7 +243,7 @@ fn str_split_small_str_20_char_delimiter() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_concat_big_to_big() {
assert_evals_to!(
indoc!(
@ -402,7 +402,7 @@ fn small_str_concat_empty_second_arg() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn small_str_concat_small_to_big() {
assert_evals_to!(
r#"Str.concat "abc" " this is longer than 15 chars""#,
@ -530,7 +530,7 @@ fn str_count_graphemes_three_js() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_count_graphemes_big_str() {
assert_evals_to!(
r#"Str.countGraphemes "6🤔å🤔e¥🤔çppkd🙃1jdal🦯asdfa∆ltråø˚waia8918.,🏅jjc""#,
@ -540,7 +540,7 @@ fn str_count_graphemes_big_str() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_starts_with_same_big_str() {
assert_evals_to!(
r#"Str.startsWith "123456789123456789" "123456789123456789""#,
@ -550,7 +550,7 @@ fn str_starts_with_same_big_str() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_starts_with_different_big_str() {
assert_evals_to!(
r#"Str.startsWith "12345678912345678910" "123456789123456789""#,
@ -560,18 +560,18 @@ fn str_starts_with_different_big_str() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_starts_with_same_small_str() {
assert_evals_to!(r#"Str.startsWith "1234" "1234""#, true, bool);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_starts_with_different_small_str() {
assert_evals_to!(r#"Str.startsWith "1234" "12""#, true, bool);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_starts_with_false_small_str() {
assert_evals_to!(r#"Str.startsWith "1234" "23""#, false, bool);
}
@ -875,7 +875,7 @@ fn str_join_comma_small() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_join_comma_big() {
assert_evals_to!(
r#"Str.joinWith ["10000000", "2000000", "30000000"] ", " "#,
@ -1031,7 +1031,7 @@ fn str_from_utf8_range_count_too_high_for_start() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_repeat_small_stays_small() {
assert_evals_to!(
indoc!(r#"Str.repeat "Roc" 3"#),
@ -1041,7 +1041,7 @@ fn str_repeat_small_stays_small() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_repeat_small_becomes_big() {
assert_evals_to!(
indoc!(r#"Str.repeat "less than 23 characters" 2"#),
@ -1051,7 +1051,7 @@ fn str_repeat_small_becomes_big() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_repeat_big() {
assert_evals_to!(
indoc!(r#"Str.repeat "more than 23 characters now" 2"#),
@ -1061,27 +1061,26 @@ fn str_repeat_big() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_repeat_empty_string() {
let a = indoc!(r#"Str.repeat "" 3"#);
let b = RocStr::from("");
assert_evals_to!(a, b, RocStr);
assert_evals_to!(a, RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_repeat_zero_times() {
assert_evals_to!(indoc!(r#"Str.repeat "Roc" 0"#), RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_empty_string() {
assert_evals_to!(indoc!(r#"Str.trim """#), RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_null_byte() {
assert_evals_to!(
indoc!(r#"Str.trim (Str.reserve "\u(0000)" 40)"#),
@ -1091,13 +1090,13 @@ fn str_trim_null_byte() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_small_blank_string() {
assert_evals_to!(indoc!(r#"Str.trim " ""#), RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_small_to_small() {
assert_evals_to!(
indoc!(r#"Str.trim " hello world ""#),
@ -1107,7 +1106,7 @@ fn str_trim_small_to_small() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_large_to_large_unique() {
assert_evals_to!(
indoc!(r#"Str.trim (Str.concat " " "hello world from a large string ")"#),
@ -1117,7 +1116,7 @@ fn str_trim_large_to_large_unique() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_large_to_small_unique() {
assert_evals_to!(
indoc!(r#"Str.trim (Str.concat " " "hello world ")"#),
@ -1184,13 +1183,13 @@ fn str_trim_small_to_small_shared() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_left_small_blank_string() {
assert_evals_to!(indoc!(r#"Str.trimLeft " ""#), RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_left_small_to_small() {
assert_evals_to!(
indoc!(r#"Str.trimLeft " hello world ""#),
@ -1200,7 +1199,7 @@ fn str_trim_left_small_to_small() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_left_large_to_large_unique() {
assert_evals_to!(
indoc!(r#"Str.trimLeft (Str.concat " " "hello world from a large string ")"#),
@ -1210,7 +1209,7 @@ fn str_trim_left_large_to_large_unique() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_left_large_to_small_unique() {
assert_evals_to!(
indoc!(r#"Str.trimLeft (Str.concat " " "hello world ")"#),
@ -1277,13 +1276,13 @@ fn str_trim_left_small_to_small_shared() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_right_small_blank_string() {
assert_evals_to!(indoc!(r#"Str.trimRight " ""#), RocStr::from(""), RocStr);
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_right_small_to_small() {
assert_evals_to!(
indoc!(r#"Str.trimRight " hello world ""#),
@ -1293,7 +1292,7 @@ fn str_trim_right_small_to_small() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_right_large_to_large_unique() {
assert_evals_to!(
indoc!(r#"Str.trimRight (Str.concat " hello world from a large string" " ")"#),
@ -1303,7 +1302,7 @@ fn str_trim_right_large_to_large_unique() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_trim_right_large_to_small_unique() {
assert_evals_to!(
indoc!(r#"Str.trimRight (Str.concat " hello world" " ")"#),
@ -1585,7 +1584,7 @@ fn str_to_dec() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn issue_2811() {
assert_evals_to!(
indoc!(
@ -1601,7 +1600,7 @@ fn issue_2811() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn to_scalar_1_byte() {
assert_evals_to!(
indoc!(
@ -1625,7 +1624,7 @@ fn to_scalar_1_byte() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn to_scalar_2_byte() {
assert_evals_to!(
indoc!(
@ -1649,7 +1648,7 @@ fn to_scalar_2_byte() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn to_scalar_3_byte() {
assert_evals_to!(
indoc!(
@ -1673,7 +1672,7 @@ fn to_scalar_3_byte() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn to_scalar_4_byte() {
// from https://design215.com/toolbox/utf8-4byte-characters.php
assert_evals_to!(
@ -1812,7 +1811,7 @@ fn str_split_last_not_found() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_overlapping_substring_1() {
assert_evals_to!(
r#"Str.split "aaa" "aa""#,
@ -1822,7 +1821,7 @@ fn str_split_overlapping_substring_1() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_split_overlapping_substring_2() {
assert_evals_to!(
r#"Str.split "aaaa" "aa""#,
@ -1951,7 +1950,7 @@ fn when_on_strings() {
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))]
fn with_capacity() {
assert_evals_to!(
indoc!(
@ -1965,7 +1964,7 @@ fn with_capacity() {
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))]
fn with_capacity_concat() {
assert_evals_to!(
indoc!(
@ -1979,7 +1978,7 @@ fn with_capacity_concat() {
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))]
fn str_with_prefix() {
assert_evals_to!(
indoc!(
@ -2003,7 +2002,7 @@ fn str_with_prefix() {
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))]
fn destructure_pattern_assigned_from_thunk_opaque() {
assert_evals_to!(
indoc!(
@ -2025,7 +2024,7 @@ fn destructure_pattern_assigned_from_thunk_opaque() {
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))]
fn destructure_pattern_assigned_from_thunk_tag() {
assert_evals_to!(
indoc!(