diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 72cb2cbd22..a7e7ac0ffe 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,6 +7,7 @@ name: Benchmarks env: RUST_BACKTRACE: 1 + ROC_NUM_WORKERS: 1 jobs: prep-dependency-container: @@ -42,4 +43,4 @@ jobs: run: cd ci/bench-runner && cargo build --release && cd ../.. - name: run benchmarks with regression check - run: echo "TODO re-enable benchmarks once race condition is fixed"#./ci/bench-runner/target/release/bench-runner --check-executables-changed + run: ./ci/bench-runner/target/release/bench-runner --check-executables-changed diff --git a/.gitignore b/.gitignore index 239f4d05dc..6ad9f80e0a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ zig-cache .direnv *.rs.bk *.o +*.tmp # llvm human-readable output *.ll diff --git a/Cargo.lock b/Cargo.lock index f0ad233d5e..b83d944766 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,10 +153,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] -name = "ash" -version = "0.32.1" +name = "arrayvec" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06063a002a77d2734631db74e8f4ce7148b77fe522e6bca46f2ae7774fd48112" +checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" + +[[package]] +name = "ash" +version = "0.33.3+1.2.191" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc4f1d82f164f838ae413296d1131aa6fa79b917d25bebaa7033d25620c09219" dependencies = [ "libloading 0.7.0", ] @@ -379,9 +385,6 @@ name = "cc" version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" -dependencies = [ - "jobserver", -] [[package]] name = "cesu8" @@ -1207,15 +1210,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" -[[package]] -name = "drm-fourcc" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aafbcdb8afc29c1a7ee5fbe53b5d62f4565b35a042a662ca9fecd0b54dae6f4" -dependencies = [ - "serde", -] - [[package]] name = "dtoa" version = "0.4.8" @@ -1287,16 +1281,6 @@ dependencies = [ "serde", ] -[[package]] -name = "external-memory" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4dfe8d292b014422776a8c516862d2bff8a81b223a4461dfdc45f3862dc9d39" -dependencies = [ - "bitflags", - "drm-fourcc", -] - [[package]] name = "fake-simd" version = "0.1.2" @@ -1315,6 +1299,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "fixedbitset" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e" + [[package]] name = "flate2" version = "1.0.21" @@ -1530,168 +1520,6 @@ dependencies = [ "wasi 0.10.2+wasi-snapshot-preview1", ] -[[package]] -name = "gfx-auxil" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1694991b11d642680e82075a75c7c2bd75556b805efa7660b705689f05b1ab1c" -dependencies = [ - "fxhash", - "gfx-hal", - "spirv_cross", -] - -[[package]] -name = "gfx-backend-dx11" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f9e453baf3aaef2b0c354ce0b3d63d76402e406a59b64b7182d123cfa6635ae" -dependencies = [ - "arrayvec", - "bitflags", - "gfx-auxil", - "gfx-hal", - "gfx-renderdoc", - "libloading 0.7.0", - "log", - "parking_lot", - "range-alloc", - "raw-window-handle", - "smallvec", - "spirv_cross", - "thunderdome", - "winapi 0.3.9", - "wio", -] - -[[package]] -name = "gfx-backend-dx12" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21506399f64a3c4d389182a89a30073856ae33eb712315456b4fd8f39ee7682a" -dependencies = [ - "arrayvec", - "bit-set", - "bitflags", - "d3d12", - "gfx-auxil", - "gfx-hal", - "gfx-renderdoc", - "log", - "parking_lot", - "range-alloc", - "raw-window-handle", - "smallvec", - "spirv_cross", - "thunderdome", - "winapi 0.3.9", -] - -[[package]] -name = "gfx-backend-empty" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c8f813c47791918aa00dc9c9ddf961d23fa8c2a5d869e6cb8ea84f944820f4" -dependencies = [ - "gfx-hal", - "log", - "raw-window-handle", -] - -[[package]] -name = "gfx-backend-gl" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bae057fc3a0ab23ecf97ae51d4017d27d5ddf0aab16ee6dcb58981af88c3152" -dependencies = [ - "arrayvec", - "bitflags", - "fxhash", - "gfx-hal", - "glow", - "js-sys", - "khronos-egl", - "libloading 0.7.0", - "log", - "naga", - "parking_lot", - "raw-window-handle", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "gfx-backend-metal" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de85808e2a98994c6af925253f8a9593bc57180ef1ea137deab6d35cc949517" -dependencies = [ - "arrayvec", - "bitflags", - "block", - "cocoa-foundation", - "copyless", - "core-graphics-types", - "foreign-types", - "fxhash", - "gfx-hal", - "log", - "metal", - "naga", - "objc", - "parking_lot", - "profiling", - "range-alloc", - "raw-window-handle", - "storage-map", -] - -[[package]] -name = "gfx-backend-vulkan" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9861ec855acbbc65c0e4f966d761224886e811dc2c6d413a4776e9293d0e5c0" -dependencies = [ - "arrayvec", - "ash", - "byteorder", - "core-graphics-types", - "gfx-hal", - "gfx-renderdoc", - "inplace_it", - "log", - "naga", - "objc", - "parking_lot", - "raw-window-handle", - "smallvec", - "winapi 0.3.9", -] - -[[package]] -name = "gfx-hal" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fbb575ea793dd0507b3082f4f2cde62dc9f3cebd98f5cd49ba2a4da97a976fd" -dependencies = [ - "bitflags", - "external-memory", - "naga", - "raw-window-handle", - "thiserror", -] - -[[package]] -name = "gfx-renderdoc" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8027995e247e2426d3a00d13f5191dd56c314bff02dc4b54cbf727f1ba9c40a" -dependencies = [ - "libloading 0.7.0", - "log", - "renderdoc-sys", -] - [[package]] name = "ghost" version = "0.1.2" @@ -1728,9 +1556,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "glow" -version = "0.9.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b80b98efaa8a34fce11d60dd2ce2760d5d83c373cbcc73bb87c2a3a84a54108" +checksum = "4f04649123493bc2483cbef4daddb45d40bbdae5adb221a63a23efdb0cc99520" dependencies = [ "js-sys", "slotmap", @@ -1779,9 +1607,9 @@ dependencies = [ [[package]] name = "gpu-alloc" -version = "0.4.7" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc1b6ca374e81862526786d9cb42357ce03706ed1b8761730caafd02ab91f3a" +checksum = "ab8524eac5fc9d05625c891adf78fcf64dc0ee9f8d0882874b9f220f42b442bf" dependencies = [ "bitflags", "gpu-alloc-types", @@ -1798,9 +1626,9 @@ dependencies = [ [[package]] name = "gpu-descriptor" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a70f1e87a3840ed6a3e99e02c2b861e4dbdf26f0d07e38f42ea5aff46cfce2" +checksum = "d7a237f0419ab10d17006d55c62ac4f689a6bf52c75d3f38b8361d249e8d4b0b" dependencies = [ "bitflags", "gpu-descriptor-types", @@ -2113,15 +1941,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" -[[package]] -name = "jobserver" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" version = "0.3.54" @@ -2475,13 +2294,14 @@ dependencies = [ "sha2", "smallvec", "thiserror", + "typed-arena", ] [[package]] name = "naga" -version = "0.5.0" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef670817eef03d356d5a509ea275e7dd3a78ea9e24261ea3cb2dfed1abb08f64" +checksum = "8c5859e55c51da10b98e7a73068e0a0c5da7bbcae4fc38f86043d0c6d1b917cf" dependencies = [ "bit-set", "bitflags", @@ -2489,9 +2309,8 @@ dependencies = [ "fxhash", "log", "num-traits", - "petgraph", - "rose_tree", - "spirv_headers", + "petgraph 0.6.0", + "spirv", "thiserror", ] @@ -2968,7 +2787,7 @@ dependencies = [ "cfg-if 1.0.0", "instant", "libc", - "petgraph", + "petgraph 0.5.1", "redox_syscall", "smallvec", "thread-id", @@ -3036,7 +2855,17 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" dependencies = [ - "fixedbitset", + "fixedbitset 0.2.0", + "indexmap", +] + +[[package]] +name = "petgraph" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" +dependencies = [ + "fixedbitset 0.4.0", "indexmap", ] @@ -3723,6 +3552,28 @@ dependencies = [ "libc", ] +[[package]] +name = "roc_ast" +version = "0.1.0" +dependencies = [ + "arraystring", + "bumpalo", + "indoc 1.0.3", + "libc", + "page_size", + "pretty_assertions 0.6.1", + "roc_can", + "roc_collections", + "roc_module", + "roc_parse", + "roc_problem", + "roc_region", + "roc_types", + "roc_unify", + "snafu", + "ven_graph", +] + [[package]] name = "roc_build" version = "0.1.0" @@ -3847,6 +3698,19 @@ dependencies = [ "wasmer-wasi", ] +[[package]] +name = "roc_code_markup" +version = "0.1.0" +dependencies = [ + "bumpalo", + "palette", + "roc_ast", + "roc_module", + "roc_utils", + "serde", + "snafu", +] + [[package]] name = "roc_collections" version = "0.1.0" @@ -3924,8 +3788,10 @@ dependencies = [ "quickcheck 1.0.3", "quickcheck_macros 1.0.0", "rand 0.8.4", + "roc_ast", "roc_builtins", "roc_can", + "roc_code_markup", "roc_collections", "roc_fmt", "roc_load", @@ -4293,6 +4159,13 @@ dependencies = [ "minimp3", ] +[[package]] +name = "roc_utils" +version = "0.1.0" +dependencies = [ + "snafu", +] + [[package]] name = "ropey" version = "1.3.1" @@ -4302,15 +4175,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "rose_tree" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284de9dae38774e2813aaabd7e947b4a6fe9b8c58c2309f754a487cdd50de1c2" -dependencies = [ - "petgraph", -] - [[package]] name = "rustc-demangle" version = "0.1.21" @@ -4604,9 +4468,12 @@ dependencies = [ [[package]] name = "slotmap" -version = "0.4.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf34684c5767b87de9119790e92e9a1d60056be2ceeaf16a8e6ef13082aeab1" +checksum = "e1e08e261d0e8f5c43123b7adf3e4ca1690d655377ac93a03b2c9d3e98de1342" +dependencies = [ + "version_check", +] [[package]] name = "smallvec" @@ -4683,21 +4550,10 @@ dependencies = [ ] [[package]] -name = "spirv_cross" -version = "0.23.1" +name = "spirv" +version = "0.2.0+1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60647fadbf83c4a72f0d7ea67a7ca3a81835cf442b8deae5c134c3e0055b2e14" -dependencies = [ - "cc", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "spirv_headers" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f5b132530b1ac069df335577e3581765995cba5a13995cdbbdbc8fb057c532c" +checksum = "246bfa38fe3db3f1dfc8ca5a2cdeb7348c78be2112740cc0ec8ef18b6d94f830" dependencies = [ "bitflags", "num-traits", @@ -5197,7 +5053,7 @@ dependencies = [ name = "ven_pretty" version = "0.9.1-alpha.0" dependencies = [ - "arrayvec", + "arrayvec 0.5.2", "criterion", "difference", "tempfile", @@ -5223,7 +5079,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6cbce692ab4ca2f1f3047fcf732430249c0e971bfdd2b234cf2c47ad93af5983" dependencies = [ - "arrayvec", + "arrayvec 0.5.2", "utf8parse", "vte_generate_state_changes", ] @@ -5640,9 +5496,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" dependencies = [ "js-sys", "wasm-bindgen", @@ -5650,14 +5506,13 @@ dependencies = [ [[package]] name = "wgpu" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd247f8b26fd3d42ef2f320d378025cd6e84d782ef749fab45cc3b981fbe3275" +checksum = "3d92a4fe73b1e7d7ef99938dacd49258cbf1ad87cdb5bf6efa20c27447442b45" dependencies = [ - "arrayvec", + "arrayvec 0.7.1", "js-sys", "log", - "naga", "parking_lot", "raw-window-handle", "smallvec", @@ -5665,29 +5520,21 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "wgpu-core", + "wgpu-hal", "wgpu-types", ] [[package]] name = "wgpu-core" -version = "0.9.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958a8a5e418492723ab4e7933bf6dbdf06f5dc87274ba2ae0e4f9c891aac579c" +checksum = "5f1b4d918c970526cbc83b72ccb72dbefd38aec45f07b2310de4ffcd7f4bd8c5" dependencies = [ - "arrayvec", + "arrayvec 0.7.1", "bitflags", "cfg_aliases", "copyless", "fxhash", - "gfx-backend-dx11", - "gfx-backend-dx12", - "gfx-backend-empty", - "gfx-backend-gl", - "gfx-backend-metal", - "gfx-backend-vulkan", - "gfx-hal", - "gpu-alloc", - "gpu-descriptor", "log", "naga", "parking_lot", @@ -5695,23 +5542,58 @@ dependencies = [ "raw-window-handle", "smallvec", "thiserror", + "wgpu-hal", "wgpu-types", ] [[package]] -name = "wgpu-types" -version = "0.9.0" +name = "wgpu-hal" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5c9678cd533558e28b416d66947b099742df1939307478db54f867137f1b60" +checksum = "27cd894b17bff1958ee93da1cc991fd64bf99667746d4bd2a7403855f4d37fe2" +dependencies = [ + "arrayvec 0.7.1", + "ash", + "bit-set", + "bitflags", + "block", + "core-graphics-types", + "d3d12", + "foreign-types", + "fxhash", + "glow", + "gpu-alloc", + "gpu-descriptor", + "inplace_it", + "khronos-egl", + "libloading 0.7.0", + "log", + "metal", + "naga", + "objc", + "parking_lot", + "range-alloc", + "raw-window-handle", + "renderdoc-sys", + "thiserror", + "wgpu-types", + "winapi 0.3.9", +] + +[[package]] +name = "wgpu-types" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25feb2fbf24ab3219a9f10890ceb8e1ef02b13314ed89d64a9ae99dcad883e18" dependencies = [ "bitflags", ] [[package]] name = "wgpu_glyph" -version = "0.13.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fee8c96eda18195a7ad9989737183e0a357f14b15e98838c76abbcf56a5f970" +checksum = "cbf11aebbcf20806535bee127367bcb393c83d77c60c4f7917184d839716cf41" dependencies = [ "bytemuck", "glyph_brush", @@ -5804,15 +5686,6 @@ dependencies = [ "x11-dl", ] -[[package]] -name = "wio" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d129932f4644ac2396cb456385cbf9e63b5b30c6e8dc4820bdca4eb082037a5" -dependencies = [ - "winapi 0.3.9", -] - [[package]] name = "ws2_32-sys" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 7363ba8600..a2fe43c612 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,9 +29,12 @@ members = [ "vendor/pathfinding", "vendor/pretty", "editor", + "ast", "cli", "cli/cli_utils", + "code_markup", "roc_std", + "utils", "docs", "linker", ] diff --git a/Earthfile b/Earthfile index 032d17bdf0..815b328641 100644 --- a/Earthfile +++ b/Earthfile @@ -47,7 +47,7 @@ install-zig-llvm-valgrind-clippy-rustfmt: copy-dirs: FROM +install-zig-llvm-valgrind-clippy-rustfmt - COPY --dir cli compiler docs editor roc_std vendor examples linker Cargo.toml Cargo.lock ./ + COPY --dir cli compiler docs editor ast code_markup utils roc_std vendor examples linker Cargo.toml Cargo.lock ./ test-zig: FROM +install-zig-llvm-valgrind-clippy-rustfmt @@ -67,7 +67,7 @@ check-rustfmt: check-typos: RUN cargo install typos-cli --version 1.0.11 # version set to prevent confusion if the version is updated automatically - COPY --dir .github ci cli compiler docs editor examples linker nightly_benches packages roc_std www *.md LEGAL_DETAILS shell.nix ./ + COPY --dir .github ci cli compiler docs editor examples ast code_markup utils linker nightly_benches packages roc_std www *.md LEGAL_DETAILS shell.nix ./ RUN typos test-rust: @@ -101,7 +101,7 @@ test-all: BUILD +test-rust BUILD +verify-no-git-changes -# compile everything needed for benchmarks and output a self-contained folder +# compile everything needed for benchmarks and output a self-contained dir from which benchmarks can be run. prep-bench-folder: FROM +copy-dirs ARG BENCH_SUFFIX=branch diff --git a/ast/Cargo.toml b/ast/Cargo.toml new file mode 100644 index 0000000000..39bf52a471 --- /dev/null +++ b/ast/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "roc_ast" +version = "0.1.0" +authors = ["The Roc Contributors"] +license = "UPL-1.0" +edition = "2018" +description = "AST as used by the editor and (soon) docs. In contrast to the compiler, these types do not keep track of a location in a file." + +[dependencies] +roc_can = { path = "../compiler/can" } +roc_collections = { path = "../compiler/collections" } +roc_region = { path = "../compiler/region" } +roc_module = { path = "../compiler/module" } +roc_parse = { path = "../compiler/parse" } +roc_problem = { path = "../compiler/problem" } +roc_types = { path = "../compiler/types" } +roc_unify = { path = "../compiler/unify"} +arraystring = "0.3.0" +bumpalo = { version = "3.6.1", features = ["collections"] } +libc = "0.2" +page_size = "0.4" +snafu = { version = "0.6", features = ["backtraces"] } +ven_graph = { path = "../vendor/pathfinding" } +indoc = "1.0" + +[dev-dependencies] +pretty_assertions = "0.6" diff --git a/ast/src/ast_error.rs b/ast/src/ast_error.rs new file mode 100644 index 0000000000..6fb8546212 --- /dev/null +++ b/ast/src/ast_error.rs @@ -0,0 +1,38 @@ +use snafu::{Backtrace, Snafu}; + +use crate::lang::core::ast::ASTNodeId; + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum ASTError { + #[snafu(display( + "ASTNodeIdWithoutExprId: The expr_id_opt in ASTNode({:?}) was `None` but I was expexting `Some(ExprId)` .", + ast_node_id + ))] + ASTNodeIdWithoutExprId { + ast_node_id: ASTNodeId, + backtrace: Backtrace, + }, + #[snafu(display( + "UnexpectedASTNode: required a {} at this position, node was a {}.", + required_node_type, + encountered_node_type + ))] + UnexpectedASTNode { + required_node_type: String, + encountered_node_type: String, + backtrace: Backtrace, + }, + #[snafu(display( + "UnexpectedPattern2Variant: required a {} at this position, Pattern2 was a {}.", + required_pattern2, + encountered_pattern2, + ))] + UnexpectedPattern2Variant { + required_pattern2: String, + encountered_pattern2: String, + backtrace: Backtrace, + }, +} + +pub type ASTResult = std::result::Result; diff --git a/ast/src/canonicalization/canonicalize.rs b/ast/src/canonicalization/canonicalize.rs new file mode 100644 index 0000000000..211e22cf97 --- /dev/null +++ b/ast/src/canonicalization/canonicalize.rs @@ -0,0 +1,305 @@ +use roc_collections::all::MutMap; +use roc_problem::can::Problem; +use roc_region::all::{Located, Region}; +use roc_types::subs::Variable; + +use crate::{ + lang::{ + core::{ + def::def::References, + expr::{ + expr2::{Expr2, ExprId, WhenBranch}, + expr_to_expr2::to_expr2, + output::Output, + record_field::RecordField, + }, + pattern::to_pattern2, + }, + env::Env, + scope::Scope, + }, + mem_pool::{pool_str::PoolStr, pool_vec::PoolVec, shallow_clone::ShallowClone}, +}; + +pub(crate) enum CanonicalizeRecordProblem { + #[allow(dead_code)] + InvalidOptionalValue { + field_name: PoolStr, + field_region: Region, + record_region: Region, + }, +} + +enum FieldVar { + VarAndExprId(Variable, ExprId), + OnlyVar(Variable), +} + +pub(crate) fn canonicalize_fields<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + fields: &'a [Located>>], +) -> Result<(PoolVec, Output), CanonicalizeRecordProblem> { + let mut can_fields: MutMap<&'a str, FieldVar> = MutMap::default(); + let mut output = Output::default(); + + for loc_field in fields.iter() { + match canonicalize_field(env, scope, &loc_field.value) { + Ok(can_field) => { + match can_field { + CanonicalField::LabelAndValue { + label, + value_expr, + value_output, + var, + } => { + let expr_id = env.pool.add(value_expr); + + let replaced = + can_fields.insert(label, FieldVar::VarAndExprId(var, expr_id)); + + if let Some(_old) = replaced { + // env.problems.push(Problem::DuplicateRecordFieldValue { + // field_name: label, + // field_region: loc_field.region, + // record_region: region, + // replaced_region: old.region, + // }); + todo!() + } + + output.references.union_mut(value_output.references); + } + CanonicalField::InvalidLabelOnly { label, var } => { + let replaced = can_fields.insert(label, FieldVar::OnlyVar(var)); + + if let Some(_old) = replaced { + todo!() + } + } + } + } + + Err(CanonicalizeFieldProblem::InvalidOptionalValue { + field_name: _, + field_region: _, + }) => { + // env.problem(Problem::InvalidOptionalValue { + // field_name: field_name.clone(), + // field_region, + // record_region: region, + // }); + // return Err(CanonicalizeRecordProblem::InvalidOptionalValue { + // field_name, + // field_region, + // record_region: region, + // }); + todo!() + } + } + } + + let pool_vec = PoolVec::with_capacity(can_fields.len() as u32, env.pool); + + for (node_id, (string, field_var)) in pool_vec.iter_node_ids().zip(can_fields.into_iter()) { + let name = PoolStr::new(string, env.pool); + + match field_var { + FieldVar::VarAndExprId(var, expr_id) => { + env.pool[node_id] = RecordField::LabeledValue(name, var, expr_id); + } + FieldVar::OnlyVar(var) => { + env.pool[node_id] = RecordField::InvalidLabelOnly(name, var); + } // TODO RecordField::LabelOnly + } + } + + Ok((pool_vec, output)) +} + +#[allow(dead_code)] +enum CanonicalizeFieldProblem { + InvalidOptionalValue { + field_name: PoolStr, + field_region: Region, + }, +} +enum CanonicalField<'a> { + LabelAndValue { + label: &'a str, + value_expr: Expr2, + value_output: Output, + var: Variable, + }, + InvalidLabelOnly { + label: &'a str, + var: Variable, + }, // TODO make ValidLabelOnly +} +fn canonicalize_field<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + field: &'a roc_parse::ast::AssignedField<'a, roc_parse::ast::Expr<'a>>, +) -> Result, CanonicalizeFieldProblem> { + use roc_parse::ast::AssignedField::*; + + match field { + // Both a label and a value, e.g. `{ name: "blah" }` + RequiredValue(label, _, loc_expr) => { + let field_var = env.var_store.fresh(); + let (loc_can_expr, output) = to_expr2(env, scope, &loc_expr.value, loc_expr.region); + + Ok(CanonicalField::LabelAndValue { + label: label.value, + value_expr: loc_can_expr, + value_output: output, + var: field_var, + }) + } + + OptionalValue(label, _, loc_expr) => Err(CanonicalizeFieldProblem::InvalidOptionalValue { + field_name: PoolStr::new(label.value, env.pool), + field_region: Region::span_across(&label.region, &loc_expr.region), + }), + + // A label with no value, e.g. `{ name }` (this is sugar for { name: name }) + LabelOnly(label) => { + let field_var = env.var_store.fresh(); + // TODO return ValidLabel if label points to in scope variable + Ok(CanonicalField::InvalidLabelOnly { + label: label.value, + var: field_var, + }) + } + + SpaceBefore(sub_field, _) | SpaceAfter(sub_field, _) => { + canonicalize_field(env, scope, sub_field) + } + + Malformed(_string) => { + panic!("TODO canonicalize malformed record field"); + } + } +} + +#[inline(always)] +pub(crate) fn canonicalize_when_branch<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + branch: &'a roc_parse::ast::WhenBranch<'a>, + output: &mut Output, +) -> (WhenBranch, References) { + let patterns = PoolVec::with_capacity(branch.patterns.len() as u32, env.pool); + + let original_scope = scope; + let mut scope = original_scope.shallow_clone(); + + // TODO report symbols not bound in all patterns + for (node_id, loc_pattern) in patterns.iter_node_ids().zip(branch.patterns.iter()) { + let (new_output, can_pattern) = to_pattern2( + env, + &mut scope, + roc_parse::pattern::PatternType::WhenBranch, + &loc_pattern.value, + loc_pattern.region, + ); + + output.union(new_output); + + env.set_region(node_id, loc_pattern.region); + env.pool[node_id] = can_pattern; + } + + let (value, mut branch_output) = + to_expr2(env, &mut scope, &branch.value.value, branch.value.region); + let value_id = env.pool.add(value); + env.set_region(value_id, branch.value.region); + + let guard = match &branch.guard { + None => None, + Some(loc_expr) => { + let (can_guard, guard_branch_output) = + to_expr2(env, &mut scope, &loc_expr.value, loc_expr.region); + + let expr_id = env.pool.add(can_guard); + env.set_region(expr_id, loc_expr.region); + + branch_output.union(guard_branch_output); + Some(expr_id) + } + }; + + // Now that we've collected all the references for this branch, check to see if + // any of the new idents it defined were unused. If any were, report it. + for (symbol, region) in scope.symbols() { + let symbol = symbol; + + if !output.references.has_lookup(symbol) + && !branch_output.references.has_lookup(symbol) + && !original_scope.contains_symbol(symbol) + { + env.problem(Problem::UnusedDef(symbol, region)); + } + } + + let references = branch_output.references.clone(); + output.union(branch_output); + + ( + WhenBranch { + patterns, + body: value_id, + guard, + }, + references, + ) +} + +pub(crate) fn canonicalize_lookup( + env: &mut Env<'_>, + scope: &mut Scope, + module_name: &str, + ident: &str, + region: Region, +) -> (Expr2, Output) { + use Expr2::*; + + let mut output = Output::default(); + let can_expr = if module_name.is_empty() { + // Since module_name was empty, this is an unqualified var. + // Look it up in scope! + match scope.lookup(&(*ident).into(), region) { + Ok(symbol) => { + output.references.lookups.insert(symbol); + + Var(symbol) + } + Err(problem) => { + env.problem(Problem::RuntimeError(problem)); + + RuntimeError() + } + } + } else { + // Since module_name was nonempty, this is a qualified var. + // Look it up in the env! + match env.qualified_lookup(module_name, ident, region) { + Ok(symbol) => { + output.references.lookups.insert(symbol); + + Var(symbol) + } + Err(problem) => { + // Either the module wasn't imported, or + // it was imported but it doesn't expose this ident. + env.problem(Problem::RuntimeError(problem)); + + RuntimeError() + } + } + }; + + // If it's valid, this ident should be in scope already. + + (can_expr, output) +} diff --git a/ast/src/canonicalization/mod.rs b/ast/src/canonicalization/mod.rs new file mode 100644 index 0000000000..fdbc754fc0 --- /dev/null +++ b/ast/src/canonicalization/mod.rs @@ -0,0 +1,2 @@ +pub mod canonicalize; +pub mod module; diff --git a/editor/src/lang/module.rs b/ast/src/canonicalization/module.rs similarity index 95% rename from editor/src/lang/module.rs rename to ast/src/canonicalization/module.rs index c0494d3193..c8286f8c2d 100644 --- a/editor/src/lang/module.rs +++ b/ast/src/canonicalization/module.rs @@ -2,14 +2,6 @@ #![allow(dead_code)] #![allow(unused_imports)] #![allow(unused_variables)] -use crate::lang::ast::{Expr2, FunctionDef, ValueDef}; -use crate::lang::def::{canonicalize_defs, sort_can_defs, Declaration, Def}; -use crate::lang::expr::Env; -use crate::lang::expr::Output; -use crate::lang::pattern::Pattern2; -use crate::lang::pool::{NodeId, Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::scope::Scope; -use crate::lang::types::Alias; use bumpalo::Bump; use roc_can::operator::desugar_def; use roc_collections::all::{default_hasher, ImMap, ImSet, MutMap, MutSet, SendMap}; @@ -22,6 +14,21 @@ use roc_problem::can::{Problem, RuntimeError}; use roc_region::all::{Located, Region}; use roc_types::subs::{VarStore, Variable}; +use crate::lang::core::def::def::canonicalize_defs; +use crate::lang::core::def::def::Def; +use crate::lang::core::def::def::{sort_can_defs, Declaration}; +use crate::lang::core::expr::expr2::Expr2; +use crate::lang::core::expr::output::Output; +use crate::lang::core::pattern::Pattern2; +use crate::lang::core::types::Alias; +use crate::lang::core::val_def::ValueDef; +use crate::lang::env::Env; +use crate::lang::scope::Scope; +use crate::mem_pool::pool::NodeId; +use crate::mem_pool::pool::Pool; +use crate::mem_pool::pool_vec::PoolVec; +use crate::mem_pool::shallow_clone::ShallowClone; + pub struct ModuleOutput { pub aliases: MutMap>, pub rigid_variables: MutMap, diff --git a/editor/src/lang/constrain.rs b/ast/src/constrain.rs similarity index 85% rename from editor/src/lang/constrain.rs rename to ast/src/constrain.rs index 77d4edb0cd..1454f1d936 100644 --- a/editor/src/lang/constrain.rs +++ b/ast/src/constrain.rs @@ -1,13 +1,5 @@ use bumpalo::{collections::Vec as BumpVec, Bump}; -use crate::lang::{ - ast::{ClosureExtra, Expr2, ExprId, RecordField, ValueDef, WhenBranch}, - expr::Env, - pattern::{DestructType, Pattern2, PatternId, PatternState2, RecordDestruct}, - pool::{Pool, PoolStr, PoolVec, ShallowClone}, - types::{Type2, TypeId}, -}; - use roc_can::expected::{Expected, PExpected}; use roc_collections::all::{BumpMap, BumpMapDefault, Index, SendMap}; use roc_module::{ @@ -21,6 +13,22 @@ use roc_types::{ types::{Category, Reason}, }; +use crate::{ + lang::{ + core::{ + expr::{ + expr2::{ClosureExtra, Expr2, ExprId, WhenBranch}, + record_field::RecordField, + }, + pattern::{DestructType, Pattern2, PatternId, PatternState2, RecordDestruct}, + types::{Type2, TypeId}, + val_def::ValueDef, + }, + env::Env, + }, + mem_pool::{pool::Pool, pool_str::PoolStr, pool_vec::PoolVec, shallow_clone::ShallowClone}, +}; + #[derive(Debug)] pub enum Constraint<'a> { Eq(Type2, Expected, Category, Region), @@ -1744,3 +1752,397 @@ fn num_num(pool: &mut Pool, type_id: TypeId) -> Type2 { pool.add(alias_content), ) } + +#[cfg(test)] +pub mod test_constrain { + use bumpalo::Bump; + use roc_can::expected::Expected; + use roc_collections::all::MutMap; + use roc_module::{ + ident::Lowercase, + symbol::{IdentIds, Interns, ModuleIds, Symbol}, + }; + use roc_parse::parser::SyntaxError; + use roc_region::all::Region; + use roc_types::{ + pretty_print::content_to_string, + solved_types::Solved, + subs::{Subs, VarStore, Variable}, + }; + + use super::Constraint; + use crate::{ + constrain::constrain_expr, + lang::{ + core::{ + expr::{expr2::Expr2, expr_to_expr2::loc_expr_to_expr2, output::Output}, + types::Type2, + }, + env::Env, + scope::Scope, + }, + mem_pool::pool::Pool, + solve_type, + }; + use indoc::indoc; + + fn run_solve<'a>( + arena: &'a Bump, + mempool: &mut Pool, + aliases: MutMap, + rigid_variables: MutMap, + constraint: Constraint, + var_store: VarStore, + ) -> (Solved, solve_type::Env, Vec) { + let env = solve_type::Env { + vars_by_symbol: MutMap::default(), + aliases, + }; + + let mut subs = Subs::new(var_store); + + for (var, name) in rigid_variables { + subs.rigid_var(var, name); + } + + // Now that the module is parsed, canonicalized, and constrained, + // we need to type check it. + let mut problems = Vec::new(); + + // Run the solver to populate Subs. + let (solved_subs, solved_env) = + solve_type::run(arena, mempool, &env, &mut problems, subs, &constraint); + + (solved_subs, solved_env, problems) + } + + fn infer_eq(actual: &str, expected_str: &str) { + let mut env_pool = Pool::with_capacity(1024); + let env_arena = Bump::new(); + let code_arena = Bump::new(); + + let mut var_store = VarStore::default(); + let var = var_store.fresh(); + let dep_idents = IdentIds::exposed_builtins(8); + let exposed_ident_ids = IdentIds::default(); + let mut module_ids = ModuleIds::default(); + let mod_id = module_ids.get_or_insert(&"ModId123".into()); + + let mut env = Env::new( + mod_id, + &env_arena, + &mut env_pool, + &mut var_store, + dep_idents, + &module_ids, + exposed_ident_ids, + ); + + let mut scope = Scope::new(env.home, env.pool, env.var_store); + + let region = Region::zero(); + + let expr2_result = str_to_expr2(&code_arena, actual, &mut env, &mut scope, region); + + match expr2_result { + Ok((expr, _)) => { + let constraint = constrain_expr( + &code_arena, + &mut env, + &expr, + Expected::NoExpectation(Type2::Variable(var)), + Region::zero(), + ); + + let Env { + pool, + var_store: ref_var_store, + mut dep_idents, + .. + } = env; + + // extract the var_store out of the env again + let mut var_store = VarStore::default(); + std::mem::swap(ref_var_store, &mut var_store); + + let (mut solved, _, _) = run_solve( + &code_arena, + pool, + Default::default(), + Default::default(), + constraint, + var_store, + ); + + let subs = solved.inner_mut(); + + let content = subs.get_content_without_compacting(var); + + // Connect the ModuleId to it's IdentIds + dep_idents.insert(mod_id, env.ident_ids); + + let interns = Interns { + module_ids: env.module_ids.clone(), + all_ident_ids: dep_idents, + }; + + let actual_str = content_to_string(content, subs, mod_id, &interns); + + assert_eq!(actual_str, expected_str); + } + Err(e) => panic!("syntax error {:?}", e), + } + } + + pub fn str_to_expr2<'a>( + arena: &'a Bump, + input: &'a str, + env: &mut Env<'a>, + scope: &mut Scope, + region: Region, + ) -> Result<(Expr2, Output), SyntaxError<'a>> { + match roc_parse::test_helpers::parse_loc_with(arena, input.trim()) { + Ok(loc_expr) => Ok(loc_expr_to_expr2(arena, loc_expr, env, scope, region)), + Err(fail) => Err(fail), + } + } + + #[test] + fn constrain_str() { + infer_eq( + indoc!( + r#" + "type inference!" + "# + ), + "Str", + ) + } + + // This will be more useful once we actually map + // strings less than 15 chars to SmallStr + #[test] + fn constrain_small_str() { + infer_eq( + indoc!( + r#" + "a" + "# + ), + "Str", + ) + } + + #[test] + fn constrain_empty_record() { + infer_eq( + indoc!( + r#" + {} + "# + ), + "{}", + ) + } + + #[test] + fn constrain_small_int() { + infer_eq( + indoc!( + r#" + 12 + "# + ), + "Num *", + ) + } + + #[test] + fn constrain_float() { + infer_eq( + indoc!( + r#" + 3.14 + "# + ), + "Float *", + ) + } + + #[test] + fn constrain_record() { + infer_eq( + indoc!( + r#" + { x : 1, y : "hi" } + "# + ), + "{ x : Num *, y : Str }", + ) + } + + #[test] + fn constrain_empty_list() { + infer_eq( + indoc!( + r#" + [] + "# + ), + "List *", + ) + } + + #[test] + fn constrain_list() { + infer_eq( + indoc!( + r#" + [ 1, 2 ] + "# + ), + "List (Num *)", + ) + } + + #[test] + fn constrain_list_of_records() { + infer_eq( + indoc!( + r#" + [ { x: 1 }, { x: 3 } ] + "# + ), + "List { x : Num * }", + ) + } + + #[test] + fn constrain_global_tag() { + infer_eq( + indoc!( + r#" + Foo + "# + ), + "[ Foo ]*", + ) + } + + #[test] + fn constrain_private_tag() { + infer_eq( + indoc!( + r#" + @Foo + "# + ), + "[ @Foo ]*", + ) + } + + #[test] + fn constrain_call_and_accessor() { + infer_eq( + indoc!( + r#" + .foo { foo: "bar" } + "# + ), + "Str", + ) + } + + #[test] + fn constrain_access() { + infer_eq( + indoc!( + r#" + { foo: "bar" }.foo + "# + ), + "Str", + ) + } + + #[test] + fn constrain_if() { + infer_eq( + indoc!( + r#" + if True then Green else Red + "# + ), + "[ Green, Red ]*", + ) + } + + #[test] + fn constrain_when() { + infer_eq( + indoc!( + r#" + when if True then Green else Red is + Green -> Blue + Red -> Purple + "# + ), + "[ Blue, Purple ]*", + ) + } + + #[test] + fn constrain_let_value() { + infer_eq( + indoc!( + r#" + person = { name: "roc" } + + person + "# + ), + "{ name : Str }", + ) + } + + #[test] + fn constrain_update() { + infer_eq( + indoc!( + r#" + person = { name: "roc" } + + { person & name: "bird" } + "# + ), + "{ name : Str }", + ) + } + + #[ignore = "TODO: implement builtins in the editor"] + #[test] + fn constrain_run_low_level() { + infer_eq( + indoc!( + r#" + List.map [ { name: "roc" }, { name: "bird" } ] .name + "# + ), + "List Str", + ) + } + + #[test] + fn constrain_closure() { + infer_eq( + indoc!( + r#" + x = 1 + + \{} -> x + "# + ), + "{}* -> Num *", + ) + } +} diff --git a/ast/src/lang/core/ast.rs b/ast/src/lang/core/ast.rs new file mode 100644 index 0000000000..586b29f19c --- /dev/null +++ b/ast/src/lang/core/ast.rs @@ -0,0 +1,45 @@ +use crate::{ + ast_error::{ASTNodeIdWithoutExprId, ASTResult}, + mem_pool::pool::Pool, +}; + +use super::{ + def::def2::{def2_to_string, DefId}, + expr::{expr2::ExprId, expr2_to_string::expr2_to_string}, + header::AppHeader, +}; + +#[derive(Debug)] +pub struct AST { + pub header: AppHeader, + pub def_ids: Vec, +} + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum ASTNodeId { + ADefId(DefId), + AExprId(ExprId), +} + +impl ASTNodeId { + pub fn to_expr_id(&self) -> ASTResult { + match self { + ASTNodeId::AExprId(expr_id) => Ok(*expr_id), + _ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?, + } + } + + pub fn to_def_id(&self) -> ASTResult { + match self { + ASTNodeId::ADefId(def_id) => Ok(*def_id), + _ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?, + } + } +} + +pub fn ast_node_to_string(node_id: ASTNodeId, pool: &Pool) -> String { + match node_id { + ASTNodeId::ADefId(def_id) => def2_to_string(def_id, pool), + ASTNodeId::AExprId(expr_id) => expr2_to_string(expr_id, pool), + } +} diff --git a/ast/src/lang/core/declaration.rs b/ast/src/lang/core/declaration.rs new file mode 100644 index 0000000000..e45668b484 --- /dev/null +++ b/ast/src/lang/core/declaration.rs @@ -0,0 +1,70 @@ +use roc_types::subs::VarStore; + +use crate::{ + lang::core::{def::def::Def, expr::expr2::Expr2}, + mem_pool::{pool::Pool, pool_vec::PoolVec}, +}; + +use super::def::def::Declaration; + +pub(crate) fn decl_to_let( + pool: &mut Pool, + var_store: &mut VarStore, + decl: Declaration, + ret: Expr2, +) -> Expr2 { + match decl { + Declaration::Declare(def) => match def { + Def::AnnotationOnly { .. } => todo!(), + Def::Value(value_def) => { + let def_id = pool.add(value_def); + + let body_id = pool.add(ret); + + Expr2::LetValue { + def_id, + body_id, + body_var: var_store.fresh(), + } + } + Def::Function(function_def) => { + let def_id = pool.add(function_def); + let body_id = pool.add(ret); + + Expr2::LetFunction { + def_id, + body_id, + body_var: var_store.fresh(), + } + } + }, + Declaration::DeclareRec(defs) => { + let mut function_defs = vec![]; + + for def in defs { + match def { + Def::AnnotationOnly { .. } => todo!(), + Def::Function(function_def) => function_defs.push(function_def), + Def::Value(_) => unreachable!(), + } + } + + let body_id = pool.add(ret); + + Expr2::LetRec { + defs: PoolVec::new(function_defs.into_iter(), pool), + body_var: var_store.fresh(), + body_id, + } + } + Declaration::InvalidCycle(_entries, _) => { + // TODO: replace with something from Expr2 + // Expr::RuntimeError(RuntimeError::CircularDef(entries)) + todo!() + } + Declaration::Builtin(_) => { + // Builtins should only be added to top-level decls, not to let-exprs! + unreachable!() + } + } +} diff --git a/editor/src/lang/def.rs b/ast/src/lang/core/def/def.rs similarity index 98% rename from editor/src/lang/def.rs rename to ast/src/lang/core/def/def.rs index c179145a7c..c8654b3d73 100644 --- a/editor/src/lang/def.rs +++ b/ast/src/lang/core/def/def.rs @@ -12,15 +12,6 @@ // }; // use crate::pattern::{bindings_from_patterns, canonicalize_pattern, Pattern}; // use crate::procedure::References; -use crate::lang::ast::{Expr2, FunctionDef, Rigids, ValueDef}; -use crate::lang::expr::Output; -use crate::lang::expr::{to_expr2, to_expr_id, Env}; -use crate::lang::pattern::{ - symbols_and_variables_from_pattern, symbols_from_pattern, to_pattern_id, Pattern2, PatternId, -}; -use crate::lang::pool::{NodeId, Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::scope::Scope; -use crate::lang::types::{to_annotation2, Alias, Annotation2, Signature, Type2, TypeId}; use roc_collections::all::{default_hasher, ImMap, MutMap, MutSet, SendMap}; use roc_module::ident::Lowercase; use roc_module::symbol::Symbol; @@ -33,6 +24,22 @@ use std::collections::HashMap; use std::fmt::Debug; use ven_graph::{strongly_connected_components, topological_sort_into_groups}; +use crate::{ + lang::{ + core::{ + expr::{expr2::Expr2, expr_to_expr2::to_expr2, output::Output}, + fun_def::FunctionDef, + pattern::{self, symbols_from_pattern, to_pattern_id, Pattern2, PatternId}, + types::{to_annotation2, Alias, Annotation2, Signature, Type2, TypeId}, + val_def::ValueDef, + }, + env::Env, + rigids::Rigids, + scope::Scope, + }, + mem_pool::{pool::Pool, pool_vec::PoolVec, shallow_clone::ShallowClone}, +}; + #[derive(Debug)] pub enum Def { AnnotationOnly { rigids: Rigids, annotation: TypeId }, @@ -127,7 +134,7 @@ fn to_pending_def<'a>( match def { Annotation(loc_pattern, loc_ann) => { // This takes care of checking for shadowing and adding idents to scope. - let (output, loc_can_pattern) = crate::lang::pattern::to_pattern_id( + let (output, loc_can_pattern) = pattern::to_pattern_id( env, scope, pattern_type, @@ -142,7 +149,7 @@ fn to_pending_def<'a>( } Body(loc_pattern, loc_expr) => { // This takes care of checking for shadowing and adding idents to scope. - let (output, loc_can_pattern) = crate::lang::pattern::to_pattern_id( + let (output, loc_can_pattern) = pattern::to_pattern_id( env, scope, pattern_type, diff --git a/ast/src/lang/core/def/def2.rs b/ast/src/lang/core/def/def2.rs new file mode 100644 index 0000000000..023a39760d --- /dev/null +++ b/ast/src/lang/core/def/def2.rs @@ -0,0 +1,43 @@ +use crate::{ + lang::core::{ + expr::{expr2::Expr2, expr2_to_string::expr2_to_string}, + pattern::Pattern2, + }, + mem_pool::pool::{NodeId, Pool}, +}; + +// A top level definition, not inside a function. For example: `main = "Hello, world!"` +#[derive(Debug)] +pub enum Def2 { + // ValueDef example: `main = "Hello, world!"`. identifier -> `main`, expr -> "Hello, world!" + ValueDef { + identifier_id: NodeId, + expr_id: NodeId, + }, + Blank, +} + +pub type DefId = NodeId; + +pub fn def2_to_string(node_id: DefId, pool: &Pool) -> String { + let mut full_string = String::new(); + let def2 = pool.get(node_id); + + match def2 { + Def2::ValueDef { + identifier_id, + expr_id, + } => { + full_string.push_str(&format!( + "Def2::ValueDef(identifier_id: >>{:?}), expr_id: >>{:?})", + pool.get(*identifier_id), + expr2_to_string(*expr_id, pool) + )); + } + Def2::Blank => { + full_string.push_str("Def2::Blank"); + } + } + + full_string +} diff --git a/ast/src/lang/core/def/def_to_def2.rs b/ast/src/lang/core/def/def_to_def2.rs new file mode 100644 index 0000000000..05cf2747a6 --- /dev/null +++ b/ast/src/lang/core/def/def_to_def2.rs @@ -0,0 +1,97 @@ +use bumpalo::collections::Vec as BumpVec; +use bumpalo::Bump; +use roc_parse::{parser::SyntaxError, pattern::PatternType}; +use roc_region::all::Region; + +use crate::lang::{ + core::{expr::expr_to_expr2::loc_expr_to_expr2, pattern::to_pattern2}, + env::Env, + scope::Scope, +}; + +use super::def2::Def2; + +pub fn defs_to_defs2<'a>( + arena: &'a Bump, + env: &mut Env<'a>, + scope: &mut Scope, + parsed_defs: &'a BumpVec>>, + region: Region, +) -> Vec { + parsed_defs + .iter() + .map(|loc| to_def2_from_def(arena, env, scope, &loc.value, region)) + .collect() +} + +pub fn to_def2_from_def<'a>( + arena: &'a Bump, + env: &mut Env<'a>, + scope: &mut Scope, + parsed_def: &'a roc_parse::ast::Def<'a>, + region: Region, +) -> Def2 { + use roc_parse::ast::Def::*; + + match parsed_def { + SpaceBefore(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region), + SpaceAfter(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region), + Body(&loc_pattern, &loc_expr) => { + // TODO loc_pattern use identifier + let expr2 = loc_expr_to_expr2(arena, loc_expr, env, scope, region).0; + let expr_id = env.pool.add(expr2); + + use roc_parse::ast::Pattern::*; + + match loc_pattern.value { + Identifier(_) => { + let (_, pattern2) = to_pattern2( + env, + scope, + PatternType::TopLevelDef, + &loc_pattern.value, + region, + ); + let pattern_id = env.pool.add(pattern2); + + // TODO support with annotation + Def2::ValueDef { + identifier_id: pattern_id, + expr_id, + } + } + other => { + unimplemented!( + "I don't yet know how to convert the pattern {:?} into an expr2", + other + ) + } + } + } + other => { + unimplemented!( + "I don't know how to make an expr2 from this def yet: {:?}", + other + ) + } + } +} + +pub fn str_to_def2<'a>( + arena: &'a Bump, + input: &'a str, + env: &mut Env<'a>, + scope: &mut Scope, + region: Region, +) -> Result, SyntaxError<'a>> { + match roc_parse::test_helpers::parse_defs_with(arena, input.trim()) { + Ok(vec_loc_def) => Ok(defs_to_defs2( + arena, + env, + scope, + arena.alloc(vec_loc_def), + region, + )), + Err(fail) => Err(fail), + } +} diff --git a/ast/src/lang/core/def/mod.rs b/ast/src/lang/core/def/mod.rs new file mode 100644 index 0000000000..7ab541b811 --- /dev/null +++ b/ast/src/lang/core/def/mod.rs @@ -0,0 +1,3 @@ +pub mod def; +pub mod def2; +pub mod def_to_def2; diff --git a/ast/src/lang/core/expr/expr2.rs b/ast/src/lang/core/expr/expr2.rs new file mode 100644 index 0000000000..e06d169ece --- /dev/null +++ b/ast/src/lang/core/expr/expr2.rs @@ -0,0 +1,233 @@ +use arraystring::{typenum::U30, ArrayString}; +use roc_types::subs::Variable; + +use crate::{ + lang::core::{fun_def::FunctionDef, pattern::Pattern2, val_def::ValueDef}, + mem_pool::{pool::NodeId, pool_str::PoolStr, pool_vec::PoolVec}, +}; +use roc_can::expr::Recursive; +use roc_module::low_level::LowLevel; +use roc_module::operator::CalledVia; +use roc_module::symbol::Symbol; + +use super::record_field::RecordField; + +pub type ArrString = ArrayString; + +// TODO make the inner types private? +pub type ExprId = NodeId; + +/// An Expr that fits in 32B. +/// It has a 1B discriminant and variants which hold payloads of at most 31B. +#[derive(Debug)] +pub enum Expr2 { + /// A negative number literal without a dot + SmallInt { + number: IntVal, // 16B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + // TODO(rvcas): rename this eventually + /// A large (over 64-bit) negative number literal without a dot. + /// This variant can't use IntVal because if IntVal stored 128-bit + /// integers, it would be 32B on its own because of alignment. + I128 { + number: i128, // 16B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + // TODO(rvcas): rename this eventually + /// A large (over 64-bit) nonnegative number literal without a dot + /// This variant can't use IntVal because if IntVal stored 128-bit + /// integers, it would be 32B on its own because of alignment. + U128 { + number: u128, // 16B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + /// A floating-point literal (with a dot) + Float { + number: FloatVal, // 16B + var: Variable, // 4B + text: PoolStr, // 8B + }, + /// string literals of length up to 30B + SmallStr(ArrString), // 31B + /// string literals of length 31B or more + Str(PoolStr), // 8B + // Lookups + Var(Symbol), // 8B + InvalidLookup(PoolStr), // 8B + + List { + elem_var: Variable, // 4B + elems: PoolVec, // 8B + }, + If { + cond_var: Variable, // 4B + expr_var: Variable, // 4B + branches: PoolVec<(ExprId, ExprId)>, // 8B + final_else: ExprId, // 4B + }, + When { + cond_var: Variable, // 4B + expr_var: Variable, // 4B + branches: PoolVec, // 8B + cond: ExprId, // 4B + }, + LetRec { + defs: PoolVec, // 8B + body_var: Variable, // 8B + body_id: ExprId, // 4B + }, + LetFunction { + def_id: NodeId, // 4B + body_var: Variable, // 8B + body_id: ExprId, // 4B + }, + LetValue { + def_id: NodeId, // 4B + body_id: ExprId, // 4B + body_var: Variable, // 4B + }, + Call { + args: PoolVec<(Variable, ExprId)>, // 8B + expr: ExprId, // 4B + expr_var: Variable, // 4B + fn_var: Variable, // 4B + closure_var: Variable, // 4B + called_via: CalledVia, // 2B + }, + RunLowLevel { + op: LowLevel, // 1B + args: PoolVec<(Variable, ExprId)>, // 8B + ret_var: Variable, // 4B + }, + Closure { + args: PoolVec<(Variable, NodeId)>, // 8B + name: Symbol, // 8B + body: ExprId, // 4B + function_type: Variable, // 4B + recursive: Recursive, // 1B + extra: NodeId, // 4B + }, + // Product Types + Record { + record_var: Variable, // 4B + fields: PoolVec, // 8B + }, + /// Empty record constant + EmptyRecord, + /// Look up exactly one field on a record, e.g. (expr).foo. + Access { + field: PoolStr, // 4B + expr: ExprId, // 4B + record_var: Variable, // 4B + ext_var: Variable, // 4B + field_var: Variable, // 4B + }, + + /// field accessor as a function, e.g. (.foo) expr + Accessor { + function_var: Variable, // 4B + closure_var: Variable, // 4B + field: PoolStr, // 4B + record_var: Variable, // 4B + ext_var: Variable, // 4B + field_var: Variable, // 4B + }, + Update { + symbol: Symbol, // 8B + updates: PoolVec, // 8B + record_var: Variable, // 4B + ext_var: Variable, // 4B + }, + + // Sum Types + GlobalTag { + name: PoolStr, // 4B + variant_var: Variable, // 4B + ext_var: Variable, // 4B + arguments: PoolVec<(Variable, ExprId)>, // 8B + }, + PrivateTag { + name: Symbol, // 8B + variant_var: Variable, // 4B + ext_var: Variable, // 4B + arguments: PoolVec<(Variable, ExprId)>, // 8B + }, + Blank, // Rendered as empty box in editor + + // Compiles, but will crash if reached + RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Problem { + RanOutOfNodeIds, +} + +pub type Res = Result; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum IntStyle { + Decimal, + Octal, + Hex, + Binary, +} + +impl IntStyle { + pub fn from_base(base: roc_parse::ast::Base) -> Self { + use roc_parse::ast::Base; + match base { + Base::Decimal => Self::Decimal, + Base::Octal => Self::Octal, + Base::Hex => Self::Hex, + Base::Binary => Self::Binary, + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum IntVal { + I64(i64), + U64(u64), + I32(i32), + U32(u32), + I16(i16), + U16(u16), + I8(i8), + U8(u8), +} + +#[test] +fn size_of_intval() { + assert_eq!(std::mem::size_of::(), 16); +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FloatVal { + F64(f64), + F32(f32), +} + +#[derive(Debug)] +pub struct WhenBranch { + pub patterns: PoolVec, // 4B + pub body: ExprId, // 3B + pub guard: Option, // 4B +} + +/// This is overflow data from a Closure variant, which needs to store +/// more than 32B of total data +#[derive(Debug)] +pub struct ClosureExtra { + pub return_type: Variable, // 4B + pub captured_symbols: PoolVec<(Symbol, Variable)>, // 8B + pub closure_type: Variable, // 4B + pub closure_ext_var: Variable, // 4B +} diff --git a/ast/src/lang/core/expr/expr2_to_string.rs b/ast/src/lang/core/expr/expr2_to_string.rs new file mode 100644 index 0000000000..cbd2967766 --- /dev/null +++ b/ast/src/lang/core/expr/expr2_to_string.rs @@ -0,0 +1,139 @@ +use crate::{ + lang::core::{expr::record_field::RecordField, val_def::value_def_to_string}, + mem_pool::pool::Pool, +}; + +use super::expr2::{Expr2, ExprId}; +use roc_types::subs::Variable; + +pub fn expr2_to_string(node_id: ExprId, pool: &Pool) -> String { + let mut full_string = String::new(); + let expr2 = pool.get(node_id); + + expr2_to_string_helper(expr2, 0, pool, &mut full_string); + + full_string +} + +fn get_spacing(indent_level: usize) -> String { + std::iter::repeat(" ") + .take(indent_level) + .collect::>() + .join("") +} + +fn expr2_to_string_helper( + expr2: &Expr2, + indent_level: usize, + pool: &Pool, + out_string: &mut String, +) { + out_string.push_str(&get_spacing(indent_level)); + + match expr2 { + Expr2::SmallStr(arr_string) => out_string.push_str(&format!( + "{}{}{}", + "SmallStr(\"", + arr_string.as_str(), + "\")", + )), + Expr2::Str(pool_str) => { + out_string.push_str(&format!("{}{}{}", "Str(\"", pool_str.as_str(pool), "\")",)) + } + Expr2::Blank => out_string.push_str("Blank"), + Expr2::EmptyRecord => out_string.push_str("EmptyRecord"), + Expr2::Record { record_var, fields } => { + out_string.push_str("Record:\n"); + out_string.push_str(&var_to_string(record_var, indent_level + 1)); + + out_string.push_str(&format!("{}fields: [\n", get_spacing(indent_level + 1))); + + let mut first_child = true; + + for field in fields.iter(pool) { + if !first_child { + out_string.push_str(", ") + } else { + first_child = false; + } + + match field { + RecordField::InvalidLabelOnly(pool_str, var) => { + out_string.push_str(&format!( + "{}({}, Var({:?})", + get_spacing(indent_level + 2), + pool_str.as_str(pool), + var, + )); + } + RecordField::LabelOnly(pool_str, var, symbol) => { + out_string.push_str(&format!( + "{}({}, Var({:?}), Symbol({:?})", + get_spacing(indent_level + 2), + pool_str.as_str(pool), + var, + symbol + )); + } + RecordField::LabeledValue(pool_str, var, val_node_id) => { + out_string.push_str(&format!( + "{}({}, Var({:?}), Expr2(\n", + get_spacing(indent_level + 2), + pool_str.as_str(pool), + var, + )); + + let val_expr2 = pool.get(*val_node_id); + expr2_to_string_helper(val_expr2, indent_level + 3, pool, out_string); + out_string.push_str(&format!("{})\n", get_spacing(indent_level + 2))); + } + } + } + + out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1))); + } + Expr2::List { elem_var, elems } => { + out_string.push_str("List:\n"); + out_string.push_str(&var_to_string(elem_var, indent_level + 1)); + out_string.push_str(&format!("{}elems: [\n", get_spacing(indent_level + 1))); + + let mut first_elt = true; + + for elem_expr2_id in elems.iter(pool) { + if !first_elt { + out_string.push_str(", ") + } else { + first_elt = false; + } + + let elem_expr2 = pool.get(*elem_expr2_id); + + expr2_to_string_helper(elem_expr2, indent_level + 2, pool, out_string) + } + + out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1))); + } + Expr2::InvalidLookup(pool_str) => { + out_string.push_str(&format!("InvalidLookup({})", pool_str.as_str(pool))); + } + Expr2::SmallInt { text, .. } => { + out_string.push_str(&format!("SmallInt({})", text.as_str(pool))); + } + Expr2::LetValue { + def_id, body_id, .. + } => { + out_string.push_str(&format!( + "LetValue(def_id: >>{:?}), body_id: >>{:?})", + value_def_to_string(pool.get(*def_id), pool), + pool.get(*body_id) + )); + } + other => todo!("Implement for {:?}", other), + } + + out_string.push('\n'); +} + +fn var_to_string(some_var: &Variable, indent_level: usize) -> String { + format!("{}Var({:?})\n", get_spacing(indent_level + 1), some_var) +} diff --git a/ast/src/lang/core/expr/expr_to_expr2.rs b/ast/src/lang/core/expr/expr_to_expr2.rs new file mode 100644 index 0000000000..febad6b13a --- /dev/null +++ b/ast/src/lang/core/expr/expr_to_expr2.rs @@ -0,0 +1,710 @@ +use bumpalo::Bump; +use roc_can::expr::Recursive; +use roc_can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int}; +use roc_can::operator::desugar_expr; +use roc_collections::all::MutSet; +use roc_module::symbol::Symbol; +use roc_parse::{ast::Expr, pattern::PatternType}; +use roc_problem::can::{Problem, RuntimeError}; +use roc_region::all::{Located, Region}; + +use super::{expr2::Expr2, output::Output}; +use crate::canonicalization::canonicalize::{ + canonicalize_fields, canonicalize_lookup, canonicalize_when_branch, CanonicalizeRecordProblem, +}; +use crate::lang::core::declaration::decl_to_let; +use crate::lang::core::def::def::{canonicalize_defs, sort_can_defs}; +use crate::lang::core::expr::expr2::ClosureExtra; +use crate::lang::core::pattern::to_pattern2; +use crate::lang::core::str::flatten_str_literal; +use crate::mem_pool::shallow_clone::ShallowClone; +use crate::{ + lang::{ + core::expr::expr2::{ExprId, FloatVal, IntStyle, IntVal}, + env::Env, + scope::Scope, + }, + mem_pool::{pool_str::PoolStr, pool_vec::PoolVec}, +}; + +pub fn loc_expr_to_expr2<'a>( + arena: &'a Bump, + loc_expr: Located>, + env: &mut Env<'a>, + scope: &mut Scope, + region: Region, +) -> (Expr2, Output) { + let desugared_loc_expr = desugar_expr(arena, arena.alloc(loc_expr)); + + to_expr2(env, scope, arena.alloc(desugared_loc_expr.value), region) +} + +const ZERO: Region = Region::zero(); + +pub fn to_expr2<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + parse_expr: &'a roc_parse::ast::Expr<'a>, + region: Region, +) -> (Expr2, self::Output) { + use roc_parse::ast::Expr::*; + + match parse_expr { + Float(string) => { + match finish_parsing_float(string) { + Ok(float) => { + let expr = Expr2::Float { + number: FloatVal::F64(float), + var: env.var_store.fresh(), + text: PoolStr::new(string, &mut env.pool), + }; + + (expr, Output::default()) + } + Err((raw, error)) => { + // emit runtime error + let runtime_error = RuntimeError::InvalidFloat(error, ZERO, raw.into()); + + env.problem(Problem::RuntimeError(runtime_error)); + // + // Expr::RuntimeError(runtime_error) + todo!() + } + } + } + Num(string) => { + match finish_parsing_int(string) { + Ok(int) => { + let expr = Expr2::SmallInt { + number: IntVal::I64(int), + var: env.var_store.fresh(), + // TODO non-hardcode + style: IntStyle::Decimal, + text: PoolStr::new(string, &mut env.pool), + }; + + (expr, Output::default()) + } + Err((raw, error)) => { + // emit runtime error + let runtime_error = RuntimeError::InvalidInt( + error, + roc_parse::ast::Base::Decimal, + ZERO, + raw.into(), + ); + + env.problem(Problem::RuntimeError(runtime_error)); + // + // Expr::RuntimeError(runtime_error) + todo!() + } + } + } + NonBase10Int { + string, + base, + is_negative, + } => { + match finish_parsing_base(string, *base, *is_negative) { + Ok(int) => { + let expr = Expr2::SmallInt { + number: IntVal::I64(int), + var: env.var_store.fresh(), + // TODO non-hardcode + style: IntStyle::from_base(*base), + text: PoolStr::new(string, &mut env.pool), + }; + + (expr, Output::default()) + } + Err((raw, error)) => { + // emit runtime error + let runtime_error = RuntimeError::InvalidInt(error, *base, ZERO, raw.into()); + + env.problem(Problem::RuntimeError(runtime_error)); + // + // Expr::RuntimeError(runtime_error) + todo!() + } + } + } + + Str(literal) => flatten_str_literal(env, scope, literal), + + List { items, .. } => { + let mut output = Output::default(); + let output_ref = &mut output; + + let elems: PoolVec = PoolVec::with_capacity(items.len() as u32, env.pool); + + for (node_id, item) in elems.iter_node_ids().zip(items.iter()) { + let (expr, sub_output) = to_expr2(env, scope, &item.value, item.region); + + output_ref.union(sub_output); + + let expr_id = env.pool.add(expr); + env.pool[node_id] = expr_id; + } + + let expr = Expr2::List { + elem_var: env.var_store.fresh(), + elems, + }; + + (expr, output) + } + + GlobalTag(tag) => { + // a global tag without any arguments + ( + Expr2::GlobalTag { + name: PoolStr::new(tag, env.pool), + variant_var: env.var_store.fresh(), + ext_var: env.var_store.fresh(), + arguments: PoolVec::empty(env.pool), + }, + Output::default(), + ) + } + PrivateTag(name) => { + // a private tag without any arguments + let ident_id = env.ident_ids.get_or_insert(&(*name).into()); + let name = Symbol::new(env.home, ident_id); + ( + Expr2::PrivateTag { + name, + variant_var: env.var_store.fresh(), + ext_var: env.var_store.fresh(), + arguments: PoolVec::empty(env.pool), + }, + Output::default(), + ) + } + + RecordUpdate { + fields, + update: loc_update, + final_comments: _, + } => { + let (can_update, update_out) = + to_expr2(env, scope, &loc_update.value, loc_update.region); + + if let Expr2::Var(symbol) = &can_update { + match canonicalize_fields(env, scope, fields) { + Ok((can_fields, mut output)) => { + output.references.union_mut(update_out.references); + + let answer = Expr2::Update { + record_var: env.var_store.fresh(), + ext_var: env.var_store.fresh(), + symbol: *symbol, + updates: can_fields, + }; + + (answer, output) + } + Err(CanonicalizeRecordProblem::InvalidOptionalValue { + field_name: _, + field_region: _, + record_region: _, + }) => { + // let runtime_error = roc_problem::can::RuntimeError::InvalidOptionalValue { + // field_name, + // field_region, + // record_region, + // }; + // + // env.problem(Problem::RuntimeError(runtime_error)); + + todo!() + } + } + } else { + // only (optionally qualified) variables can be updated, not arbitrary expressions + + // let error = roc_problem::can::RuntimeError::InvalidRecordUpdate { + // region: can_update.region, + // }; + // + // let answer = Expr::RuntimeError(error.clone()); + // + // env.problems.push(Problem::RuntimeError(error)); + // + // (answer, Output::default()) + todo!() + } + } + + Record { + fields, + final_comments: _, + } => { + if fields.is_empty() { + (Expr2::EmptyRecord, Output::default()) + } else { + match canonicalize_fields(env, scope, fields) { + Ok((can_fields, output)) => ( + Expr2::Record { + record_var: env.var_store.fresh(), + fields: can_fields, + }, + output, + ), + Err(CanonicalizeRecordProblem::InvalidOptionalValue { + field_name: _, + field_region: _, + record_region: _, + }) => { + // let runtime_error = RuntimeError::InvalidOptionalValue { + // field_name, + // field_region, + // record_region, + // }; + // + // env.problem(runtime_error); + // ( + // Expr::RuntimeError( + // ), + // Output::default(), + // + // ) + todo!() + } + } + } + } + + Access(record_expr, field) => { + // TODO + let region = ZERO; + let (record_expr_id, output) = to_expr_id(env, scope, record_expr, region); + + ( + Expr2::Access { + record_var: env.var_store.fresh(), + field_var: env.var_store.fresh(), + ext_var: env.var_store.fresh(), + expr: record_expr_id, + field: PoolStr::new(field, env.pool), + }, + output, + ) + } + + AccessorFunction(field) => ( + Expr2::Accessor { + function_var: env.var_store.fresh(), + record_var: env.var_store.fresh(), + ext_var: env.var_store.fresh(), + closure_var: env.var_store.fresh(), + field_var: env.var_store.fresh(), + field: PoolStr::new(field, env.pool), + }, + Output::default(), + ), + + If(branches, final_else) => { + let mut new_branches = Vec::with_capacity(branches.len()); + let mut output = Output::default(); + + for (condition, then_branch) in branches.iter() { + let (cond, cond_output) = to_expr2(env, scope, &condition.value, condition.region); + + let (then_expr, then_output) = + to_expr2(env, scope, &then_branch.value, then_branch.region); + + output.references.union_mut(cond_output.references); + output.references.union_mut(then_output.references); + + new_branches.push((env.pool.add(cond), env.pool.add(then_expr))); + } + + let (else_expr, else_output) = + to_expr2(env, scope, &final_else.value, final_else.region); + + output.references.union_mut(else_output.references); + + let expr = Expr2::If { + cond_var: env.var_store.fresh(), + expr_var: env.var_store.fresh(), + branches: PoolVec::new(new_branches.into_iter(), env.pool), + final_else: env.pool.add(else_expr), + }; + + (expr, output) + } + + When(loc_cond, branches) => { + // Infer the condition expression's type. + let cond_var = env.var_store.fresh(); + let (can_cond, mut output) = to_expr2(env, scope, &loc_cond.value, loc_cond.region); + + // the condition can never be a tail-call + output.tail_call = None; + + let can_branches = PoolVec::with_capacity(branches.len() as u32, env.pool); + + for (node_id, branch) in can_branches.iter_node_ids().zip(branches.iter()) { + let (can_when_branch, branch_references) = + canonicalize_when_branch(env, scope, *branch, &mut output); + + output.references.union_mut(branch_references); + + env.pool[node_id] = can_when_branch; + } + + // A "when" with no branches is a runtime error, but it will mess things up + // if code gen mistakenly thinks this is a tail call just because its condition + // happened to be one. (The condition gave us our initial output value.) + if branches.is_empty() { + output.tail_call = None; + } + + // Incorporate all three expressions into a combined Output value. + let expr = Expr2::When { + expr_var: env.var_store.fresh(), + cond_var, + cond: env.pool.add(can_cond), + branches: can_branches, + }; + + (expr, output) + } + + Closure(loc_arg_patterns, loc_body_expr) => { + // The globally unique symbol that will refer to this closure once it gets converted + // into a top-level procedure for code gen. + // + // In the Foo module, this will look something like Foo.$1 or Foo.$2. + let symbol = env + .closure_name_symbol + .unwrap_or_else(|| env.gen_unique_symbol()); + env.closure_name_symbol = None; + + // The body expression gets a new scope for canonicalization. + // Shadow `scope` to make sure we don't accidentally use the original one for the + // rest of this block, but keep the original around for later diffing. + let original_scope = scope; + let mut scope = original_scope.shallow_clone(); + let can_args = PoolVec::with_capacity(loc_arg_patterns.len() as u32, env.pool); + let mut output = Output::default(); + + let mut bound_by_argument_patterns = MutSet::default(); + + for (node_id, loc_pattern) in can_args.iter_node_ids().zip(loc_arg_patterns.iter()) { + let (new_output, can_arg) = to_pattern2( + env, + &mut scope, + roc_parse::pattern::PatternType::FunctionArg, + &loc_pattern.value, + loc_pattern.region, + ); + + bound_by_argument_patterns + .extend(new_output.references.bound_symbols.iter().copied()); + + output.union(new_output); + + let pattern_id = env.add(can_arg, loc_pattern.region); + env.pool[node_id] = (env.var_store.fresh(), pattern_id); + } + + let (body_expr, new_output) = + to_expr2(env, &mut scope, &loc_body_expr.value, loc_body_expr.region); + + let mut captured_symbols: MutSet = + new_output.references.lookups.iter().copied().collect(); + + // filter out the closure's name itself + captured_symbols.remove(&symbol); + + // symbols bound either in this pattern or deeper down are not captured! + captured_symbols.retain(|s| !new_output.references.bound_symbols.contains(s)); + captured_symbols.retain(|s| !bound_by_argument_patterns.contains(s)); + + // filter out top-level symbols + // those will be globally available, and don't need to be captured + captured_symbols.retain(|s| !env.top_level_symbols.contains(s)); + + // filter out imported symbols + // those will be globally available, and don't need to be captured + captured_symbols.retain(|s| s.module_id() == env.home); + + // TODO any Closure that has an empty `captured_symbols` list could be excluded! + + output.union(new_output); + + // filter out aliases + captured_symbols.retain(|s| !output.references.referenced_aliases.contains(s)); + + // filter out functions that don't close over anything + captured_symbols.retain(|s| !output.non_closures.contains(s)); + + // Now that we've collected all the references, check to see if any of the args we defined + // went unreferenced. If any did, report them as unused arguments. + for (sub_symbol, region) in scope.symbols() { + if !original_scope.contains_symbol(sub_symbol) { + if !output.references.has_lookup(sub_symbol) { + // The body never referenced this argument we declared. It's an unused argument! + env.problem(Problem::UnusedArgument(symbol, sub_symbol, region)); + } + + // We shouldn't ultimately count arguments as referenced locals. Otherwise, + // we end up with weird conclusions like the expression (\x -> x + 1) + // references the (nonexistant) local variable x! + output.references.lookups.remove(&sub_symbol); + } + } + + env.register_closure(symbol, output.references.clone()); + + let mut captured_symbols: Vec<_> = captured_symbols + .into_iter() + .map(|s| (s, env.var_store.fresh())) + .collect(); + + // sort symbols, so we know the order in which they're stored in the closure record + captured_symbols.sort(); + + // store that this function doesn't capture anything. It will be promoted to a + // top-level function, and does not need to be captured by other surrounding functions. + if captured_symbols.is_empty() { + output.non_closures.insert(symbol); + } + + let captured_symbols = PoolVec::new(captured_symbols.into_iter(), env.pool); + + let extra = ClosureExtra { + return_type: env.var_store.fresh(), // 4B + captured_symbols, // 8B + closure_type: env.var_store.fresh(), // 4B + closure_ext_var: env.var_store.fresh(), // 4B + }; + + ( + Expr2::Closure { + function_type: env.var_store.fresh(), + name: symbol, + recursive: Recursive::NotRecursive, + args: can_args, + body: env.add(body_expr, loc_body_expr.region), + extra: env.pool.add(extra), + }, + output, + ) + } + + Apply(loc_fn, loc_args, application_style) => { + // The expression that evaluates to the function being called, e.g. `foo` in + // (foo) bar baz + let fn_region = loc_fn.region; + + // Canonicalize the function expression and its arguments + let (fn_expr, mut output) = to_expr2(env, scope, &loc_fn.value, fn_region); + + // The function's return type + let args = PoolVec::with_capacity(loc_args.len() as u32, env.pool); + + for (node_id, loc_arg) in args.iter_node_ids().zip(loc_args.iter()) { + let (arg_expr_id, arg_out) = to_expr_id(env, scope, &loc_arg.value, loc_arg.region); + + env.pool[node_id] = (env.var_store.fresh(), arg_expr_id); + + output.references.union_mut(arg_out.references); + } + + // Default: We're not tail-calling a symbol (by name), we're tail-calling a function value. + output.tail_call = None; + + let expr = match fn_expr { + Expr2::Var(ref symbol) => { + output.references.calls.insert(*symbol); + + // we're tail-calling a symbol by name, check if it's the tail-callable symbol + output.tail_call = match &env.tailcallable_symbol { + Some(tc_sym) if *tc_sym == *symbol => Some(*symbol), + Some(_) | None => None, + }; + + // IDEA: Expr2::CallByName? + let fn_expr_id = env.add(fn_expr, fn_region); + Expr2::Call { + args, + expr: fn_expr_id, + expr_var: env.var_store.fresh(), + fn_var: env.var_store.fresh(), + closure_var: env.var_store.fresh(), + called_via: *application_style, + } + } + Expr2::RuntimeError() => { + // We can't call a runtime error; bail out by propagating it! + return (fn_expr, output); + } + Expr2::GlobalTag { + variant_var, + ext_var, + name, + .. + } => Expr2::GlobalTag { + variant_var, + ext_var, + name, + arguments: args, + }, + Expr2::PrivateTag { + variant_var, + ext_var, + name, + .. + } => Expr2::PrivateTag { + variant_var, + ext_var, + name, + arguments: args, + }, + _ => { + // This could be something like ((if True then fn1 else fn2) arg1 arg2). + let fn_expr_id = env.add(fn_expr, fn_region); + Expr2::Call { + args, + expr: fn_expr_id, + expr_var: env.var_store.fresh(), + fn_var: env.var_store.fresh(), + closure_var: env.var_store.fresh(), + called_via: *application_style, + } + } + }; + + (expr, output) + } + + Defs(loc_defs, loc_ret) => { + let (unsorted, mut scope, defs_output, symbols_introduced) = canonicalize_defs( + env, + Output::default(), + scope, + loc_defs, + PatternType::DefExpr, + ); + + // The def as a whole is a tail call iff its return expression is a tail call. + // Use its output as a starting point because its tail_call already has the right answer! + let (ret_expr, mut output) = to_expr2(env, &mut scope, &loc_ret.value, loc_ret.region); + + output + .introduced_variables + .union(&defs_output.introduced_variables); + + output.references.union_mut(defs_output.references); + + // Now that we've collected all the references, check to see if any of the new idents + // we defined went unused by the return expression. If any were unused, report it. + for (symbol, region) in symbols_introduced { + if !output.references.has_lookup(symbol) { + env.problem(Problem::UnusedDef(symbol, region)); + } + } + + let (can_defs, output) = sort_can_defs(env, unsorted, output); + + match can_defs { + Ok(decls) => { + let mut expr = ret_expr; + + for declaration in decls.into_iter().rev() { + expr = decl_to_let(env.pool, env.var_store, declaration, expr); + } + + (expr, output) + } + Err(_err) => { + // TODO: fix this to be something from Expr2 + // (RuntimeError(err), output) + todo!() + } + } + } + + PrecedenceConflict { .. } => { + // use roc_problem::can::RuntimeError::*; + // + // let problem = PrecedenceProblem::BothNonAssociative( + // *whole_region, + // binop1.clone(), + // binop2.clone(), + // ); + // + // env.problem(Problem::PrecedenceProblem(problem.clone())); + // + // ( + // RuntimeError(InvalidPrecedence(problem, region)), + // Output::default(), + // ) + todo!() + } + MalformedClosure => { + // use roc_problem::can::RuntimeError::*; + // (RuntimeError(MalformedClosure(region)), Output::default()) + todo!() + } + MalformedIdent(_name, _problem) => { + // use roc_problem::can::RuntimeError::*; + // + // let problem = MalformedIdentifier((*name).into(), region); + // env.problem(Problem::RuntimeError(problem.clone())); + // + // (RuntimeError(problem), Output::default()) + todo!() + } + Var { module_name, ident } => canonicalize_lookup(env, scope, module_name, ident, region), + + // Below this point, we shouln't see any of these nodes anymore because + // operator desugaring should have removed them! + bad_expr @ ParensAround(_) => { + panic!( + "A ParensAround did not get removed during operator desugaring somehow: {:#?}", + bad_expr + ); + } + bad_expr @ SpaceBefore(_, _) => { + panic!( + "A SpaceBefore did not get removed during operator desugaring somehow: {:#?}", + bad_expr + ); + } + bad_expr @ SpaceAfter(_, _) => { + panic!( + "A SpaceAfter did not get removed during operator desugaring somehow: {:#?}", + bad_expr + ); + } + bad_expr @ BinOps { .. } => { + panic!( + "A binary operator chain did not get desugared somehow: {:#?}", + bad_expr + ); + } + bad_expr @ UnaryOp(_, _) => { + panic!( + "A unary operator did not get desugared somehow: {:#?}", + bad_expr + ); + } + + rest => todo!("not yet implemented {:?}", rest), + } +} + +pub fn to_expr_id<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + parse_expr: &'a roc_parse::ast::Expr<'a>, + region: Region, +) -> (ExprId, Output) { + let (expr, output) = to_expr2(env, scope, parse_expr, region); + + (env.add(expr, region), output) +} diff --git a/ast/src/lang/core/expr/introduced_vars.rs b/ast/src/lang/core/expr/introduced_vars.rs new file mode 100644 index 0000000000..0abb087815 --- /dev/null +++ b/ast/src/lang/core/expr/introduced_vars.rs @@ -0,0 +1,51 @@ +use roc_collections::all::MutMap; +use roc_module::ident::Lowercase; +use roc_module::symbol::Symbol; +use roc_types::subs::Variable; + +#[derive(Clone, Debug, PartialEq, Default)] +pub struct IntroducedVariables { + // Rigids must be unique within a type annoation. + // E.g. in `identity : a -> a`, there should only be one + // variable (a rigid one, with name "a"). + // Hence `rigids : Map` + // + // But then between annotations, the same name can occur multiple times, + // but a variable can only have one name. Therefore + // `ftv : Map`. + pub wildcards: Vec, + pub var_by_name: MutMap, + pub name_by_var: MutMap, + pub host_exposed_aliases: MutMap, +} + +impl IntroducedVariables { + pub fn insert_named(&mut self, name: Lowercase, var: Variable) { + self.var_by_name.insert(name.clone(), var); + self.name_by_var.insert(var, name); + } + + pub fn insert_wildcard(&mut self, var: Variable) { + self.wildcards.push(var); + } + + pub fn insert_host_exposed_alias(&mut self, symbol: Symbol, var: Variable) { + self.host_exposed_aliases.insert(symbol, var); + } + + pub fn union(&mut self, other: &Self) { + self.wildcards.extend(other.wildcards.iter().cloned()); + self.var_by_name.extend(other.var_by_name.clone()); + self.name_by_var.extend(other.name_by_var.clone()); + self.host_exposed_aliases + .extend(other.host_exposed_aliases.clone()); + } + + pub fn var_by_name(&self, name: &Lowercase) -> Option<&Variable> { + self.var_by_name.get(name) + } + + pub fn name_by_var(&self, var: Variable) -> Option<&Lowercase> { + self.name_by_var.get(&var) + } +} diff --git a/ast/src/lang/core/expr/mod.rs b/ast/src/lang/core/expr/mod.rs new file mode 100644 index 0000000000..32d768c4c9 --- /dev/null +++ b/ast/src/lang/core/expr/mod.rs @@ -0,0 +1,6 @@ +pub mod expr2; +pub mod expr2_to_string; +pub(crate) mod expr_to_expr2; +mod introduced_vars; +pub(crate) mod output; +pub mod record_field; diff --git a/ast/src/lang/core/expr/output.rs b/ast/src/lang/core/expr/output.rs new file mode 100644 index 0000000000..4287e3f72f --- /dev/null +++ b/ast/src/lang/core/expr/output.rs @@ -0,0 +1,30 @@ +use crate::{ + lang::core::{def::def::References, types::Alias}, + mem_pool::pool::NodeId, +}; +use roc_collections::all::{MutMap, MutSet}; +use roc_module::symbol::Symbol; + +use super::introduced_vars::IntroducedVariables; + +#[derive(Clone, Default, Debug, PartialEq)] +pub struct Output { + pub references: References, + pub tail_call: Option, + pub introduced_variables: IntroducedVariables, + pub aliases: MutMap>, + pub non_closures: MutSet, +} + +impl Output { + pub fn union(&mut self, other: Self) { + self.references.union_mut(other.references); + + if let (None, Some(later)) = (self.tail_call, other.tail_call) { + self.tail_call = Some(later); + } + + self.aliases.extend(other.aliases); + self.non_closures.extend(other.non_closures); + } +} diff --git a/ast/src/lang/core/expr/record_field.rs b/ast/src/lang/core/expr/record_field.rs new file mode 100644 index 0000000000..aaf464799f --- /dev/null +++ b/ast/src/lang/core/expr/record_field.rs @@ -0,0 +1,49 @@ +use roc_types::subs::Variable; + +use crate::mem_pool::pool_str::PoolStr; +use roc_module::symbol::Symbol; + +use super::expr2::ExprId; + +#[derive(Debug)] +pub enum RecordField { + InvalidLabelOnly(PoolStr, Variable), + LabelOnly(PoolStr, Variable, Symbol), + LabeledValue(PoolStr, Variable, ExprId), +} + +use RecordField::*; + +impl RecordField { + pub fn get_record_field_var(&self) -> &Variable { + match self { + InvalidLabelOnly(_, var) => var, + LabelOnly(_, var, _) => var, + LabeledValue(_, var, _) => var, + } + } + + pub fn get_record_field_pool_str(&self) -> &PoolStr { + match self { + InvalidLabelOnly(pool_str, _) => pool_str, + LabelOnly(pool_str, _, _) => pool_str, + LabeledValue(pool_str, _, _) => pool_str, + } + } + + pub fn get_record_field_pool_str_mut(&mut self) -> &mut PoolStr { + match self { + InvalidLabelOnly(pool_str, _) => pool_str, + LabelOnly(pool_str, _, _) => pool_str, + LabeledValue(pool_str, _, _) => pool_str, + } + } + + pub fn get_record_field_val_node_id(&self) -> Option { + match self { + InvalidLabelOnly(_, _) => None, + LabelOnly(_, _, _) => None, + LabeledValue(_, _, field_val_id) => Some(*field_val_id), + } + } +} diff --git a/ast/src/lang/core/fun_def.rs b/ast/src/lang/core/fun_def.rs new file mode 100644 index 0000000000..588d07d996 --- /dev/null +++ b/ast/src/lang/core/fun_def.rs @@ -0,0 +1,61 @@ +use crate::{ + lang::rigids::Rigids, + mem_pool::{pool::NodeId, pool_vec::PoolVec, shallow_clone::ShallowClone}, +}; +use roc_module::symbol::Symbol; +use roc_types::subs::Variable; + +use super::{ + expr::expr2::ExprId, + pattern::PatternId, + types::{Type2, TypeId}, +}; + +#[derive(Debug)] +pub enum FunctionDef { + WithAnnotation { + name: Symbol, // 8B + arguments: PoolVec<(PatternId, Type2)>, // 8B + rigids: NodeId, // 4B + return_type: TypeId, // 4B + body: ExprId, // 4B + }, + NoAnnotation { + name: Symbol, // 8B + arguments: PoolVec<(PatternId, Variable)>, // 8B + return_var: Variable, // 4B + body: ExprId, // 4B + }, +} + +impl ShallowClone for FunctionDef { + fn shallow_clone(&self) -> Self { + match self { + Self::WithAnnotation { + name, + arguments, + rigids, + return_type, + body, + } => Self::WithAnnotation { + name: *name, + arguments: arguments.shallow_clone(), + rigids: *rigids, + return_type: *return_type, + body: *body, + }, + + Self::NoAnnotation { + name, + arguments, + return_var, + body, + } => Self::NoAnnotation { + name: *name, + arguments: arguments.shallow_clone(), + return_var: *return_var, + body: *body, + }, + } + } +} diff --git a/ast/src/lang/core/header.rs b/ast/src/lang/core/header.rs new file mode 100644 index 0000000000..4b10dad537 --- /dev/null +++ b/ast/src/lang/core/header.rs @@ -0,0 +1,10 @@ +use super::expr::expr2::ExprId; + +#[derive(Debug)] +pub struct AppHeader { + pub app_name: String, + pub packages_base: String, + pub imports: Vec, + pub provides: Vec, + pub ast_node_id: ExprId, // TODO probably want to create and use HeaderId +} diff --git a/ast/src/lang/core/mod.rs b/ast/src/lang/core/mod.rs new file mode 100644 index 0000000000..74300dab4f --- /dev/null +++ b/ast/src/lang/core/mod.rs @@ -0,0 +1,10 @@ +pub mod ast; +mod declaration; +pub mod def; +pub mod expr; +mod fun_def; +pub mod header; +pub mod pattern; +pub mod str; +pub mod types; +pub mod val_def; diff --git a/editor/src/lang/pattern.rs b/ast/src/lang/core/pattern.rs similarity index 97% rename from editor/src/lang/pattern.rs rename to ast/src/lang/core/pattern.rs index aae7797e8a..19a956bcd8 100644 --- a/editor/src/lang/pattern.rs +++ b/ast/src/lang/core/pattern.rs @@ -1,11 +1,7 @@ #![allow(clippy::all)] #![allow(dead_code)] #![allow(unused_imports)] -use crate::editor::ed_error::{EdResult, UnexpectedPattern2Variant}; -use crate::lang::ast::{ExprId, FloatVal, IntVal}; -use crate::lang::expr::{to_expr_id, Env, Output}; -use crate::lang::pool::{NodeId, Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::scope::Scope; + use bumpalo::collections::Vec as BumpVec; use roc_can::expr::unescape_char; use roc_can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int}; @@ -17,7 +13,18 @@ use roc_problem::can::{MalformedPatternProblem, Problem, RuntimeError}; use roc_region::all::Region; use roc_types::subs::Variable; -use super::constrain::Constraint; +use crate::ast_error::{ASTResult, UnexpectedPattern2Variant}; +use crate::constrain::Constraint; +use crate::lang::core::expr::expr_to_expr2::to_expr_id; +use crate::lang::env::Env; +use crate::lang::scope::Scope; +use crate::mem_pool::pool::{NodeId, Pool}; +use crate::mem_pool::pool_str::PoolStr; +use crate::mem_pool::pool_vec::PoolVec; +use crate::mem_pool::shallow_clone::ShallowClone; + +use super::expr::expr2::{ExprId, FloatVal, IntVal}; +use super::expr::output::Output; use super::types::Type2; pub type PatternId = NodeId; @@ -483,7 +490,7 @@ pub fn symbols_from_pattern(pool: &Pool, initial: &Pattern2) -> Vec { symbols } -pub fn get_identifier_string(pattern: &Pattern2, interns: &Interns) -> EdResult { +pub fn get_identifier_string(pattern: &Pattern2, interns: &Interns) -> ASTResult { match pattern { Pattern2::Identifier(symbol) => Ok(symbol.ident_str(interns).to_string()), other => UnexpectedPattern2Variant { @@ -569,7 +576,7 @@ fn underscore_in_def<'a>(env: &mut Env<'a>, region: Region) -> Pattern2 { Pattern2::UnsupportedPattern(region) } -fn flatten_str_literal(pool: &mut Pool, literal: &StrLiteral<'_>) -> Pattern2 { +pub(crate) fn flatten_str_literal(pool: &mut Pool, literal: &StrLiteral<'_>) -> Pattern2 { use roc_parse::ast::StrLiteral::*; match literal { @@ -579,7 +586,7 @@ fn flatten_str_literal(pool: &mut Pool, literal: &StrLiteral<'_>) -> Pattern2 { } } -fn flatten_str_lines(pool: &mut Pool, lines: &[&[StrSegment<'_>]]) -> Pattern2 { +pub(crate) fn flatten_str_lines(pool: &mut Pool, lines: &[&[StrSegment<'_>]]) -> Pattern2 { use StrSegment::*; let mut buf = String::new(); diff --git a/ast/src/lang/core/str.rs b/ast/src/lang/core/str.rs new file mode 100644 index 0000000000..53b0a999cf --- /dev/null +++ b/ast/src/lang/core/str.rs @@ -0,0 +1,228 @@ +use roc_module::{operator::CalledVia, symbol::Symbol}; +use roc_parse::ast::StrLiteral; + +use crate::{ + ast_error::{ASTResult, UnexpectedASTNode}, + lang::{core::expr::expr_to_expr2::to_expr2, env::Env, scope::Scope}, + mem_pool::{pool::Pool, pool_str::PoolStr, pool_vec::PoolVec}, +}; + +use super::expr::{ + expr2::{Expr2, ExprId}, + output::Output, +}; + +pub(crate) fn flatten_str_literal<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + literal: &StrLiteral<'a>, +) -> (Expr2, Output) { + use roc_parse::ast::StrLiteral::*; + + match literal { + PlainLine(str_slice) => { + // TODO use smallstr + let expr = Expr2::Str(PoolStr::new(str_slice, &mut env.pool)); + + (expr, Output::default()) + } + Line(segments) => flatten_str_lines(env, scope, &[segments]), + Block(lines) => flatten_str_lines(env, scope, lines), + } +} + +enum StrSegment { + Interpolation(Expr2), + Plaintext(PoolStr), +} + +fn flatten_str_lines<'a>( + env: &mut Env<'a>, + scope: &mut Scope, + lines: &[&[roc_parse::ast::StrSegment<'a>]], +) -> (Expr2, Output) { + use roc_parse::ast::StrSegment::*; + + let mut buf = String::new(); + let mut segments = Vec::new(); + let mut output = Output::default(); + + for line in lines { + for segment in line.iter() { + match segment { + Plaintext(string) => { + buf.push_str(string); + } + Unicode(loc_hex_digits) => match u32::from_str_radix(loc_hex_digits.value, 16) { + Ok(code_pt) => match std::char::from_u32(code_pt) { + Some(ch) => { + buf.push(ch); + } + None => { + // env.problem(Problem::InvalidUnicodeCodePt(loc_hex_digits.region)); + // + // return ( + // Expr::RuntimeError(RuntimeError::InvalidUnicodeCodePt( + // loc_hex_digits.region, + // )), + // output, + // ); + todo!() + } + }, + Err(_) => { + // env.problem(Problem::InvalidHexadecimal(loc_hex_digits.region)); + // + // return ( + // Expr::RuntimeError(RuntimeError::InvalidHexadecimal( + // loc_hex_digits.region, + // )), + // output, + // ); + todo!() + } + }, + Interpolated(loc_expr) => { + if roc_can::expr::is_valid_interpolation(loc_expr.value) { + // Interpolations desugar to Str.concat calls + output.references.calls.insert(Symbol::STR_CONCAT); + + if !buf.is_empty() { + segments.push(StrSegment::Plaintext(PoolStr::new(&buf, &mut env.pool))); + + buf = String::new(); + } + + let (loc_expr, new_output) = + to_expr2(env, scope, loc_expr.value, loc_expr.region); + + output.union(new_output); + + segments.push(StrSegment::Interpolation(loc_expr)); + } else { + // env.problem(Problem::InvalidInterpolation(loc_expr.region)); + // + // return ( + // Expr::RuntimeError(RuntimeError::InvalidInterpolation(loc_expr.region)), + // output, + // ); + todo!() + } + } + EscapedChar(escaped) => buf.push(roc_can::expr::unescape_char(escaped)), + } + } + } + + if !buf.is_empty() { + segments.push(StrSegment::Plaintext(PoolStr::new(&buf, &mut env.pool))); + } + + (desugar_str_segments(env, segments), output) +} + +/// Resolve string interpolations by desugaring a sequence of StrSegments +/// into nested calls to Str.concat +fn desugar_str_segments(env: &mut Env, segments: Vec) -> Expr2 { + use StrSegment::*; + + let pool = &mut env.pool; + let var_store = &mut env.var_store; + + let mut iter = segments.into_iter().rev(); + let mut expr = match iter.next() { + Some(Plaintext(pool_str)) => Expr2::Str(pool_str), + Some(Interpolation(expr_id)) => expr_id, + None => { + // No segments? Empty string! + + let pool_str = PoolStr::new("", pool); + Expr2::Str(pool_str) + } + }; + + for seg in iter { + let new_expr = match seg { + Plaintext(string) => Expr2::Str(string), + Interpolation(expr_id) => expr_id, + }; + + let concat_expr_id = pool.add(Expr2::Var(Symbol::STR_CONCAT)); + + let args = vec![ + (var_store.fresh(), pool.add(new_expr)), + (var_store.fresh(), pool.add(expr)), + ]; + let args = PoolVec::new(args.into_iter(), pool); + + let new_call = Expr2::Call { + args, + expr: concat_expr_id, + expr_var: var_store.fresh(), + fn_var: var_store.fresh(), + closure_var: var_store.fresh(), + called_via: CalledVia::Space, + }; + + expr = new_call + } + + expr +} + +pub fn update_str_expr( + node_id: ExprId, + new_char: char, + insert_index: usize, + pool: &mut Pool, +) -> ASTResult<()> { + let str_expr = pool.get_mut(node_id); + + enum Either { + MyString(String), + MyPoolStr(PoolStr), + Done, + } + + let insert_either = match str_expr { + Expr2::SmallStr(arr_string) => { + let insert_res = arr_string.try_insert(insert_index as u8, new_char); + + match insert_res { + Ok(_) => Either::Done, + _ => { + let mut new_string = arr_string.as_str().to_string(); + new_string.insert(insert_index, new_char); + + Either::MyString(new_string) + } + } + } + Expr2::Str(old_pool_str) => Either::MyPoolStr(*old_pool_str), + other => UnexpectedASTNode { + required_node_type: "SmallStr or Str", + encountered_node_type: format!("{:?}", other), + } + .fail()?, + }; + + match insert_either { + Either::MyString(new_string) => { + let new_pool_str = PoolStr::new(&new_string, pool); + + pool.set(node_id, Expr2::Str(new_pool_str)) + } + Either::MyPoolStr(old_pool_str) => { + let mut new_string = old_pool_str.as_str(pool).to_owned(); + + new_string.insert(insert_index, new_char); + + let new_pool_str = PoolStr::new(&new_string, pool); + + pool.set(node_id, Expr2::Str(new_pool_str)) + } + Either::Done => (), + } + + Ok(()) +} diff --git a/editor/src/lang/types.rs b/ast/src/lang/core/types.rs similarity index 99% rename from editor/src/lang/types.rs rename to ast/src/lang/core/types.rs index c935901acd..107a6e7946 100644 --- a/editor/src/lang/types.rs +++ b/ast/src/lang/core/types.rs @@ -1,9 +1,6 @@ #![allow(clippy::all)] #![allow(dead_code)] #![allow(unused_imports)] -use crate::lang::expr::Env; -use crate::lang::pool::{NodeId, Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::scope::Scope; // use roc_can::expr::Output; use roc_collections::all::{MutMap, MutSet}; use roc_module::ident::{Ident, TagName}; @@ -12,6 +9,13 @@ use roc_region::all::{Located, Region}; use roc_types::types::{Problem, RecordField}; use roc_types::{subs::Variable, types::ErrorType}; +use crate::lang::env::Env; +use crate::lang::scope::Scope; +use crate::mem_pool::pool::{NodeId, Pool}; +use crate::mem_pool::pool_str::PoolStr; +use crate::mem_pool::pool_vec::PoolVec; +use crate::mem_pool::shallow_clone::ShallowClone; + pub type TypeId = NodeId; #[derive(Debug)] diff --git a/ast/src/lang/core/val_def.rs b/ast/src/lang/core/val_def.rs new file mode 100644 index 0000000000..b30bfb3ace --- /dev/null +++ b/ast/src/lang/core/val_def.rs @@ -0,0 +1,101 @@ +use crate::{ + lang::{core::expr::expr2_to_string::expr2_to_string, rigids::Rigids}, + mem_pool::{ + pool::{NodeId, Pool}, + shallow_clone::ShallowClone, + }, +}; +use roc_types::subs::Variable; + +use super::{ + expr::expr2::ExprId, + pattern::{Pattern2, PatternId}, + types::TypeId, +}; + +#[derive(Debug)] +pub enum ValueDef { + WithAnnotation { + pattern_id: PatternId, // 4B + expr_id: ExprId, // 4B + type_id: TypeId, + rigids: Rigids, + expr_var: Variable, // 4B + }, + NoAnnotation { + pattern_id: PatternId, // 4B + expr_id: ExprId, // 4B + expr_var: Variable, // 4B + }, +} + +impl ShallowClone for ValueDef { + fn shallow_clone(&self) -> Self { + match self { + Self::WithAnnotation { + pattern_id, + expr_id, + type_id, + rigids, + expr_var, + } => Self::WithAnnotation { + pattern_id: *pattern_id, + expr_id: *expr_id, + type_id: *type_id, + rigids: rigids.shallow_clone(), + expr_var: *expr_var, + }, + Self::NoAnnotation { + pattern_id, + expr_id, + expr_var, + } => Self::NoAnnotation { + pattern_id: *pattern_id, + expr_id: *expr_id, + expr_var: *expr_var, + }, + } + } +} + +impl ValueDef { + pub fn get_expr_id(&self) -> ExprId { + match self { + ValueDef::WithAnnotation { expr_id, .. } => *expr_id, + ValueDef::NoAnnotation { expr_id, .. } => *expr_id, + } + } + + pub fn get_pattern_id(&self) -> NodeId { + match self { + ValueDef::WithAnnotation { pattern_id, .. } => *pattern_id, + ValueDef::NoAnnotation { pattern_id, .. } => *pattern_id, + } + } +} + +pub fn value_def_to_string(val_def: &ValueDef, pool: &Pool) -> String { + match val_def { + ValueDef::WithAnnotation { + pattern_id, + expr_id, + type_id, + rigids, + expr_var, + } => { + format!("WithAnnotation {{ pattern_id: {:?}, expr_id: {:?}, type_id: {:?}, rigids: {:?}, expr_var: {:?}}}", pool.get(*pattern_id), expr2_to_string(*expr_id, pool), pool.get(*type_id), rigids, expr_var) + } + ValueDef::NoAnnotation { + pattern_id, + expr_id, + expr_var, + } => { + format!( + "NoAnnotation {{ pattern_id: {:?}, expr_id: {:?}, expr_var: {:?}}}", + pool.get(*pattern_id), + expr2_to_string(*expr_id, pool), + expr_var + ) + } + } +} diff --git a/ast/src/lang/env.rs b/ast/src/lang/env.rs new file mode 100644 index 0000000000..87a048151e --- /dev/null +++ b/ast/src/lang/env.rs @@ -0,0 +1,168 @@ +use bumpalo::{collections::Vec as BumpVec, Bump}; +use roc_collections::all::{MutMap, MutSet}; +use roc_module::ident::{Ident, ModuleName}; +use roc_module::symbol::{IdentIds, ModuleId, ModuleIds, Symbol}; +use roc_problem::can::{Problem, RuntimeError}; +use roc_region::all::{Located, Region}; +use roc_types::subs::VarStore; + +use crate::mem_pool::pool::{NodeId, Pool}; + +use super::core::def::def::References; + +#[derive(Debug)] +pub struct Env<'a> { + pub home: ModuleId, + pub var_store: &'a mut VarStore, + pub pool: &'a mut Pool, + pub arena: &'a Bump, + + pub problems: BumpVec<'a, Problem>, + + pub dep_idents: MutMap, + pub module_ids: &'a ModuleIds, + pub ident_ids: IdentIds, + pub exposed_ident_ids: IdentIds, + + pub closures: MutMap, + /// Symbols which were referenced by qualified lookups. + pub qualified_lookups: MutSet, + + pub top_level_symbols: MutSet, + + pub closure_name_symbol: Option, + pub tailcallable_symbol: Option, +} + +impl<'a> Env<'a> { + pub fn new( + home: ModuleId, + arena: &'a Bump, + pool: &'a mut Pool, + var_store: &'a mut VarStore, + dep_idents: MutMap, + module_ids: &'a ModuleIds, + exposed_ident_ids: IdentIds, + ) -> Env<'a> { + Env { + home, + arena, + pool, + problems: BumpVec::new_in(arena), + var_store, + dep_idents, + module_ids, + ident_ids: exposed_ident_ids.clone(), // we start with these, but will add more later + exposed_ident_ids, + closures: MutMap::default(), + qualified_lookups: MutSet::default(), + tailcallable_symbol: None, + closure_name_symbol: None, + top_level_symbols: MutSet::default(), + } + } + + pub fn add(&mut self, item: T, region: Region) -> NodeId { + let id = self.pool.add(item); + self.set_region(id, region); + + id + } + + pub fn problem(&mut self, problem: Problem) { + self.problems.push(problem); + } + + pub fn set_region(&mut self, _node_id: NodeId, _region: Region) { + dbg!("Don't Forget to set the region eventually"); + } + + pub fn register_closure(&mut self, symbol: Symbol, references: References) { + self.closures.insert(symbol, references); + } + + /// Generates a unique, new symbol like "$1" or "$5", + /// using the home module as the module_id. + /// + /// This is used, for example, during canonicalization of an Expr::Closure + /// to generate a unique symbol to refer to that closure. + pub fn gen_unique_symbol(&mut self) -> Symbol { + let ident_id = self.ident_ids.gen_unique(); + + Symbol::new(self.home, ident_id) + } + + /// Returns Err if the symbol resolved, but it was not exposed by the given module + pub fn qualified_lookup( + &mut self, + module_name: &str, + ident: &str, + region: Region, + ) -> Result { + debug_assert!( + !module_name.is_empty(), + "Called env.qualified_lookup with an unqualified ident: {:?}", + ident + ); + + let module_name: ModuleName = module_name.into(); + + match self.module_ids.get_id(&module_name) { + Some(&module_id) => { + let ident: Ident = ident.into(); + + // You can do qualified lookups on your own module, e.g. + // if I'm in the Foo module, I can do a `Foo.bar` lookup. + if module_id == self.home { + match self.ident_ids.get_id(&ident) { + Some(ident_id) => { + let symbol = Symbol::new(module_id, *ident_id); + + self.qualified_lookups.insert(symbol); + + Ok(symbol) + } + None => Err(RuntimeError::LookupNotInScope( + Located { + value: ident, + region, + }, + self.ident_ids + .idents() + .map(|(_, string)| string.as_ref().into()) + .collect(), + )), + } + } else { + match self + .dep_idents + .get(&module_id) + .and_then(|exposed_ids| exposed_ids.get_id(&ident)) + { + Some(ident_id) => { + let symbol = Symbol::new(module_id, *ident_id); + + self.qualified_lookups.insert(symbol); + + Ok(symbol) + } + None => Err(RuntimeError::ValueNotExposed { + module_name, + ident, + region, + }), + } + } + } + None => Err(RuntimeError::ModuleNotImported { + module_name, + imported_modules: self + .module_ids + .available_modules() + .map(|string| string.as_ref().into()) + .collect(), + region, + }), + } + } +} diff --git a/ast/src/lang/mod.rs b/ast/src/lang/mod.rs new file mode 100644 index 0000000000..fa21ea740d --- /dev/null +++ b/ast/src/lang/mod.rs @@ -0,0 +1,4 @@ +pub mod core; +pub mod env; +mod rigids; +pub mod scope; diff --git a/ast/src/lang/rigids.rs b/ast/src/lang/rigids.rs new file mode 100644 index 0000000000..a10dce0016 --- /dev/null +++ b/ast/src/lang/rigids.rs @@ -0,0 +1,81 @@ +use std::{ + collections::{HashMap, HashSet}, + hash::BuildHasherDefault, +}; + +use crate::mem_pool::{ + pool::Pool, pool_str::PoolStr, pool_vec::PoolVec, shallow_clone::ShallowClone, +}; +use roc_collections::all::WyHash; +use roc_types::subs::Variable; + +#[derive(Debug)] +pub struct Rigids { + pub names: PoolVec<(Option, Variable)>, // 8B + padding: [u8; 1], +} + +#[allow(clippy::needless_collect)] +impl Rigids { + pub fn new( + named: HashMap<&str, Variable, BuildHasherDefault>, + unnamed: HashSet>, + pool: &mut Pool, + ) -> Self { + let names = PoolVec::with_capacity((named.len() + unnamed.len()) as u32, pool); + + let mut temp_names = Vec::new(); + + temp_names.extend(named.iter().map(|(name, var)| (Some(*name), *var))); + + temp_names.extend(unnamed.iter().map(|var| (None, *var))); + + for (node_id, (opt_name, variable)) in names.iter_node_ids().zip(temp_names) { + let poolstr = opt_name.map(|name| PoolStr::new(name, pool)); + + pool[node_id] = (poolstr, variable); + } + + Self { + names, + padding: Default::default(), + } + } + + pub fn named(&self, pool: &mut Pool) -> PoolVec<(PoolStr, Variable)> { + let named = self + .names + .iter(pool) + .filter_map(|(opt_pool_str, var)| { + opt_pool_str.as_ref().map(|pool_str| (*pool_str, *var)) + }) + .collect::>(); + + PoolVec::new(named.into_iter(), pool) + } + + pub fn unnamed(&self, pool: &mut Pool) -> PoolVec { + let unnamed = self + .names + .iter(pool) + .filter_map(|(opt_pool_str, var)| { + if opt_pool_str.is_none() { + Some(*var) + } else { + None + } + }) + .collect::>(); + + PoolVec::new(unnamed.into_iter(), pool) + } +} + +impl ShallowClone for Rigids { + fn shallow_clone(&self) -> Self { + Self { + names: self.names.shallow_clone(), + padding: self.padding, + } + } +} diff --git a/editor/src/lang/scope.rs b/ast/src/lang/scope.rs similarity index 97% rename from editor/src/lang/scope.rs rename to ast/src/lang/scope.rs index e17f3e09a9..9f3a11c60c 100644 --- a/editor/src/lang/scope.rs +++ b/ast/src/lang/scope.rs @@ -1,8 +1,11 @@ #![allow(clippy::all)] #![allow(dead_code)] #![allow(unused_imports)] -use crate::lang::pool::{Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::types::{Alias, Type2, TypeId}; + +use crate::mem_pool::pool::Pool; +use crate::mem_pool::pool_str::PoolStr; +use crate::mem_pool::pool_vec::PoolVec; +use crate::mem_pool::shallow_clone::ShallowClone; use roc_collections::all::{MutMap, MutSet}; use roc_module::ident::{Ident, Lowercase}; use roc_module::symbol::{IdentIds, ModuleId, Symbol}; @@ -14,6 +17,8 @@ use roc_types::{ subs::{VarId, VarStore, Variable}, }; +use super::core::types::{Alias, Type2, TypeId}; + fn solved_type_to_type_id( pool: &mut Pool, solved_type: &SolvedType, diff --git a/ast/src/lib.rs b/ast/src/lib.rs new file mode 100644 index 0000000000..b3d987f99f --- /dev/null +++ b/ast/src/lib.rs @@ -0,0 +1,7 @@ +pub mod ast_error; +mod canonicalization; +pub mod constrain; +pub mod lang; +pub mod mem_pool; +pub mod parse; +pub mod solve_type; diff --git a/ast/src/mem_pool/mod.rs b/ast/src/mem_pool/mod.rs new file mode 100644 index 0000000000..b6c8c83b8f --- /dev/null +++ b/ast/src/mem_pool/mod.rs @@ -0,0 +1,4 @@ +pub mod pool; +pub mod pool_str; +pub mod pool_vec; +pub mod shallow_clone; diff --git a/ast/src/mem_pool/pool.rs b/ast/src/mem_pool/pool.rs new file mode 100644 index 0000000000..ab4ae5548d --- /dev/null +++ b/ast/src/mem_pool/pool.rs @@ -0,0 +1,228 @@ +/// A memory pool of 32-byte nodes. The node value 0 is reserved for the pool's +/// use, and valid nodes may never have that value. +/// +/// Internally, the pool is divided into pages of 4096 bytes. It stores nodes +/// into one page at a time, and when it runs out, it uses mmap to reserve an +/// anonymous memory page in which to store nodes. +/// +/// Since nodes are 32 bytes, one page can store 128 nodes; you can access a +/// particular node by its NodeId, which is an opaque wrapper around a pointer. +/// +/// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied. +/// This is important for performance. +use libc::{c_void, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE}; +use std::any::type_name; +use std::marker::PhantomData; +use std::mem::size_of; +use std::ptr::null; + +pub const NODE_BYTES: usize = 32; + +// Each page has 128 slots. Each slot holds one 32B node +// This means each page is 4096B, which is the size of a memory page +// on typical systems where the compiler will be run. +// +// Nice things about this system include: +// * Allocating a new page is as simple as asking the OS for a memory page. +// * Since each node is 32B, each node's memory address will be a multiple of 16. +// * Thanks to the free lists and our consistent chunk sizes, we should +// end up with very little fragmentation. +// * Finding a slot for a given node should be very fast: see if the relevant +// free list has any openings; if not, try the next size up. +// +// Less nice things include: +// * This system makes it very hard to ever give a page back to the OS. +// We could try doing the Mesh Allocator strategy: whenever we allocate +// something, assign it to a random slot in the page, and then periodically +// try to merge two pages into one (by locking and remapping them in the OS) +// and then returning the redundant physical page back to the OS. This should +// work in theory, but is pretty complicated, and we'd need to schedule it. +// Keep in mind that we can't use the Mesh Allocator itself because it returns +// usize pointers, which would be too big for us to have 16B nodes. +// On the plus side, we could be okay with higher memory usage early on, +// and then later use the Mesh strategy to reduce long-running memory usage. +// +// With this system, we can allocate up to 4B nodes. If we wanted to keep +// a generational index in there, like https://crates.io/crates/sharded-slab +// does, we could use some of the 32 bits for that. For example, if we wanted +// to have a 5-bit generational index (supporting up to 32 generations), then +// we would have 27 bits remaining, meaning we could only support at most +// 134M nodes. Since the editor has a separate Pool for each module, is that +// enough for any single module we'll encounter in practice? Probably, and +// especially if we allocate super large collection literals on the heap instead +// of in the pool. +// +// Another possible design is to try to catch reuse bugs using an "ASan" like +// approach: in development builds, whenever we "free" a particular slot, we +// can add it to a dev-build-only "freed nodes" list and don't hand it back +// out (so, we leak the memory.) Then we can (again, in development builds only) +// check to see if we're about to store something in zeroed-out memory; if so, check +// to see if it was + +#[derive(Debug, Eq)] +pub struct NodeId { + pub(super) index: u32, + pub(super) _phantom: PhantomData, +} + +impl Clone for NodeId { + fn clone(&self) -> Self { + NodeId { + index: self.index, + _phantom: PhantomData::default(), + } + } +} + +impl PartialEq for NodeId { + fn eq(&self, other: &Self) -> bool { + self.index == other.index + } +} + +impl Copy for NodeId {} + +#[derive(Debug)] +pub struct Pool { + pub(super) nodes: *mut [u8; NODE_BYTES], + num_nodes: u32, + capacity: u32, + // free_1node_slots: Vec>, +} + +impl Pool { + pub fn with_capacity(nodes: u32) -> Self { + // round up number of nodes requested to nearest page size in bytes + let bytes_per_page = page_size::get(); + let node_bytes = NODE_BYTES * nodes as usize; + let leftover = node_bytes % bytes_per_page; + let bytes_to_mmap = if leftover == 0 { + node_bytes + } else { + node_bytes + bytes_per_page - leftover + }; + + let nodes = unsafe { + // mmap anonymous memory pages - that is, contiguous virtual memory + // addresses from the OS which will be lazily translated into + // physical memory one 4096-byte page at a time, once we actually + // try to read or write in that page's address range. + libc::mmap( + null::() as *mut c_void, + bytes_to_mmap, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + 0, + 0, + ) + } as *mut [u8; NODE_BYTES]; + + // This is our actual capacity, in nodes. + // It might be higher than the requested capacity due to rounding up + // to nearest page size. + let capacity = (bytes_to_mmap / NODE_BYTES) as u32; + + Pool { + nodes, + num_nodes: 0, + capacity, + } + } + + pub fn add(&mut self, node: T) -> NodeId { + // It's only safe to store this if T fits in S. + debug_assert!( + size_of::() <= NODE_BYTES, + "{} has a size of {}, but it needs to be at most {}", + type_name::(), + size_of::(), + NODE_BYTES + ); + + let node_id = self.reserve(1); + let node_ptr = unsafe { self.nodes.offset(node_id.index as isize) } as *mut T; + + unsafe { *node_ptr = node }; + + node_id + } + + /// Reserves the given number of contiguous node slots, and returns + /// the NodeId of the first one. We only allow reserving 2^32 in a row. + pub(super) fn reserve(&mut self, nodes: u32) -> NodeId { + // TODO once we have a free list, look in there for an open slot first! + let index = self.num_nodes; + + if index < self.capacity { + self.num_nodes = index + nodes; + + NodeId { + index, + _phantom: PhantomData::default(), + } + } else { + todo!("pool ran out of capacity. TODO reallocate the nodes pointer to map to a bigger space. Can use mremap on Linux, but must memcpy lots of bytes on macOS and Windows."); + } + } + + pub fn get<'a, 'b, T>(&'a self, node_id: NodeId) -> &'b T { + unsafe { + let node_ptr = self.nodes.offset(node_id.index as isize) as *const T; + + &*node_ptr + } + } + + pub fn get_mut(&mut self, node_id: NodeId) -> &mut T { + unsafe { + let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T; + + &mut *node_ptr + } + } + + pub fn set(&mut self, node_id: NodeId, element: T) { + unsafe { + let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T; + + *node_ptr = element; + } + } + + // A node is available iff its bytes are all zeroes + #[allow(dead_code)] + fn is_available(&self, node_id: NodeId) -> bool { + debug_assert_eq!(size_of::(), NODE_BYTES); + + unsafe { + let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES]; + + *node_ptr == [0; NODE_BYTES] + } + } +} + +impl std::ops::Index> for Pool { + type Output = T; + + fn index(&self, node_id: NodeId) -> &Self::Output { + self.get(node_id) + } +} + +impl std::ops::IndexMut> for Pool { + fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output { + self.get_mut(node_id) + } +} + +impl Drop for Pool { + fn drop(&mut self) { + unsafe { + libc::munmap( + self.nodes as *mut c_void, + NODE_BYTES * self.capacity as usize, + ); + } + } +} diff --git a/ast/src/mem_pool/pool_str.rs b/ast/src/mem_pool/pool_str.rs new file mode 100644 index 0000000000..435d4586bb --- /dev/null +++ b/ast/src/mem_pool/pool_str.rs @@ -0,0 +1,86 @@ +use super::pool::{NodeId, Pool, NODE_BYTES}; +use super::shallow_clone::ShallowClone; +use libc::c_void; +use std::marker::PhantomData; +use std::mem::size_of; + +/// A string containing at most 2^32 pool-allocated bytes. +#[derive(Debug, Copy, Clone)] +pub struct PoolStr { + first_node_id: NodeId<()>, + len: u32, +} + +#[test] +fn pool_str_size() { + assert_eq!(size_of::(), 8); +} + +impl PoolStr { + pub fn new(string: &str, pool: &mut Pool) -> Self { + debug_assert!(string.len() <= u32::MAX as usize); + + let chars_per_node = NODE_BYTES / size_of::(); + + let number_of_nodes = f64::ceil(string.len() as f64 / chars_per_node as f64) as u32; + + if number_of_nodes > 0 { + let first_node_id = pool.reserve(number_of_nodes); + let index = first_node_id.index as isize; + let next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut c_void; + + unsafe { + libc::memcpy( + next_node_ptr, + string.as_ptr() as *const c_void, + string.len(), + ); + } + + PoolStr { + first_node_id, + len: string.len() as u32, + } + } else { + PoolStr { + first_node_id: NodeId { + index: 0, + _phantom: PhantomData::default(), + }, + len: 0, + } + } + } + + pub fn as_str(&self, pool: &Pool) -> &str { + unsafe { + let node_ptr = pool.nodes.offset(self.first_node_id.index as isize) as *const u8; + + let node_slice: &[u8] = std::slice::from_raw_parts(node_ptr, self.len as usize); + + std::str::from_utf8_unchecked(&node_slice[0..self.len as usize]) + } + } + + #[allow(clippy::len_without_is_empty)] + pub fn len(&self, pool: &Pool) -> usize { + let contents = self.as_str(pool); + + contents.len() + } + + pub fn is_empty(&self, pool: &Pool) -> bool { + self.len(pool) == 0 + } +} + +impl ShallowClone for PoolStr { + fn shallow_clone(&self) -> Self { + // Question: should this fully clone, or is a shallow copy + // (and the aliasing it entails) OK? + Self { + first_node_id: self.first_node_id, + len: self.len, + } + } +} diff --git a/ast/src/mem_pool/pool_vec.rs b/ast/src/mem_pool/pool_vec.rs new file mode 100644 index 0000000000..65c9e89b1b --- /dev/null +++ b/ast/src/mem_pool/pool_vec.rs @@ -0,0 +1,323 @@ +use super::pool::{NodeId, Pool, NODE_BYTES}; +use super::shallow_clone::ShallowClone; +use libc::c_void; +use std::any::type_name; +use std::cmp::Ordering; +use std::marker::PhantomData; +use std::mem::size_of; + +/// An array of at most 2^32 pool-allocated nodes. +#[derive(Debug)] +pub struct PoolVec { + first_node_id: NodeId, + len: u32, +} + +#[test] +fn pool_vec_size() { + assert_eq!(size_of::>(), 8); +} + +impl<'a, T: 'a + Sized> PoolVec { + pub fn empty(pool: &mut Pool) -> Self { + Self::new(std::iter::empty(), pool) + } + + pub fn with_capacity(len: u32, pool: &mut Pool) -> Self { + debug_assert!( + size_of::() <= NODE_BYTES, + "{} has a size of {}", + type_name::(), + size_of::() + ); + + if len == 0 { + Self::empty(pool) + } else { + let first_node_id = pool.reserve(len); + + PoolVec { first_node_id, len } + } + } + + pub fn len(&self) -> usize { + self.len as usize + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn new>(nodes: I, pool: &mut Pool) -> Self { + debug_assert!(nodes.len() <= u32::MAX as usize); + debug_assert!(size_of::() <= NODE_BYTES); + + let len = nodes.len() as u32; + + if len > 0 { + let first_node_id = pool.reserve(len); + let index = first_node_id.index as isize; + let mut next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut T; + + for (indx_inc, node) in nodes.enumerate() { + unsafe { + *next_node_ptr = node; + + next_node_ptr = pool.nodes.offset(index + (indx_inc as isize) + 1) as *mut T; + } + } + + PoolVec { first_node_id, len } + } else { + PoolVec { + first_node_id: NodeId { + index: 0, + _phantom: PhantomData::default(), + }, + len: 0, + } + } + } + + pub fn iter(&self, pool: &'a Pool) -> impl ExactSizeIterator { + self.pool_list_iter(pool) + } + + pub fn iter_mut(&self, pool: &'a mut Pool) -> impl ExactSizeIterator { + self.pool_list_iter_mut(pool) + } + + pub fn iter_node_ids(&self) -> impl ExactSizeIterator> { + self.pool_list_iter_node_ids() + } + + /// Private version of into_iter which exposes the implementation detail + /// of PoolVecIter. We don't want that struct to be public, but we + /// actually do want to have this separate function for code reuse + /// in the iterator's next() method. + #[inline(always)] + fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> { + PoolVecIter { + pool, + current_node_id: self.first_node_id, + len_remaining: self.len, + } + } + + #[inline(always)] + fn pool_list_iter_mut(&self, pool: &'a Pool) -> PoolVecIterMut<'a, T> { + PoolVecIterMut { + pool, + current_node_id: self.first_node_id, + len_remaining: self.len, + } + } + + #[inline(always)] + fn pool_list_iter_node_ids(&self) -> PoolVecIterNodeIds { + PoolVecIterNodeIds { + current_node_id: self.first_node_id, + len_remaining: self.len, + } + } + + pub fn free(self, pool: &'a mut Pool) { + // zero out the memory + unsafe { + let index = self.first_node_id.index as isize; + let node_ptr = pool.nodes.offset(index) as *mut c_void; + let bytes = self.len as usize * NODE_BYTES; + + libc::memset(node_ptr, 0, bytes); + } + + // TODO insert it into the pool's free list + } +} + +impl ShallowClone for PoolVec { + fn shallow_clone(&self) -> Self { + // Question: should this fully clone, or is a shallow copy + // (and the aliasing it entails) OK? + Self { + first_node_id: self.first_node_id, + len: self.len, + } + } +} + +struct PoolVecIter<'a, T> { + pool: &'a Pool, + current_node_id: NodeId, + len_remaining: u32, +} + +impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T> +where + T: 'a, +{ + fn len(&self) -> usize { + self.len_remaining as usize + } +} + +impl<'a, T> Iterator for PoolVecIter<'a, T> +where + T: 'a, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + let len_remaining = self.len_remaining; + + match len_remaining.cmp(&1) { + Ordering::Greater => { + // Get the current node + let index = self.current_node_id.index; + let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T; + + // Advance the node pointer to the next node in the current page + self.current_node_id = NodeId { + index: index + 1, + _phantom: PhantomData::default(), + }; + self.len_remaining = len_remaining - 1; + + Some(unsafe { &*node_ptr }) + } + Ordering::Equal => { + self.len_remaining = 0; + + // Don't advance the node pointer's node, because that might + // advance past the end of the page! + + let index = self.current_node_id.index; + let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T; + + Some(unsafe { &*node_ptr }) + } + Ordering::Less => { + // len_remaining was 0 + None + } + } + } +} + +struct PoolVecIterMut<'a, T> { + pool: &'a Pool, + current_node_id: NodeId, + len_remaining: u32, +} + +impl<'a, T> ExactSizeIterator for PoolVecIterMut<'a, T> +where + T: 'a, +{ + fn len(&self) -> usize { + self.len_remaining as usize + } +} + +impl<'a, T> Iterator for PoolVecIterMut<'a, T> +where + T: 'a, +{ + type Item = &'a mut T; + + fn next(&mut self) -> Option { + let len_remaining = self.len_remaining; + + match len_remaining.cmp(&1) { + Ordering::Greater => { + // Get the current node + let index = self.current_node_id.index; + let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T; + + // Advance the node pointer to the next node in the current page + self.current_node_id = NodeId { + index: index + 1, + _phantom: PhantomData::default(), + }; + self.len_remaining = len_remaining - 1; + + Some(unsafe { &mut *node_ptr }) + } + Ordering::Equal => { + self.len_remaining = 0; + + // Don't advance the node pointer's node, because that might + // advance past the end of the page! + + let index = self.current_node_id.index; + let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T; + + Some(unsafe { &mut *node_ptr }) + } + Ordering::Less => { + // len_remaining was 0 + None + } + } + } +} + +struct PoolVecIterNodeIds { + current_node_id: NodeId, + len_remaining: u32, +} + +impl ExactSizeIterator for PoolVecIterNodeIds { + fn len(&self) -> usize { + self.len_remaining as usize + } +} + +impl Iterator for PoolVecIterNodeIds { + type Item = NodeId; + + fn next(&mut self) -> Option { + let len_remaining = self.len_remaining; + + match len_remaining.cmp(&1) { + Ordering::Greater => { + // Get the current node + let current = self.current_node_id; + let index = current.index; + + // Advance the node pointer to the next node in the current page + self.current_node_id = NodeId { + index: index + 1, + _phantom: PhantomData::default(), + }; + self.len_remaining = len_remaining - 1; + + Some(current) + } + Ordering::Equal => { + self.len_remaining = 0; + + // Don't advance the node pointer's node, because that might + // advance past the end of the page! + + Some(self.current_node_id) + } + Ordering::Less => { + // len_remaining was 0 + None + } + } + } +} + +#[test] +fn pool_vec_iter_test() { + let expected_vec: Vec = vec![2, 4, 8, 16]; + + let mut test_pool = Pool::with_capacity(1024); + let pool_vec = PoolVec::new(expected_vec.clone().into_iter(), &mut test_pool); + + let current_vec: Vec = pool_vec.iter(&test_pool).copied().collect(); + + assert_eq!(current_vec, expected_vec); +} diff --git a/ast/src/mem_pool/shallow_clone.rs b/ast/src/mem_pool/shallow_clone.rs new file mode 100644 index 0000000000..f444b1f897 --- /dev/null +++ b/ast/src/mem_pool/shallow_clone.rs @@ -0,0 +1,32 @@ +use roc_can::expected::Expected; +use roc_can::expected::PExpected; + +/// Clones the outer node, but does not clone any nodeids +pub trait ShallowClone { + fn shallow_clone(&self) -> Self; +} + +impl ShallowClone for Expected { + fn shallow_clone(&self) -> Self { + use Expected::*; + + match self { + NoExpectation(t) => NoExpectation(t.shallow_clone()), + ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region), + FromAnnotation(loc_pat, n, source, t) => { + FromAnnotation(loc_pat.clone(), *n, *source, t.shallow_clone()) + } + } + } +} + +impl ShallowClone for PExpected { + fn shallow_clone(&self) -> Self { + use PExpected::*; + + match self { + NoExpectation(t) => NoExpectation(t.shallow_clone()), + ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region), + } + } +} diff --git a/ast/src/parse/mod.rs b/ast/src/parse/mod.rs new file mode 100644 index 0000000000..31bf78fd5e --- /dev/null +++ b/ast/src/parse/mod.rs @@ -0,0 +1,2 @@ +pub mod parse_ast; +pub mod parse_header; diff --git a/ast/src/parse/parse_ast.rs b/ast/src/parse/parse_ast.rs new file mode 100644 index 0000000000..3ae3cca0d7 --- /dev/null +++ b/ast/src/parse/parse_ast.rs @@ -0,0 +1,48 @@ +use bumpalo::Bump; +use roc_parse::parser::SyntaxError; +use roc_region::all::Region; + +use crate::lang::{ + core::{ + ast::AST, + def::{def2::DefId, def_to_def2::str_to_def2}, + expr::expr2::Expr2, + }, + env::Env, + scope::Scope, +}; + +use super::parse_header; + +pub fn parse_from_string<'a>( + code_str: &'a str, + env: &mut Env<'a>, + ast_arena: &'a Bump, +) -> Result> { + let blank_line_indx = code_str + .find("\n\n") + .expect("I was expecting a double newline to split header and rest of code."); + + let header_str = &code_str[0..blank_line_indx]; + let tail_str = &code_str[blank_line_indx..]; + + let mut scope = Scope::new(env.home, env.pool, env.var_store); + let region = Region::new(0, 0, 0, 0); + + let mut def_ids = Vec::::new(); + + let def2_vec = str_to_def2(ast_arena, tail_str, env, &mut scope, region)?; + + for def2 in def2_vec { + let def_id = env.pool.add(def2); + + def_ids.push(def_id); + } + + let ast_node_id = env.pool.add(Expr2::Blank); + + Ok(AST { + header: parse_header::parse_from_string(header_str, ast_node_id), + def_ids, + }) +} diff --git a/ast/src/parse/parse_header.rs b/ast/src/parse/parse_header.rs new file mode 100644 index 0000000000..e387fea026 --- /dev/null +++ b/ast/src/parse/parse_header.rs @@ -0,0 +1,12 @@ +use crate::lang::core::{expr::expr2::ExprId, header::AppHeader}; + +// TODO don't use mock struct and actually parse string +pub fn parse_from_string(_header_str: &str, ast_node_id: ExprId) -> AppHeader { + AppHeader { + app_name: "\"untitled-app\"".to_owned(), + packages_base: "\"platform\"".to_owned(), + imports: vec![], + provides: vec!["main".to_owned()], + ast_node_id, + } +} diff --git a/editor/src/lang/roc_file.rs b/ast/src/roc_file.rs similarity index 100% rename from editor/src/lang/roc_file.rs rename to ast/src/roc_file.rs diff --git a/editor/src/lang/solve.rs b/ast/src/solve_type.rs similarity index 99% rename from editor/src/lang/solve.rs rename to ast/src/solve_type.rs index 68b29c7635..2b39c606b3 100644 --- a/editor/src/lang/solve.rs +++ b/ast/src/solve_type.rs @@ -1,8 +1,5 @@ #![allow(clippy::all)] #![allow(dead_code)] -use crate::lang::constrain::Constraint::{self, *}; -use crate::lang::pool::{Pool, PoolVec, ShallowClone}; -use crate::lang::types::Type2; use bumpalo::Bump; use roc_can::expected::{Expected, PExpected}; use roc_collections::all::{BumpMap, BumpMapDefault, MutMap}; @@ -20,6 +17,12 @@ use roc_types::types::{ use roc_unify::unify::unify; use roc_unify::unify::Unified::*; +use crate::constrain::Constraint; +use crate::lang::core::types::Type2; +use crate::mem_pool::pool::Pool; +use crate::mem_pool::pool_vec::PoolVec; +use crate::mem_pool::shallow_clone::ShallowClone; + // Type checking system adapted from Elm by Evan Czaplicki, BSD-3-Clause Licensed // https://github.com/elm/compiler // Thank you, Evan! @@ -197,6 +200,8 @@ fn solve<'a>( subs: &mut Subs, constraint: &Constraint, ) -> State { + use crate::solve_type::Constraint::*; + match constraint { True => state, // SaveTheEnvironment => { diff --git a/cli/benches/time_bench.rs b/cli/benches/time_bench.rs index 56395acc75..f4c8f6e8be 100644 --- a/cli/benches/time_bench.rs +++ b/cli/benches/time_bench.rs @@ -25,12 +25,12 @@ fn bench_group_wall_time(c: &mut Criterion) { group.sample_size(nr_of_runs); let bench_funcs: Vec>) -> ()> = vec![ - bench_nqueens, // queens 11 - bench_cfold, // e = mkExpr 17 1 - bench_deriv, // nest deriv 8 f - bench_rbtree_ck, // ms = makeMap 5 80000 - bench_rbtree_delete, // m = makeMap 100000 - bench_quicksort, // list size 10000 + bench_nqueens, // queens 11 + bench_cfold, // e = mkExpr 17 1 + bench_deriv, // nest deriv 8 f + bench_rbtree_ck, // ms = makeMap 5 80000 + // bench_rbtree_delete, // m = makeMap 100000 + bench_quicksort, // list size 10000 ]; for bench_func in bench_funcs.iter() { diff --git a/cli/cli_utils/src/bench_utils.rs b/cli/cli_utils/src/bench_utils.rs index 146f500380..b01ec495fd 100644 --- a/cli/cli_utils/src/bench_utils.rs +++ b/cli/cli_utils/src/bench_utils.rs @@ -131,6 +131,7 @@ pub fn bench_rbtree_ck(bench_group_opt: Option<&mut BenchmarkGro ); } +#[allow(dead_code)] pub fn bench_rbtree_delete(bench_group_opt: Option<&mut BenchmarkGroup>) { exec_bench_w_input( &example_file("benchmarks", "RBTreeDel.roc"), diff --git a/cli/src/build.rs b/cli/src/build.rs index d0702ce3dd..e62ddb7043 100644 --- a/cli/src/build.rs +++ b/cli/src/build.rs @@ -3,6 +3,8 @@ use roc_build::{ link::{link, rebuild_host, LinkType}, program, }; +#[cfg(feature = "llvm")] +use roc_builtins::bitcode; use roc_can::builtins::builtin_defs_map; use roc_collections::all::MutMap; use roc_load::file::LoadingProblem; @@ -10,6 +12,7 @@ use roc_mono::ir::OptLevel; use std::path::PathBuf; use std::time::{Duration, SystemTime}; use target_lexicon::Triple; +#[cfg(feature = "llvm")] use tempfile::Builder; fn report_timing(buf: &mut String, label: &str, duration: Duration) { @@ -240,11 +243,19 @@ pub fn build_file<'a>( })?; BuildOutcome::NoProblems } else { + let mut inputs = vec![ + host_input_path.as_path().to_str().unwrap(), + app_o_file.to_str().unwrap(), + ]; + if matches!(opt_level, OptLevel::Development) { + inputs.push(bitcode::OBJ_PATH); + } + let (mut child, _) = // TODO use lld link( target, binary_path.clone(), - &[host_input_path.as_path().to_str().unwrap(), app_o_file.to_str().unwrap()], + &inputs, link_type ) .map_err(|_| { diff --git a/cli/src/main.rs b/cli/src/main.rs index 1a7016fc93..a2e91147b1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -11,12 +11,13 @@ use std::path::{Path, PathBuf}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; -#[cfg(feature = "llvm")] -use roc_cli::build; use std::ffi::{OsStr, OsString}; +#[cfg(feature = "llvm")] +use roc_cli::build; + #[cfg(not(feature = "llvm"))] -fn build(_target: &Triple, _matches: &clap::ArgMatches, _config: BuildConfig) -> io::Result { +fn build(_matches: &clap::ArgMatches, _config: BuildConfig) -> io::Result { panic!("Building without LLVM is not currently supported."); } diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index aa7020700f..725b62dba2 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -18,6 +18,13 @@ mod cli_run { #[cfg(not(debug_assertions))] use roc_collections::all::MutMap; + #[cfg(target_os = "linux")] + const TEST_SURGICAL_LINKER: bool = true; + + // Surgical linker currently only supports linux. + #[cfg(not(target_os = "linux"))] + const TEST_SURGICAL_LINKER: bool = false; + #[cfg(not(target_os = "macos"))] const ALLOW_VALGRIND: bool = true; @@ -136,7 +143,6 @@ mod cli_run { ); } } - /// This macro does two things. /// /// First, it generates and runs a separate test for each of the given @@ -184,6 +190,19 @@ mod cli_run { example.expected_ending, example.use_valgrind, ); + + // Also check with the surgical linker. + + if TEST_SURGICAL_LINKER { + check_output_with_stdin( + &file_name, + example.stdin, + example.executable_filename, + &["--roc-linker"], + example.expected_ending, + example.use_valgrind, + ); + } } )* @@ -228,7 +247,7 @@ mod cli_run { }, hello_rust:"hello-rust" => Example { filename: "Hello.roc", - executable_filename: "hello-world", + executable_filename: "hello-rust", stdin: &[], expected_ending:"Hello, World!\n", use_valgrind: true, @@ -435,77 +454,77 @@ mod cli_run { } benchmarks! { - nqueens => Example { - filename: "NQueens.roc", - executable_filename: "nqueens", - stdin: &["6"], - expected_ending: "4\n", - use_valgrind: true, - }, - cfold => Example { - filename: "CFold.roc", - executable_filename: "cfold", - stdin: &["3"], - expected_ending: "11 & 11\n", - use_valgrind: true, - }, - deriv => Example { - filename: "Deriv.roc", - executable_filename: "deriv", - stdin: &["2"], - expected_ending: "1 count: 6\n2 count: 22\n", - use_valgrind: true, - }, - rbtree_ck => Example { - filename: "RBTreeCk.roc", - executable_filename: "rbtree-ck", - stdin: &["100"], - expected_ending: "10\n", - use_valgrind: true, - }, - rbtree_insert => Example { - filename: "RBTreeInsert.roc", - executable_filename: "rbtree-insert", - stdin: &[], - expected_ending: "Node Black 0 {} Empty Empty\n", - use_valgrind: true, - }, - rbtree_del => Example { - filename: "RBTreeDel.roc", - executable_filename: "rbtree-del", - stdin: &["420"], - expected_ending: "30\n", - use_valgrind: true, - }, - astar => Example { - filename: "TestAStar.roc", - executable_filename: "test-astar", - stdin: &[], - expected_ending: "True\n", - use_valgrind: false, - }, - base64 => Example { - filename: "TestBase64.roc", - executable_filename: "test-base64", - stdin: &[], - expected_ending: "encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n", - use_valgrind: true, - }, - closure => Example { - filename: "Closure.roc", - executable_filename: "closure", - stdin: &[], - expected_ending: "", - use_valgrind: true, - }, - quicksort_app => Example { - filename: "QuicksortApp.roc", - executable_filename: "quicksortapp", - stdin: &[], - expected_ending: "todo put the correct quicksort answer here", - use_valgrind: true, - }, - } + nqueens => Example { + filename: "NQueens.roc", + executable_filename: "nqueens", + stdin: &["6"], + expected_ending: "4\n", + use_valgrind: true, + }, + cfold => Example { + filename: "CFold.roc", + executable_filename: "cfold", + stdin: &["3"], + expected_ending: "11 & 11\n", + use_valgrind: true, + }, + deriv => Example { + filename: "Deriv.roc", + executable_filename: "deriv", + stdin: &["2"], + expected_ending: "1 count: 6\n2 count: 22\n", + use_valgrind: true, + }, + rbtree_ck => Example { + filename: "RBTreeCk.roc", + executable_filename: "rbtree-ck", + stdin: &["100"], + expected_ending: "10\n", + use_valgrind: true, + }, + rbtree_insert => Example { + filename: "RBTreeInsert.roc", + executable_filename: "rbtree-insert", + stdin: &[], + expected_ending: "Node Black 0 {} Empty Empty\n", + use_valgrind: true, + }, + // rbtree_del => Example { + // filename: "RBTreeDel.roc", + // executable_filename: "rbtree-del", + // stdin: &["420"], + // expected_ending: "30\n", + // use_valgrind: true, + // }, + astar => Example { + filename: "TestAStar.roc", + executable_filename: "test-astar", + stdin: &[], + expected_ending: "True\n", + use_valgrind: false, + }, + base64 => Example { + filename: "TestBase64.roc", + executable_filename: "test-base64", + stdin: &[], + expected_ending: "encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n", + use_valgrind: true, + }, + closure => Example { + filename: "Closure.roc", + executable_filename: "closure", + stdin: &[], + expected_ending: "", + use_valgrind: true, + }, + quicksort_app => Example { + filename: "QuicksortApp.roc", + executable_filename: "quicksortapp", + stdin: &[], + expected_ending: "todo put the correct quicksort answer here", + use_valgrind: true, + }, + } #[cfg(not(debug_assertions))] fn check_for_tests(examples_dir: &str, all_examples: &mut MutMap<&str, Example<'_>>) { @@ -562,10 +581,10 @@ mod cli_run { file.read_exact(buf).unwrap(); // Only app modules in this directory are considered benchmarks. - if "app".as_bytes() == buf { + if "app".as_bytes() == buf && !benchmark_file_name.contains("RBTreeDel") { all_benchmarks.remove(benchmark_file_name.as_str()).unwrap_or_else(|| { - panic!("The benchmark {}/{} does not have any corresponding tests in cli_run. Please add one, so if it ever stops working, we'll know about it right away!", benchmarks_dir, benchmark_file_name); - }); + panic!("The benchmark {}/{} does not have any corresponding tests in cli_run. Please add one, so if it ever stops working, we'll know about it right away!", benchmarks_dir, benchmark_file_name); + }); } } } diff --git a/cli/tests/repl_eval.rs b/cli/tests/repl_eval.rs index 87d53837a8..bfb04ae0e0 100644 --- a/cli/tests/repl_eval.rs +++ b/cli/tests/repl_eval.rs @@ -502,6 +502,11 @@ mod repl_eval { expect_success("\\x -> x", " : a -> a"); } + #[test] + fn sum_lambda() { + expect_success("\\x, y -> x + y", " : Num a, Num a -> Num a"); + } + #[test] fn stdlib_function() { expect_success("Num.abs", " : Num a -> Num a"); diff --git a/code_markup/Cargo.toml b/code_markup/Cargo.toml new file mode 100644 index 0000000000..2583ee0006 --- /dev/null +++ b/code_markup/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "roc_code_markup" +version = "0.1.0" +authors = ["The Roc Contributors"] +license = "UPL-1.0" +edition = "2018" +description = "Our own markup language for Roc code. Used by the editor and (soon) the docs." + +[dependencies] +roc_ast = { path = "../ast" } +roc_module = { path = "../compiler/module" } +roc_utils = { path = "../utils" } +serde = { version = "1.0.123", features = ["derive"] } +palette = "0.5" +snafu = { version = "0.6", features = ["backtraces"] } +bumpalo = { version = "3.2", features = ["collections"] } + +[dev-dependencies] \ No newline at end of file diff --git a/code_markup/src/colors.rs b/code_markup/src/colors.rs new file mode 100644 index 0000000000..93c6c452b6 --- /dev/null +++ b/code_markup/src/colors.rs @@ -0,0 +1,22 @@ +use palette::{Hsv, LinSrgb}; + +pub type RgbaTup = (f32, f32, f32, f32); +pub const WHITE: RgbaTup = (1.0, 1.0, 1.0, 1.0); + +pub fn to_slice((r, g, b, a): RgbaTup) -> [f32; 4] { + [r, g, b, a] +} + +pub fn from_hsb(hue: usize, saturation: usize, brightness: usize) -> RgbaTup { + from_hsba(hue, saturation, brightness, 1.0) +} + +pub fn from_hsba(hue: usize, saturation: usize, brightness: usize, alpha: f32) -> RgbaTup { + let rgb = LinSrgb::from(Hsv::new( + hue as f32, + (saturation as f32) / 100.0, + (brightness as f32) / 100.0, + )); + + (rgb.red, rgb.green, rgb.blue, alpha) +} diff --git a/code_markup/src/lib.rs b/code_markup/src/lib.rs new file mode 100644 index 0000000000..9e5d220855 --- /dev/null +++ b/code_markup/src/lib.rs @@ -0,0 +1,5 @@ +pub mod colors; +pub mod markup; +pub mod markup_error; +pub mod slow_pool; +pub mod syntax_highlight; diff --git a/editor/src/editor/markup/attribute.rs b/code_markup/src/markup/attribute.rs similarity index 95% rename from editor/src/editor/markup/attribute.rs rename to code_markup/src/markup/attribute.rs index 304c4aadce..7df80cbc70 100644 --- a/editor/src/editor/markup/attribute.rs +++ b/code_markup/src/markup/attribute.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] - -use crate::editor::ed_error::{CaretNotFound, EdResult}; use snafu::ensure; +use crate::markup_error::{CaretNotFound, MarkResult}; + #[derive(Debug, Copy, Clone)] pub struct Caret { pub offset_col: usize, @@ -65,10 +65,6 @@ pub struct Attributes { } impl Attributes { - pub fn new() -> Attributes { - Attributes { all: Vec::new() } - } - pub fn add(&mut self, attr: Attribute) { self.all.push(attr); } @@ -103,7 +99,7 @@ impl Attributes { carets } - pub fn delete_caret(&mut self, offset_col: usize, node_id: usize) -> EdResult<()> { + pub fn delete_caret(&mut self, offset_col: usize, node_id: usize) -> MarkResult<()> { let old_len = self.all.len(); self.all.retain(|attr| { @@ -121,3 +117,9 @@ impl Attributes { Ok(()) } } + +impl Default for Attributes { + fn default() -> Self { + Attributes { all: Vec::new() } + } +} diff --git a/editor/src/editor/markup/common_nodes.rs b/code_markup/src/markup/common_nodes.rs similarity index 84% rename from editor/src/editor/markup/common_nodes.rs rename to code_markup/src/markup/common_nodes.rs index f74648f3fa..89a8a8c2d9 100644 --- a/editor/src/editor/markup/common_nodes.rs +++ b/code_markup/src/markup/common_nodes.rs @@ -1,7 +1,6 @@ -use crate::{ - editor::{slow_pool::MarkNodeId, syntax_highlight::HighlightStyle}, - lang::{ast::ExprId, parse::ASTNodeId}, -}; +use roc_ast::lang::core::{ast::ASTNodeId, expr::expr2::ExprId}; + +use crate::{slow_pool::MarkNodeId, syntax_highlight::HighlightStyle}; use super::{attribute::Attributes, nodes, nodes::MarkupNode}; @@ -10,7 +9,7 @@ pub fn new_equals_mn(ast_node_id: ASTNodeId, parent_id_opt: Option) content: nodes::EQUALS.to_owned(), ast_node_id, syn_high_style: HighlightStyle::Operator, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -21,7 +20,7 @@ pub fn new_comma_mn(expr_id: ExprId, parent_id_opt: Option) -> Marku content: nodes::COMMA.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Blank, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -31,7 +30,7 @@ pub fn new_blank_mn(ast_node_id: ASTNodeId, parent_id_opt: Option) - MarkupNode::Blank { ast_node_id, syn_high_style: HighlightStyle::Blank, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -45,7 +44,7 @@ pub fn new_blank_mn_w_nls( MarkupNode::Blank { ast_node_id, syn_high_style: HighlightStyle::Blank, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: nr_of_newlines, } @@ -56,7 +55,7 @@ pub fn new_colon_mn(expr_id: ExprId, parent_id_opt: Option) -> Marku content: nodes::COLON.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Operator, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -67,7 +66,7 @@ pub fn new_left_accolade_mn(expr_id: ExprId, parent_id_opt: Option) content: nodes::LEFT_ACCOLADE.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Bracket, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -78,7 +77,7 @@ pub fn new_right_accolade_mn(expr_id: ExprId, parent_id_opt: Option) content: nodes::RIGHT_ACCOLADE.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Bracket, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -89,7 +88,7 @@ pub fn new_left_square_mn(expr_id: ExprId, parent_id_opt: Option) -> content: nodes::LEFT_SQUARE_BR.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Bracket, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } @@ -100,7 +99,7 @@ pub fn new_right_square_mn(expr_id: ExprId, parent_id_opt: Option) - content: nodes::RIGHT_SQUARE_BR.to_owned(), ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::Bracket, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, } diff --git a/editor/src/editor/markup/mod.rs b/code_markup/src/markup/mod.rs similarity index 70% rename from editor/src/editor/markup/mod.rs rename to code_markup/src/markup/mod.rs index de015ebd80..e3ce137f80 100644 --- a/editor/src/editor/markup/mod.rs +++ b/code_markup/src/markup/mod.rs @@ -1,3 +1,4 @@ pub mod attribute; pub mod common_nodes; pub mod nodes; +pub mod top_level_def; diff --git a/editor/src/editor/markup/nodes.rs b/code_markup/src/markup/nodes.rs similarity index 92% rename from editor/src/editor/markup/nodes.rs rename to code_markup/src/markup/nodes.rs index 00b5358e88..cea08f288b 100644 --- a/editor/src/editor/markup/nodes.rs +++ b/code_markup/src/markup/nodes.rs @@ -1,33 +1,39 @@ -use super::attribute::Attributes; -use crate::editor::ed_error::EdResult; -use crate::editor::ed_error::ExpectedTextNode; -use crate::editor::ed_error::{NestedNodeMissingChild, NestedNodeRequired}; -use crate::editor::markup::common_nodes::new_blank_mn; -use crate::editor::markup::common_nodes::new_blank_mn_w_nls; -use crate::editor::markup::common_nodes::new_colon_mn; -use crate::editor::markup::common_nodes::new_comma_mn; -use crate::editor::markup::common_nodes::new_equals_mn; -use crate::editor::markup::common_nodes::new_left_accolade_mn; -use crate::editor::markup::common_nodes::new_left_square_mn; -use crate::editor::markup::common_nodes::new_right_accolade_mn; -use crate::editor::markup::common_nodes::new_right_square_mn; -use crate::editor::mvc::tld_value_update::tld_mark_node; -use crate::editor::slow_pool::MarkNodeId; -use crate::editor::slow_pool::SlowPool; -use crate::editor::syntax_highlight::HighlightStyle; -use crate::editor::util::index_of; -use crate::lang::ast::Def2; -use crate::lang::ast::DefId; -use crate::lang::ast::ExprId; -use crate::lang::ast::RecordField; -use crate::lang::ast::ValueDef; -use crate::lang::parse::ASTNodeId; -use crate::lang::parse::{AppHeader, AST}; -use crate::lang::pattern::get_identifier_string; -use crate::lang::{ast::Expr2, expr::Env, pool::PoolStr}; -use crate::ui::util::slice_get; +use crate::{ + markup::common_nodes::{ + new_blank_mn, new_colon_mn, new_comma_mn, new_equals_mn, new_left_accolade_mn, + new_left_square_mn, new_right_accolade_mn, new_right_square_mn, + }, + markup_error::MarkResult, + slow_pool::{MarkNodeId, SlowPool}, + syntax_highlight::HighlightStyle, +}; + +use super::{ + attribute::Attributes, common_nodes::new_blank_mn_w_nls, top_level_def::tld_mark_node, +}; + +use crate::markup_error::{ExpectedTextNode, NestedNodeMissingChild, NestedNodeRequired}; use bumpalo::Bump; +use roc_ast::{ + ast_error::ASTResult, + lang::{ + core::{ + ast::{ASTNodeId, AST}, + def::def2::{Def2, DefId}, + expr::{ + expr2::{Expr2, ExprId}, + record_field::RecordField, + }, + header::AppHeader, + pattern::get_identifier_string, + val_def::ValueDef, + }, + env::Env, + }, + mem_pool::pool_str::PoolStr, +}; use roc_module::symbol::Interns; +use roc_utils::{index_of, slice_get}; use std::fmt; #[derive(Debug)] @@ -95,7 +101,7 @@ impl MarkupNode { &self, child_id: MarkNodeId, mark_node_pool: &SlowPool, - ) -> EdResult<(usize, usize)> { + ) -> MarkResult<(usize, usize)> { match self { MarkupNode::Nested { children_ids, .. } => { let mut mark_child_index_opt: Option = None; @@ -122,12 +128,11 @@ impl MarkupNode { Ok((child_index, ast_child_index)) } else { // we want to find the index of the closest ast mark node to child_index - let indices_in_mark_res: EdResult> = child_ids_with_ast - .iter() - .map(|c_id| index_of(*c_id, children_ids)) - .collect(); + let mut indices_in_mark = vec![]; - let indices_in_mark = indices_in_mark_res?; + for &c_id in child_ids_with_ast.iter() { + indices_in_mark.push(index_of(c_id, children_ids)?); + } let mut last_diff = usize::MAX; let mut best_index = 0; @@ -186,7 +191,7 @@ impl MarkupNode { full_content } - pub fn get_content_mut(&mut self) -> EdResult<&mut String> { + pub fn get_content_mut(&mut self) -> MarkResult<&mut String> { match self { MarkupNode::Nested { .. } => ExpectedTextNode { function_name: "set_content".to_owned(), @@ -208,7 +213,7 @@ impl MarkupNode { .all(|chr| chr.is_ascii_alphanumeric()) } - pub fn add_child_at_index(&mut self, index: usize, child_id: MarkNodeId) -> EdResult<()> { + pub fn add_child_at_index(&mut self, index: usize, child_id: MarkNodeId) -> MarkResult<()> { if let MarkupNode::Nested { children_ids, .. } = self { children_ids.splice(index..index, vec![child_id]); } else { @@ -292,7 +297,7 @@ fn new_markup_node( content: text, ast_node_id: node_id, syn_high_style: highlight_style, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: None, newlines_at_end: 0, }; @@ -307,7 +312,7 @@ pub fn def2_to_markup<'a, 'b>( def2_node_id: DefId, mark_node_pool: &mut SlowPool, interns: &Interns, -) -> EdResult { +) -> ASTResult { let ast_node_id = ASTNodeId::ADefId(def2_node_id); let mark_node_id = match def2 { @@ -349,7 +354,7 @@ pub fn expr2_to_markup<'a, 'b>( expr2_node_id: ExprId, mark_node_pool: &mut SlowPool, interns: &Interns, -) -> EdResult { +) -> ASTResult { let ast_node_id = ASTNodeId::AExprId(expr2_node_id); let mark_node_id = match expr2 { @@ -497,7 +502,7 @@ pub fn expr2_to_markup<'a, 'b>( content: val_name, ast_node_id, syn_high_style: HighlightStyle::Variable, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: None, newlines_at_end: 0, }; @@ -606,7 +611,7 @@ fn header_mn(content: String, expr_id: ExprId, mark_node_pool: &mut SlowPool) -> content, ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: HighlightStyle::PackageRelated, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: None, newlines_at_end: 0, }; @@ -624,7 +629,7 @@ fn header_val_mn( content, ast_node_id: ASTNodeId::AExprId(expr_id), syn_high_style: highlight_style, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: None, newlines_at_end: 0, }; @@ -798,7 +803,7 @@ pub fn ast_to_mark_nodes<'a, 'b>( ast: &AST, mark_node_pool: &mut SlowPool, interns: &Interns, -) -> EdResult> { +) -> ASTResult> { let mut all_mark_node_ids = vec![header_to_markup(&ast.header, mark_node_pool)]; for &def_id in ast.def_ids.iter() { diff --git a/code_markup/src/markup/top_level_def.rs b/code_markup/src/markup/top_level_def.rs new file mode 100644 index 0000000000..d2122fc790 --- /dev/null +++ b/code_markup/src/markup/top_level_def.rs @@ -0,0 +1,51 @@ +use roc_ast::{ + ast_error::ASTResult, + lang::{ + core::{ + ast::ASTNodeId, + pattern::{get_identifier_string, PatternId}, + }, + env::Env, + }, +}; +use roc_module::symbol::Interns; + +use crate::{ + markup::{attribute::Attributes, common_nodes::new_equals_mn, nodes::MarkupNode}, + slow_pool::{MarkNodeId, SlowPool}, + syntax_highlight::HighlightStyle, +}; + +pub fn tld_mark_node<'a>( + identifier_id: PatternId, + expr_mark_node_id: MarkNodeId, + ast_node_id: ASTNodeId, + mark_node_pool: &mut SlowPool, + env: &Env<'a>, + interns: &Interns, +) -> ASTResult { + let pattern2 = env.pool.get(identifier_id); + let val_name = get_identifier_string(pattern2, interns)?; + + let val_name_mn = MarkupNode::Text { + content: val_name, + ast_node_id, + syn_high_style: HighlightStyle::Variable, + attributes: Attributes::default(), + parent_id_opt: None, + newlines_at_end: 0, + }; + + let val_name_mn_id = mark_node_pool.add(val_name_mn); + + let equals_mn_id = mark_node_pool.add(new_equals_mn(ast_node_id, None)); + + let full_let_node = MarkupNode::Nested { + ast_node_id, + children_ids: vec![val_name_mn_id, equals_mn_id, expr_mark_node_id], + parent_id_opt: None, + newlines_at_end: 2, + }; + + Ok(full_let_node) +} diff --git a/code_markup/src/markup_error.rs b/code_markup/src/markup_error.rs new file mode 100644 index 0000000000..5de9768fba --- /dev/null +++ b/code_markup/src/markup_error.rs @@ -0,0 +1,55 @@ +use roc_utils::util_error::UtilError; +use snafu::{Backtrace, NoneError, ResultExt, Snafu}; + +use crate::slow_pool::MarkNodeId; + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum MarkError { + #[snafu(display( + "CaretNotFound: No carets were found in the expected node with id {}", + node_id + ))] + CaretNotFound { + node_id: MarkNodeId, + backtrace: Backtrace, + }, + #[snafu(display( + "ExpectedTextNode: the function {} expected a Text node, got {} instead.", + function_name, + node_type + ))] + ExpectedTextNode { + function_name: String, + node_type: String, + backtrace: Backtrace, + }, + #[snafu(display("NestedNodeMissingChild: expected to find child with id {} in Nested MarkupNode, but it was missing. Id's of the children are {:?}.", node_id, children_ids))] + NestedNodeMissingChild { + node_id: MarkNodeId, + children_ids: Vec, + backtrace: Backtrace, + }, + #[snafu(display( + "NestedNodeRequired: required a Nested node at this position, node was a {}.", + node_type + ))] + NestedNodeRequired { + node_type: String, + backtrace: Backtrace, + }, + #[snafu(display("UIError: {}", msg))] + UtilErrorBacktrace { msg: String, backtrace: Backtrace }, +} + +pub type MarkResult = std::result::Result; + +impl From for MarkError { + fn from(util_err: UtilError) -> Self { + let msg = format!("{}", util_err); + + // hack to handle MarkError derive + let dummy_res: Result<(), NoneError> = Err(NoneError {}); + dummy_res.context(UtilErrorBacktrace { msg }).unwrap_err() + } +} diff --git a/editor/src/editor/slow_pool.rs b/code_markup/src/slow_pool.rs similarity index 95% rename from editor/src/editor/slow_pool.rs rename to code_markup/src/slow_pool.rs index 72dc43336c..d69e1a1a9c 100644 --- a/editor/src/editor/slow_pool.rs +++ b/code_markup/src/slow_pool.rs @@ -1,6 +1,7 @@ -use crate::editor::markup::nodes::MarkupNode; use std::fmt; +use crate::markup::nodes::MarkupNode; + pub type MarkNodeId = usize; #[derive(Debug)] @@ -9,10 +10,6 @@ pub struct SlowPool { } impl SlowPool { - pub fn new() -> SlowPool { - SlowPool { nodes: Vec::new() } - } - pub fn add(&mut self, node: MarkupNode) -> MarkNodeId { let id = self.nodes.len(); @@ -72,3 +69,9 @@ impl fmt::Display for SlowPool { Ok(()) } } + +impl Default for SlowPool { + fn default() -> Self { + SlowPool { nodes: Vec::new() } + } +} diff --git a/editor/src/editor/syntax_highlight.rs b/code_markup/src/syntax_highlight.rs similarity index 78% rename from editor/src/editor/syntax_highlight.rs rename to code_markup/src/syntax_highlight.rs index 602a335b48..12c0ef33e1 100644 --- a/editor/src/editor/syntax_highlight.rs +++ b/code_markup/src/syntax_highlight.rs @@ -1,8 +1,8 @@ -use crate::graphics::colors as gr_colors; -use gr_colors::{from_hsb, RgbaTup}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use crate::colors::{self, from_hsb, RgbaTup}; + #[derive(Hash, Eq, PartialEq, Copy, Clone, Debug, Deserialize, Serialize)] pub enum HighlightStyle { Operator, // =+-<>... @@ -24,14 +24,14 @@ pub fn default_highlight_map() -> HashMap { let mut highlight_map = HashMap::new(); [ - (Operator, gr_colors::WHITE), + (Operator, colors::WHITE), (String, from_hsb(346, 65, 97)), - (FunctionName, gr_colors::WHITE), - (Type, gr_colors::WHITE), + (FunctionName, colors::WHITE), + (Type, colors::WHITE), (Bracket, from_hsb(347, 80, 100)), (Number, from_hsb(185, 50, 75)), - (PackageRelated, gr_colors::WHITE), - (Variable, gr_colors::WHITE), + (PackageRelated, colors::WHITE), + (Variable, colors::WHITE), (RecordField, from_hsb(258, 50, 90)), (Import, from_hsb(185, 50, 75)), (Provides, from_hsb(185, 50, 75)), diff --git a/compiler/build/src/link.rs b/compiler/build/src/link.rs index 40a8f84cbf..ff22fd9b00 100644 --- a/compiler/build/src/link.rs +++ b/compiler/build/src/link.rs @@ -1,7 +1,8 @@ use crate::target::arch_str; #[cfg(feature = "llvm")] use libloading::{Error, Library}; -#[cfg(feature = "llvm")] +use roc_builtins::bitcode; +// #[cfg(feature = "llvm")] use roc_mono::ir::OptLevel; use std::collections::HashMap; use std::env; @@ -93,7 +94,12 @@ pub fn build_zig_host_native( .env("PATH", env_path) .env("HOME", env_home); if let Some(shared_lib_path) = shared_lib_path { - command.args(&["build-exe", "-fPIE", shared_lib_path.to_str().unwrap()]); + command.args(&[ + "build-exe", + "-fPIE", + shared_lib_path.to_str().unwrap(), + bitcode::OBJ_PATH, + ]); } else { command.args(&["build-obj", "-fPIC"]); } @@ -109,7 +115,6 @@ pub fn build_zig_host_native( // include libc "--library", "c", - "--strip", // cross-compile? "-target", target, @@ -178,7 +183,12 @@ pub fn build_zig_host_native( .env("PATH", &env_path) .env("HOME", &env_home); if let Some(shared_lib_path) = shared_lib_path { - command.args(&["build-exe", "-fPIE", shared_lib_path.to_str().unwrap()]); + command.args(&[ + "build-exe", + "-fPIE", + shared_lib_path.to_str().unwrap(), + bitcode::OBJ_PATH, + ]); } else { command.args(&["build-obj", "-fPIC"]); } @@ -197,7 +207,6 @@ pub fn build_zig_host_native( // include libc "--library", "c", - "--strip", ]); if matches!(opt_level, OptLevel::Optimize) { command.args(&["-O", "ReleaseSafe"]); @@ -274,6 +283,7 @@ pub fn build_c_host_native( if let Some(shared_lib_path) = shared_lib_path { command.args(&[ shared_lib_path.to_str().unwrap(), + bitcode::OBJ_PATH, "-fPIE", "-pie", "-lm", @@ -370,7 +380,7 @@ pub fn rebuild_host( } else if cargo_host_src.exists() { // Compile and link Cargo.toml, if it exists let cargo_dir = host_input_path.parent().unwrap(); - let libhost_dir = + let cargo_out_dir = cargo_dir .join("target") .join(if matches!(opt_level, OptLevel::Optimize) { @@ -378,30 +388,30 @@ pub fn rebuild_host( } else { "debug" }); - let libhost = libhost_dir.join("libhost.a"); let mut command = Command::new("cargo"); command.arg("build").current_dir(cargo_dir); if matches!(opt_level, OptLevel::Optimize) { command.arg("--release"); } + let source_file = if shared_lib_path.is_some() { + command.env("RUSTFLAGS", "-C link-dead-code"); + command.args(&["--bin", "host"]); + "src/main.rs" + } else { + command.arg("--lib"); + "src/lib.rs" + }; let output = command.output().unwrap(); - validate_output("src/lib.rs", "cargo build", output); + validate_output(source_file, "cargo build", output); - // Cargo hosts depend on a c wrapper for the api. Compile host.c as well. if shared_lib_path.is_some() { - // If compiling to executable, let c deal with linking as well. - let output = build_c_host_native( - &env_path, - &env_home, - host_dest_native.to_str().unwrap(), - &[c_host_src.to_str().unwrap(), libhost.to_str().unwrap()], - opt_level, - shared_lib_path, - ); - validate_output("host.c", "clang", output); + // For surgical linking, just copy the dynamically linked rust app. + std::fs::copy(cargo_out_dir.join("host"), host_dest_native).unwrap(); } else { + // Cargo hosts depend on a c wrapper for the api. Compile host.c as well. + let output = build_c_host_native( &env_path, &env_home, @@ -418,7 +428,7 @@ pub fn rebuild_host( .args(&[ "-r", "-L", - libhost_dir.to_str().unwrap(), + cargo_out_dir.to_str().unwrap(), c_host_dest.to_str().unwrap(), "-lhost", "-o", diff --git a/compiler/build/src/program.rs b/compiler/build/src/program.rs index 226bfaf8be..7d9803e4be 100644 --- a/compiler/build/src/program.rs +++ b/compiler/build/src/program.rs @@ -2,11 +2,9 @@ use roc_gen_llvm::llvm::build::module_from_builtins; #[cfg(feature = "llvm")] pub use roc_gen_llvm::llvm::build::FunctionIterator; -#[cfg(feature = "llvm")] use roc_load::file::MonomorphizedModule; #[cfg(feature = "llvm")] use roc_mono::ir::OptLevel; -#[cfg(feature = "llvm")] use std::path::{Path, PathBuf}; use std::time::Duration; diff --git a/compiler/builtins/README.md b/compiler/builtins/README.md index 8e1eb0e291..7780ccb9ac 100644 --- a/compiler/builtins/README.md +++ b/compiler/builtins/README.md @@ -62,12 +62,6 @@ Its one thing to actually write these functions, its _another_ thing to let the ### builtins/mono/src/borrow.rs After we have all of this, we need to specify if the arguments we're passing are owned, borrowed or irrelvant. Towards the bottom of this file, add a new case for you builtin and specify each arg. Be sure to read the comment, as it explains this in more detail. -## Specifying the uniqueness of a function -### builtins/src/unique.rs -One of the cool things about Roc is that it evaluates if a value in memory is shared between scopes or if it is used in just one place. If the value is used in one place then it is 'unique', and it therefore can be mutated in place. For a value created by a function, the uniqueness of the output is determined in part by the uniqueness of the input arguments. For example `List.single : elem -> List elem` can return a unique list if the `elem` is also unique. - -We have to define the uniqueness constraints of a function just like we have to define a type signature. That is what happens in `unique.rs`. This can be tricky so it would be a good step to ask for help on if it is confusing. - ## Testing it ### solve/tests/solve_expr.rs To make sure that Roc is properly inferring the type of the new builtin, add a test to this file simlar to: diff --git a/compiler/builtins/bitcode/src/list.zig b/compiler/builtins/bitcode/src/list.zig index 22e76054ea..2b83458408 100644 --- a/compiler/builtins/bitcode/src/list.zig +++ b/compiler/builtins/bitcode/src/list.zig @@ -1,6 +1,7 @@ const std = @import("std"); const utils = @import("utils.zig"); const RocResult = utils.RocResult; +const UpdateMode = utils.UpdateMode; const mem = std.mem; const EqFn = fn (?[*]u8, ?[*]u8) callconv(.C) bool; @@ -52,6 +53,14 @@ pub const RocList = extern struct { }; } + pub fn makeUniqueExtra(self: RocList, alignment: u32, element_width: usize, update_mode: UpdateMode) RocList { + if (update_mode == .InPlace) { + return self; + } else { + return self.makeUnique(alignment, element_width); + } + } + pub fn makeUnique(self: RocList, alignment: u32, element_width: usize) RocList { if (self.isEmpty()) { return self; @@ -132,14 +141,14 @@ const Caller1 = fn (?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) void; const Caller2 = fn (?[*]u8, ?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) void; const Caller3 = fn (?[*]u8, ?[*]u8, ?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) void; -pub fn listReverse(list: RocList, alignment: u32, element_width: usize) callconv(.C) RocList { +pub fn listReverse(list: RocList, alignment: u32, element_width: usize, update_mode: UpdateMode) callconv(.C) RocList { if (list.bytes) |source_ptr| { const size = list.len(); var i: usize = 0; const end: usize = size - 1; - if (list.isUnique()) { + if (update_mode == .InPlace or list.isUnique()) { // Working from the front and back so // we only need to go ~(n / 2) iterations. @@ -720,10 +729,13 @@ pub fn listSingle(alignment: u32, element: Opaque, element_width: usize) callcon return output; } -pub fn listAppend(list: RocList, alignment: u32, element: Opaque, element_width: usize) callconv(.C) RocList { +pub fn listAppend(list: RocList, alignment: u32, element: Opaque, element_width: usize, update_mode: UpdateMode) callconv(.C) RocList { const old_length = list.len(); var output = list.reallocate(alignment, old_length + 1, element_width); + // we'd need capacity to use update_mode here + _ = update_mode; + if (output.bytes) |target| { if (element) |source| { @memcpy(target + old_length * element_width, source, element_width); @@ -763,18 +775,24 @@ pub fn listSwap( element_width: usize, index_1: usize, index_2: usize, + update_mode: UpdateMode, ) callconv(.C) RocList { const size = list.len(); - if (index_1 >= size or index_2 >= size) { + if (index_1 == index_2 or index_1 >= size or index_2 >= size) { // Either index out of bounds so we just return return list; } - const newList = list.makeUnique(alignment, element_width); + const newList = blk: { + if (update_mode == .InPlace) { + break :blk list; + } else { + break :blk list.makeUnique(alignment, element_width); + } + }; - if (newList.bytes) |source_ptr| { - swapElements(source_ptr, element_width, index_1, index_2); - } + const source_ptr = @ptrCast([*]u8, newList.bytes); + swapElements(source_ptr, element_width, index_1, index_2); return newList; } @@ -815,6 +833,67 @@ pub fn listDrop( } } +pub fn listDropAt( + list: RocList, + alignment: u32, + element_width: usize, + drop_index: usize, + dec: Dec, +) callconv(.C) RocList { + if (list.bytes) |source_ptr| { + const size = list.len(); + + if (drop_index >= size) { + return list; + } + + if (drop_index < size) { + const element = source_ptr + drop_index * element_width; + dec(element); + } + + // NOTE + // we need to return an empty list explicitly, + // because we rely on the pointer field being null if the list is empty + // which also requires duplicating the utils.decref call to spend the RC token + if (size < 2) { + utils.decref(list.bytes, size * element_width, alignment); + return RocList.empty(); + } + + if (list.isUnique()) { + var i = drop_index; + while (i < size) : (i += 1) { + const copy_target = source_ptr + i * element_width; + const copy_source = copy_target + element_width; + @memcpy(copy_target, copy_source, element_width); + } + + var new_list = list; + + new_list.length -= 1; + return new_list; + } + + const output = RocList.allocate(alignment, size - 1, element_width); + const target_ptr = output.bytes orelse unreachable; + + const head_size = drop_index * element_width; + @memcpy(target_ptr, source_ptr, head_size); + + const tail_target = target_ptr + drop_index * element_width; + const tail_source = source_ptr + (drop_index + 1) * element_width; + const tail_size = (size - drop_index - 1) * element_width; + @memcpy(tail_target, tail_source, tail_size); + + utils.decref(list.bytes, size * element_width, alignment); + + return output; + } else { + return RocList.empty(); + } +} + pub fn listRange(width: utils.IntWidth, low: Opaque, high: Opaque) callconv(.C) RocList { return switch (width) { .U8 => helper1(u8, low, high), diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index 69bf1ca5ac..d82b8542a8 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -39,6 +39,7 @@ comptime { exportListFn(list.listSortWith, "sort_with"); exportListFn(list.listConcat, "concat"); exportListFn(list.listDrop, "drop"); + exportListFn(list.listDropAt, "drop_at"); exportListFn(list.listSet, "set"); exportListFn(list.listSetInPlace, "set_in_place"); exportListFn(list.listSwap, "swap"); @@ -101,6 +102,7 @@ comptime { exportStrFn(str.strToUtf8C, "to_utf8"); exportStrFn(str.fromUtf8C, "from_utf8"); exportStrFn(str.fromUtf8RangeC, "from_utf8_range"); + exportStrFn(str.repeat, "repeat"); } // Utils @@ -164,7 +166,12 @@ test "" { // https://github.com/ziglang/zig/blob/85755c51d529e7d9b406c6bdf69ce0a0f33f3353/lib/std/special/compiler_rt/muloti4.zig // // Thank you Zig Contributors! -export fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { + +// Export it as weak incase it is alreadly linked in by something else. +comptime { + @export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak }); +} +fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { // @setRuntimeSafety(std.builtin.is_test); const min = @bitCast(i128, @as(u128, 1 << (128 - 1))); diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 2172e693d7..9ffb9a1375 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1,5 +1,6 @@ const utils = @import("utils.zig"); const RocList = @import("list.zig").RocList; +const UpdateMode = utils.UpdateMode; const std = @import("std"); const mem = std.mem; const always_inline = std.builtin.CallOptions.Modifier.always_inline; @@ -866,6 +867,22 @@ pub fn startsWith(string: RocStr, prefix: RocStr) callconv(.C) bool { return true; } +// Str.repeat +pub fn repeat(string: RocStr, count: usize) callconv(.C) RocStr { + const bytes_len = string.len(); + const bytes_ptr = string.asU8ptr(); + + var ret_string = RocStr.allocate(.Clone, count * bytes_len); + var ret_string_ptr = ret_string.asU8ptr(); + + var i: usize = 0; + while (i < count) : (i += 1) { + @memcpy(ret_string_ptr + (i * bytes_len), bytes_ptr, bytes_len); + } + + return ret_string; +} + // Str.startsWithCodePt pub fn startsWithCodePt(string: RocStr, prefix: u32) callconv(.C) bool { const bytes_ptr = string.asU8ptr(); @@ -1131,10 +1148,10 @@ test "RocStr.joinWith: result is big" { // Str.toUtf8 pub fn strToUtf8C(arg: RocStr) callconv(.C) RocList { - return @call(.{ .modifier = always_inline }, strToBytes, .{arg}); + return strToBytes(arg); } -fn strToBytes(arg: RocStr) RocList { +inline fn strToBytes(arg: RocStr) RocList { if (arg.isEmpty()) { return RocList.empty(); } else if (arg.isSmallStr()) { @@ -1161,11 +1178,11 @@ const CountAndStart = extern struct { start: usize, }; -pub fn fromUtf8C(arg: RocList, output: *FromUtf8Result) callconv(.C) void { - output.* = @call(.{ .modifier = always_inline }, fromUtf8, .{arg}); +pub fn fromUtf8C(arg: RocList, update_mode: UpdateMode, output: *FromUtf8Result) callconv(.C) void { + output.* = fromUtf8(arg, update_mode); } -fn fromUtf8(arg: RocList) FromUtf8Result { +inline fn fromUtf8(arg: RocList, update_mode: UpdateMode) FromUtf8Result { const bytes = @ptrCast([*]const u8, arg.bytes)[0..arg.length]; if (unicode.utf8ValidateSlice(bytes)) { @@ -1178,13 +1195,23 @@ fn fromUtf8(arg: RocList) FromUtf8Result { const data_bytes = arg.len(); utils.decref(arg.bytes, data_bytes, RocStr.alignment); - return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + return FromUtf8Result{ + .is_ok = true, + .string = string, + .byte_index = 0, + .problem_code = Utf8ByteProblem.InvalidStartByte, + }; } else { - const byte_list = arg.makeUnique(RocStr.alignment, @sizeOf(u8)); + const byte_list = arg.makeUniqueExtra(RocStr.alignment, @sizeOf(u8), update_mode); const string = RocStr{ .str_bytes = byte_list.bytes, .str_len = byte_list.length }; - return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + return FromUtf8Result{ + .is_ok = true, + .string = string, + .byte_index = 0, + .problem_code = Utf8ByteProblem.InvalidStartByte, + }; } } else { const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); @@ -1193,7 +1220,12 @@ fn fromUtf8(arg: RocList) FromUtf8Result { const data_bytes = arg.len(); utils.decref(arg.bytes, data_bytes, RocStr.alignment); - return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; + return FromUtf8Result{ + .is_ok = false, + .string = RocStr.empty(), + .byte_index = temp.index, + .problem_code = temp.problem, + }; } } @@ -1276,11 +1308,11 @@ pub const Utf8ByteProblem = enum(u8) { }; fn validateUtf8Bytes(bytes: [*]u8, length: usize) FromUtf8Result { - return fromUtf8(RocList{ .bytes = bytes, .length = length }); + return fromUtf8(RocList{ .bytes = bytes, .length = length }, .Immutable); } fn validateUtf8BytesX(str: RocList) FromUtf8Result { - return fromUtf8(str); + return fromUtf8(str, .Immutable); } fn expectOk(result: FromUtf8Result) !void { diff --git a/compiler/builtins/bitcode/src/utils.zig b/compiler/builtins/bitcode/src/utils.zig index 67240bdb2d..1371afe674 100644 --- a/compiler/builtins/bitcode/src/utils.zig +++ b/compiler/builtins/bitcode/src/utils.zig @@ -256,3 +256,8 @@ pub const Ordering = enum(u8) { GT = 1, LT = 2, }; + +pub const UpdateMode = extern enum(u8) { + Immutable = 0, + InPlace = 1, +}; diff --git a/compiler/builtins/docs/List.roc b/compiler/builtins/docs/List.roc index f8f9a2df43..964affa15b 100644 --- a/compiler/builtins/docs/List.roc +++ b/compiler/builtins/docs/List.roc @@ -31,6 +31,7 @@ interface List range, sortWith, drop, + dropAt, swap ] imports [] @@ -422,15 +423,18 @@ min : List (Num a) -> Result (Num a) [ ListWasEmpty ]* ## If the given index is outside the bounds of the list, returns the original ## list unmodified. ## -## To drop the element at a given index, instead of replacing it, see [List.drop]. +## To drop the element at a given index, instead of replacing it, see [List.dropAt]. set : List elem, Nat, elem -> List elem +## Drops n elements from the beginning of the list. +drop : List elem, Nat -> List elem + ## Drops the element at the given index from the list. ## ## This has no effect if the given index is outside the bounds of the list. ## ## To replace the element at a given index, instead of dropping it, see [List.set]. -drop : List elem, Nat -> List elem +dropAt : List elem, Nat -> List elem ## Adds a new element to the end of the list. ## diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index b45d822c69..e451a1e68d 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -28,6 +28,7 @@ pub const STR_EQUAL: &str = "roc_builtins.str.equal"; pub const STR_TO_UTF8: &str = "roc_builtins.str.to_utf8"; pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8"; pub const STR_FROM_UTF8_RANGE: &str = "roc_builtins.str.from_utf8_range"; +pub const STR_REPEAT: &str = "roc_builtins.str.repeat"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; @@ -62,6 +63,7 @@ pub const LIST_REPEAT: &str = "roc_builtins.list.repeat"; pub const LIST_APPEND: &str = "roc_builtins.list.append"; pub const LIST_PREPEND: &str = "roc_builtins.list.prepend"; pub const LIST_DROP: &str = "roc_builtins.list.drop"; +pub const LIST_DROP_AT: &str = "roc_builtins.list.drop_at"; pub const LIST_SWAP: &str = "roc_builtins.list.swap"; pub const LIST_SINGLE: &str = "roc_builtins.list.single"; pub const LIST_JOIN: &str = "roc_builtins.list.join"; diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 98762b568b..0374e228e2 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -618,6 +618,13 @@ pub fn types() -> MutMap { Box::new(str_type()) ); + // repeat : Str, Nat -> Str + add_top_level_function_type!( + Symbol::STR_REPEAT, + vec![str_type(), nat_type()], + Box::new(str_type()) + ); + // fromUtf8 : List U8 -> Result Str [ BadUtf8 Utf8Problem ]* { let bad_utf8 = SolvedType::TagUnion( @@ -913,6 +920,13 @@ pub fn types() -> MutMap { Box::new(list_type(flex(TVAR1))), ); + // dropAt : List elem, Nat -> List elem + add_top_level_function_type!( + Symbol::LIST_DROP_AT, + vec![list_type(flex(TVAR1)), nat_type()], + Box::new(list_type(flex(TVAR1))), + ); + // swap : List elem, Nat, Nat -> List elem add_top_level_function_type!( Symbol::LIST_SWAP, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index fe45f1f673..ea0bde537a 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -66,6 +66,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_FROM_UTF8_RANGE => str_from_utf8_range, STR_TO_UTF8 => str_to_utf8, STR_FROM_FLOAT=> str_from_float, + STR_REPEAT => str_repeat, LIST_LEN => list_len, LIST_GET => list_get, LIST_SET => list_set, @@ -86,6 +87,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option LIST_MAP2 => list_map2, LIST_MAP3 => list_map3, LIST_DROP => list_drop, + LIST_DROP_AT => list_drop_at, LIST_SWAP => list_swap, LIST_MAP_WITH_INDEX => list_map_with_index, LIST_KEEP_IF => list_keep_if, @@ -1233,6 +1235,26 @@ fn str_split(symbol: Symbol, var_store: &mut VarStore) -> Def { ) } +/// Str.repeat : Str, Nat -> Str +fn str_repeat(symbol: Symbol, var_store: &mut VarStore) -> Def { + let str_var = var_store.fresh(); + let nat_var = var_store.fresh(); + + let body = RunLowLevel { + op: LowLevel::StrRepeat, + args: vec![(str_var, Var(Symbol::ARG_1)), (nat_var, Var(Symbol::ARG_2))], + ret_var: str_var, + }; + + defn( + symbol, + vec![(str_var, Symbol::ARG_1), (nat_var, Symbol::ARG_2)], + var_store, + body, + str_var, + ) +} + /// Str.concat : Str, Str -> Str fn str_concat(symbol: Symbol, var_store: &mut VarStore) -> Def { let str_var = var_store.fresh(); @@ -1958,6 +1980,29 @@ fn list_drop(symbol: Symbol, var_store: &mut VarStore) -> Def { ) } +/// List.dropAt : List elem, Nat -> List elem +fn list_drop_at(symbol: Symbol, var_store: &mut VarStore) -> Def { + let list_var = var_store.fresh(); + let index_var = var_store.fresh(); + + let body = RunLowLevel { + op: LowLevel::ListDropAt, + args: vec![ + (list_var, Var(Symbol::ARG_1)), + (index_var, Var(Symbol::ARG_2)), + ], + ret_var: list_var, + }; + + defn( + symbol, + vec![(list_var, Symbol::ARG_1), (index_var, Symbol::ARG_2)], + var_store, + body, + list_var, + ) +} + /// List.append : List elem, elem -> List elem fn list_append(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/constrain/src/expr.rs b/compiler/constrain/src/expr.rs index 77b8a3c82c..4e920147d0 100644 --- a/compiler/constrain/src/expr.rs +++ b/compiler/constrain/src/expr.rs @@ -9,7 +9,7 @@ use roc_can::expected::PExpected; use roc_can::expr::Expr::{self, *}; use roc_can::expr::{Field, WhenBranch}; use roc_can::pattern::Pattern; -use roc_collections::all::{ImMap, Index, SendMap}; +use roc_collections::all::{ImMap, Index, MutSet, SendMap}; use roc_module::ident::{Lowercase, TagName}; use roc_module::symbol::{ModuleId, Symbol}; use roc_region::all::{Located, Region}; @@ -1438,13 +1438,15 @@ fn instantiate_rigids( annotation: &Type, introduced_vars: &IntroducedVariables, new_rigids: &mut Vec, - ftv: &mut ImMap, + ftv: &mut ImMap, // rigids defined before the current annotation loc_pattern: &Located, headers: &mut SendMap>, ) -> Type { let mut annotation = annotation.clone(); let mut rigid_substitution: ImMap = ImMap::default(); + let outside_rigids: MutSet = ftv.values().copied().collect(); + for (name, var) in introduced_vars.var_by_name.iter() { if let Some(existing_rigid) = ftv.get(name) { rigid_substitution.insert(*var, Type::Variable(*existing_rigid)); @@ -1464,7 +1466,12 @@ fn instantiate_rigids( &Located::at(loc_pattern.region, annotation.clone()), ) { for (symbol, loc_type) in new_headers { - new_rigids.extend(loc_type.value.variables()); + for var in loc_type.value.variables() { + // a rigid is only new if this annotation is the first occurrence of this rigid + if !outside_rigids.contains(&var) { + new_rigids.push(var); + } + } headers.insert(symbol, loc_type); } } diff --git a/compiler/gen_dev/src/generic64/aarch64.rs b/compiler/gen_dev/src/generic64/aarch64.rs index 6149bff14c..32273fbf91 100644 --- a/compiler/gen_dev/src/generic64/aarch64.rs +++ b/compiler/gen_dev/src/generic64/aarch64.rs @@ -226,6 +226,7 @@ impl CallConv for AArch64Call { #[inline(always)] fn load_args<'a>( + _buf: &mut Vec<'a, u8>, _symbol_map: &mut MutMap>, _args: &'a [(Layout<'a>, Symbol)], _ret_layout: &Layout<'a>, diff --git a/compiler/gen_dev/src/generic64/mod.rs b/compiler/gen_dev/src/generic64/mod.rs index 9f98f512be..008633c202 100644 --- a/compiler/gen_dev/src/generic64/mod.rs +++ b/compiler/gen_dev/src/generic64/mod.rs @@ -9,7 +9,7 @@ use std::marker::PhantomData; pub mod aarch64; pub mod x86_64; -const PTR_SIZE: u32 = 64; +const PTR_SIZE: u32 = 8; pub trait CallConv { const GENERAL_PARAM_REGS: &'static [GeneralReg]; @@ -48,6 +48,7 @@ pub trait CallConv { // load_args updates the symbol map to know where every arg is stored. fn load_args<'a>( + buf: &mut Vec<'a, u8>, symbol_map: &mut MutMap>, args: &'a [(Layout<'a>, Symbol)], // ret_layout is needed because if it is a complex type, we pass a pointer as the first arg. @@ -422,7 +423,12 @@ impl< args: &'a [(Layout<'a>, Symbol)], ret_layout: &Layout<'a>, ) -> Result<(), String> { - CC::load_args(&mut self.symbol_storage_map, args, ret_layout)?; + CC::load_args( + &mut self.buf, + &mut self.symbol_storage_map, + args, + ret_layout, + )?; // Update used and free regs. for (sym, storage) in &self.symbol_storage_map { match storage { @@ -489,6 +495,25 @@ impl< ASM::mov_freg64_freg64(&mut self.buf, dst_reg, CC::FLOAT_RETURN_REGS[0]); Ok(()) } + Layout::Builtin(Builtin::Str) => { + if CC::returns_via_arg_pointer(ret_layout)? { + // This will happen on windows, return via pointer here. + Err("FnCall: Returning strings via pointer not yet implemented".to_string()) + } else { + let offset = self.claim_stack_size(16)?; + self.symbol_storage_map.insert( + *dst, + SymbolStorage::Base { + offset, + size: 16, + owned: true, + }, + ); + ASM::mov_base32_reg64(&mut self.buf, offset, CC::GENERAL_RETURN_REGS[0]); + ASM::mov_base32_reg64(&mut self.buf, offset + 8, CC::GENERAL_RETURN_REGS[1]); + Ok(()) + } + } x => Err(format!( "FnCall: receiving return type, {:?}, is not yet implemented", x @@ -893,6 +918,35 @@ impl< ASM::mov_freg64_imm64(&mut self.buf, &mut self.relocs, reg, val); Ok(()) } + Literal::Str(x) if x.len() < 16 => { + // Load small string. + let reg = self.get_tmp_general_reg()?; + + let offset = self.claim_stack_size(16)?; + self.symbol_storage_map.insert( + *sym, + SymbolStorage::Base { + offset, + size: 16, + owned: true, + }, + ); + let mut bytes = [0; 16]; + bytes[..x.len()].copy_from_slice(x.as_bytes()); + bytes[15] = (x.len() as u8) | 0b1000_0000; + + let mut num_bytes = [0; 8]; + num_bytes.copy_from_slice(&bytes[..8]); + let num = i64::from_ne_bytes(num_bytes); + ASM::mov_reg64_imm64(&mut self.buf, reg, num); + ASM::mov_base32_reg64(&mut self.buf, offset, reg); + + num_bytes.copy_from_slice(&bytes[8..]); + let num = i64::from_ne_bytes(num_bytes); + ASM::mov_reg64_imm64(&mut self.buf, reg, num); + ASM::mov_base32_reg64(&mut self.buf, offset + 8, reg); + Ok(()) + } x => Err(format!("loading literal, {:?}, is not yet implemented", x)), } } @@ -1012,6 +1066,19 @@ impl< Layout::Builtin(Builtin::Float64) => { ASM::mov_freg64_base32(&mut self.buf, CC::FLOAT_RETURN_REGS[0], *offset); } + Layout::Builtin(Builtin::Str) => { + if self.symbol_storage_map.contains_key(&Symbol::RET_POINTER) { + // This will happen on windows, return via pointer here. + return Err("Returning strings via pointer not yet implemented".to_string()); + } else { + ASM::mov_reg64_base32(&mut self.buf, CC::GENERAL_RETURN_REGS[0], *offset); + ASM::mov_reg64_base32( + &mut self.buf, + CC::GENERAL_RETURN_REGS[1], + *offset + 8, + ); + } + } Layout::Struct(field_layouts) => { let (offset, size) = (*offset, *size); // Nothing to do for empty struct @@ -1446,8 +1513,6 @@ macro_rules! single_register_integers { #[macro_export] macro_rules! single_register_floats { () => { - // Float16 is explicitly ignored because it is not supported by must hardware and may require special exceptions. - // Builtin::Float16 | Builtin::Float32 | Builtin::Float64 }; } diff --git a/compiler/gen_dev/src/generic64/x86_64.rs b/compiler/gen_dev/src/generic64/x86_64.rs index a39a828f8a..a30a1a4e9c 100644 --- a/compiler/gen_dev/src/generic64/x86_64.rs +++ b/compiler/gen_dev/src/generic64/x86_64.rs @@ -177,6 +177,7 @@ impl CallConv for X86_64SystemV { #[inline(always)] fn load_args<'a>( + buf: &mut Vec<'a, u8>, symbol_map: &mut MutMap>, args: &'a [(Layout<'a>, Symbol)], ret_layout: &Layout<'a>, @@ -231,6 +232,29 @@ impl CallConv for X86_64SystemV { ); } } + Layout::Builtin(Builtin::Str) => { + if general_i + 1 < Self::GENERAL_PARAM_REGS.len() { + // Load the value to the param reg. + let dst1 = Self::GENERAL_PARAM_REGS[general_i]; + let dst2 = Self::GENERAL_PARAM_REGS[general_i + 1]; + base_offset += 16; + X86_64Assembler::mov_reg64_base32(buf, dst1, base_offset - 8); + X86_64Assembler::mov_reg64_base32(buf, dst2, base_offset); + symbol_map.insert( + *sym, + SymbolStorage::Base { + offset: base_offset, + size: 16, + owned: true, + }, + ); + general_i += 2; + } else { + return Err( + "loading strings args on the stack is not yet implemented".to_string() + ); + } + } Layout::Struct(&[]) => {} x => { return Err(format!( @@ -257,7 +281,7 @@ impl CallConv for X86_64SystemV { // For most return layouts we will do nothing. // In some cases, we need to put the return address as the first arg. match ret_layout { - Layout::Builtin(single_register_builtins!()) => {} + Layout::Builtin(single_register_builtins!() | Builtin::Str) => {} x => { return Err(format!( "receiving return type, {:?}, is not yet implemented", @@ -373,6 +397,32 @@ impl CallConv for X86_64SystemV { stack_offset += 8; } } + Layout::Builtin(Builtin::Str) => { + if general_i + 1 < Self::GENERAL_PARAM_REGS.len() { + // Load the value to the param reg. + let dst1 = Self::GENERAL_PARAM_REGS[general_i]; + let dst2 = Self::GENERAL_PARAM_REGS[general_i + 1]; + match symbol_map + .get(&args[i]) + .ok_or("function argument does not reference any symbol")? + { + SymbolStorage::Base { offset, .. } => { + X86_64Assembler::mov_reg64_base32(buf, dst1, *offset); + X86_64Assembler::mov_reg64_base32(buf, dst2, *offset + 8); + } + _ => { + return Err("Strings only support being loaded from base offsets" + .to_string()); + } + } + general_i += 2; + } else { + return Err( + "calling functions with strings on the stack is not yet implemented" + .to_string(), + ); + } + } Layout::Struct(&[]) => {} x => { return Err(format!( @@ -516,6 +566,7 @@ impl CallConv for X86_64WindowsFastcall { #[inline(always)] fn load_args<'a>( + _buf: &mut Vec<'a, u8>, symbol_map: &mut MutMap>, args: &'a [(Layout<'a>, Symbol)], ret_layout: &Layout<'a>, @@ -535,9 +586,18 @@ impl CallConv for X86_64WindowsFastcall { Layout::Builtin(single_register_integers!()) => { symbol_map .insert(*sym, SymbolStorage::GeneralReg(Self::GENERAL_PARAM_REGS[i])); + i += 1; } Layout::Builtin(single_register_floats!()) => { symbol_map.insert(*sym, SymbolStorage::FloatReg(Self::FLOAT_PARAM_REGS[i])); + i += 1; + } + Layout::Builtin(Builtin::Str) => { + // I think this just needs to be passed on the stack, so not a huge deal. + return Err( + "Passing str args with Windows fast call not yet implemented." + .to_string(), + ); } Layout::Struct(&[]) => {} x => { @@ -547,7 +607,6 @@ impl CallConv for X86_64WindowsFastcall { )); } } - i += 1; } else { base_offset += match layout { Layout::Builtin(single_register_builtins!()) => 8, @@ -580,7 +639,6 @@ impl CallConv for X86_64WindowsFastcall { ret_layout: &Layout<'a>, ) -> Result { let mut stack_offset = Self::SHADOW_SPACE_SIZE as i32; - let mut reg_i = 0; // For most return layouts we will do nothing. // In some cases, we need to put the return address as the first arg. match ret_layout { @@ -597,7 +655,7 @@ impl CallConv for X86_64WindowsFastcall { Layout::Builtin(single_register_integers!()) => { if i < Self::GENERAL_PARAM_REGS.len() { // Load the value to the param reg. - let dst = Self::GENERAL_PARAM_REGS[reg_i]; + let dst = Self::GENERAL_PARAM_REGS[i]; match symbol_map .get(&args[i]) .ok_or("function argument does not reference any symbol")? @@ -615,7 +673,6 @@ impl CallConv for X86_64WindowsFastcall { ) } } - reg_i += 1; } else { // Load the value to the stack. match symbol_map @@ -651,7 +708,7 @@ impl CallConv for X86_64WindowsFastcall { Layout::Builtin(single_register_floats!()) => { if i < Self::FLOAT_PARAM_REGS.len() { // Load the value to the param reg. - let dst = Self::FLOAT_PARAM_REGS[reg_i]; + let dst = Self::FLOAT_PARAM_REGS[i]; match symbol_map .get(&args[i]) .ok_or("function argument does not reference any symbol")? @@ -668,7 +725,6 @@ impl CallConv for X86_64WindowsFastcall { return Err("Cannot load general symbol into FloatReg".to_string()) } } - reg_i += 1; } else { // Load the value to the stack. match symbol_map @@ -700,6 +756,12 @@ impl CallConv for X86_64WindowsFastcall { stack_offset += 8; } } + Layout::Builtin(Builtin::Str) => { + // I think this just needs to be passed on the stack, so not a huge deal. + return Err( + "Passing str args with Windows fast call not yet implemented.".to_string(), + ); + } Layout::Struct(&[]) => {} x => { return Err(format!( diff --git a/compiler/gen_dev/src/lib.rs b/compiler/gen_dev/src/lib.rs index 010fa066d3..5ef03a3a42 100644 --- a/compiler/gen_dev/src/lib.rs +++ b/compiler/gen_dev/src/lib.rs @@ -93,12 +93,8 @@ where for (layout, sym) in proc.args { self.set_layout_map(*sym, layout)?; } - // let start = std::time::Instant::now(); self.scan_ast(&proc.body); self.create_free_map(); - // let duration = start.elapsed(); - // println!("Time to calculate lifetimes: {:?}", duration); - // println!("{:?}", self.last_seen_map()); self.build_stmt(&proc.body, &proc.ret_layout)?; self.finalize() } @@ -119,6 +115,11 @@ where self.free_symbols(stmt)?; Ok(()) } + Stmt::Refcounting(_modify, following) => { + // TODO: actually deal with refcounting. For hello world, we just skipped it. + self.build_stmt(following, ret_layout)?; + Ok(()) + } Stmt::Switch { cond_symbol, cond_layout, @@ -298,6 +299,13 @@ where arg_layouts, ret_layout, ), + Symbol::STR_CONCAT => self.build_run_low_level( + sym, + &LowLevel::StrConcat, + arguments, + arg_layouts, + ret_layout, + ), x if x .module_string(&self.env().interns) .starts_with(ModuleName::APP) => @@ -470,6 +478,13 @@ where arg_layouts, ret_layout, ), + LowLevel::StrConcat => self.build_fn_call( + sym, + bitcode::STR_CONCAT.to_string(), + args, + arg_layouts, + ret_layout, + ), x => Err(format!("low level, {:?}. is not yet implemented", x)), } } diff --git a/compiler/gen_dev/tests/dev_num.rs b/compiler/gen_dev/tests/dev_num.rs index c223acbcdf..c82d2ca23f 100644 --- a/compiler/gen_dev/tests/dev_num.rs +++ b/compiler/gen_dev/tests/dev_num.rs @@ -1,12 +1,6 @@ -#[macro_use] -extern crate pretty_assertions; - #[macro_use] extern crate indoc; -extern crate bumpalo; -extern crate libc; - #[macro_use] mod helpers; diff --git a/compiler/gen_dev/tests/dev_str.rs b/compiler/gen_dev/tests/dev_str.rs new file mode 100644 index 0000000000..19da650a89 --- /dev/null +++ b/compiler/gen_dev/tests/dev_str.rs @@ -0,0 +1,954 @@ +// #[macro_use] +// extern crate indoc; + +#[macro_use] +mod helpers; + +#[cfg(all(test, any(target_os = "linux", target_os = "macos"), any(target_arch = "x86_64"/*, target_arch = "aarch64"*/)))] +mod dev_str { + // use roc_std::{RocList, RocStr}; + // #[test] + // fn str_split_bigger_delimiter_small_str() { + // assert_evals_to!( + // indoc!( + // r#" + // List.len (Str.split "hello" "JJJJ there") + // "# + // ), + // 1, + // i64 + // ); + + // assert_evals_to!( + // indoc!( + // r#" + // when List.first (Str.split "JJJ" "JJJJ there") is + // Ok str -> + // Str.countGraphemes str + + // _ -> + // -1 + + // "# + // ), + // 3, + // i64 + // ); + // } + + // #[test] + // fn str_split_str_concat_repeated() { + // assert_evals_to!( + // indoc!( + // r#" + // when List.first (Str.split "JJJJJ" "JJJJ there") is + // Ok str -> + // str + // |> Str.concat str + // |> Str.concat str + // |> Str.concat str + // |> Str.concat str + + // _ -> + // "Not Str!" + + // "# + // ), + // RocStr::from_slice(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), + // RocStr + // ); + // } + + // #[test] + // fn str_split_small_str_bigger_delimiter() { + // assert_evals_to!( + // indoc!( + // r#" + // when + // List.first + // (Str.split "JJJ" "0123456789abcdefghi") + // is + // Ok str -> str + // _ -> "" + // "# + // ), + // RocStr::from_slice(b"JJJ"), + // RocStr + // ); + // } + + // #[test] + // fn str_split_big_str_small_delimiter() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split "01234567789abcdefghi?01234567789abcdefghi" "?" + // "# + // ), + // RocList::from_slice(&[ + // RocStr::from_slice(b"01234567789abcdefghi"), + // RocStr::from_slice(b"01234567789abcdefghi") + // ]), + // RocList + // ); + + // assert_evals_to!( + // indoc!( + // r#" + // Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch" + // "# + // ), + // RocList::from_slice(&[ + // RocStr::from_slice(b"01234567789abcdefghi "), + // RocStr::from_slice(b" 01234567789abcdefghi") + // ]), + // RocList + // ); + // } + + // #[test] + // fn str_split_small_str_small_delimiter() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split "J!J!J" "!" + // "# + // ), + // RocList::from_slice(&[ + // RocStr::from_slice(b"J"), + // RocStr::from_slice(b"J"), + // RocStr::from_slice(b"J") + // ]), + // RocList + // ); + // } + + // #[test] + // fn str_split_bigger_delimiter_big_strs() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split + // "string to split is shorter" + // "than the delimiter which happens to be very very long" + // "# + // ), + // RocList::from_slice(&[RocStr::from_slice(b"string to split is shorter")]), + // RocList + // ); + // } + + // #[test] + // fn str_split_empty_strs() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split "" "" + // "# + // ), + // RocList::from_slice(&[RocStr::from_slice(b"")]), + // RocList + // ); + // } + + // #[test] + // fn str_split_minimal_example() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split "a," "," + // "# + // ), + // RocList::from_slice(&[RocStr::from_slice(b"a"), RocStr::from_slice(b"")]), + // RocList + // ) + // } + + // #[test] + // fn str_split_small_str_big_delimiter() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split + // "1---- ---- ---- ---- ----2---- ---- ---- ---- ----" + // "---- ---- ---- ---- ----" + // |> List.len + // "# + // ), + // 3, + // i64 + // ); + + // assert_evals_to!( + // indoc!( + // r#" + // Str.split + // "1---- ---- ---- ---- ----2---- ---- ---- ---- ----" + // "---- ---- ---- ---- ----" + // "# + // ), + // RocList::from_slice(&[ + // RocStr::from_slice(b"1"), + // RocStr::from_slice(b"2"), + // RocStr::from_slice(b"") + // ]), + // RocList + // ); + // } + + // #[test] + // fn str_split_small_str_20_char_delimiter() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.split + // "3|-- -- -- -- -- -- |4|-- -- -- -- -- -- |" + // "|-- -- -- -- -- -- |" + // "# + // ), + // RocList::from_slice(&[ + // RocStr::from_slice(b"3"), + // RocStr::from_slice(b"4"), + // RocStr::from_slice(b"") + // ]), + // RocList + // ); + // } + + // #[test] + // fn str_concat_big_to_big() { + // assert_evals_to!( + // indoc!( + // r#" + // Str.concat + // "First string that is fairly long. Longer strings make for different errors. " + // "Second string that is also fairly long. Two long strings test things that might not appear with short strings." + // "# + // ), + // RocStr::from_slice(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), + // RocStr + // ); + // } + + #[test] + fn small_str_literal() { + assert_evals_to!( + "\"JJJJJJJJJJJJJJJ\"", + [ + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0b1000_1111 + ], + [u8; 16] + ); + } + + // #[test] + // fn small_str_zeroed_literal() { + // // Verifies that we zero out unused bytes in the string. + // // This is important so that string equality tests don't randomly + // // fail due to unused memory being there! + // assert_evals_to!( + // "\"J\"", + // [ + // 0x4a, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0x00, + // 0b1000_0001 + // ], + // [u8; 16] + // ); + // } + + #[test] + fn small_str_concat_empty_first_arg() { + assert_evals_to!( + r#"Str.concat "" "JJJJJJJJJJJJJJJ""#, + [ + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0b1000_1111 + ], + [u8; 16] + ); + } + + #[test] + fn small_str_concat_empty_second_arg() { + assert_evals_to!( + r#"Str.concat "JJJJJJJJJJJJJJJ" """#, + [ + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0b1000_1111 + ], + [u8; 16] + ); + } + + // #[test] + // fn small_str_concat_small_to_big() { + // assert_evals_to!( + // r#"Str.concat "abc" " this is longer than 15 chars""#, + // RocStr::from_slice(b"abc this is longer than 15 chars"), + // RocStr + // ); + // } + + #[test] + fn small_str_concat_small_to_small_staying_small() { + assert_evals_to!( + r#"Str.concat "J" "JJJJJJJJJJJJJJ""#, + [ + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0x4a, + 0b1000_1111 + ], + [u8; 16] + ); + } + + // #[test] + // fn small_str_concat_small_to_small_overflow_to_big() { + // assert_evals_to!( + // r#"Str.concat "abcdefghijklm" "nopqrstuvwxyz""#, + // RocStr::from_slice(b"abcdefghijklmnopqrstuvwxyz"), + // RocStr + // ); + // } + + // #[test] + // fn str_concat_empty() { + // assert_evals_to!(r#"Str.concat "" """#, RocStr::default(), RocStr); + // } + + // #[test] + // fn small_str_is_empty() { + // assert_evals_to!(r#"Str.isEmpty "abc""#, false, bool); + // } + + // #[test] + // fn big_str_is_empty() { + // assert_evals_to!( + // r#"Str.isEmpty "this is more than 15 chars long""#, + // false, + // bool + // ); + // } + + // #[test] + // fn empty_str_is_empty() { + // assert_evals_to!(r#"Str.isEmpty """#, true, bool); + // } + + // #[test] + // fn str_starts_with() { + // assert_evals_to!(r#"Str.startsWith "hello world" "hell""#, true, bool); + // assert_evals_to!(r#"Str.startsWith "hello world" """#, true, bool); + // assert_evals_to!(r#"Str.startsWith "nope" "hello world""#, false, bool); + // assert_evals_to!(r#"Str.startsWith "hell" "hello world""#, false, bool); + // assert_evals_to!(r#"Str.startsWith "" "hello world""#, false, bool); + // } + + // #[test] + // fn str_starts_with_code_point() { + // assert_evals_to!( + // &format!(r#"Str.startsWithCodePt "foobar" {}"#, 'f' as u32), + // true, + // bool + // ); + // assert_evals_to!( + // &format!(r#"Str.startsWithCodePt "zoobar" {}"#, 'f' as u32), + // false, + // bool + // ); + // } + + // #[test] + // fn str_ends_with() { + // assert_evals_to!(r#"Str.endsWith "hello world" "world""#, true, bool); + // assert_evals_to!(r#"Str.endsWith "nope" "hello world""#, false, bool); + // assert_evals_to!(r#"Str.endsWith "" "hello world""#, false, bool); + // } + + // #[test] + // fn str_count_graphemes_small_str() { + // assert_evals_to!(r#"Str.countGraphemes "å🤔""#, 2, usize); + // } + + // #[test] + // fn str_count_graphemes_three_js() { + // assert_evals_to!(r#"Str.countGraphemes "JJJ""#, 3, usize); + // } + + // #[test] + // fn str_count_graphemes_big_str() { + // assert_evals_to!( + // r#"Str.countGraphemes "6🤔å🤔e¥🤔çppkd🙃1jdal🦯asdfa∆ltråø˚waia8918.,🏅jjc""#, + // 45, + // usize + // ); + // } + + // #[test] + // fn str_starts_with_same_big_str() { + // assert_evals_to!( + // r#"Str.startsWith "123456789123456789" "123456789123456789""#, + // true, + // bool + // ); + // } + + // #[test] + // fn str_starts_with_different_big_str() { + // assert_evals_to!( + // r#"Str.startsWith "12345678912345678910" "123456789123456789""#, + // true, + // bool + // ); + // } + + // #[test] + // fn str_starts_with_same_small_str() { + // assert_evals_to!(r#"Str.startsWith "1234" "1234""#, true, bool); + // } + + // #[test] + // fn str_starts_with_different_small_str() { + // assert_evals_to!(r#"Str.startsWith "1234" "12""#, true, bool); + // } + // #[test] + // fn str_starts_with_false_small_str() { + // assert_evals_to!(r#"Str.startsWith "1234" "23""#, false, bool); + // } + + // #[test] + // fn str_from_int() { + // assert_evals_to!( + // r#"Str.fromInt 1234"#, + // roc_std::RocStr::from_slice("1234".as_bytes()), + // roc_std::RocStr + // ); + // assert_evals_to!( + // r#"Str.fromInt 0"#, + // roc_std::RocStr::from_slice("0".as_bytes()), + // roc_std::RocStr + // ); + // assert_evals_to!( + // r#"Str.fromInt -1"#, + // roc_std::RocStr::from_slice("-1".as_bytes()), + // roc_std::RocStr + // ); + + // let max = format!("{}", i64::MAX); + // assert_evals_to!( + // r#"Str.fromInt Num.maxInt"#, + // RocStr::from_slice(max.as_bytes()), + // RocStr + // ); + + // let min = format!("{}", i64::MIN); + // assert_evals_to!( + // r#"Str.fromInt Num.minInt"#, + // RocStr::from_slice(min.as_bytes()), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_single_ascii() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97 ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_many_ascii() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 98, 99, 0x7E ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("abc~".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_single_unicode() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 0xE2, 0x88, 0x86 ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("∆".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_many_unicode() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 0xE2, 0x88, 0x86, 0xC5, 0x93, 0xC2, 0xAC ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("∆œ¬".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_single_grapheme() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 0xF0, 0x9F, 0x92, 0x96 ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("💖".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_many_grapheme() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 0xF0, 0x9F, 0x92, 0x96, 0xF0, 0x9F, 0xA4, 0xA0, 0xF0, 0x9F, 0x9A, 0x80 ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("💖🤠🚀".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_pass_all() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 0xF0, 0x9F, 0x92, 0x96, 98, 0xE2, 0x88, 0x86 ] is + // Ok val -> val + // Err _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("💖b∆".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_invalid_start_byte() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 98, 0x80, 99 ] is + // Err (BadUtf8 InvalidStartByte byteIndex) -> + // if byteIndex == 2 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_unexpected_end_of_sequence() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 98, 99, 0xC2 ] is + // Err (BadUtf8 UnexpectedEndOfSequence byteIndex) -> + // if byteIndex == 3 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_expected_continuation() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 98, 99, 0xC2, 0x00 ] is + // Err (BadUtf8 ExpectedContinuation byteIndex) -> + // if byteIndex == 3 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_overlong_encoding() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 0xF0, 0x80, 0x80, 0x80 ] is + // Err (BadUtf8 OverlongEncoding byteIndex) -> + // if byteIndex == 1 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_codepoint_too_large() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 0xF4, 0x90, 0x80, 0x80 ] is + // Err (BadUtf8 CodepointTooLarge byteIndex) -> + // if byteIndex == 1 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_fail_surrogate_half() { + // assert_evals_to!( + // indoc!( + // r#" + // when Str.fromUtf8 [ 97, 98, 0xED, 0xA0, 0x80 ] is + // Err (BadUtf8 EncodesSurrogateHalf byteIndex) -> + // if byteIndex == 2 then + // "a" + // else + // "b" + // _ -> "" + // "# + // ), + // roc_std::RocStr::from_slice("a".as_bytes()), + // roc_std::RocStr + // ); + // } + + // #[test] + // fn str_equality() { + // assert_evals_to!(r#""a" == "a""#, true, bool); + // assert_evals_to!( + // r#""loremipsumdolarsitamet" == "loremipsumdolarsitamet""#, + // true, + // bool + // ); + // assert_evals_to!(r#""a" != "b""#, true, bool); + // assert_evals_to!(r#""a" == "b""#, false, bool); + // } + + // #[test] + // fn str_clone() { + // use roc_std::RocStr; + // let long = RocStr::from_slice("loremipsumdolarsitamet".as_bytes()); + // let short = RocStr::from_slice("x".as_bytes()); + // let empty = RocStr::from_slice("".as_bytes()); + + // debug_assert_eq!(long.clone(), long); + // debug_assert_eq!(short.clone(), short); + // debug_assert_eq!(empty.clone(), empty); + // } + + // #[test] + // fn nested_recursive_literal() { + // assert_evals_to!( + // indoc!( + // r#" + // Expr : [ Add Expr Expr, Val I64, Var I64 ] + + // expr : Expr + // expr = Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1)) + + // printExpr : Expr -> Str + // printExpr = \e -> + // when e is + // Add a b -> + // "Add (" + // |> Str.concat (printExpr a) + // |> Str.concat ") (" + // |> Str.concat (printExpr b) + // |> Str.concat ")" + // Val v -> "Val " |> Str.concat (Str.fromInt v) + // Var v -> "Var " |> Str.concat (Str.fromInt v) + + // printExpr expr + // "# + // ), + // RocStr::from_slice(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), + // RocStr + // ); + // } + + // #[test] + // fn str_join_comma_small() { + // assert_evals_to!( + // r#"Str.joinWith ["1", "2"] ", " "#, + // RocStr::from("1, 2"), + // RocStr + // ); + // } + + // #[test] + // fn str_join_comma_big() { + // assert_evals_to!( + // r#"Str.joinWith ["10000000", "2000000", "30000000"] ", " "#, + // RocStr::from("10000000, 2000000, 30000000"), + // RocStr + // ); + // } + + // #[test] + // fn str_join_comma_single() { + // assert_evals_to!(r#"Str.joinWith ["1"] ", " "#, RocStr::from("1"), RocStr); + // } + + // #[test] + // fn str_from_float() { + // assert_evals_to!(r#"Str.fromFloat 3.14"#, RocStr::from("3.14"), RocStr); + // } + + // #[test] + // fn str_to_utf8() { + // assert_evals_to!( + // r#"Str.toUtf8 "hello""#, + // RocList::from_slice(&[104, 101, 108, 108, 111]), + // RocList + // ); + // assert_evals_to!( + // r#"Str.toUtf8 "this is a long string""#, + // RocList::from_slice(&[ + // 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 108, 111, 110, 103, 32, 115, 116, + // 114, 105, 110, 103 + // ]), + // RocList + // ); + // } + + // #[test] + // fn str_from_utf8_range() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { count: 5, start: 0 } is + // Ok utf8String -> utf8String + // _ -> "" + // "# + // ), + // RocStr::from("hello"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_slice() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { count: 4, start: 1 } is + // Ok utf8String -> utf8String + // _ -> "" + // "# + // ), + // RocStr::from("ello"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_slice_not_end() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { count: 3, start: 1 } is + // Ok utf8String -> utf8String + // _ -> "" + // "# + // ), + // RocStr::from("ell"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_order_does_not_matter() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { start: 1, count: 3 } is + // Ok utf8String -> utf8String + // _ -> "" + // "# + // ), + // RocStr::from("ell"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_out_of_bounds_start_value() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { start: 7, count: 3 } is + // Ok _ -> "" + // Err (BadUtf8 _ _) -> "" + // Err OutOfBounds -> "out of bounds" + // "# + // ), + // RocStr::from("out of bounds"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_count_too_high() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { start: 0, count: 6 } is + // Ok _ -> "" + // Err (BadUtf8 _ _) -> "" + // Err OutOfBounds -> "out of bounds" + // "# + // ), + // RocStr::from("out of bounds"), + // RocStr + // ); + // } + + // #[test] + // fn str_from_utf8_range_count_too_high_for_start() { + // assert_evals_to!( + // indoc!( + // r#" + // bytes = Str.toUtf8 "hello" + // when Str.fromUtf8Range bytes { start: 4, count: 3 } is + // Ok _ -> "" + // Err (BadUtf8 _ _) -> "" + // Err OutOfBounds -> "out of bounds" + // "# + // ), + // RocStr::from("out of bounds"), + // RocStr + // ); + // } +} diff --git a/compiler/gen_llvm/src/llvm/build.rs b/compiler/gen_llvm/src/llvm/build.rs index 4864b04fcb..ea3eef69d2 100644 --- a/compiler/gen_llvm/src/llvm/build.rs +++ b/compiler/gen_llvm/src/llvm/build.rs @@ -9,13 +9,14 @@ use crate::llvm::build_dict::{ use crate::llvm::build_hash::generic_hash; use crate::llvm::build_list::{ self, allocate_list, empty_list, empty_polymorphic_list, list_append, list_concat, - list_contains, list_drop, list_get_unsafe, list_join, list_keep_errs, list_keep_if, - list_keep_oks, list_len, list_map, list_map2, list_map3, list_map_with_index, list_prepend, - list_range, list_repeat, list_reverse, list_set, list_single, list_sort_with, list_swap, + list_contains, list_drop, list_drop_at, list_get_unsafe, list_join, list_keep_errs, + list_keep_if, list_keep_oks, list_len, list_map, list_map2, list_map3, list_map_with_index, + list_prepend, list_range, list_repeat, list_reverse, list_set, list_single, list_sort_with, + list_swap, }; use crate::llvm::build_str::{ empty_str, str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, - str_from_utf8, str_from_utf8_range, str_join_with, str_number_of_bytes, str_split, + str_from_utf8, str_from_utf8_range, str_join_with, str_number_of_bytes, str_repeat, str_split, str_starts_with, str_starts_with_code_point, str_to_utf8, }; use crate::llvm::compare::{generic_eq, generic_neq}; @@ -708,7 +709,7 @@ fn promote_to_main_function<'a, 'ctx, 'env>( env, main_fn_name, roc_main_fn, - &[], + top_level.arguments, top_level.result, main_fn_name, ); @@ -934,7 +935,9 @@ pub fn build_exp_call<'a, 'ctx, 'env>( CallType::LowLevel { op, update_mode } => { let bytes = update_mode.to_bytes(); let update_var = UpdateModeVar(&bytes); - let update_mode = func_spec_solutions.update_mode(update_var).ok(); + let update_mode = func_spec_solutions + .update_mode(update_var) + .unwrap_or(UpdateMode::Immutable); run_low_level( env, @@ -2185,7 +2188,10 @@ fn list_literal<'a, 'ctx, 'env>( let list_length = elems.len(); let list_length_intval = env.ptr_int().const_int(list_length as _, false); - if element_type.is_int_type() { + // TODO re-enable, currently causes morphic segfaults because it tries to update + // constants in-place... + // if element_type.is_int_type() { + if false { let element_type = element_type.into_int_type(); let element_width = elem_layout.stack_size(env.ptr_bytes); let size = list_length * element_width as usize; @@ -2226,17 +2232,18 @@ fn list_literal<'a, 'ctx, 'env>( } ListLiteralElement::Symbol(symbol) => { let val = load_symbol(scope, symbol); - let intval = val.into_int_value(); - if intval.is_const() { - global_elements.push(intval); - } else { - is_all_constant = false; + // here we'd like to furthermore check for intval.is_const(). + // if all elements are const for LLVM, we could make the array a constant. + // BUT morphic does not know about this, and could allow us to modify that + // array in-place. That would cause a segfault. So, we'll have to find + // constants ourselves and cannot lean on LLVM here. - runtime_evaluated_elements.push((index, val)); + is_all_constant = false; - global_elements.push(element_type.get_undef()); - } + runtime_evaluated_elements.push((index, val)); + + global_elements.push(element_type.get_undef()); } }; } @@ -3210,6 +3217,141 @@ fn expose_function_to_host_help_c_abi_generic<'a, 'ctx, 'env>( c_function } +fn expose_function_to_host_help_c_abi_gen_test<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + ident_string: &str, + roc_function: FunctionValue<'ctx>, + arguments: &[Layout<'a>], + c_function_name: &str, +) -> FunctionValue<'ctx> { + let context = env.context; + + // a tagged union to indicate to the test loader that a panic occurred. + // especially when running 32-bit binaries on a 64-bit machine, there + // does not seem to be a smarter solution + let wrapper_return_type = context.struct_type( + &[ + context.i64_type().into(), + roc_function.get_type().get_return_type().unwrap(), + ], + false, + ); + + let mut cc_argument_types = Vec::with_capacity_in(arguments.len(), env.arena); + for layout in arguments { + cc_argument_types.push(to_cc_type(env, layout)); + } + + // STEP 1: turn `f : a,b,c -> d` into `f : a,b,c, &d -> {}` if the C abi demands it + let mut argument_types = cc_argument_types; + let return_type = wrapper_return_type; + + let c_function_type = { + let output_type = return_type.ptr_type(AddressSpace::Generic); + argument_types.push(output_type.into()); + env.context.void_type().fn_type(&argument_types, false) + }; + + let c_function = add_func( + env.module, + c_function_name, + c_function_type, + Linkage::External, + C_CALL_CONV, + ); + + let subprogram = env.new_subprogram(c_function_name); + c_function.set_subprogram(subprogram); + + // STEP 2: build the exposed function's body + let builder = env.builder; + let context = env.context; + + let entry = context.append_basic_block(c_function, "entry"); + + builder.position_at_end(entry); + + debug_info_init!(env, c_function); + + // drop the final argument, which is the pointer we write the result into + let args_vector = c_function.get_params(); + let mut args = args_vector.as_slice(); + let args_length = args.len(); + + args = &args[..args.len() - 1]; + + let mut arguments_for_call = Vec::with_capacity_in(args.len(), env.arena); + + let it = args.iter().zip(roc_function.get_type().get_param_types()); + for (arg, fastcc_type) in it { + let arg_type = arg.get_type(); + if arg_type == fastcc_type { + // the C and Fast calling conventions agree + arguments_for_call.push(*arg); + } else { + let cast = complex_bitcast_check_size(env, *arg, fastcc_type, "to_fastcc_type"); + arguments_for_call.push(cast); + } + } + + let arguments_for_call = &arguments_for_call.into_bump_slice(); + + let call_result = { + let roc_wrapper_function = make_exception_catcher(env, roc_function); + debug_assert_eq!( + arguments_for_call.len(), + roc_wrapper_function.get_params().len() + ); + + builder.position_at_end(entry); + + let call_wrapped = builder.build_call( + roc_wrapper_function, + arguments_for_call, + "call_wrapped_function", + ); + call_wrapped.set_call_convention(FAST_CALL_CONV); + + call_wrapped.try_as_basic_value().left().unwrap() + }; + + let output_arg_index = args_length - 1; + + let output_arg = c_function + .get_nth_param(output_arg_index as u32) + .unwrap() + .into_pointer_value(); + + builder.build_store(output_arg, call_result); + builder.build_return(None); + + // STEP 3: build a {} -> u64 function that gives the size of the return type + let size_function_type = env.context.i64_type().fn_type(&[], false); + let size_function_name: String = format!("roc__{}_size", ident_string); + + let size_function = add_func( + env.module, + size_function_name.as_str(), + size_function_type, + Linkage::External, + C_CALL_CONV, + ); + + let subprogram = env.new_subprogram(&size_function_name); + size_function.set_subprogram(subprogram); + + let entry = context.append_basic_block(size_function, "entry"); + + builder.position_at_end(entry); + + debug_info_init!(env, size_function); + + let size: BasicValueEnum = return_type.size_of().unwrap().into(); + builder.build_return(Some(&size)); + + c_function +} + fn expose_function_to_host_help_c_abi<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, ident_string: &str, @@ -3220,16 +3362,24 @@ fn expose_function_to_host_help_c_abi<'a, 'ctx, 'env>( ) -> FunctionValue<'ctx> { let context = env.context; - // a generic version that writes the result into a passed *u8 pointer - if !env.is_gen_test { - expose_function_to_host_help_c_abi_generic( + if env.is_gen_test { + return expose_function_to_host_help_c_abi_gen_test( env, + ident_string, roc_function, arguments, - &format!("{}_generic", c_function_name), + c_function_name, ); } + // a generic version that writes the result into a passed *u8 pointer + expose_function_to_host_help_c_abi_generic( + env, + roc_function, + arguments, + &format!("{}_generic", c_function_name), + ); + let wrapper_return_type = if env.is_gen_test { context .struct_type( @@ -3256,11 +3406,9 @@ fn expose_function_to_host_help_c_abi<'a, 'ctx, 'env>( let cc_return = to_cc_return(env, &return_layout); let c_function_type = match cc_return { - CCReturn::Void if !env.is_gen_test => { - env.context.void_type().fn_type(&argument_types, false) - } - CCReturn::Return if !env.is_gen_test => return_type.fn_type(&argument_types, false), - _ => { + CCReturn::Void => env.context.void_type().fn_type(&argument_types, false), + CCReturn::Return => return_type.fn_type(&argument_types, false), + CCReturn::ByPointer => { let output_type = return_type.ptr_type(AddressSpace::Generic); argument_types.push(output_type.into()); env.context.void_type().fn_type(&argument_types, false) @@ -3294,13 +3442,13 @@ fn expose_function_to_host_help_c_abi<'a, 'ctx, 'env>( let args_length = args.len(); match cc_return { - CCReturn::Return if !env.is_gen_test => { + CCReturn::Return => { debug_assert_eq!(args.len(), roc_function.get_params().len()); } - CCReturn::Void if !env.is_gen_test => { + CCReturn::Void => { debug_assert_eq!(args.len(), roc_function.get_params().len()); } - _ => { + CCReturn::ByPointer => { args = &args[..args.len() - 1]; debug_assert_eq!(args.len(), roc_function.get_params().len()); } @@ -3323,44 +3471,25 @@ fn expose_function_to_host_help_c_abi<'a, 'ctx, 'env>( let arguments_for_call = &arguments_for_call.into_bump_slice(); let call_result = { - if env.is_gen_test { - let roc_wrapper_function = make_exception_catcher(env, roc_function); - debug_assert_eq!( - arguments_for_call.len(), - roc_wrapper_function.get_params().len() - ); + let call_unwrapped = + builder.build_call(roc_function, arguments_for_call, "call_unwrapped_function"); + call_unwrapped.set_call_convention(FAST_CALL_CONV); - builder.position_at_end(entry); + let call_unwrapped_result = call_unwrapped.try_as_basic_value().left().unwrap(); - let call_wrapped = builder.build_call( - roc_wrapper_function, - arguments_for_call, - "call_wrapped_function", - ); - call_wrapped.set_call_convention(FAST_CALL_CONV); - - call_wrapped.try_as_basic_value().left().unwrap() - } else { - let call_unwrapped = - builder.build_call(roc_function, arguments_for_call, "call_unwrapped_function"); - call_unwrapped.set_call_convention(FAST_CALL_CONV); - - let call_unwrapped_result = call_unwrapped.try_as_basic_value().left().unwrap(); - - // make_good_roc_result(env, call_unwrapped_result) - call_unwrapped_result - } + // make_good_roc_result(env, call_unwrapped_result) + call_unwrapped_result }; match cc_return { - CCReturn::Void if !env.is_gen_test => { + CCReturn::Void => { // TODO return empty struct here? builder.build_return(None); } - CCReturn::Return if !env.is_gen_test => { + CCReturn::Return => { builder.build_return(Some(&call_result)); } - _ => { + CCReturn::ByPointer => { let output_arg_index = args_length - 1; let output_arg = c_function @@ -4105,19 +4234,28 @@ pub fn build_proc<'a, 'ctx, 'env>( let func_solutions = mod_solutions.func_solutions(func_name).unwrap(); let mut it = func_solutions.specs(); - let func_spec = it.next().unwrap(); - debug_assert!( - it.next().is_none(), - "we expect only one specialization of this symbol" - ); + let evaluator = match it.next() { + Some(func_spec) => { + debug_assert!( + it.next().is_none(), + "we expect only one specialization of this symbol" + ); - let evaluator = function_value_by_func_spec( - env, - *func_spec, - symbol, - top_level.arguments, - &top_level.result, - ); + function_value_by_func_spec( + env, + *func_spec, + symbol, + top_level.arguments, + &top_level.result, + ) + } + None => { + // morphic did not generate a specialization for this function, + // therefore it must actually be unused. + // An example is our closure callers + panic!("morphic did not specialize {:?}", symbol); + } + }; let ident_string = proc.name.as_str(&env.interns); let fn_name: String = format!("{}_1", ident_string); @@ -4777,7 +4915,7 @@ fn run_low_level<'a, 'ctx, 'env>( layout: &Layout<'a>, op: LowLevel, args: &[Symbol], - update_mode: Option, + update_mode: UpdateMode, // expect_failed: *const (), ) -> BasicValueEnum<'ctx> { use LowLevel::*; @@ -4833,7 +4971,7 @@ fn run_low_level<'a, 'ctx, 'env>( let original_wrapper = load_symbol(scope, &args[0]).into_struct_value(); - str_from_utf8(env, parent, original_wrapper) + str_from_utf8(env, parent, original_wrapper, update_mode) } StrFromUtf8Range => { debug_assert_eq!(args.len(), 2); @@ -4853,6 +4991,12 @@ fn run_low_level<'a, 'ctx, 'env>( str_to_utf8(env, string.into_struct_value()) } + StrRepeat => { + // Str.repeat : Str, Nat -> Str + debug_assert_eq!(args.len(), 2); + + str_repeat(env, scope, args[0], args[1]) + } StrSplit => { // Str.split : Str, Str -> List Str debug_assert_eq!(args.len(), 2); @@ -4909,7 +5053,7 @@ fn run_low_level<'a, 'ctx, 'env>( let (list, list_layout) = load_symbol_and_layout(scope, &args[0]); - list_reverse(env, list, list_layout) + list_reverse(env, list, list_layout, update_mode) } ListConcat => { debug_assert_eq!(args.len(), 2); @@ -4951,7 +5095,7 @@ fn run_low_level<'a, 'ctx, 'env>( let original_wrapper = load_symbol(scope, &args[0]).into_struct_value(); let (elem, elem_layout) = load_symbol_and_layout(scope, &args[1]); - list_append(env, original_wrapper, elem, elem_layout) + list_append(env, original_wrapper, elem, elem_layout, update_mode) } ListSwap => { // List.swap : List elem, Nat, Nat -> List elem @@ -4971,6 +5115,7 @@ fn run_low_level<'a, 'ctx, 'env>( index_1.into_int_value(), index_2.into_int_value(), element_layout, + update_mode, ), _ => unreachable!("Invalid layout {:?} in List.swap", list_layout), } @@ -4996,6 +5141,27 @@ fn run_low_level<'a, 'ctx, 'env>( _ => unreachable!("Invalid layout {:?} in List.drop", list_layout), } } + ListDropAt => { + // List.dropAt : List elem, Nat -> List elem + debug_assert_eq!(args.len(), 2); + + let (list, list_layout) = load_symbol_and_layout(scope, &args[0]); + let original_wrapper = list.into_struct_value(); + + let count = load_symbol(scope, &args[1]); + + match list_layout { + Layout::Builtin(Builtin::EmptyList) => empty_list(env), + Layout::Builtin(Builtin::List(element_layout)) => list_drop_at( + env, + layout_ids, + original_wrapper, + count.into_int_value(), + element_layout, + ), + _ => unreachable!("Invalid layout {:?} in List.dropAt", list_layout), + } + } ListPrepend => { // List.prepend : List elem, elem -> List elem debug_assert_eq!(args.len(), 2); @@ -5298,7 +5464,7 @@ fn run_low_level<'a, 'ctx, 'env>( index.into_int_value(), element, element_layout, - update_mode.unwrap(), + update_mode, ), _ => unreachable!("invalid dict layout"), } @@ -5615,89 +5781,115 @@ fn build_foreign_symbol<'a, 'ctx, 'env>( let builder = env.builder; let context = env.context; - // Here we build two functions: - // - // - an C_CALL_CONV extern that will be provided by the host, e.g. `roc_fx_putLine` - // This is just a type signature that we make available to the linker, - // and can use in the wrapper - // - a FAST_CALL_CONV wrapper that we make here, e.g. `roc_fx_putLine_fastcc_wrapper` + let fastcc_function_name = format!("{}_fastcc_wrapper", foreign.as_str()); - let return_type = basic_type_from_layout(env, ret_layout); - let cc_return = to_cc_return(env, ret_layout); + let (fastcc_function, arguments) = match env.module.get_function(fastcc_function_name.as_str()) + { + Some(function_value) => { + let mut arguments = Vec::with_capacity_in(argument_symbols.len(), env.arena); - let mut cc_argument_types = Vec::with_capacity_in(argument_symbols.len() + 1, env.arena); - let mut fastcc_argument_types = Vec::with_capacity_in(argument_symbols.len(), env.arena); - let mut arguments = Vec::with_capacity_in(argument_symbols.len(), env.arena); + for symbol in argument_symbols { + let (value, _) = load_symbol_and_layout(scope, symbol); - for symbol in argument_symbols { - let (value, layout) = load_symbol_and_layout(scope, symbol); + arguments.push(value); + } - cc_argument_types.push(to_cc_type(env, layout)); - - let basic_type = basic_type_from_layout(env, layout); - fastcc_argument_types.push(basic_type); - - arguments.push(value); - } - - let cc_type = match cc_return { - CCReturn::Void => env.context.void_type().fn_type(&cc_argument_types, false), - CCReturn::ByPointer => { - cc_argument_types.push(return_type.ptr_type(AddressSpace::Generic).into()); - env.context.void_type().fn_type(&cc_argument_types, false) + (function_value, arguments) + } + None => { + // Here we build two functions: + // + // - an C_CALL_CONV extern that will be provided by the host, e.g. `roc_fx_putLine` + // This is just a type signature that we make available to the linker, + // and can use in the wrapper + // - a FAST_CALL_CONV wrapper that we make here, e.g. `roc_fx_putLine_fastcc_wrapper` + + let return_type = basic_type_from_layout(env, ret_layout); + let cc_return = to_cc_return(env, ret_layout); + + let mut cc_argument_types = + Vec::with_capacity_in(argument_symbols.len() + 1, env.arena); + let mut fastcc_argument_types = + Vec::with_capacity_in(argument_symbols.len(), env.arena); + let mut arguments = Vec::with_capacity_in(argument_symbols.len(), env.arena); + + for symbol in argument_symbols { + let (value, layout) = load_symbol_and_layout(scope, symbol); + + cc_argument_types.push(to_cc_type(env, layout)); + + let basic_type = basic_type_from_layout(env, layout); + fastcc_argument_types.push(basic_type); + + arguments.push(value); + } + + let cc_type = match cc_return { + CCReturn::Void => env.context.void_type().fn_type(&cc_argument_types, false), + CCReturn::ByPointer => { + cc_argument_types.push(return_type.ptr_type(AddressSpace::Generic).into()); + env.context.void_type().fn_type(&cc_argument_types, false) + } + CCReturn::Return => return_type.fn_type(&cc_argument_types, false), + }; + + let cc_function = get_foreign_symbol(env, foreign.clone(), cc_type); + + let fastcc_type = return_type.fn_type(&fastcc_argument_types, false); + + let fastcc_function = add_func( + env.module, + &fastcc_function_name, + fastcc_type, + Linkage::Private, + FAST_CALL_CONV, + ); + + let old = builder.get_insert_block().unwrap(); + + let entry = context.append_basic_block(fastcc_function, "entry"); + { + builder.position_at_end(entry); + let return_pointer = env.builder.build_alloca(return_type, "return_value"); + + let fastcc_parameters = fastcc_function.get_params(); + let mut cc_arguments = + Vec::with_capacity_in(fastcc_parameters.len() + 1, env.arena); + + for (param, cc_type) in fastcc_parameters.into_iter().zip(cc_argument_types.iter()) + { + if param.get_type() == *cc_type { + cc_arguments.push(param); + } else { + let as_cc_type = + complex_bitcast(env.builder, param, *cc_type, "to_cc_type"); + cc_arguments.push(as_cc_type); + } + } + + if let CCReturn::ByPointer = cc_return { + cc_arguments.push(return_pointer.into()); + } + + let call = env.builder.build_call(cc_function, &cc_arguments, "tmp"); + call.set_call_convention(C_CALL_CONV); + + let return_value = match cc_return { + CCReturn::Return => call.try_as_basic_value().left().unwrap(), + + CCReturn::ByPointer => env.builder.build_load(return_pointer, "read_result"), + CCReturn::Void => return_type.const_zero(), + }; + + builder.build_return(Some(&return_value)); + } + + builder.position_at_end(old); + + (fastcc_function, arguments) } - CCReturn::Return => return_type.fn_type(&cc_argument_types, false), }; - let cc_function = get_foreign_symbol(env, foreign.clone(), cc_type); - - let fastcc_type = return_type.fn_type(&fastcc_argument_types, false); - - let fastcc_function = add_func( - env.module, - &format!("{}_fastcc_wrapper", foreign.as_str()), - fastcc_type, - Linkage::Private, - FAST_CALL_CONV, - ); - - let old = builder.get_insert_block().unwrap(); - - let entry = context.append_basic_block(fastcc_function, "entry"); - { - builder.position_at_end(entry); - let return_pointer = env.builder.build_alloca(return_type, "return_value"); - - let fastcc_parameters = fastcc_function.get_params(); - let mut cc_arguments = Vec::with_capacity_in(fastcc_parameters.len() + 1, env.arena); - - for (param, cc_type) in fastcc_parameters.into_iter().zip(cc_argument_types.iter()) { - if param.get_type() == *cc_type { - cc_arguments.push(param); - } else { - let as_cc_type = complex_bitcast(env.builder, param, *cc_type, "to_cc_type"); - cc_arguments.push(as_cc_type); - } - } - - if let CCReturn::ByPointer = cc_return { - cc_arguments.push(return_pointer.into()); - } - - let call = env.builder.build_call(cc_function, &cc_arguments, "tmp"); - call.set_call_convention(C_CALL_CONV); - - let return_value = match cc_return { - CCReturn::Return => call.try_as_basic_value().left().unwrap(), - - CCReturn::ByPointer => env.builder.build_load(return_pointer, "read_result"), - CCReturn::Void => return_type.const_zero(), - }; - - builder.build_return(Some(&return_value)); - } - - builder.position_at_end(old); let call = env.builder.build_call(fastcc_function, &arguments, "tmp"); call.set_call_convention(FAST_CALL_CONV); return call.try_as_basic_value().left().unwrap(); diff --git a/compiler/gen_llvm/src/llvm/build_list.rs b/compiler/gen_llvm/src/llvm/build_list.rs index 2fe94d4cb5..ddc6cd645b 100644 --- a/compiler/gen_llvm/src/llvm/build_list.rs +++ b/compiler/gen_llvm/src/llvm/build_list.rs @@ -17,6 +17,16 @@ use morphic_lib::UpdateMode; use roc_builtins::bitcode; use roc_mono::layout::{Builtin, Layout, LayoutIds}; +pub fn pass_update_mode<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + update_mode: UpdateMode, +) -> BasicValueEnum<'ctx> { + match update_mode { + UpdateMode::Immutable => env.context.i8_type().const_zero().into(), + UpdateMode::InPlace => env.context.i8_type().const_int(1, false).into(), + } +} + fn list_returned_from_zig<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, output: BasicValueEnum<'ctx>, @@ -162,6 +172,7 @@ pub fn list_reverse<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, list: BasicValueEnum<'ctx>, list_layout: &Layout<'a>, + update_mode: UpdateMode, ) -> BasicValueEnum<'ctx> { let element_layout = match *list_layout { Layout::Builtin(Builtin::EmptyList) => { @@ -180,6 +191,7 @@ pub fn list_reverse<'a, 'ctx, 'env>( pass_list_cc(env, list), env.alignment_intvalue(&element_layout), layout_width(env, &element_layout), + pass_update_mode(env, update_mode), ], bitcode::LIST_REVERSE, ) @@ -228,6 +240,7 @@ pub fn list_append<'a, 'ctx, 'env>( original_wrapper: StructValue<'ctx>, element: BasicValueEnum<'ctx>, element_layout: &Layout<'a>, + update_mode: UpdateMode, ) -> BasicValueEnum<'ctx> { call_bitcode_fn_returns_list( env, @@ -236,6 +249,7 @@ pub fn list_append<'a, 'ctx, 'env>( env.alignment_intvalue(element_layout), pass_element_as_opaque(env, element), layout_width(env, element_layout), + pass_update_mode(env, update_mode), ], bitcode::LIST_APPEND, ) @@ -267,6 +281,7 @@ pub fn list_swap<'a, 'ctx, 'env>( index_1: IntValue<'ctx>, index_2: IntValue<'ctx>, element_layout: &Layout<'a>, + update_mode: UpdateMode, ) -> BasicValueEnum<'ctx> { call_bitcode_fn_returns_list( env, @@ -276,12 +291,13 @@ pub fn list_swap<'a, 'ctx, 'env>( layout_width(env, element_layout), index_1.into(), index_2.into(), + pass_update_mode(env, update_mode), ], bitcode::LIST_SWAP, ) } -/// List.drop : List elem, Nat, Nat -> List elem +/// List.drop : List elem, Nat -> List elem pub fn list_drop<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, layout_ids: &mut LayoutIds<'a>, @@ -303,6 +319,28 @@ pub fn list_drop<'a, 'ctx, 'env>( ) } +/// List.dropAt : List elem, Nat -> List elem +pub fn list_drop_at<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + layout_ids: &mut LayoutIds<'a>, + original_wrapper: StructValue<'ctx>, + count: IntValue<'ctx>, + element_layout: &Layout<'a>, +) -> BasicValueEnum<'ctx> { + let dec_element_fn = build_dec_wrapper(env, layout_ids, element_layout); + call_bitcode_fn_returns_list( + env, + &[ + pass_list_cc(env, original_wrapper.into()), + env.alignment_intvalue(element_layout), + layout_width(env, element_layout), + count.into(), + dec_element_fn.as_global_value().as_pointer_value().into(), + ], + bitcode::LIST_DROP_AT, + ) +} + /// List.set : List elem, Nat, elem -> List elem pub fn list_set<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, diff --git a/compiler/gen_llvm/src/llvm/build_str.rs b/compiler/gen_llvm/src/llvm/build_str.rs index 80bfa0fa3a..564f35625e 100644 --- a/compiler/gen_llvm/src/llvm/build_str.rs +++ b/compiler/gen_llvm/src/llvm/build_str.rs @@ -1,9 +1,12 @@ use crate::llvm::bitcode::{call_bitcode_fn, call_void_bitcode_fn}; use crate::llvm::build::{complex_bitcast, Env, Scope}; -use crate::llvm::build_list::{allocate_list, call_bitcode_fn_returns_list, store_list}; +use crate::llvm::build_list::{ + allocate_list, call_bitcode_fn_returns_list, pass_update_mode, store_list, +}; use inkwell::builder::Builder; use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue}; use inkwell::AddressSpace; +use morphic_lib::UpdateMode; use roc_builtins::bitcode; use roc_module::symbol::Symbol; use roc_mono::layout::{Builtin, Layout}; @@ -12,6 +15,18 @@ use super::build::load_symbol; pub static CHAR_LAYOUT: Layout = Layout::Builtin(Builtin::Int8); +/// Str.repeat : Str, Nat -> Str +pub fn str_repeat<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + scope: &Scope<'a, 'ctx>, + str_symbol: Symbol, + count_symbol: Symbol, +) -> BasicValueEnum<'ctx> { + let str_c_abi = str_symbol_to_c_abi(env, scope, str_symbol); + let count = load_symbol(scope, &count_symbol); + call_bitcode_fn(env, &[str_c_abi.into(), count], bitcode::STR_REPEAT) +} + /// Str.split : Str, Str -> List Str pub fn str_split<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, @@ -338,6 +353,7 @@ pub fn str_from_utf8<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, _parent: FunctionValue<'ctx>, original_wrapper: StructValue<'ctx>, + update_mode: UpdateMode, ) -> BasicValueEnum<'ctx> { let builder = env.builder; @@ -353,6 +369,7 @@ pub fn str_from_utf8<'a, 'ctx, 'env>( env.str_list_c_abi().into(), "to_i128", ), + pass_update_mode(env, update_mode), result_ptr.into(), ], bitcode::STR_FROM_UTF8, diff --git a/compiler/gen_wasm/README.md b/compiler/gen_wasm/README.md index 50018f8758..9681e350c3 100644 --- a/compiler/gen_wasm/README.md +++ b/compiler/gen_wasm/README.md @@ -3,48 +3,57 @@ ## Plan - Initial bringup - - Get a wasm backend working for some of the number tests. - - Use a separate `gen_wasm` directory for now, to avoid trying to do bringup and integration at the same time. -- Improve the fundamentals + - [x] Get a wasm backend working for some of the number tests. + - [x] Use a separate `gen_wasm` directory for now, to avoid trying to do bringup and integration at the same time. +- Get the fundamentals working + - [x] Come up with a way to do control flow - [x] Flesh out the details of value representations between local variables and stack memory - - [ ] Set up a way to write tests with any return value rather than just i64 and f64 - - [ ] Figure out relocations for linking object files - - [ ] Think about the Wasm module builder library we're using, are we happy with it? + - [x] Set up a way to write tests with any return value rather than just i64 and f64 + - [x] Implement stack memory + - [x] Push and pop stack frames + - [x] Deal with returning structs + - [x] Distinguish which variables go in locals, own stack frame, caller stack frame, etc. + - [ ] Ensure early Return statements don't skip stack cleanup + - [ ] Vendor-in parity_wasm library so that we can use `bumpalo::Vec` + - [ ] Implement relocations + - Requires knowing the _byte_ offset of each call site. This is awkward as the backend builds a `Vec` rather than a `Vec`. It may be worth serialising each instruction as it is inserted. + +- Refactor for code sharing with CPU backends + + - [ ] Implement a `scan_ast` pre-pass like `Backend` does, but for reusing Wasm locals rather than CPU registers + - [ ] Extract a trait from `WasmBackend` that looks as similar as possible to `Backend`, to prepare for code sharing + - [ ] Refactor to actually share code between `WasmBackend` and `Backend` if it seems feasible + - Integration - Move wasm files to `gen_dev/src/wasm` - Share tests between wasm and x64, with some way of saying which tests work on which backends, and dispatching to different eval helpers based on that. - Get `build_module` in object_builder.rs to dispatch to the wasm generator (adding some Wasm options to the `Triple` struct) - Get `build_module` to write to a file, or maybe return `Vec`, instead of returning an Object structure -- Code sharing - - Try to ensure that both Wasm and x64 use the same `Backend` trait so that we can share code. - - We need to work towards this after we've progressed a bit more with Wasm and gained more understanding and experience of the differences. - - We will have to think about how to deal with the `Backend` code that doesn't apply to Wasm. Perhaps we will end up with more traits like `RegisterBackend` / `StackBackend` or `NativeBackend` / `WasmBackend`, and perhaps even some traits to do with backends that support jumps and those that don't. ## Structured control flow -🚨 **This is an area that could be tricky** 🚨 - One of the security features of WebAssembly is that it does not allow unrestricted "jumps" to anywhere you like. It does not have an instruction for that. All of the [control instructions][control-inst] can only implement "structured" control flow, and have names like `if`, `loop`, `block` that you'd normally associate with high-level languages. There are branch (`br`) instructions that can jump to labelled blocks within the same function, but the blocks have to be nested in sensible ways. [control-inst]: https://webassembly.github.io/spec/core/syntax/instructions.html#control-instructions -Implications: +This way of representing control flow is similar to parts of the Roc AST like `When`, `If` and `LetRec`. But Mono IR converts this to jumps and join points, which are more of a Control Flow Graph than a tree. We need to map back from graph to a tree again in the Wasm backend. -Roc, like most modern languages, is already enforcing structured control flow in the source program. Constructs from the Roc AST like `When`, `If` and `LetRec` can all be converted straightforwardly to Wasm constructs. +Our solution is to wrap all joinpoint/jump graphs in an outer `loop`, with nested `block`s inside it. -However the Mono IR converts this to jumps and join points, which are more of a Control Flow Graph than a tree. That doesn't map so directly to the Wasm structures. This is such a common issue for compiler back-ends that the WebAssembly compiler toolkit `binaryen` has an [API for control-flow graphs][cfg-api]. We're not using `binaryen` right now. It's a C++ library, though it does have a (very thin and somewhat hard-to-use) [Rust wrapper][binaryen-rs]. We should probably investigate this area sooner rather than later. If relooping turns out to be necessary or difficult, we might need to switch from parity_wasm to binaryen. +### Possible future optimisations -> By the way, it's not obvious how to pronounce "binaryen" but apparently it rhymes with "Targaryen", the family name from the "Game of Thrones" TV series +There are other algorithms available that may result in more optimised control flow. We are not focusing on that for our development backend, but here are some notes for future reference. + +The WebAssembly compiler toolkit `binaryen` has an [API for control-flow graphs][cfg-api]. We're not using `binaryen` right now. It's a C++ library, though it does have a (very thin and somewhat hard-to-use) [Rust wrapper][binaryen-rs]. Binaryen's control-flow graph API implements the "Relooper" algorithm developed by the Emscripten project and described in [this paper](https://github.com/emscripten-core/emscripten/blob/main/docs/paper.pdf). + +> By the way, apparently "binaryen" rhymes with "Targaryen", the family name from the "Game of Thrones" TV series + +There is also an improvement on Relooper called ["Stackifier"](https://medium.com/leaningtech/solving-the-structured-control-flow-problem-once-and-for-all-5123117b1ee2). It can reorder the joinpoints and jumps to make code more efficient. (It is also has things Roc wouldn't need but C++ does, like support for "irreducible" graphs that include `goto`). [cfg-api]: https://github.com/WebAssembly/binaryen/wiki/Compiling-to-WebAssembly-with-Binaryen#cfg-api [binaryen-rs]: https://crates.io/crates/binaryen -Binaryen's control-flow graph API implements the "Relooper" algorithm developed by the Emscripten project and described in [this paper](https://github.com/emscripten-core/emscripten/blob/main/docs/paper.pdf). - -There is an alternative algorithm that is supposed to be an improvement on Relooper, called ["Stackifier"](https://medium.com/leaningtech/solving-the-structured-control-flow-problem-once-and-for-all-5123117b1ee2). - - ## Stack machine vs register machine Wasm's instruction set is based on a stack-machine VM. Whereas CPU instructions have named registers that they operate on, Wasm has no named registers at all. The instructions don't contain register names. Instructions can oly operate on whatever data is at the top of the stack. @@ -90,29 +99,30 @@ The Mono IR contains two functions, `Num.add` and `main`, so we generate two cor (func (;1;) (result i64) ; declare function index 1 (main) with no parameters and an i64 result (local i64 i64 i64 i64) ; declare 4 local variables, all with type i64, one for each symbol in the Mono IR - i64.const 1 ; load constant of type i64 and value 1 stack=[1] - local.set 0 ; store top of stack to local0 stack=[] local0=1 - i64.const 2 ; load constant of type i64 and value 2 stack=[2] local0=1 - local.set 1 ; store top of stack to local1 stack=[] local0=1 local1=2 - local.get 0 ; load local0 to top of stack stack=[1] local0=1 local1=2 - local.get 1 ; load local1 to top of stack stack=[1,2] local0=1 local1=2 - call 0 ; call function index 0 (which pops 2 and pushes 1) stack=[3] local0=1 local1=2 - local.set 2 ; store top of stack to local2 stack=[] local0=1 local1=2 local2=3 - i64.const 4 ; load constant of type i64 and value 4 stack=[4] local0=1 local1=2 local2=3 - local.set 3 ; store top of stack to local3 stack=[] local0=1 local1=2 local2=3 local3=4 - local.get 2 ; load local2 to top of stack stack=[3] local0=1 local1=2 local2=3 local3=4 - local.get 3 ; load local3 to top of stack stack=[3,4] local0=1 local1=2 local2=3 local3=4 - call 0 ; call function index 0 (which pops 2 and pushes 1) stack=[7] local0=1 local1=2 local2=3 local3=4 + i64.const 1 ; stack=[1] + local.set 0 ; stack=[] local0=1 + i64.const 2 ; stack=[2] local0=1 + local.set 1 ; stack=[] local0=1 local1=2 + local.get 0 ; stack=[1] local0=1 local1=2 + local.get 1 ; stack=[1,2] local0=1 local1=2 + call 0 ; stack=[3] local0=1 local1=2 + local.set 2 ; stack=[] local0=1 local1=2 local2=3 + i64.const 4 ; stack=[4] local0=1 local1=2 local2=3 + local.set 3 ; stack=[] local0=1 local1=2 local2=3 local3=4 + local.get 2 ; stack=[3] local0=1 local1=2 local2=3 local3=4 + local.get 3 ; stack=[3,4] local0=1 local1=2 local2=3 local3=4 + call 0 ; stack=[7] local0=1 local1=2 local2=3 local3=4 return) ; return the value at the top of the stack ``` -If we run this code through the `wasm-opt` tool from the [binaryen toolkit](https://github.com/WebAssembly/binaryen#tools), the unnecessary locals get optimised away. The command line below runs the minimum number of passes to achieve this (`--simplify-locals` must come first). +If we run this code through the `wasm-opt` tool from the [binaryen toolkit](https://github.com/WebAssembly/binaryen#tools), the unnecessary locals get optimised away (which is all of them in this example!). The command line below runs the minimum number of passes to achieve this (`--simplify-locals` must come first). ``` $ wasm-opt --simplify-locals --reorder-locals --vacuum example.wasm > opt.wasm ``` -The optimised functions have no local variables, and the code shrinks to about 60% of its original size. +The optimised functions have no local variables at all for this example. (Of course, this is an oversimplified toy example! It might not be so extreme in a real program.) + ``` (func (;0;) (param i64 i64) (result i64) local.get 0 @@ -122,9 +132,20 @@ The optimised functions have no local variables, and the code shrinks to about 6 i64.const 1 i64.const 2 call 0 - i64.const 4) + i64.const 4 + call 0) ``` +### Reducing sets and gets + +It would be nice to find some cheap optimisation to reduce the number of `local.set` and `local.get` instructions. + +We don't need a `local` if the value we want is already at the top of the VM stack. In fact, for our example above, it just so happens that if we simply skip generating the `local.set` instructions, everything _does_ appear on the VM stack in the right order, which means we can skip the `local.get` too. It ends up being very close to the fully optimised version! I assume this is because the Mono IR within the function is in dependency order, but I'm not sure... + +Of course the trick is to do this reliably for more complex dependency graphs. I am investigating whether we can do it by optimistically assuming it's OK not to create a local, and then keeping track of which symbols are at which positions in the VM stack after every instruction. Then when we need to use a symbol we can first check if it's on the VM stack and only create a local if it's not. In cases where we _do_ need to create a local, we need to go back and insert a `local.set` instruction at an earlier point in the program. We can make this fast by waiting to do all of the insertions in one batch when we're finalising the procedure. + +For a while we thought it would be very helpful to reuse the same local for multiple symbols at different points in the program. And we already have similar code in the CPU backends for register allocation. But on further examination, it doesn't actually buy us much! In our example above, we would still have the same number of `local.set` and `local.get` instructions - they'd just be operating on two locals instead of four! That doesn't shrink much code. Only the declaration at the top of the function would shrink from `(local i64 i64 i64 i64)` to `(local i64 i64)`... and in fact that's only smaller in the text format, it's the same size in the binary format! So the `scan_ast` pass doesn't seem worthwhile for Wasm. + ## Memory WebAssembly programs have a "linear memory" for storing data, which is a block of memory assigned to it by the host. You can assign a min and max size to the memory, and the WebAssembly program can request 64kB pages from the host, just like a "normal" program would request pages from the OS. Addresses start at zero and go up to whatever the current size is. Zero is a perfectly normal address like any other, and dereferencing it is not a segfault. But addresses beyond the current memory size are out of bounds and dereferencing them will cause a panic. @@ -143,7 +164,7 @@ When we are talking about how we store values in _memory_, I'll use the term _st Of course our program can use another area of memory as a heap as well. WebAssembly doesn't mind how you divide up your memory. It just gives you some memory and some instructions for loading and storing. -## Function calls +## Calling conventions & stack memory In WebAssembly you call a function by pushing arguments to the stack and then issuing a `call` instruction, which specifies a function index. The VM knows how many values to pop off the stack by examining the _type_ of the function. In our example earlier, `Num.add` had the type `[i64 i64] → [i64]` so it expects to find two i64's on the stack and pushes one i64 back as the result. Remember, the runtime engine will validate the module before running it, and if your generated code is trying to call a function at a point in the program where the wrong value types are on the stack, it will fail validation. @@ -151,11 +172,17 @@ Function arguments are restricted to the four value types, `i32`, `i64`, `f32` a That's all great for primitive values but what happens when we want to pass more complex data structures between functions? -Well, remember, "stack memory" is not a special kind of memory in WebAssembly, it's just an area of our memory where we _decide_ that we want to implement a stack data structure. So we can implement it however we want. A good choice would be to make our stack frame look the same as it would when we're targeting a CPU, except without the return address (since there's no need for one). We can also decide to pass numbers through the machine stack rather than in stack memory, since that takes fewer instructions. +Well, remember, "stack memory" is not a special kind of memory in WebAssembly, and is separate from the VM stack. It's just an area of our memory where we implement a stack data structure. But there are some conventions that it makes sense to follow so that we can easily link to Wasm code generated from Zig or other languages. -The only other thing we need is a stack pointer. On CPU targets, there's often have a specific "stack pointer" register. WebAssembly has no equivalent to that, but we can use a `global` variable. +### Observations from compiled C code -The system I've outlined above is based on my experience of compiling C to WebAssembly via the Emscripten toolchain (which is built on top of clang). It's also in line with what the WebAssembly project describes [here](https://github.com/WebAssembly/design/blob/main/Rationale.md#locals). +- `global 0` is used as the stack pointer, and its value is normally copied to a `local` as well (presumably because locals tend to be assigned to CPU registers) +- Stack memory grows downwards +- If a C function returns a struct, the compiled WebAssembly function has no return value, but instead has an extra _argument_. The argument is an `i32` pointer to space allocated in the caller's stack, that the called function can write to. +- There is no maximum number of arguments for a WebAssembly function, and arguments are not passed via _stack memory_. This makes sense because the _VM stack_ has no size limit. It's like having a CPU with an unlimited number of registers. +- Stack memory is only used for allocating local variables, not for passing arguments. And it's only used for values that cannot be stored in one of WebAssembly's primitive values (`i32`, `i64`, `f32`, `f64`). + +These observations are based on experiments compiling C to WebAssembly via the Emscripten toolchain (which is built on top of clang). It's also in line with what the WebAssembly project describes [here](https://github.com/WebAssembly/design/blob/main/Rationale.md#locals). ## Modules vs Instances diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index 0b03f4aea8..4512203625 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -1,5 +1,5 @@ use parity_wasm::builder; -use parity_wasm::builder::{CodeLocation, ModuleBuilder}; +use parity_wasm::builder::{CodeLocation, FunctionDefinition, ModuleBuilder, SignatureBuilder}; use parity_wasm::elements::{ BlockType, Instruction, Instruction::*, Instructions, Local, ValueType, }; @@ -8,147 +8,28 @@ use roc_collections::all::MutMap; use roc_module::low_level::LowLevel; use roc_module::symbol::Symbol; use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt}; -use roc_mono::layout::{Builtin, Layout, UnionLayout}; +use roc_mono::layout::{Builtin, Layout}; -use crate::*; +use crate::layout::WasmLayout; +use crate::storage::{StackMemoryLocation, SymbolStorage}; +use crate::{ + copy_memory, pop_stack_frame, push_stack_frame, round_up_to_alignment, CopyMemoryConfig, + LocalId, PTR_SIZE, PTR_TYPE, +}; // Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone. // Follow Emscripten's example by using 1kB (4 bytes would probably do) const UNUSED_DATA_SECTION_BYTES: u32 = 1024; -#[derive(Clone, Copy, Debug)] -struct LocalId(u32); - #[derive(Clone, Copy, Debug)] struct LabelId(u32); -#[derive(Debug)] -struct SymbolStorage(LocalId, WasmLayout); - -// See README for background information on Wasm locals, memory and function calls -#[derive(Debug)] -pub enum WasmLayout { - // Most number types can fit in a Wasm local without any stack memory. - // Roc i8 is represented as an i32 local. Store the type and the original size. - LocalOnly(ValueType, u32), - - // A `local` pointing to stack memory - StackMemory(u32), - - // A `local` pointing to heap memory - HeapMemory, -} - -impl WasmLayout { - fn new(layout: &Layout) -> Self { - use ValueType::*; - let size = layout.stack_size(PTR_SIZE); - match layout { - Layout::Builtin(Builtin::Int128) => Self::StackMemory(size), - Layout::Builtin(Builtin::Int64) => Self::LocalOnly(I64, size), - Layout::Builtin(Builtin::Int32) => Self::LocalOnly(I32, size), - Layout::Builtin(Builtin::Int16) => Self::LocalOnly(I32, size), - Layout::Builtin(Builtin::Int8) => Self::LocalOnly(I32, size), - Layout::Builtin(Builtin::Int1) => Self::LocalOnly(I32, size), - Layout::Builtin(Builtin::Usize) => Self::LocalOnly(I32, size), - Layout::Builtin(Builtin::Decimal) => Self::StackMemory(size), - Layout::Builtin(Builtin::Float128) => Self::StackMemory(size), - Layout::Builtin(Builtin::Float64) => Self::LocalOnly(F64, size), - Layout::Builtin(Builtin::Float32) => Self::LocalOnly(F32, size), - Layout::Builtin(Builtin::Str) => Self::StackMemory(size), - Layout::Builtin(Builtin::Dict(_, _)) => Self::StackMemory(size), - Layout::Builtin(Builtin::Set(_)) => Self::StackMemory(size), - Layout::Builtin(Builtin::List(_)) => Self::StackMemory(size), - Layout::Builtin(Builtin::EmptyStr) => Self::StackMemory(size), - Layout::Builtin(Builtin::EmptyList) => Self::StackMemory(size), - Layout::Builtin(Builtin::EmptyDict) => Self::StackMemory(size), - Layout::Builtin(Builtin::EmptySet) => Self::StackMemory(size), - Layout::LambdaSet(lambda_set) => WasmLayout::new(&lambda_set.runtime_representation()), - Layout::Struct(_) => Self::StackMemory(size), - Layout::Union(UnionLayout::NonRecursive(_)) => Self::StackMemory(size), - Layout::Union(UnionLayout::Recursive(_)) => Self::HeapMemory, - Layout::Union(UnionLayout::NonNullableUnwrapped(_)) => Self::HeapMemory, - Layout::Union(UnionLayout::NullableWrapped { .. }) => Self::HeapMemory, - Layout::Union(UnionLayout::NullableUnwrapped { .. }) => Self::HeapMemory, - Layout::RecursivePointer => Self::HeapMemory, - } - } - - fn value_type(&self) -> ValueType { - match self { - Self::LocalOnly(type_, _) => *type_, - _ => PTR_TYPE, - } - } - - fn stack_memory(&self) -> u32 { - match self { - Self::StackMemory(size) => *size, - _ => 0, - } - } - - #[allow(dead_code)] - fn load(&self, offset: u32) -> Result { - use crate::backend::WasmLayout::*; - use ValueType::*; - - match self { - LocalOnly(I32, 4) => Ok(I32Load(ALIGN_4, offset)), - LocalOnly(I32, 2) => Ok(I32Load16S(ALIGN_2, offset)), - LocalOnly(I32, 1) => Ok(I32Load8S(ALIGN_1, offset)), - LocalOnly(I64, 8) => Ok(I64Load(ALIGN_8, offset)), - LocalOnly(F64, 8) => Ok(F64Load(ALIGN_8, offset)), - LocalOnly(F32, 4) => Ok(F32Load(ALIGN_4, offset)), - - // LocalOnly(F32, 2) => Ok(), // convert F16 to F32 (lowlevel function? Wasm-only?) - // StackMemory(size) => Ok(), // would this be some kind of memcpy in the IR? - HeapMemory => { - if PTR_TYPE == I64 { - Ok(I64Load(ALIGN_8, offset)) - } else { - Ok(I32Load(ALIGN_4, offset)) - } - } - - _ => Err(format!( - "Failed to generate load instruction for WasmLayout {:?}", - self - )), - } - } - - #[allow(dead_code)] - fn store(&self, offset: u32) -> Result { - use crate::backend::WasmLayout::*; - use ValueType::*; - - match self { - LocalOnly(I32, 4) => Ok(I32Store(ALIGN_4, offset)), - LocalOnly(I32, 2) => Ok(I32Store16(ALIGN_2, offset)), - LocalOnly(I32, 1) => Ok(I32Store8(ALIGN_1, offset)), - LocalOnly(I64, 8) => Ok(I64Store(ALIGN_8, offset)), - LocalOnly(F64, 8) => Ok(F64Store(ALIGN_8, offset)), - LocalOnly(F32, 4) => Ok(F32Store(ALIGN_4, offset)), - - // LocalOnly(F32, 2) => Ok(), // convert F32 to F16 (lowlevel function? Wasm-only?) - // StackMemory(size) => Ok(), // would this be some kind of memcpy in the IR? - HeapMemory => { - if PTR_TYPE == I64 { - Ok(I64Store(ALIGN_8, offset)) - } else { - Ok(I32Store(ALIGN_4, offset)) - } - } - - _ => Err(format!( - "Failed to generate store instruction for WasmLayout {:?}", - self - )), - } - } +enum LocalKind { + Parameter, + Variable, } +// TODO: use Bumpalo Vec once parity_wasm supports general iterators (>=0.43) pub struct WasmBackend<'a> { // Module: Wasm AST pub builder: ModuleBuilder, @@ -160,12 +41,12 @@ pub struct WasmBackend<'a> { // Functions: Wasm AST instructions: std::vec::Vec, - ret_type: ValueType, arg_types: std::vec::Vec, locals: std::vec::Vec, // Functions: internal state & IR mappings - stack_memory: u32, + stack_memory: i32, + stack_frame_pointer: Option, symbol_storage_map: MutMap, /// how many blocks deep are we (used for jumps) block_depth: u32, @@ -185,12 +66,12 @@ impl<'a> WasmBackend<'a> { // Functions: Wasm AST instructions: std::vec::Vec::with_capacity(256), - ret_type: ValueType::I32, arg_types: std::vec::Vec::with_capacity(8), locals: std::vec::Vec::with_capacity(32), // Functions: internal state & IR mappings stack_memory: 0, + stack_frame_pointer: None, symbol_storage_map: MutMap::default(), block_depth: 0, joinpoint_label_map: MutMap::default(), @@ -205,48 +86,18 @@ impl<'a> WasmBackend<'a> { // Functions: internal state & IR mappings self.stack_memory = 0; + self.stack_frame_pointer = None; self.symbol_storage_map.clear(); - // joinpoint_label_map.clear(); + self.joinpoint_label_map.clear(); + assert_eq!(self.block_depth, 0); } pub fn build_proc(&mut self, proc: Proc<'a>, sym: Symbol) -> Result { - let ret_layout = WasmLayout::new(&proc.ret_layout); - - if let WasmLayout::StackMemory { .. } = ret_layout { - return Err(format!( - "Not yet implemented: Returning values to callee stack memory {:?} {:?}", - proc.name, sym - )); - } - - self.ret_type = ret_layout.value_type(); - self.arg_types.reserve(proc.args.len()); - - for (layout, symbol) in proc.args { - let wasm_layout = WasmLayout::new(layout); - self.arg_types.push(wasm_layout.value_type()); - self.insert_local(wasm_layout, *symbol); - } + let signature_builder = self.start_proc(&proc); self.build_stmt(&proc.body, &proc.ret_layout)?; - let signature = builder::signature() - .with_params(self.arg_types.clone()) // requires std::Vec, not Bumpalo - .with_result(self.ret_type) - .build_sig(); - - // functions must end with an End instruction/opcode - let mut instructions = self.instructions.clone(); - instructions.push(Instruction::End); - - let function_def = builder::function() - .with_signature(signature) - .body() - .with_locals(self.locals.clone()) - .with_instructions(Instructions::new(instructions)) - .build() // body - .build(); // function - + let function_def = self.finalize_proc(signature_builder); let location = self.builder.push_function(function_def); let function_index = location.body; self.proc_symbol_map.insert(sym, location); @@ -255,48 +106,188 @@ impl<'a> WasmBackend<'a> { Ok(function_index) } - fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId { - self.stack_memory += layout.stack_memory(); - let index = self.symbol_storage_map.len(); - if index >= self.arg_types.len() { - self.locals.push(Local::new(1, layout.value_type())); + fn start_proc(&mut self, proc: &Proc<'a>) -> SignatureBuilder { + let ret_layout = WasmLayout::new(&proc.ret_layout); + + let signature_builder = if let WasmLayout::StackMemory { .. } = ret_layout { + self.arg_types.push(PTR_TYPE); + self.start_block(BlockType::NoResult); // block to ensure all paths pop stack memory (if any) + builder::signature() + } else { + let ret_type = ret_layout.value_type(); + self.start_block(BlockType::Value(ret_type)); // block to ensure all paths pop stack memory (if any) + builder::signature().with_result(ret_type) + }; + + for (layout, symbol) in proc.args { + self.insert_local(WasmLayout::new(layout), *symbol, LocalKind::Parameter); } - let local_id = LocalId(index as u32); - let storage = SymbolStorage(local_id, layout); - self.symbol_storage_map.insert(symbol, storage); - local_id + + signature_builder.with_params(self.arg_types.clone()) } - fn get_symbol_storage(&self, sym: &Symbol) -> Result<&SymbolStorage, String> { - self.symbol_storage_map.get(sym).ok_or_else(|| { - format!( + fn finalize_proc(&mut self, signature_builder: SignatureBuilder) -> FunctionDefinition { + self.end_block(); // end the block from start_proc, to ensure all paths pop stack memory (if any) + + let mut final_instructions = Vec::with_capacity(self.instructions.len() + 10); + + if self.stack_memory > 0 { + push_stack_frame( + &mut final_instructions, + self.stack_memory, + self.stack_frame_pointer.unwrap(), + ); + } + + final_instructions.extend(self.instructions.drain(0..)); + + if self.stack_memory > 0 { + pop_stack_frame( + &mut final_instructions, + self.stack_memory, + self.stack_frame_pointer.unwrap(), + ); + } + final_instructions.push(End); + + builder::function() + .with_signature(signature_builder.build_sig()) + .body() + .with_locals(self.locals.clone()) + .with_instructions(Instructions::new(final_instructions)) + .build() // body + .build() // function + } + + fn insert_local( + &mut self, + wasm_layout: WasmLayout, + symbol: Symbol, + kind: LocalKind, + ) -> Option { + let next_local_id = LocalId((self.arg_types.len() + self.locals.len()) as u32); + + match kind { + LocalKind::Parameter => { + self.arg_types.push(wasm_layout.value_type()); + } + LocalKind::Variable => { + self.locals.push(Local::new(1, wasm_layout.value_type())); + } + } + + let (maybe_local_id, storage) = match wasm_layout { + WasmLayout::LocalOnly(value_type, size) => ( + Some(next_local_id), + SymbolStorage::Local { + local_id: next_local_id, + value_type, + size, + }, + ), + + WasmLayout::HeapMemory => ( + Some(next_local_id), + SymbolStorage::Local { + local_id: next_local_id, + value_type: PTR_TYPE, + size: PTR_SIZE, + }, + ), + + WasmLayout::StackMemory { + size, + alignment_bytes, + } => { + let location = match kind { + LocalKind::Parameter => StackMemoryLocation::PointerArg(next_local_id), + + LocalKind::Variable => { + match self.stack_frame_pointer { + Some(_) => {} + None => { + self.stack_frame_pointer = Some(next_local_id); + } + }; + + let offset = + round_up_to_alignment(self.stack_memory, alignment_bytes as i32); + + self.stack_memory = offset + size as i32; + + StackMemoryLocation::FrameOffset(offset as u32) + } + }; + + ( + None, + SymbolStorage::StackMemory { + location, + size, + alignment_bytes, + }, + ) + } + }; + + self.symbol_storage_map.insert(symbol, storage); + + maybe_local_id + } + + fn get_symbol_storage(&self, sym: &Symbol) -> &SymbolStorage { + self.symbol_storage_map.get(sym).unwrap_or_else(|| { + panic!( "Symbol {:?} not found in function scope:\n{:?}", sym, self.symbol_storage_map ) }) } - fn load_from_symbol(&mut self, sym: &Symbol) -> Result<(), String> { - let SymbolStorage(LocalId(local_id), _) = self.get_symbol_storage(sym)?; - let id: u32 = *local_id; - self.instructions.push(GetLocal(id)); - Ok(()) + fn local_id_from_symbol(&self, sym: &Symbol) -> LocalId { + let storage = self.get_symbol_storage(sym); + match storage { + SymbolStorage::Local { local_id, .. } => *local_id, + _ => { + panic!("{:?} does not have a local_id", sym); + } + } + } + + /// Load a symbol, e.g. for passing to a function call + fn load_symbol(&mut self, sym: &Symbol) { + let storage = self.get_symbol_storage(sym).to_owned(); + match storage { + SymbolStorage::Local { local_id, .. } + | SymbolStorage::StackMemory { + location: StackMemoryLocation::PointerArg(local_id), + .. + } => { + self.instructions.push(GetLocal(local_id.0)); + } + + SymbolStorage::StackMemory { + location: StackMemoryLocation::FrameOffset(offset), + .. + } => { + self.instructions.extend([ + GetLocal(self.stack_frame_pointer.unwrap().0), + I32Const(offset as i32), + I32Add, + ]); + } + } } /// start a loop that leaves a value on the stack fn start_loop_with_return(&mut self, value_type: ValueType) { self.block_depth += 1; - - // self.instructions.push(Loop(BlockType::NoResult)); self.instructions.push(Loop(BlockType::Value(value_type))); } - fn start_block(&mut self) { + fn start_block(&mut self, block_type: BlockType) { self.block_depth += 1; - - // Our blocks always end with a `return` or `br`, - // so they never leave extra values on the stack - self.instructions.push(Block(BlockType::NoResult)); + self.instructions.push(Block(block_type)); } fn end_block(&mut self) { @@ -306,36 +297,80 @@ impl<'a> WasmBackend<'a> { fn build_stmt(&mut self, stmt: &Stmt<'a>, ret_layout: &Layout<'a>) -> Result<(), String> { match stmt { - // This pattern is a simple optimisation to get rid of one local and two instructions per proc. - // If we are just returning the expression result, then don't SetLocal and immediately GetLocal + // Simple optimisation: if we are just returning the expression, we don't need a local Stmt::Let(let_sym, expr, layout, Stmt::Ret(ret_sym)) if let_sym == ret_sym => { + let wasm_layout = WasmLayout::new(layout); + if let WasmLayout::StackMemory { + size, + alignment_bytes, + } = wasm_layout + { + // Map this symbol to the first argument (pointer into caller's stack) + // Saves us from having to copy it later + let storage = SymbolStorage::StackMemory { + location: StackMemoryLocation::PointerArg(LocalId(0)), + size, + alignment_bytes, + }; + self.symbol_storage_map.insert(*let_sym, storage); + } self.build_expr(let_sym, expr, layout)?; - self.instructions.push(Return); + self.instructions.push(Br(self.block_depth)); // jump to end of function (stack frame pop) Ok(()) } Stmt::Let(sym, expr, layout, following) => { let wasm_layout = WasmLayout::new(layout); - let local_id = self.insert_local(wasm_layout, *sym); + let maybe_local_id = self.insert_local(wasm_layout, *sym, LocalKind::Variable); self.build_expr(sym, expr, layout)?; - self.instructions.push(SetLocal(local_id.0)); + + if let Some(local_id) = maybe_local_id { + self.instructions.push(SetLocal(local_id.0)); + } self.build_stmt(following, ret_layout)?; Ok(()) } Stmt::Ret(sym) => { - if let Some(SymbolStorage(local_id, _)) = self.symbol_storage_map.get(sym) { - self.instructions.push(GetLocal(local_id.0)); - self.instructions.push(Return); - Ok(()) - } else { - Err(format!( - "Not yet implemented: returning values with layout {:?}", - ret_layout - )) + use crate::storage::SymbolStorage::*; + + let storage = self.symbol_storage_map.get(sym).unwrap(); + + match storage { + StackMemory { + location, + size, + alignment_bytes, + } => { + let (from_ptr, from_offset) = match location { + StackMemoryLocation::PointerArg(local_id) => (*local_id, 0), + StackMemoryLocation::FrameOffset(offset) => { + (self.stack_frame_pointer.unwrap(), *offset) + } + }; + + copy_memory( + &mut self.instructions, + CopyMemoryConfig { + from_ptr, + from_offset, + to_ptr: LocalId(0), + to_offset: 0, + size: *size, + alignment_bytes: *alignment_bytes, + }, + ); + } + + Local { local_id, .. } => { + self.instructions.push(GetLocal(local_id.0)); + self.instructions.push(Br(self.block_depth)); // jump to end of function (for stack frame pop) + } } + + Ok(()) } Stmt::Switch { @@ -351,19 +386,16 @@ impl<'a> WasmBackend<'a> { // create (number_of_branches - 1) new blocks. for _ in 0..branches.len() { - self.start_block() + self.start_block(BlockType::NoResult) } // the LocalId of the symbol that we match on - let matched_on = match self.symbol_storage_map.get(cond_symbol) { - Some(SymbolStorage(local_id, _)) => local_id.0, - None => unreachable!("symbol not defined: {:?}", cond_symbol), - }; + let matched_on = self.local_id_from_symbol(cond_symbol); // then, we jump whenever the value under scrutiny is equal to the value of a branch for (i, (value, _, _)) in branches.iter().enumerate() { // put the cond_symbol on the top of the stack - self.instructions.push(GetLocal(matched_on)); + self.instructions.push(GetLocal(matched_on.0)); self.instructions.push(I32Const(*value as i32)); @@ -398,12 +430,13 @@ impl<'a> WasmBackend<'a> { let mut jp_parameter_local_ids = std::vec::Vec::with_capacity(parameters.len()); for parameter in parameters.iter() { let wasm_layout = WasmLayout::new(¶meter.layout); - let local_id = self.insert_local(wasm_layout, parameter.symbol); - - jp_parameter_local_ids.push(local_id); + let maybe_local_id = + self.insert_local(wasm_layout, parameter.symbol, LocalKind::Variable); + let jp_param_id = maybe_local_id.unwrap(); + jp_parameter_local_ids.push(jp_param_id); } - self.start_block(); + self.start_block(BlockType::NoResult); self.joinpoint_label_map .insert(*id, (self.block_depth, jp_parameter_local_ids)); @@ -429,12 +462,8 @@ impl<'a> WasmBackend<'a> { // put the arguments on the stack for (symbol, local_id) in arguments.iter().zip(locals.iter()) { - let argument = match self.symbol_storage_map.get(symbol) { - Some(SymbolStorage(local_id, _)) => local_id.0, - None => unreachable!("symbol not defined: {:?}", symbol), - }; - - self.instructions.push(GetLocal(argument)); + let argument = self.local_id_from_symbol(symbol); + self.instructions.push(GetLocal(argument.0)); self.instructions.push(SetLocal(local_id.0)); } @@ -463,7 +492,7 @@ impl<'a> WasmBackend<'a> { }) => match call_type { CallType::ByName { name: func_sym, .. } => { for arg in *arguments { - self.load_from_symbol(arg)?; + self.load_symbol(arg); } let function_location = self.proc_symbol_map.get(func_sym).ok_or(format!( "Cannot find function {:?} called from {:?}", @@ -479,46 +508,102 @@ impl<'a> WasmBackend<'a> { x => Err(format!("the call type, {:?}, is not yet implemented", x)), }, + Expr::Struct(fields) => self.create_struct(sym, layout, fields), + x => Err(format!("Expression is not yet implemented {:?}", x)), } } fn load_literal(&mut self, lit: &Literal<'a>, layout: &Layout<'a>) -> Result<(), String> { - match lit { - Literal::Bool(x) => { - self.instructions.push(I32Const(*x as i32)); - Ok(()) + let instruction = match lit { + Literal::Bool(x) => I32Const(*x as i32), + Literal::Byte(x) => I32Const(*x as i32), + Literal::Int(x) => match layout { + Layout::Builtin(Builtin::Int64) => I64Const(*x as i64), + Layout::Builtin( + Builtin::Int32 + | Builtin::Int16 + | Builtin::Int8 + | Builtin::Int1 + | Builtin::Usize, + ) => I32Const(*x as i32), + x => { + return Err(format!("loading literal, {:?}, is not yet implemented", x)); + } + }, + Literal::Float(x) => match layout { + Layout::Builtin(Builtin::Float64) => F64Const((*x as f64).to_bits()), + Layout::Builtin(Builtin::Float32) => F32Const((*x as f32).to_bits()), + x => { + return Err(format!("loading literal, {:?}, is not yet implemented", x)); + } + }, + x => { + return Err(format!("loading literal, {:?}, is not yet implemented", x)); } - Literal::Byte(x) => { - self.instructions.push(I32Const(*x as i32)); - Ok(()) - } - Literal::Int(x) => { - let instruction = match layout { - Layout::Builtin(Builtin::Int64) => I64Const(*x as i64), - Layout::Builtin( - Builtin::Int32 - | Builtin::Int16 - | Builtin::Int8 - | Builtin::Int1 - | Builtin::Usize, - ) => I32Const(*x as i32), - x => panic!("loading literal, {:?}, is not yet implemented", x), - }; - self.instructions.push(instruction); - Ok(()) - } - Literal::Float(x) => { - let instruction = match layout { - Layout::Builtin(Builtin::Float64) => F64Const((*x as f64).to_bits()), - Layout::Builtin(Builtin::Float32) => F32Const((*x as f32).to_bits()), - x => panic!("loading literal, {:?}, is not yet implemented", x), - }; - self.instructions.push(instruction); - Ok(()) - } - x => Err(format!("loading literal, {:?}, is not yet implemented", x)), + }; + self.instructions.push(instruction); + Ok(()) + } + + fn create_struct( + &mut self, + sym: &Symbol, + layout: &Layout<'a>, + fields: &'a [Symbol], + ) -> Result<(), String> { + let storage = self.get_symbol_storage(sym).to_owned(); + + if let Layout::Struct(field_layouts) = layout { + match storage { + SymbolStorage::StackMemory { location, size, .. } => { + if size > 0 { + let (local_id, struct_offset) = + location.local_and_offset(self.stack_frame_pointer); + let mut field_offset = struct_offset; + for (field, _) in fields.iter().zip(field_layouts.iter()) { + field_offset += self.copy_symbol_to_pointer_at_offset( + local_id, + field_offset, + field, + ); + } + } else { + return Err(format!("Not supported yet: zero-size struct at {:?}", sym)); + } + } + _ => { + return Err(format!( + "Cannot create struct {:?} with storage {:?}", + sym, storage + )); + } + }; + } else { + // Struct expression but not Struct layout => single element. Copy it. + let field_storage = self.get_symbol_storage(&fields[0]).to_owned(); + storage.copy_from( + &field_storage, + &mut self.instructions, + self.stack_frame_pointer, + ); } + Ok(()) + } + + fn copy_symbol_to_pointer_at_offset( + &mut self, + to_ptr: LocalId, + to_offset: u32, + from_symbol: &Symbol, + ) -> u32 { + let from_storage = self.get_symbol_storage(from_symbol).to_owned(); + from_storage.copy_to_memory( + &mut self.instructions, + to_ptr, + to_offset, + self.stack_frame_pointer, + ) } fn build_call_low_level( @@ -528,7 +613,7 @@ impl<'a> WasmBackend<'a> { return_layout: &Layout<'a>, ) -> Result<(), String> { for arg in args { - self.load_from_symbol(arg)?; + self.load_symbol(arg); } let wasm_layout = WasmLayout::new(return_layout); self.build_instructions_lowlevel(lowlevel, wasm_layout.value_type())?; @@ -546,7 +631,7 @@ impl<'a> WasmBackend<'a> { // For those, we'll need to pre-process each argument before the main op, // so simple arrays of instructions won't work. But there are common patterns. let instructions: &[Instruction] = match lowlevel { - // Wasm type might not be enough, may need to sign-extend i8 etc. Maybe in load_from_symbol? + // Wasm type might not be enough, may need to sign-extend i8 etc. Maybe in load_symbol? LowLevel::NumAdd => match return_value_type { ValueType::I32 => &[I32Add], ValueType::I64 => &[I64Add], diff --git a/compiler/gen_wasm/src/layout.rs b/compiler/gen_wasm/src/layout.rs new file mode 100644 index 0000000000..df59b80eb1 --- /dev/null +++ b/compiler/gen_wasm/src/layout.rs @@ -0,0 +1,82 @@ +use parity_wasm::elements::ValueType; +use roc_mono::layout::{Layout, UnionLayout}; + +use crate::{PTR_SIZE, PTR_TYPE}; + +// See README for background information on Wasm locals, memory and function calls +#[derive(Debug, Clone)] +pub enum WasmLayout { + // Primitive number value. Just a Wasm local, without any stack memory. + // For example, Roc i8 is represented as Wasm i32. Store the type and the original size. + LocalOnly(ValueType, u32), + + // Local pointer to stack memory + StackMemory { size: u32, alignment_bytes: u32 }, + + // Local pointer to heap memory + HeapMemory, +} + +impl WasmLayout { + pub fn new(layout: &Layout) -> Self { + use roc_mono::layout::Builtin::*; + use UnionLayout::*; + use ValueType::*; + + let size = layout.stack_size(PTR_SIZE); + let alignment_bytes = layout.alignment_bytes(PTR_SIZE); + + match layout { + Layout::Builtin(Int32 | Int16 | Int8 | Int1 | Usize) => Self::LocalOnly(I32, size), + + Layout::Builtin(Int64) => Self::LocalOnly(I64, size), + + Layout::Builtin(Float32) => Self::LocalOnly(F32, size), + + Layout::Builtin(Float64) => Self::LocalOnly(F64, size), + + Layout::Builtin( + Int128 + | Decimal + | Float128 + | Str + | Dict(_, _) + | Set(_) + | List(_) + | EmptyStr + | EmptyList + | EmptyDict + | EmptySet, + ) + | Layout::Struct(_) + | Layout::LambdaSet(_) + | Layout::Union(NonRecursive(_)) => Self::StackMemory { + size, + alignment_bytes, + }, + + Layout::Union( + Recursive(_) + | NonNullableUnwrapped(_) + | NullableWrapped { .. } + | NullableUnwrapped { .. }, + ) + | Layout::RecursivePointer => Self::HeapMemory, + } + } + + pub fn value_type(&self) -> ValueType { + match self { + Self::LocalOnly(type_, _) => *type_, + _ => PTR_TYPE, + } + } + + #[allow(dead_code)] + pub fn stack_memory(&self) -> u32 { + match self { + Self::StackMemory { size, .. } => *size, + _ => 0, + } + } +} diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 37eb3c1d5b..a5517cb4e4 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -1,9 +1,11 @@ mod backend; pub mod from_wasm32_memory; +mod layout; +mod storage; use bumpalo::Bump; use parity_wasm::builder; -use parity_wasm::elements::{Instruction, Internal, ValueType}; +use parity_wasm::elements::{Instruction, Instruction::*, Internal, ValueType}; use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::{Interns, Symbol}; @@ -22,6 +24,10 @@ pub const ALIGN_4: u32 = 2; pub const ALIGN_8: u32 = 3; pub const STACK_POINTER_GLOBAL_ID: u32 = 0; +pub const STACK_ALIGNMENT_BYTES: i32 = 16; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LocalId(pub u32); pub struct Env<'a> { pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot @@ -104,3 +110,85 @@ pub fn build_module_help<'a>( Ok((backend.builder, main_function_index)) } + +fn encode_alignment(bytes: u32) -> u32 { + match bytes { + 1 => ALIGN_1, + 2 => ALIGN_2, + 4 => ALIGN_4, + 8 => ALIGN_8, + _ => panic!("{:?}-byte alignment is not supported", bytes), + } +} + +pub struct CopyMemoryConfig { + from_ptr: LocalId, + from_offset: u32, + to_ptr: LocalId, + to_offset: u32, + size: u32, + alignment_bytes: u32, +} + +pub fn copy_memory(instructions: &mut Vec, config: CopyMemoryConfig) { + let alignment_flag = encode_alignment(config.alignment_bytes); + let mut i = 0; + while config.size - i >= 8 { + instructions.push(GetLocal(config.to_ptr.0)); + instructions.push(GetLocal(config.from_ptr.0)); + instructions.push(I64Load(alignment_flag, i + config.from_offset)); + instructions.push(I64Store(alignment_flag, i + config.to_offset)); + i += 8; + } + if config.size - i >= 4 { + instructions.push(GetLocal(config.to_ptr.0)); + instructions.push(GetLocal(config.from_ptr.0)); + instructions.push(I32Load(alignment_flag, i + config.from_offset)); + instructions.push(I32Store(alignment_flag, i + config.to_offset)); + i += 4; + } + while config.size - i > 0 { + instructions.push(GetLocal(config.to_ptr.0)); + instructions.push(GetLocal(config.from_ptr.0)); + instructions.push(I32Load8U(alignment_flag, i + config.from_offset)); + instructions.push(I32Store8(alignment_flag, i + config.to_offset)); + i += 1; + } +} + +/// Round up to alignment_bytes (assumed to be a power of 2) +pub fn round_up_to_alignment(unaligned: i32, alignment_bytes: i32) -> i32 { + let mut aligned = unaligned; + aligned += alignment_bytes - 1; // if lower bits are non-zero, push it over the next boundary + aligned &= -alignment_bytes; // mask with a flag that has upper bits 1, lower bits 0 + aligned +} + +pub fn push_stack_frame( + instructions: &mut Vec, + size: i32, + local_frame_pointer: LocalId, +) { + let aligned_size = round_up_to_alignment(size, STACK_ALIGNMENT_BYTES); + instructions.extend([ + GetGlobal(STACK_POINTER_GLOBAL_ID), + I32Const(aligned_size), + I32Sub, + TeeLocal(local_frame_pointer.0), + SetGlobal(STACK_POINTER_GLOBAL_ID), + ]); +} + +pub fn pop_stack_frame( + instructions: &mut Vec, + size: i32, + local_frame_pointer: LocalId, +) { + let aligned_size = round_up_to_alignment(size, STACK_ALIGNMENT_BYTES); + instructions.extend([ + GetLocal(local_frame_pointer.0), + I32Const(aligned_size), + I32Add, + SetGlobal(STACK_POINTER_GLOBAL_ID), + ]); +} diff --git a/compiler/gen_wasm/src/storage.rs b/compiler/gen_wasm/src/storage.rs new file mode 100644 index 0000000000..80c0583234 --- /dev/null +++ b/compiler/gen_wasm/src/storage.rs @@ -0,0 +1,151 @@ +use crate::{copy_memory, CopyMemoryConfig, LocalId, ALIGN_1, ALIGN_2, ALIGN_4, ALIGN_8}; +use parity_wasm::elements::{Instruction, Instruction::*, ValueType}; + +#[derive(Debug, Clone)] +pub enum StackMemoryLocation { + FrameOffset(u32), + PointerArg(LocalId), +} + +impl StackMemoryLocation { + pub fn local_and_offset(&self, stack_frame_pointer: Option) -> (LocalId, u32) { + match self { + Self::PointerArg(local_id) => (*local_id, 0), + Self::FrameOffset(offset) => (stack_frame_pointer.unwrap(), *offset), + } + } +} + +#[derive(Debug, Clone)] +pub enum SymbolStorage { + // TODO: implicit storage in the VM stack + // TODO: const data storage + Local { + local_id: LocalId, + value_type: ValueType, + size: u32, + }, + StackMemory { + location: StackMemoryLocation, + size: u32, + alignment_bytes: u32, + }, +} + +impl SymbolStorage { + /// generate code to copy from another storage of the same type + pub fn copy_from( + &self, + from: &Self, + instructions: &mut Vec, + stack_frame_pointer: Option, + ) { + match (self, from) { + ( + Self::Local { + local_id: to_local_id, + value_type: to_value_type, + size: to_size, + }, + Self::Local { + local_id: from_local_id, + value_type: from_value_type, + size: from_size, + }, + ) => { + debug_assert!(to_value_type == from_value_type); + debug_assert!(to_size == from_size); + instructions.push(GetLocal(from_local_id.0)); + instructions.push(SetLocal(to_local_id.0)); + } + ( + Self::StackMemory { + location: to_location, + size: to_size, + alignment_bytes: to_alignment_bytes, + }, + Self::StackMemory { + location: from_location, + size: from_size, + alignment_bytes: from_alignment_bytes, + }, + ) => { + let (from_ptr, from_offset) = from_location.local_and_offset(stack_frame_pointer); + let (to_ptr, to_offset) = to_location.local_and_offset(stack_frame_pointer); + debug_assert!(*to_size == *from_size); + debug_assert!(*to_alignment_bytes == *from_alignment_bytes); + copy_memory( + instructions, + CopyMemoryConfig { + from_ptr, + from_offset, + to_ptr, + to_offset, + size: *from_size, + alignment_bytes: *from_alignment_bytes, + }, + ); + } + _ => { + panic!( + "Cannot copy different storage types {:?} to {:?}", + from, self + ); + } + } + } + + /// Generate code to copy to a memory address (such as a struct index) + pub fn copy_to_memory( + &self, + instructions: &mut Vec, + to_ptr: LocalId, + to_offset: u32, + stack_frame_pointer: Option, + ) -> u32 { + match self { + Self::Local { + local_id, + value_type, + size, + .. + } => { + let store_instruction = match (value_type, size) { + (ValueType::I64, 8) => I64Store(ALIGN_8, to_offset), + (ValueType::I32, 4) => I32Store(ALIGN_4, to_offset), + (ValueType::I32, 2) => I32Store16(ALIGN_2, to_offset), + (ValueType::I32, 1) => I32Store8(ALIGN_1, to_offset), + (ValueType::F32, 4) => F32Store(ALIGN_4, to_offset), + (ValueType::F64, 8) => F64Store(ALIGN_8, to_offset), + _ => { + panic!("Cannot store {:?} with alignment of {:?}", value_type, size); + } + }; + instructions.push(GetLocal(to_ptr.0)); + instructions.push(GetLocal(local_id.0)); + instructions.push(store_instruction); + *size + } + + Self::StackMemory { + location, + size, + alignment_bytes, + } => { + let (from_ptr, from_offset) = location.local_and_offset(stack_frame_pointer); + copy_memory( + instructions, + CopyMemoryConfig { + from_ptr, + from_offset, + to_ptr, + to_offset, + size: *size, + alignment_bytes: *alignment_bytes, + }, + ); + *size + } + } + } +} diff --git a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs index 184edf43f7..5cf709a597 100644 --- a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs +++ b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs @@ -1,11 +1,15 @@ use parity_wasm::builder; use parity_wasm::builder::ModuleBuilder; -use parity_wasm::elements::{Instruction, Instruction::*, Instructions, Internal, ValueType}; +use parity_wasm::elements::{ + Instruction, Instruction::*, Instructions, Internal, Local, ValueType, +}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; use roc_gen_wasm::*; use roc_std::{RocDec, RocList, RocOrder, RocStr}; +const STACK_POINTER_LOCAL_ID: u32 = 0; + pub trait Wasm32TestResult { fn insert_test_wrapper( module_builder: &mut ModuleBuilder, @@ -16,9 +20,11 @@ pub trait Wasm32TestResult { let signature = builder::signature().with_result(ValueType::I32).build_sig(); + let stack_frame_pointer = Local::new(1, ValueType::I32); let function_def = builder::function() .with_signature(signature) .body() + .with_locals(vec![stack_frame_pointer]) .with_instructions(Instructions::new(instructions)) .build() // body .build(); // function @@ -35,22 +41,15 @@ pub trait Wasm32TestResult { fn build_wrapper_body(main_function_index: u32) -> Vec; } -fn build_wrapper_body_prelude(stack_memory_size: usize) -> Vec { - vec![ - GetGlobal(STACK_POINTER_GLOBAL_ID), - I32Const(stack_memory_size as i32), - I32Sub, - SetGlobal(STACK_POINTER_GLOBAL_ID), - ] -} - macro_rules! build_wrapper_body_primitive { ($store_instruction: expr, $align: expr) => { fn build_wrapper_body(main_function_index: u32) -> Vec { - const MAX_ALIGNED_SIZE: usize = 16; - let mut instructions = build_wrapper_body_prelude(MAX_ALIGNED_SIZE); + let size: i32 = 8; + let mut instructions = Vec::with_capacity(16); + push_stack_frame(&mut instructions, size, LocalId(STACK_POINTER_LOCAL_ID)); instructions.extend([ - GetGlobal(STACK_POINTER_GLOBAL_ID), + // load result address to prepare for the store instruction later + GetLocal(STACK_POINTER_LOCAL_ID), // // Call the main function with no arguments. Get primitive back. Call(main_function_index), @@ -59,9 +58,10 @@ macro_rules! build_wrapper_body_primitive { $store_instruction($align, 0), // // Return the result pointer - GetGlobal(STACK_POINTER_GLOBAL_ID), - End, + GetLocal(STACK_POINTER_LOCAL_ID), ]); + pop_stack_frame(&mut instructions, size, LocalId(STACK_POINTER_LOCAL_ID)); + instructions.push(End); instructions } }; @@ -76,18 +76,28 @@ macro_rules! wasm_test_result_primitive { } fn build_wrapper_body_stack_memory(main_function_index: u32, size: usize) -> Vec { - let mut instructions = build_wrapper_body_prelude(size); + let mut instructions = Vec::with_capacity(16); + push_stack_frame( + &mut instructions, + size as i32, + LocalId(STACK_POINTER_LOCAL_ID), + ); instructions.extend([ // // Call the main function with the allocated address to write the result. // No value is returned to the VM stack. This is the same as in compiled C. - GetGlobal(STACK_POINTER_GLOBAL_ID), + GetLocal(STACK_POINTER_LOCAL_ID), Call(main_function_index), // // Return the result address - GetGlobal(STACK_POINTER_GLOBAL_ID), - End, + GetLocal(STACK_POINTER_LOCAL_ID), ]); + pop_stack_frame( + &mut instructions, + size as i32, + LocalId(STACK_POINTER_LOCAL_ID), + ); + instructions.push(End); instructions } @@ -163,3 +173,106 @@ where ) } } + +impl Wasm32TestResult for (T, U, V, W) +where + T: Wasm32TestResult + FromWasm32Memory, + U: Wasm32TestResult + FromWasm32Memory, + V: Wasm32TestResult + FromWasm32Memory, + W: Wasm32TestResult + FromWasm32Memory, +{ + fn build_wrapper_body(main_function_index: u32) -> Vec { + build_wrapper_body_stack_memory( + main_function_index, + T::ACTUAL_WIDTH + U::ACTUAL_WIDTH + V::ACTUAL_WIDTH + W::ACTUAL_WIDTH, + ) + } +} + +impl Wasm32TestResult for (T, U, V, W, X) +where + T: Wasm32TestResult + FromWasm32Memory, + U: Wasm32TestResult + FromWasm32Memory, + V: Wasm32TestResult + FromWasm32Memory, + W: Wasm32TestResult + FromWasm32Memory, + X: Wasm32TestResult + FromWasm32Memory, +{ + fn build_wrapper_body(main_function_index: u32) -> Vec { + build_wrapper_body_stack_memory( + main_function_index, + T::ACTUAL_WIDTH + U::ACTUAL_WIDTH + V::ACTUAL_WIDTH + W::ACTUAL_WIDTH + X::ACTUAL_WIDTH, + ) + } +} + +impl Wasm32TestResult for (T, U, V, W, X, Y) +where + T: Wasm32TestResult + FromWasm32Memory, + U: Wasm32TestResult + FromWasm32Memory, + V: Wasm32TestResult + FromWasm32Memory, + W: Wasm32TestResult + FromWasm32Memory, + X: Wasm32TestResult + FromWasm32Memory, + Y: Wasm32TestResult + FromWasm32Memory, +{ + fn build_wrapper_body(main_function_index: u32) -> Vec { + build_wrapper_body_stack_memory( + main_function_index, + T::ACTUAL_WIDTH + + U::ACTUAL_WIDTH + + V::ACTUAL_WIDTH + + W::ACTUAL_WIDTH + + X::ACTUAL_WIDTH + + Y::ACTUAL_WIDTH, + ) + } +} + +impl Wasm32TestResult for (T, U, V, W, X, Y, Z) +where + T: Wasm32TestResult + FromWasm32Memory, + U: Wasm32TestResult + FromWasm32Memory, + V: Wasm32TestResult + FromWasm32Memory, + W: Wasm32TestResult + FromWasm32Memory, + X: Wasm32TestResult + FromWasm32Memory, + Y: Wasm32TestResult + FromWasm32Memory, + Z: Wasm32TestResult + FromWasm32Memory, +{ + fn build_wrapper_body(main_function_index: u32) -> Vec { + build_wrapper_body_stack_memory( + main_function_index, + T::ACTUAL_WIDTH + + U::ACTUAL_WIDTH + + V::ACTUAL_WIDTH + + W::ACTUAL_WIDTH + + X::ACTUAL_WIDTH + + Y::ACTUAL_WIDTH + + Z::ACTUAL_WIDTH, + ) + } +} + +impl Wasm32TestResult for (T, U, V, W, X, Y, Z, A) +where + T: Wasm32TestResult + FromWasm32Memory, + U: Wasm32TestResult + FromWasm32Memory, + V: Wasm32TestResult + FromWasm32Memory, + W: Wasm32TestResult + FromWasm32Memory, + X: Wasm32TestResult + FromWasm32Memory, + Y: Wasm32TestResult + FromWasm32Memory, + Z: Wasm32TestResult + FromWasm32Memory, + A: Wasm32TestResult + FromWasm32Memory, +{ + fn build_wrapper_body(main_function_index: u32) -> Vec { + build_wrapper_body_stack_memory( + main_function_index, + T::ACTUAL_WIDTH + + U::ACTUAL_WIDTH + + V::ACTUAL_WIDTH + + W::ACTUAL_WIDTH + + X::ACTUAL_WIDTH + + Y::ACTUAL_WIDTH + + Z::ACTUAL_WIDTH + + A::ACTUAL_WIDTH, + ) + } +} diff --git a/compiler/gen_wasm/tests/wasm_records.rs b/compiler/gen_wasm/tests/wasm_records.rs index 9c776ecfaa..884e92c7db 100644 --- a/compiler/gen_wasm/tests/wasm_records.rs +++ b/compiler/gen_wasm/tests/wasm_records.rs @@ -307,94 +307,13 @@ mod wasm_records { // () // ); // } - // - // #[test] - // fn i64_record1_literal() { - // assert_evals_to!( - // indoc!( - // r#" - // { x: 3 } - // "# - // ), - // 3, - // i64 - // ); - // } - - // #[test] - // fn i64_record2_literal() { - // assert_evals_to!( - // indoc!( - // r#" - // { x: 3, y: 5 } - // "# - // ), - // (3, 5), - // (i64, i64) - // ); - // } - - // // #[test] - // // fn i64_record3_literal() { - // // assert_evals_to!( - // // indoc!( - // // r#" - // // { x: 3, y: 5, z: 17 } - // // "# - // // ), - // // (3, 5, 17), - // // (i64, i64, i64) - // // ); - // // } - - // #[test] - // fn f64_record2_literal() { - // assert_evals_to!( - // indoc!( - // r#" - // { x: 3.1, y: 5.1 } - // "# - // ), - // (3.1, 5.1), - // (f64, f64) - // ); - // } - - // // #[test] - // // fn f64_record3_literal() { - // // assert_evals_to!( - // // indoc!( - // // r#" - // // { x: 3.1, y: 5.1, z: 17.1 } - // // "# - // // ), - // // (3.1, 5.1, 17.1), - // // (f64, f64, f64) - // // ); - // // } - - // // #[test] - // // fn bool_record4_literal() { - // // assert_evals_to!( - // // indoc!( - // // r#" - // // record : { a : Bool, b : Bool, c : Bool, d : Bool } - // // record = { a: True, b: True, c : True, d : Bool } - - // // record - // // "# - // // ), - // // (true, false, false, true), - // // (bool, bool, bool, bool) - // // ); - // // } #[test] fn i64_record1_literal() { assert_evals_to!( indoc!( r#" - { a: 3 } + { x: 3 } "# ), 3, @@ -402,31 +321,86 @@ mod wasm_records { ); } - // // #[test] - // // fn i64_record9_literal() { - // // assert_evals_to!( - // // indoc!( - // // r#" - // // { a: 3, b: 5, c: 17, d: 1, e: 9, f: 12, g: 13, h: 14, i: 15 } - // // "# - // // ), - // // (3, 5, 17, 1, 9, 12, 13, 14, 15), - // // (i64, i64, i64, i64, i64, i64, i64, i64, i64) - // // ); - // // } + #[test] + fn i64_record2_literal() { + assert_evals_to!( + indoc!( + r#" + { x: 3, y: 5 } + "# + ), + (3, 5), + (i64, i64) + ); + } - // // #[test] - // // fn f64_record3_literal() { - // // assert_evals_to!( - // // indoc!( - // // r#" - // // { x: 3.1, y: 5.1, z: 17.1 } - // // "# - // // ), - // // (3.1, 5.1, 17.1), - // // (f64, f64, f64) - // // ); - // // } + #[test] + fn i64_record3_literal() { + assert_evals_to!( + indoc!( + r#" + { x: 3, y: 5, z: 17 } + "# + ), + (3, 5, 17), + (i64, i64, i64) + ); + } + + #[test] + fn f64_record2_literal() { + assert_evals_to!( + indoc!( + r#" + { x: 3.1, y: 5.1 } + "# + ), + (3.1, 5.1), + (f64, f64) + ); + } + + #[test] + fn f64_record3_literal() { + assert_evals_to!( + indoc!( + r#" + { x: 3.1, y: 5.1, z: 17.1 } + "# + ), + (3.1, 5.1, 17.1), + (f64, f64, f64) + ); + } + + #[test] + fn bool_record4_literal() { + assert_evals_to!( + indoc!( + r#" + record : { a : Bool, b : Bool, c : Bool, d : Bool } + record = { a: True, b: False, c : False, d : True } + + record + "# + ), + [true, false, false, true], + [bool; 4] + ); + } + + #[test] + fn i64_record9_literal() { + assert_evals_to!( + indoc!( + r#" + { a: 3, b: 5, c: 17, d: 1, e: 9, f: 12, g: 13, h: 14, i: 15 } + "# + ), + [3, 5, 17, 1, 9, 12, 13, 14, 15], + [i64; 9] + ); + } #[test] fn bool_literal() { @@ -667,135 +641,135 @@ mod wasm_records { // ); // } - // #[test] - // fn return_record_2() { - // assert_evals_to!( - // indoc!( - // r#" - // { x: 3, y: 5 } - // "# - // ), - // [3, 5], - // [i64; 2] - // ); - // } + #[test] + fn return_record_2() { + assert_evals_to!( + indoc!( + r#" + { x: 3, y: 5 } + "# + ), + [3, 5], + [i64; 2] + ); + } - // #[test] - // fn return_record_3() { - // assert_evals_to!( - // indoc!( - // r#" - // { x: 3, y: 5, z: 4 } - // "# - // ), - // (3, 5, 4), - // (i64, i64, i64) - // ); - // } + #[test] + fn return_record_3() { + assert_evals_to!( + indoc!( + r#" + { x: 3, y: 5, z: 4 } + "# + ), + (3, 5, 4), + (i64, i64, i64) + ); + } - // #[test] - // fn return_record_4() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 3, b: 5, c: 4, d: 2 } - // "# - // ), - // [3, 5, 4, 2], - // [i64; 4] - // ); - // } + #[test] + fn return_record_4() { + assert_evals_to!( + indoc!( + r#" + { a: 3, b: 5, c: 4, d: 2 } + "# + ), + [3, 5, 4, 2], + [i64; 4] + ); + } - // #[test] - // fn return_record_5() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 3, b: 5, c: 4, d: 2, e: 1 } - // "# - // ), - // [3, 5, 4, 2, 1], - // [i64; 5] - // ); - // } + #[test] + fn return_record_5() { + assert_evals_to!( + indoc!( + r#" + { a: 3, b: 5, c: 4, d: 2, e: 1 } + "# + ), + [3, 5, 4, 2, 1], + [i64; 5] + ); + } - // #[test] - // fn return_record_6() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 3, b: 5, c: 4, d: 2, e: 1, f: 7 } - // "# - // ), - // [3, 5, 4, 2, 1, 7], - // [i64; 6] - // ); - // } + #[test] + fn return_record_6() { + assert_evals_to!( + indoc!( + r#" + { a: 3, b: 5, c: 4, d: 2, e: 1, f: 7 } + "# + ), + [3, 5, 4, 2, 1, 7], + [i64; 6] + ); + } - // #[test] - // fn return_record_7() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 3, b: 5, c: 4, d: 2, e: 1, f: 7, g: 8 } - // "# - // ), - // [3, 5, 4, 2, 1, 7, 8], - // [i64; 7] - // ); - // } + #[test] + fn return_record_7() { + assert_evals_to!( + indoc!( + r#" + { a: 3, b: 5, c: 4, d: 2, e: 1, f: 7, g: 8 } + "# + ), + [3, 5, 4, 2, 1, 7, 8], + [i64; 7] + ); + } - // #[test] - // fn return_record_float_int() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 3.14, b: 0x1 } - // "# - // ), - // (3.14, 0x1), - // (f64, i64) - // ); - // } + #[test] + fn return_record_float_int() { + assert_evals_to!( + indoc!( + r#" + { a: 3.14, b: 0x1 } + "# + ), + (3.14, 0x1), + (f64, i64) + ); + } - // #[test] - // fn return_record_int_float() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 0x1, b: 3.14 } - // "# - // ), - // (0x1, 3.14), - // (i64, f64) - // ); - // } + #[test] + fn return_record_int_float() { + assert_evals_to!( + indoc!( + r#" + { a: 0x1, b: 3.14 } + "# + ), + (0x1, 3.14), + (i64, f64) + ); + } - // #[test] - // fn return_record_float_float() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 6.28, b: 3.14 } - // "# - // ), - // (6.28, 3.14), - // (f64, f64) - // ); - // } + #[test] + fn return_record_float_float() { + assert_evals_to!( + indoc!( + r#" + { a: 6.28, b: 3.14 } + "# + ), + (6.28, 3.14), + (f64, f64) + ); + } - // #[test] - // fn return_record_float_float_float() { - // assert_evals_to!( - // indoc!( - // r#" - // { a: 6.28, b: 3.14, c: 0.1 } - // "# - // ), - // (6.28, 3.14, 0.1), - // (f64, f64, f64) - // ); - // } + #[test] + fn return_record_float_float_float() { + assert_evals_to!( + indoc!( + r#" + { a: 6.28, b: 3.14, c: 0.1 } + "# + ), + (6.28, 3.14, 0.1), + (f64, f64, f64) + ); + } // #[test] // fn return_nested_record() { @@ -851,20 +825,20 @@ mod wasm_records { // ); // } - #[test] - fn update_single_element_record() { - assert_evals_to!( - indoc!( - r#" - rec = { foo: 42} + // #[test] + // fn update_single_element_record() { + // assert_evals_to!( + // indoc!( + // r#" + // rec = { foo: 42} - { rec & foo: rec.foo + 1 } - "# - ), - 43, - i64 - ); - } + // { rec & foo: rec.foo + 1 } + // "# + // ), + // 43, + // i64 + // ); + // } // #[test] // fn booleans_in_record() { @@ -899,6 +873,24 @@ mod wasm_records { // ); // } + #[test] + fn stack_memory_return_from_branch() { + // stack memory pointer should end up in the right place after returning from a branch + assert_evals_to!( + indoc!( + r#" + stackMemoryJunk = { x: 999, y: 111 } + if True then + { x: 123, y: 321 } + else + stackMemoryJunk + "# + ), + (123, 321), + (i64, i64) + ); + } + // #[test] // fn blue_and_present() { // assert_evals_to!( diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 49b0a0c291..66481d3db8 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -37,13 +37,13 @@ use roc_types::subs::{Subs, VarStore, Variable}; use roc_types::types::{Alias, Type}; use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::collections::{HashMap, HashSet}; -use std::fs; use std::io; use std::iter; use std::path::{Path, PathBuf}; use std::str::from_utf8_unchecked; use std::sync::Arc; use std::time::{Duration, SystemTime}; +use std::{env, fs}; /// Default name for the binary generated for an app, if an invalid one was specified. const DEFAULT_APP_OUTPUT_PATH: &str = "app"; @@ -1351,7 +1351,12 @@ where // doing .max(1) on the entire expression guards against // num_cpus returning 0, while also avoiding wrapping // unsigned subtraction overflow. - let num_workers = num_cpus::get().max(2) - 1; + let default_num_workers = num_cpus::get().max(2) - 1; + + let num_workers = match env::var("ROC_NUM_WORKERS") { + Ok(env_str) => env_str.parse::().unwrap_or(default_num_workers), + Err(_) => default_num_workers, + }; let worker_arenas = arena.alloc(bumpalo::collections::Vec::with_capacity_in( num_workers, diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 583e6017c1..8753d21af0 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -15,6 +15,7 @@ pub enum LowLevel { StrFromUtf8, StrFromUtf8Range, StrToUtf8, + StrRepeat, StrFromFloat, ListLen, ListGetUnsafe, @@ -40,6 +41,7 @@ pub enum LowLevel { ListKeepErrs, ListSortWith, ListDrop, + ListDropAt, ListSwap, DictSize, DictEmpty, @@ -114,19 +116,19 @@ impl LowLevel { match self { StrConcat | StrJoinWith | StrIsEmpty | StrStartsWith | StrStartsWithCodePt | StrEndsWith | StrSplit | StrCountGraphemes | StrFromInt | StrFromUtf8 - | StrFromUtf8Range | StrToUtf8 | StrFromFloat | ListLen | ListGetUnsafe | ListSet - | ListDrop | ListSingle | ListRepeat | ListReverse | ListConcat | ListContains - | ListAppend | ListPrepend | ListJoin | ListRange | ListSwap | DictSize | DictEmpty - | DictInsert | DictRemove | DictContains | DictGetUnsafe | DictKeys | DictValues - | DictUnion | DictIntersection | DictDifference | SetFromList | NumAdd | NumAddWrap - | NumAddChecked | NumSub | NumSubWrap | NumSubChecked | NumMul | NumMulWrap - | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare | NumDivUnchecked - | NumRemUnchecked | NumIsMultipleOf | NumAbs | NumNeg | NumSin | NumCos - | NumSqrtUnchecked | NumLogUnchecked | NumRound | NumToFloat | NumPow | NumCeiling - | NumPowInt | NumFloor | NumIsFinite | NumAtan | NumAcos | NumAsin | NumBitwiseAnd - | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy | NumShiftRightBy | NumBytesToU16 - | NumBytesToU32 | NumShiftRightZfBy | NumIntCast | Eq | NotEq | And | Or | Not - | Hash | ExpectTrue => false, + | StrFromUtf8Range | StrToUtf8 | StrRepeat | StrFromFloat | ListLen | ListGetUnsafe + | ListSet | ListDrop | ListDropAt | ListSingle | ListRepeat | ListReverse + | ListConcat | ListContains | ListAppend | ListPrepend | ListJoin | ListRange + | ListSwap | DictSize | DictEmpty | DictInsert | DictRemove | DictContains + | DictGetUnsafe | DictKeys | DictValues | DictUnion | DictIntersection + | DictDifference | SetFromList | NumAdd | NumAddWrap | NumAddChecked | NumSub + | NumSubWrap | NumSubChecked | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte + | NumLt | NumLte | NumCompare | NumDivUnchecked | NumRemUnchecked | NumIsMultipleOf + | NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumLogUnchecked | NumRound + | NumToFloat | NumPow | NumCeiling | NumPowInt | NumFloor | NumIsFinite | NumAtan + | NumAcos | NumAsin | NumBitwiseAnd | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy + | NumShiftRightBy | NumBytesToU16 | NumBytesToU32 | NumShiftRightZfBy | NumIntCast + | Eq | NotEq | And | Or | Not | Hash | ExpectTrue => false, ListMap | ListMap2 | ListMap3 | ListMapWithIndex | ListKeepIf | ListWalk | ListWalkUntil | ListWalkBackwards | ListKeepOks | ListKeepErrs | ListSortWith diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index c3e0f46c3a..897fcdbca3 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -973,6 +973,7 @@ define_builtins! { 16 STR_STARTS_WITH_CODE_PT: "startsWithCodePt" 17 STR_ALIAS_ANALYSIS_STATIC: "#aliasAnalysisStatic" // string with the static lifetime 18 STR_FROM_UTF8_RANGE: "fromUtf8Range" + 19 STR_REPEAT: "repeat" } 4 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias @@ -1009,6 +1010,7 @@ define_builtins! { 31 LIST_SORT_WITH: "sortWith" 32 LIST_DROP: "drop" 33 LIST_SWAP: "swap" + 34 LIST_DROP_AT: "dropAt" } 5 RESULT: "Result" => { 0 RESULT_RESULT: "Result" imported // the Result.Result type alias diff --git a/compiler/mono/src/alias_analysis.rs b/compiler/mono/src/alias_analysis.rs index f8e5dddb71..b218875425 100644 --- a/compiler/mono/src/alias_analysis.rs +++ b/compiler/mono/src/alias_analysis.rs @@ -9,13 +9,16 @@ use roc_module::low_level::LowLevel; use roc_module::symbol::Symbol; use std::convert::TryFrom; -use crate::ir::{Call, CallType, Expr, ListLiteralElement, Literal, ModifyRc, Proc, Stmt}; -use crate::layout::{Builtin, Layout, ListLayout, UnionLayout}; +use crate::ir::{ + Call, CallType, Expr, HostExposedLayouts, ListLiteralElement, Literal, ModifyRc, Proc, Stmt, +}; +use crate::layout::{Builtin, Layout, ListLayout, RawFunctionLayout, UnionLayout}; // just using one module for now pub const MOD_APP: ModName = ModName(b"UserApp"); pub const STATIC_STR_NAME: ConstName = ConstName(&Symbol::STR_ALIAS_ANALYSIS_STATIC.to_ne_bytes()); +pub const STATIC_LIST_NAME: ConstName = ConstName(b"THIS IS A STATIC LIST"); const ENTRY_POINT_NAME: &[u8] = b"mainForHost"; @@ -128,25 +131,49 @@ where }; m.add_const(STATIC_STR_NAME, static_str_def)?; - // the entry point wrapper - let roc_main_bytes = func_name_bytes_help( - entry_point.symbol, - entry_point.layout.arguments.iter().copied(), - entry_point.layout.result, - ); - let roc_main = FuncName(&roc_main_bytes); + // a const that models all static lists + let static_list_def = { + let mut cbuilder = ConstDefBuilder::new(); + let block = cbuilder.add_block(); + let cell = cbuilder.add_new_heap_cell(block)?; - let entry_point_function = build_entry_point(entry_point.layout, roc_main)?; - let entry_point_name = FuncName(ENTRY_POINT_NAME); - m.add_func(entry_point_name, entry_point_function)?; + let unit_type = cbuilder.add_tuple_type(&[])?; + let bag = cbuilder.add_empty_bag(block, unit_type)?; + let value_id = cbuilder.add_make_tuple(block, &[cell, bag])?; + let root = BlockExpr(block, value_id); + let list_type_id = static_list_type(&mut cbuilder)?; + + cbuilder.build(list_type_id, root)? + }; + m.add_const(STATIC_LIST_NAME, static_list_def)?; let mut type_definitions = MutSet::default(); + let mut host_exposed_functions = Vec::new(); // all other functions for proc in procs { let bytes = func_name_bytes(proc); let func_name = FuncName(&bytes); + if let HostExposedLayouts::HostExposed { aliases, .. } = &proc.host_exposed_layouts { + for (_, (symbol, top_level, layout)) in aliases { + match layout { + RawFunctionLayout::Function(_, _, _) => { + let it = top_level.arguments.iter().copied(); + let bytes = func_name_bytes_help(*symbol, it, top_level.result); + + host_exposed_functions.push((bytes, top_level.arguments)); + } + RawFunctionLayout::ZeroArgumentThunk(_) => { + let it = std::iter::once(Layout::Struct(&[])); + let bytes = func_name_bytes_help(*symbol, it, top_level.result); + + host_exposed_functions.push((bytes, top_level.arguments)); + } + } + } + } + if DEBUG { eprintln!( "{:?}: {:?} with {:?} args", @@ -163,6 +190,19 @@ where m.add_func(func_name, spec)?; } + // the entry point wrapper + let roc_main_bytes = func_name_bytes_help( + entry_point.symbol, + entry_point.layout.arguments.iter().copied(), + entry_point.layout.result, + ); + let roc_main = FuncName(&roc_main_bytes); + + let entry_point_function = + build_entry_point(entry_point.layout, roc_main, &host_exposed_functions)?; + let entry_point_name = FuncName(ENTRY_POINT_NAME); + m.add_func(entry_point_name, entry_point_function)?; + for union_layout in type_definitions { let type_name_bytes = recursive_tag_union_name_bytes(&union_layout).as_bytes(); let type_name = TypeName(&type_name_bytes); @@ -202,23 +242,83 @@ where morphic_lib::solve(program) } -fn build_entry_point(layout: crate::ir::ProcLayout, func_name: FuncName) -> Result { +/// if you want an "escape hatch" which allows you construct "best-case scenario" values +/// of an arbitrary type in much the same way that 'unknown_with' allows you to construct +/// "worst-case scenario" values of an arbitrary type, you can use the following terrible hack: +/// use 'add_make_union' to construct an instance of variant 0 of a union type 'union {(), your_type}', +/// and then use 'add_unwrap_union' to extract variant 1 from the value you just constructed. +/// In the current implementation (but not necessarily in future versions), +/// I can promise this will effectively give you a value of type 'your_type' +/// all of whose heap cells are considered unique and mutable. +fn terrible_hack(builder: &mut FuncDefBuilder, block: BlockId, type_id: TypeId) -> Result { + let variant_types = vec![builder.add_tuple_type(&[])?, type_id]; + let unit = builder.add_make_tuple(block, &[])?; + let value = builder.add_make_union(block, &variant_types, 0, unit)?; + + builder.add_unwrap_union(block, value, 1) +} + +fn build_entry_point( + layout: crate::ir::ProcLayout, + func_name: FuncName, + host_exposed_functions: &[([u8; SIZE], &[Layout])], +) -> Result { let mut builder = FuncDefBuilder::new(); - let block = builder.add_block(); + let outer_block = builder.add_block(); - // to the modelling language, the arguments appear out of thin air - let argument_type = build_tuple_type(&mut builder, layout.arguments)?; - let argument = builder.add_unknown_with(block, &[], argument_type)?; + let mut cases = Vec::new(); - let name_bytes = [0; 16]; - let spec_var = CalleeSpecVar(&name_bytes); - let result = builder.add_call(block, spec_var, MOD_APP, func_name, argument)?; + { + let block = builder.add_block(); + + // to the modelling language, the arguments appear out of thin air + let argument_type = build_tuple_type(&mut builder, layout.arguments)?; + + // does not make any assumptions about the input + // let argument = builder.add_unknown_with(block, &[], argument_type)?; + + // assumes the input can be updated in-place + let argument = terrible_hack(&mut builder, block, argument_type)?; + + let name_bytes = [0; 16]; + let spec_var = CalleeSpecVar(&name_bytes); + let result = builder.add_call(block, spec_var, MOD_APP, func_name, argument)?; + + // to the modelling language, the result disappears into the void + let unit_type = builder.add_tuple_type(&[])?; + let unit_value = builder.add_unknown_with(block, &[result], unit_type)?; + + cases.push(BlockExpr(block, unit_value)); + } + + // add fake calls to host-exposed functions so they are specialized + for (name_bytes, layouts) in host_exposed_functions { + let host_exposed_func_name = FuncName(name_bytes); + + if host_exposed_func_name == func_name { + continue; + } + + let block = builder.add_block(); + + let type_id = layout_spec(&mut builder, &Layout::Struct(layouts))?; + + let argument = builder.add_unknown_with(block, &[], type_id)?; + + let spec_var = CalleeSpecVar(name_bytes); + let result = + builder.add_call(block, spec_var, MOD_APP, host_exposed_func_name, argument)?; + + let unit_type = builder.add_tuple_type(&[])?; + let unit_value = builder.add_unknown_with(block, &[result], unit_type)?; + + cases.push(BlockExpr(block, unit_value)); + } - // to the modelling language, the result disappears into the void let unit_type = builder.add_tuple_type(&[])?; - let unit_value = builder.add_unknown_with(block, &[result], unit_type)?; + let unit_value = builder.add_choice(outer_block, &cases)?; - let root = BlockExpr(block, unit_value); + let root = BlockExpr(outer_block, unit_value); let spec = builder.build(unit_type, unit_type, root)?; Ok(spec) @@ -818,6 +918,17 @@ fn lowlevel_spec( let new_cell = builder.add_new_heap_cell(block)?; builder.add_make_tuple(block, &[new_cell, bag]) } + ListReverse => { + let list = env.symbols[&arguments[0]]; + + let bag = builder.add_get_tuple_field(block, list, LIST_BAG_INDEX)?; + let cell = builder.add_get_tuple_field(block, list, LIST_CELL_INDEX)?; + + let _unit = builder.add_update(block, update_mode_var, cell)?; + + let new_cell = builder.add_new_heap_cell(block)?; + builder.add_make_tuple(block, &[new_cell, bag]) + } ListAppend => { let list = env.symbols[&arguments[0]]; let to_insert = env.symbols[&arguments[1]]; @@ -833,6 +944,27 @@ fn lowlevel_spec( let new_cell = builder.add_new_heap_cell(block)?; builder.add_make_tuple(block, &[new_cell, bag]) } + StrToUtf8 => { + let string = env.symbols[&arguments[0]]; + + let u8_type = builder.add_tuple_type(&[])?; + let bag = builder.add_empty_bag(block, u8_type)?; + let cell = builder.add_get_tuple_field(block, string, LIST_CELL_INDEX)?; + + builder.add_make_tuple(block, &[cell, bag]) + } + StrFromUtf8 => { + let list = env.symbols[&arguments[0]]; + + let cell = builder.add_get_tuple_field(block, list, LIST_CELL_INDEX)?; + let string = builder.add_make_tuple(block, &[cell])?; + + let byte_index = builder.add_make_tuple(block, &[])?; + let is_ok = builder.add_make_tuple(block, &[])?; + let problem_code = builder.add_make_tuple(block, &[])?; + + builder.add_make_tuple(block, &[byte_index, string, is_ok, problem_code]) + } DictEmpty => { match layout { Layout::Builtin(Builtin::EmptyDict) => { @@ -1117,9 +1249,11 @@ fn expr_spec<'a>( let list = new_list(builder, block, type_id)?; let mut bag = builder.add_get_tuple_field(block, list, LIST_BAG_INDEX)?; + let mut all_constants = true; for element in elems.iter() { let value_id = if let ListLiteralElement::Symbol(symbol) = element { + all_constants = false; env.symbols[symbol] } else { builder.add_make_tuple(block, &[]).unwrap() @@ -1128,9 +1262,13 @@ fn expr_spec<'a>( bag = builder.add_bag_insert(block, bag, value_id)?; } - let cell = builder.add_new_heap_cell(block)?; + if all_constants { + new_static_list(builder, block) + } else { + let cell = builder.add_new_heap_cell(block)?; - builder.add_make_tuple(block, &[cell, bag]) + builder.add_make_tuple(block, &[cell, bag]) + } } EmptyArray => { @@ -1296,6 +1434,14 @@ fn str_type(builder: &mut TC) -> Result { builder.add_tuple_type(&[cell_id]) } +fn static_list_type(builder: &mut TC) -> Result { + let unit_type = builder.add_tuple_type(&[])?; + let cell = builder.add_heap_cell_type(); + let bag = builder.add_bag_type(unit_type)?; + + builder.add_tuple_type(&[cell, bag]) +} + // const OK_TAG_ID: u8 = 1u8; // const ERR_TAG_ID: u8 = 0u8; @@ -1329,6 +1475,12 @@ fn new_static_string(builder: &mut FuncDefBuilder, block: BlockId) -> Result Result { + let module = MOD_APP; + + builder.add_const_ref(block, module, STATIC_LIST_NAME) +} + fn new_num(builder: &mut FuncDefBuilder, block: BlockId) -> Result { // we model all our numbers as unit values builder.add_make_tuple(block, &[]) diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index 89d6ae2446..e6221e798b 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -993,6 +993,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { // List.append should own its first argument ListAppend => arena.alloc_slice_copy(&[owned, owned]), ListDrop => arena.alloc_slice_copy(&[owned, irrelevant]), + ListDropAt => arena.alloc_slice_copy(&[owned, irrelevant]), ListSwap => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), Eq | NotEq => arena.alloc_slice_copy(&[borrowed, borrowed]), @@ -1013,6 +1014,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { StrFromUtf8 => arena.alloc_slice_copy(&[owned]), StrFromUtf8Range => arena.alloc_slice_copy(&[borrowed, irrelevant]), StrToUtf8 => arena.alloc_slice_copy(&[owned]), + StrRepeat => arena.alloc_slice_copy(&[borrowed, irrelevant]), StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]), Hash => arena.alloc_slice_copy(&[borrowed, irrelevant]), DictSize => arena.alloc_slice_copy(&[borrowed]), diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index bc620bdf74..c611ef99a2 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -94,6 +94,12 @@ impl<'a> CapturedSymbols<'a> { } } +impl<'a> Default for CapturedSymbols<'a> { + fn default() -> Self { + CapturedSymbols::None + } +} + #[derive(Clone, Debug, PartialEq)] pub struct PendingSpecialization<'a> { solved_type: SolvedType, @@ -6208,6 +6214,8 @@ fn reuse_function_symbol<'a>( layout_cache, ); + // even though this function may not itself capture, + // unification may still cause it to have an extra argument construct_closure_data( env, lambda_set, @@ -6437,8 +6445,6 @@ fn call_by_name<'a>( assign_to_symbols(env, procs, layout_cache, iter, result) } } else { - let argument_layouts = lambda_set.extend_argument_list(env.arena, arg_layouts); - call_by_name_help( env, procs, @@ -6446,7 +6452,7 @@ fn call_by_name<'a>( proc_name, loc_args, lambda_set, - argument_layouts, + arg_layouts, ret_layout, layout_cache, assigned, @@ -6494,10 +6500,6 @@ fn call_by_name_help<'a>( let original_fn_var = fn_var; let arena = env.arena; - // debug_assert!(!procs.module_thunks.contains(&proc_name), "{:?}", proc_name); - - let top_level_layout = ProcLayout::new(env.arena, argument_layouts, *ret_layout); - // the arguments given to the function, stored in symbols let mut field_symbols = Vec::with_capacity_in(loc_args.len(), arena); field_symbols.extend( @@ -6506,7 +6508,13 @@ fn call_by_name_help<'a>( .map(|(_, arg_expr)| possible_reuse_symbol(env, procs, &arg_expr.value)), ); - let field_symbols = field_symbols.into_bump_slice(); + // If required, add an extra argument to the layout that is the captured environment + // afterwards, we MUST make sure the number of arguments in the layout matches the + // number of arguments actually passed. + let top_level_layout = { + let argument_layouts = lambda_set.extend_argument_list(env.arena, argument_layouts); + ProcLayout::new(env.arena, argument_layouts, *ret_layout) + }; // the variables of the given arguments let mut pattern_vars = Vec::with_capacity_in(loc_args.len(), arena); @@ -6535,6 +6543,8 @@ fn call_by_name_help<'a>( proc_name, ); + let field_symbols = field_symbols.into_bump_slice(); + let call = self::Call { call_type: CallType::ByName { name: proc_name, @@ -6573,6 +6583,9 @@ fn call_by_name_help<'a>( "see call_by_name for background (scroll down a bit), function is {:?}", proc_name, ); + + let field_symbols = field_symbols.into_bump_slice(); + let call = self::Call { call_type: CallType::ByName { name: proc_name, @@ -6625,6 +6638,8 @@ fn call_by_name_help<'a>( proc_name, ); + let field_symbols = field_symbols.into_bump_slice(); + let call = self::Call { call_type: CallType::ByName { name: proc_name, @@ -6643,6 +6658,19 @@ fn call_by_name_help<'a>( None => { let opt_partial_proc = procs.partial_procs.get(&proc_name); + /* + debug_assert_eq!( + argument_layouts.len(), + field_symbols.len(), + "Function {:?} is called with {} arguments, but the layout expects {}", + proc_name, + field_symbols.len(), + argument_layouts.len(), + ); + */ + + let field_symbols = field_symbols.into_bump_slice(); + match opt_partial_proc { Some(partial_proc) => { // TODO should pending_procs hold a Rc to avoid this .clone()? @@ -6657,18 +6685,22 @@ fn call_by_name_help<'a>( match specialize(env, procs, proc_name, layout_cache, pending, partial_proc) { - Ok((proc, layout)) => call_specialized_proc( - env, - procs, - proc_name, - proc, - layout, - field_symbols, - loc_args, - layout_cache, - assigned, - hole, - ), + Ok((proc, layout)) => { + // now we just call our freshly-specialized function + call_specialized_proc( + env, + procs, + proc_name, + proc, + lambda_set, + layout, + field_symbols, + loc_args, + layout_cache, + assigned, + hole, + ) + } Err(SpecializeFailure { attempted_layout, problem: _, @@ -6684,6 +6716,7 @@ fn call_by_name_help<'a>( procs, proc_name, proc, + lambda_set, attempted_layout, field_symbols, loc_args, @@ -6833,6 +6866,7 @@ fn call_specialized_proc<'a>( procs: &mut Procs<'a>, proc_name: Symbol, proc: Proc<'a>, + lambda_set: LambdaSet<'a>, layout: RawFunctionLayout<'a>, field_symbols: &'a [Symbol], loc_args: std::vec::Vec<(Variable, Located)>, @@ -6871,6 +6905,8 @@ fn call_specialized_proc<'a>( arguments: field_symbols, }; + // the closure argument is already added here (to get the right specialization) + // but now we need to remove it because the `match_on_lambda_set` will add it again build_call(env, call, assigned, Layout::LambdaSet(lambda_set), hole) } RawFunctionLayout::ZeroArgumentThunk(_) => { @@ -6878,30 +6914,75 @@ fn call_specialized_proc<'a>( } } } else { - debug_assert_eq!( - function_layout.arguments.len(), - field_symbols.len(), - "function {:?} with layout {:?} expects {:?} arguments, but is applied to {:?}", - proc_name, - function_layout, - function_layout.arguments.len(), - field_symbols.len(), - ); - let call = self::Call { - call_type: CallType::ByName { - name: proc_name, - ret_layout: function_layout.result, - arg_layouts: function_layout.arguments, - specialization_id: env.next_call_specialization_id(), - }, - arguments: field_symbols, - }; - let iter = loc_args.into_iter().rev().zip(field_symbols.iter().rev()); - let result = build_call(env, call, assigned, function_layout.result, hole); + match procs + .partial_procs + .get(&proc_name) + .map(|pp| &pp.captured_symbols) + { + Some(&CapturedSymbols::Captured(captured_symbols)) => { + let symbols = Vec::from_iter_in(captured_symbols.iter().map(|x| x.0), env.arena) + .into_bump_slice(); - assign_to_symbols(env, procs, layout_cache, iter, result) + let closure_data_symbol = env.unique_symbol(); + + // the closure argument is already added here (to get the right specialization) + // but now we need to remove it because the `match_on_lambda_set` will add it again + let mut argument_layouts = + Vec::from_iter_in(function_layout.arguments.iter().copied(), env.arena); + argument_layouts.pop().unwrap(); + + debug_assert_eq!(argument_layouts.len(), field_symbols.len(),); + + let new_hole = match_on_lambda_set( + env, + lambda_set, + closure_data_symbol, + field_symbols, + argument_layouts.into_bump_slice(), + function_layout.result, + assigned, + hole, + ); + + let result = construct_closure_data( + env, + lambda_set, + proc_name, + symbols, + closure_data_symbol, + env.arena.alloc(new_hole), + ); + + assign_to_symbols(env, procs, layout_cache, iter, result) + } + _ => { + debug_assert_eq!( + function_layout.arguments.len(), + field_symbols.len(), + "function {:?} with layout {:#?} expects {:?} arguments, but is applied to {:?}", + proc_name, + function_layout, + function_layout.arguments.len(), + field_symbols.len(), + ); + + let call = self::Call { + call_type: CallType::ByName { + name: proc_name, + ret_layout: function_layout.result, + arg_layouts: function_layout.arguments, + specialization_id: env.next_call_specialization_id(), + }, + arguments: field_symbols, + }; + + let result = build_call(env, call, assigned, function_layout.result, hole); + + assign_to_symbols(env, procs, layout_cache, iter, result) + } + } } } diff --git a/compiler/mono/src/layout.rs b/compiler/mono/src/layout.rs index 959d679cb3..507bcfaf7e 100644 --- a/compiler/mono/src/layout.rs +++ b/compiler/mono/src/layout.rs @@ -599,7 +599,7 @@ impl<'a> LambdaSet<'a> { // this can happen when there is a type error somewhere Ok(LambdaSet { set: &[], - representation: arena.alloc(Layout::Union(UnionLayout::NonRecursive(&[]))), + representation: arena.alloc(Layout::Struct(&[])), }) } _ => panic!("called LambdaSet.from_var on invalid input"), diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index 4f93fe449e..e43ac44ebc 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -3709,6 +3709,18 @@ mod solve_expr { ); } + #[test] + fn list_drop_at() { + infer_eq_without_problem( + indoc!( + r#" + List.dropAt + "# + ), + "List a, Nat -> List a", + ); + } + #[test] fn function_that_captures_nothing_is_not_captured() { // we should make sure that a function that doesn't capture anything it not itself captured diff --git a/compiler/test_gen/src/gen_list.rs b/compiler/test_gen/src/gen_list.rs index 582b9acceb..47ac4b3e96 100644 --- a/compiler/test_gen/src/gen_list.rs +++ b/compiler/test_gen/src/gen_list.rs @@ -198,6 +198,43 @@ fn list_drop() { assert_evals_to!("List.drop [1,2] 5", RocList::from_slice(&[]), RocList); } +#[test] +fn list_drop_at() { + assert_evals_to!( + "List.dropAt [1, 2, 3] 0", + RocList::from_slice(&[2, 3]), + RocList + ); + assert_evals_to!( + "List.dropAt [0, 0, 0] 3", + RocList::from_slice(&[0, 0, 0]), + RocList + ); + assert_evals_to!("List.dropAt [] 1", RocList::from_slice(&[]), RocList); + assert_evals_to!("List.dropAt [0] 0", RocList::from_slice(&[]), RocList); +} + +#[test] +fn list_drop_at_mutable() { + assert_evals_to!( + indoc!( + r#" + list : List I64 + list = [ if True then 4 else 4, 5, 6 ] + + { newList: List.dropAt list 0, original: list } + "# + ), + ( + // new_list + RocList::from_slice(&[5, 6]), + // original + RocList::from_slice(&[4, 5, 6]), + ), + (RocList, RocList,) + ); +} + #[test] fn list_swap() { assert_evals_to!("List.swap [] 0 1", RocList::from_slice(&[]), RocList); @@ -2031,3 +2068,31 @@ fn map_with_index_multi_record() { RocList<((), ())> ); } + +#[test] +fn empty_list_of_function_type() { + // see https://github.com/rtfeldman/roc/issues/1732 + assert_evals_to!( + indoc!( + r#" + myList : List (Str -> Str) + myList = [] + + myClosure : Str -> Str + myClosure = \_ -> "bar" + + choose = + if False then + myList + else + [ myClosure ] + + when List.get choose 0 is + Ok f -> f "foo" + Err _ -> "bad!" + "# + ), + RocStr::from_slice(b"bar"), + RocStr + ); +} diff --git a/compiler/test_gen/src/gen_primitives.rs b/compiler/test_gen/src/gen_primitives.rs index d183b95d0b..d6a3a38049 100644 --- a/compiler/test_gen/src/gen_primitives.rs +++ b/compiler/test_gen/src/gen_primitives.rs @@ -2906,3 +2906,104 @@ fn do_pass_bool_byte_closure_layout() { RocStr ); } + +#[test] +fn nested_rigid_list() { + assert_evals_to!( + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + foo : List a -> List a + foo = \list -> + p2 : List a + p2 = list + + p2 + + main = + when foo [] is + _ -> "hello world" + "# + ), + RocStr::from_slice(b"hello world"), + RocStr + ); +} + +#[test] +fn nested_rigid_alias() { + assert_evals_to!( + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + Identity a : [ @Identity a ] + + foo : Identity a -> Identity a + foo = \list -> + p2 : Identity a + p2 = list + + p2 + + main = + when foo (@Identity "foo") is + _ -> "hello world" + "# + ), + RocStr::from_slice(b"hello world"), + RocStr + ); +} + +#[test] +fn nested_rigid_tag_union() { + assert_evals_to!( + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + foo : [ @Identity a ] -> [ @Identity a ] + foo = \list -> + p2 : [ @Identity a ] + p2 = list + + p2 + + main = + when foo (@Identity "foo") is + _ -> "hello world" + "# + ), + RocStr::from_slice(b"hello world"), + RocStr + ); +} + +#[test] +fn call_that_needs_closure_parameter() { + // here both p2 is lifted to the top-level, which means that `list` must be + // passed to it from `manyAux`. + assert_evals_to!( + indoc!( + r#" + Step state a : [ Loop state, Done a ] + + manyAux : List a -> [ Pair (Step (List a) (List a))] + manyAux = \list -> + p2 = \_ -> Pair (Done list) + + p2 "foo" + + manyAuxTest = (manyAux [ ]) == Pair (Loop [97]) + + runTest = \t -> if t then "PASS" else "FAIL" + + runTest manyAuxTest + "# + ), + RocStr::from_slice(b"FAIL"), + RocStr + ); +} diff --git a/compiler/test_gen/src/gen_str.rs b/compiler/test_gen/src/gen_str.rs index 2b5bdb45ad..292946403e 100644 --- a/compiler/test_gen/src/gen_str.rs +++ b/compiler/test_gen/src/gen_str.rs @@ -949,3 +949,31 @@ fn str_from_utf8_range_count_too_high_for_start() { RocStr ); } + +#[test] +fn str_repeat_small() { + assert_evals_to!( + indoc!(r#"Str.repeat "Roc" 3"#), + RocStr::from("RocRocRoc"), + RocStr + ); +} + +#[test] +fn str_repeat_big() { + assert_evals_to!( + indoc!(r#"Str.repeat "more than 16 characters" 2"#), + RocStr::from("more than 16 charactersmore than 16 characters"), + RocStr + ); +} + +#[test] +fn str_repeat_empty_string() { + assert_evals_to!(indoc!(r#"Str.repeat "" 3"#), RocStr::from(""), RocStr); +} + +#[test] +fn str_repeat_zero_times() { + assert_evals_to!(indoc!(r#"Str.repeat "Roc" 0"#), RocStr::from(""), RocStr); +} diff --git a/compiler/test_mono/generated/empty_list_of_function_type.txt b/compiler/test_mono/generated/empty_list_of_function_type.txt new file mode 100644 index 0000000000..89aa3d309b --- /dev/null +++ b/compiler/test_mono/generated/empty_list_of_function_type.txt @@ -0,0 +1,43 @@ +procedure List.3 (#Attr.2, #Attr.3): + let Test.20 = lowlevel ListLen #Attr.2; + let Test.17 = lowlevel NumLt #Attr.3 Test.20; + if Test.17 then + let Test.19 = lowlevel ListGetUnsafe #Attr.2 #Attr.3; + let Test.18 = Ok Test.19; + ret Test.18; + else + let Test.16 = Struct {}; + let Test.15 = Err Test.16; + ret Test.15; + +procedure Test.2 (Test.6): + let Test.24 = "bar"; + ret Test.24; + +procedure Test.0 (): + let Test.1 = Array []; + joinpoint Test.22 Test.3: + let Test.14 = 0i64; + let Test.7 = CallByName List.3 Test.3 Test.14; + dec Test.3; + let Test.11 = 1i64; + let Test.12 = GetTagId Test.7; + let Test.13 = lowlevel Eq Test.11 Test.12; + if Test.13 then + let Test.5 = UnionAtIndex (Id 1) (Index 0) Test.7; + let Test.9 = "foo"; + let Test.8 = CallByName Test.2 Test.9; + dec Test.9; + ret Test.8; + else + let Test.10 = "bad!"; + ret Test.10; + in + let Test.25 = false; + if Test.25 then + jump Test.22 Test.1; + else + dec Test.1; + let Test.23 = Struct {}; + let Test.21 = Array [Test.23]; + jump Test.22 Test.21; diff --git a/compiler/test_mono/src/lib.rs b/compiler/test_mono/src/lib.rs index 2d05f9cd16..cbcdbed066 100644 --- a/compiler/test_mono/src/lib.rs +++ b/compiler/test_mono/src/lib.rs @@ -1084,6 +1084,33 @@ fn specialize_lowlevel() { ) } +#[mono_test] +fn empty_list_of_function_type() { + // see https://github.com/rtfeldman/roc/issues/1732 + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + main = + myList : List (Str -> Str) + myList = [] + + myClosure : Str -> Str + myClosure = \_ -> "bar" + + choose = + if False then + myList + else + [ myClosure ] + + when List.get choose 0 is + Ok f -> f "foo" + Err _ -> "bad!" + "# + ) +} + // #[ignore] // #[mono_test] // fn static_str_closure() { diff --git a/editor/Cargo.toml b/editor/Cargo.toml index 15078a6894..c32c9372f7 100644 --- a/editor/Cargo.toml +++ b/editor/Cargo.toml @@ -5,13 +5,14 @@ authors = ["The Roc Contributors"] license = "UPL-1.0" edition = "2018" description = "An editor for Roc" -exclude = ["src/shaders/*.spv"] [dependencies] +roc_ast = { path = "../ast" } roc_collections = { path = "../compiler/collections" } roc_load = { path = "../compiler/load" } roc_builtins = { path = "../compiler/builtins" } roc_can = { path = "../compiler/can" } +roc_code_markup = { path = "../code_markup"} roc_parse = { path = "../compiler/parse" } roc_region = { path = "../compiler/region" } roc_module = { path = "../compiler/module" } @@ -29,13 +30,13 @@ arraystring = "0.3.0" libc = "0.2" page_size = "0.4" winit = "0.24" -wgpu = "0.9" +wgpu = "0.10" glyph_brush = "0.7" log = "0.4" zerocopy = "0.3" env_logger = "0.8" futures = "0.3" -wgpu_glyph = "0.13" +wgpu_glyph = "0.14" cgmath = "0.18.0" snafu = { version = "0.6", features = ["backtraces"] } colored = "2" diff --git a/editor/src/editor/ed_error.rs b/editor/src/editor/ed_error.rs index 82b53f6bce..357693bb69 100644 --- a/editor/src/editor/ed_error.rs +++ b/editor/src/editor/ed_error.rs @@ -1,7 +1,9 @@ -use crate::lang::parse::ASTNodeId; -use crate::ui::ui_error::UIResult; -use crate::{editor::slow_pool::MarkNodeId, ui::text::text_pos::TextPos}; +use crate::ui::text::text_pos::TextPos; use colored::*; +use roc_ast::ast_error::ASTError; +use roc_ast::lang::core::ast::ASTNodeId; +use roc_code_markup::markup_error::MarkError; +use roc_code_markup::slow_pool::MarkNodeId; use snafu::{Backtrace, ErrorCompat, NoneError, ResultExt, Snafu}; //import errors as follows: @@ -212,8 +214,12 @@ pub enum EdError { #[snafu(display("StringParseError: {}", msg))] StringParseError { msg: String, backtrace: Backtrace }, + #[snafu(display("ASTError: {}", msg))] + ASTErrorBacktrace { msg: String, backtrace: Backtrace }, #[snafu(display("UIError: {}", msg))] UIErrorBacktrace { msg: String, backtrace: Backtrace }, + #[snafu(display("MarkError: {}", msg))] + MarkErrorBacktrace { msg: String, backtrace: Backtrace }, } pub type EdResult = std::result::Result; @@ -285,9 +291,22 @@ impl From for EdError { } } -pub fn from_ui_res(ui_res: UIResult) -> EdResult { - match ui_res { - Ok(t) => Ok(t), - Err(ui_err) => Err(EdError::from(ui_err)), +impl From for EdError { + fn from(mark_err: MarkError) -> Self { + let msg = format!("{}", mark_err); + + // hack to handle EdError derive + let dummy_res: Result<(), NoneError> = Err(NoneError {}); + dummy_res.context(MarkErrorBacktrace { msg }).unwrap_err() + } +} + +impl From for EdError { + fn from(ast_err: ASTError) -> Self { + let msg = format!("{}", ast_err); + + // hack to handle EdError derive + let dummy_res: Result<(), NoneError> = Err(NoneError {}); + dummy_res.context(ASTErrorBacktrace { msg }).unwrap_err() } } diff --git a/editor/src/editor/grid_node_map.rs b/editor/src/editor/grid_node_map.rs index 2cece54f43..e342d624c1 100644 --- a/editor/src/editor/grid_node_map.rs +++ b/editor/src/editor/grid_node_map.rs @@ -2,21 +2,20 @@ use crate::editor::ed_error::EdResult; use crate::editor::ed_error::NestedNodeWithoutChildren; use crate::editor::ed_error::{NoDefMarkNodeBeforeLineNr, NodeIdNotInGridNodeMap}; use crate::editor::mvc::ed_model::EdModel; -use crate::editor::slow_pool::MarkNodeId; -use crate::editor::slow_pool::SlowPool; use crate::editor::util::first_last_index_of; use crate::editor::util::index_of; -use crate::lang::parse::ASTNodeId; use crate::ui::text::selection::Selection; use crate::ui::text::text_pos::TextPos; use crate::ui::ui_error::{LineInsertionFailed, OutOfBounds, UIResult}; use crate::ui::util::{slice_get, slice_get_mut}; +use roc_ast::lang::core::ast::ASTNodeId; +use roc_code_markup::markup::nodes::get_root_mark_node_id; +use roc_code_markup::slow_pool::MarkNodeId; +use roc_code_markup::slow_pool::SlowPool; use snafu::OptionExt; use std::cmp::Ordering; use std::fmt; -use super::markup::nodes::get_root_mark_node_id; - #[derive(Debug)] pub struct GridNodeMap { pub lines: Vec>, diff --git a/editor/src/editor/main.rs b/editor/src/editor/main.rs index eeec775167..02afb87bfe 100644 --- a/editor/src/editor/main.rs +++ b/editor/src/editor/main.rs @@ -17,14 +17,14 @@ use crate::graphics::{ primitives::rect::Rect, primitives::text::{build_glyph_brush, example_code_glyph_rect, queue_text_draw, Text}, }; -use crate::lang::expr::Env; -use crate::lang::pool::Pool; use crate::ui::text::caret_w_select::CaretPos; use crate::ui::util::path_to_string; use bumpalo::Bump; use cgmath::Vector2; use fs_extra::dir::{copy, ls, CopyOptions, DirEntryAttr, DirEntryValue}; use pipelines::RectResources; +use roc_ast::lang::env::Env; +use roc_ast::mem_pool::pool::Pool; use roc_can::builtins::builtin_defs_map; use roc_collections::all::MutMap; use roc_load; @@ -79,7 +79,7 @@ fn run_event_loop(project_dir_path_opt: Option<&Path>) -> Result<(), Box) -> Result<(), Box) -> Result<(), Box { size = new_size; - swap_chain = gpu_device.create_swap_chain( - &surface, - &wgpu::SwapChainDescriptor { - usage: wgpu::TextureUsage::RENDER_ATTACHMENT, + surface.configure( + &gpu_device, + &wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, format: render_format, width: size.width, height: size.height, @@ -283,11 +283,15 @@ fn run_event_loop(project_dir_path_opt: Option<&Path>) -> Result<(), Box) -> Result<(), Box) -> Result<(), Box) -> Result<(), Box) -> Result<(), Box) -> Result<(), Box( ) -> EdResult> { let mut module = EdModule::new(code_str, env, code_arena)?; - let mut mark_node_pool = SlowPool::new(); + let mut mark_node_pool = SlowPool::default(); let markup_ids = if code_str.is_empty() { EmptyCodeString {}.fail() } else { - ast_to_mark_nodes( + Ok(ast_to_mark_nodes( code_arena, &mut module.env, &module.ast, &mut mark_node_pool, &loaded_module.interns, - ) + )?) }?; let mut code_lines = CodeLines::default(); @@ -152,7 +153,7 @@ impl<'a> EdModel<'a> { if let Some(parent_id) = curr_mark_node.get_parent_id_opt() { let parent = self.mark_node_pool.get(parent_id); - parent.get_child_indices(curr_mark_node_id, &self.mark_node_pool) + Ok(parent.get_child_indices(curr_mark_node_id, &self.mark_node_pool)?) } else { MissingParent { node_id: curr_mark_node_id, @@ -180,7 +181,7 @@ pub struct EdModule<'a> { impl<'a> EdModule<'a> { pub fn new(code_str: &'a str, mut env: Env<'a>, ast_arena: &'a Bump) -> EdResult> { if !code_str.is_empty() { - let parse_res = AST::parse_from_string(code_str, &mut env, ast_arena); + let parse_res = parse_ast::parse_from_string(code_str, &mut env, ast_arena); match parse_res { Ok(ast) => Ok(EdModule { env, ast }), @@ -201,8 +202,6 @@ pub mod test_ed_model { use crate::editor::main::load_module; use crate::editor::mvc::ed_model; use crate::editor::resources::strings::HELLO_WORLD; - use crate::lang::expr::Env; - use crate::lang::pool::Pool; use crate::ui::text::caret_w_select::test_caret_w_select::convert_dsl_to_selection; use crate::ui::text::caret_w_select::test_caret_w_select::convert_selection_to_dsl; use crate::ui::text::caret_w_select::CaretPos; @@ -211,6 +210,8 @@ pub mod test_ed_model { use crate::ui::ui_error::UIResult; use bumpalo::Bump; use ed_model::EdModel; + use roc_ast::lang::env::Env; + use roc_ast::mem_pool::pool::Pool; use roc_load::file::LoadedModule; use roc_module::symbol::IdentIds; use roc_module::symbol::ModuleIds; diff --git a/editor/src/editor/mvc/ed_update.rs b/editor/src/editor/mvc/ed_update.rs index fb2267a5c3..4bca32f0db 100644 --- a/editor/src/editor/mvc/ed_update.rs +++ b/editor/src/editor/mvc/ed_update.rs @@ -4,14 +4,13 @@ use std::process::Command; use std::process::Stdio; use crate::editor::code_lines::CodeLines; -use crate::editor::ed_error::from_ui_res; use crate::editor::ed_error::EdResult; use crate::editor::ed_error::MissingSelection; use crate::editor::grid_node_map::GridNodeMap; -use crate::editor::markup::attribute::Attributes; +/*use crate::editor::markup::attribute::Attributes; use crate::editor::markup::nodes; use crate::editor::markup::nodes::MarkupNode; -use crate::editor::markup::nodes::EQUALS; +use crate::editor::markup::nodes::EQUALS;*/ use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_model::SelectedBlock; @@ -27,7 +26,7 @@ use crate::editor::mvc::string_update::start_new_string; use crate::editor::mvc::string_update::update_small_string; use crate::editor::mvc::string_update::update_string; use crate::editor::mvc::tld_value_update::{start_new_tld_value, update_tld_val_name}; -use crate::editor::slow_pool::MarkNodeId; +/*use crate::editor::slow_pool::MarkNodeId; use crate::editor::slow_pool::SlowPool; use crate::editor::sound::play_sound; use crate::editor::syntax_highlight::HighlightStyle; @@ -39,7 +38,7 @@ use crate::lang::parse::ASTNodeId; use crate::lang::pool::Pool; use crate::lang::pool::PoolStr; use crate::lang::types::Type2; -use crate::lang::{constrain::Constraint, solve}; +use crate::lang::{constrain::Constraint, solve};*/ use crate::ui::text::caret_w_select::CaretWSelect; use crate::ui::text::lines::MoveCaretFun; use crate::ui::text::selection::validate_raw_sel; @@ -52,7 +51,25 @@ use crate::ui::util::path_to_string; use crate::ui::util::write_to_file; use crate::window::keyboard_input::Modifiers; use bumpalo::Bump; +use roc_ast::constrain::constrain_expr; +use roc_ast::constrain::Constraint; +use roc_ast::lang::core::ast::ASTNodeId; +use roc_ast::lang::core::def::def2::Def2; +use roc_ast::lang::core::def::def2::DefId; +use roc_ast::lang::core::expr::expr2::Expr2; +use roc_ast::lang::core::expr::expr2::ExprId; +use roc_ast::lang::core::types::Type2; +use roc_ast::mem_pool::pool::Pool; +use roc_ast::mem_pool::pool_str::PoolStr; +use roc_ast::solve_type; use roc_can::expected::Expected; +use roc_code_markup::markup::attribute::Attributes; +use roc_code_markup::markup::nodes; +use roc_code_markup::markup::nodes::MarkupNode; +use roc_code_markup::markup::nodes::EQUALS; +use roc_code_markup::slow_pool::MarkNodeId; +use roc_code_markup::slow_pool::SlowPool; +use roc_code_markup::syntax_highlight::HighlightStyle; use roc_collections::all::MutMap; use roc_module::ident::Lowercase; use roc_module::symbol::Symbol; @@ -494,8 +511,8 @@ impl<'a> EdModel<'a> { rigid_variables: MutMap, constraint: Constraint, var_store: VarStore, - ) -> (Solved, solve::Env, Vec) { - let env = solve::Env { + ) -> (Solved, solve_type::Env, Vec) { + let env = solve_type::Env { vars_by_symbol: MutMap::default(), aliases, }; @@ -513,7 +530,7 @@ impl<'a> EdModel<'a> { // Run the solver to populate Subs. let (solved_subs, solved_env) = - solve::run(&arena, mempool, &env, &mut problems, subs, &constraint); + solve_type::run(&arena, mempool, &env, &mut problems, subs, &constraint); (solved_subs, solved_env, problems) } @@ -579,7 +596,7 @@ impl<'a> EdModel<'a> { let blank_replacement = MarkupNode::Blank { ast_node_id: sel_block.ast_node_id, - attributes: Attributes::new(), + attributes: Attributes::default(), syn_high_style: HighlightStyle::Blank, parent_id_opt: expr2_level_mark_node.get_parent_id_opt(), newlines_at_end, @@ -812,7 +829,7 @@ pub fn get_node_context<'a>(ed_model: &'a EdModel) -> EdResult> fn if_modifiers(modifiers: &Modifiers, shortcut_result: UIResult<()>) -> EdResult<()> { if modifiers.cmd_or_ctrl() { - from_ui_res(shortcut_result) + Ok(shortcut_result?) } else { Ok(()) } diff --git a/editor/src/editor/mvc/ed_view.rs b/editor/src/editor/mvc/ed_view.rs index 6fee1301f5..b9d4693b9e 100644 --- a/editor/src/editor/mvc/ed_view.rs +++ b/editor/src/editor/mvc/ed_view.rs @@ -7,7 +7,6 @@ use crate::editor::render_debug::build_debug_graphics; use crate::editor::resources::strings::START_TIP; use crate::graphics::primitives::rect::Rect; use crate::graphics::primitives::text::{owned_section_from_text, Text}; -use crate::lang::pool::Pool; use crate::ui::text::caret_w_select::make_caret_rect; use crate::ui::text::caret_w_select::make_selection_rect; use crate::ui::text::caret_w_select::CaretWSelect; @@ -15,6 +14,7 @@ use crate::ui::text::selection::Selection; use crate::ui::tooltip::ToolTip; use crate::ui::ui_error::MissingGlyphDims; use cgmath::Vector2; +use roc_ast::mem_pool::pool::Pool; use snafu::OptionExt; use winit::dpi::PhysicalSize; diff --git a/editor/src/editor/mvc/int_update.rs b/editor/src/editor/mvc/int_update.rs index 237da57d86..75a8354b60 100644 --- a/editor/src/editor/mvc/int_update.rs +++ b/editor/src/editor/mvc/int_update.rs @@ -1,17 +1,18 @@ +use roc_ast::lang::core::expr::expr2::Expr2::SmallInt; +use roc_ast::lang::core::expr::expr2::IntStyle; +use roc_ast::lang::core::expr::expr2::IntVal; +use roc_ast::mem_pool::pool_str::PoolStr; +use roc_code_markup::markup::attribute::Attributes; +use roc_code_markup::markup::nodes::MarkupNode; +use roc_code_markup::slow_pool::MarkNodeId; +use roc_code_markup::syntax_highlight::HighlightStyle; + use crate::editor::ed_error::EdResult; use crate::editor::ed_error::StringParseError; -use crate::editor::markup::attribute::Attributes; -use crate::editor::markup::nodes::MarkupNode; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_update::get_node_context; use crate::editor::mvc::ed_update::NodeContext; -use crate::editor::slow_pool::MarkNodeId; -use crate::editor::syntax_highlight::HighlightStyle; -use crate::lang::ast::Expr2::SmallInt; -use crate::lang::ast::IntVal; -use crate::lang::ast::{IntStyle, IntVal::*}; -use crate::lang::pool::PoolStr; use crate::ui::text::lines::SelectableLines; // digit_char should be verified to be a digit before calling this function @@ -48,7 +49,7 @@ pub fn start_new_int(ed_model: &mut EdModel, digit_char: &char) -> EdResult EdResult<()> { + use IntVal::*; + *number = match number { I64(_) => I64(check_parse_res(updated_str.parse::())?), U64(_) => U64(check_parse_res(updated_str.parse::())?), diff --git a/editor/src/editor/mvc/let_update.rs b/editor/src/editor/mvc/let_update.rs index 697531bb5a..a678c0dbfb 100644 --- a/editor/src/editor/mvc/let_update.rs +++ b/editor/src/editor/mvc/let_update.rs @@ -1,18 +1,19 @@ +use roc_ast::lang::core::ast::ASTNodeId; +use roc_ast::lang::core::expr::expr2::Expr2; +use roc_ast::lang::core::pattern::Pattern2; +use roc_ast::lang::core::val_def::ValueDef; +use roc_code_markup::markup::attribute::Attributes; +use roc_code_markup::markup::common_nodes::new_blank_mn_w_nls; +use roc_code_markup::markup::common_nodes::new_equals_mn; +use roc_code_markup::markup::nodes::MarkupNode; +use roc_code_markup::syntax_highlight::HighlightStyle; use roc_module::symbol::Symbol; use crate::editor::ed_error::EdResult; -use crate::editor::markup::attribute::Attributes; -use crate::editor::markup::common_nodes::new_blank_mn_w_nls; -use crate::editor::markup::common_nodes::new_equals_mn; -use crate::editor::markup::nodes::MarkupNode; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_update::get_node_context; use crate::editor::mvc::ed_update::NodeContext; -use crate::editor::syntax_highlight::HighlightStyle; -use crate::lang::ast::{Expr2, ValueDef}; -use crate::lang::parse::ASTNodeId; -use crate::lang::pattern::Pattern2; pub fn start_new_let_value(ed_model: &mut EdModel, new_char: &char) -> EdResult { let NodeContext { @@ -66,7 +67,7 @@ pub fn start_new_let_value(ed_model: &mut EdModel, new_char: &char) -> EdResult< content: val_name_string, ast_node_id, syn_high_style: HighlightStyle::Variable, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: Some(curr_mark_node_id), newlines_at_end: curr_mark_node_nls, }; diff --git a/editor/src/editor/mvc/list_update.rs b/editor/src/editor/mvc/list_update.rs index 65e6fce10b..fef31b7627 100644 --- a/editor/src/editor/mvc/list_update.rs +++ b/editor/src/editor/mvc/list_update.rs @@ -1,19 +1,18 @@ -use crate::editor::ed_error::EdResult; -use crate::editor::ed_error::{MissingParent, UnexpectedASTNode}; -use crate::editor::markup::common_nodes::{ +use roc_ast::lang::core::ast::{ast_node_to_string, ASTNodeId}; +use roc_ast::lang::core::expr::expr2::{Expr2, ExprId}; +use roc_ast::mem_pool::pool_vec::PoolVec; +use roc_code_markup::markup::common_nodes::{ new_blank_mn, new_comma_mn, new_left_square_mn, new_right_square_mn, }; -use crate::editor::markup::nodes; -use crate::editor::markup::nodes::MarkupNode; +use roc_code_markup::markup::nodes::{self, MarkupNode}; +use roc_code_markup::slow_pool::MarkNodeId; + +use crate::editor::ed_error::EdResult; +use crate::editor::ed_error::{MissingParent, UnexpectedASTNode}; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_update::get_node_context; use crate::editor::mvc::ed_update::NodeContext; -use crate::editor::slow_pool::MarkNodeId; -use crate::lang::ast::ExprId; -use crate::lang::ast::{ast_node_to_string, Expr2}; -use crate::lang::parse::ASTNodeId; -use crate::lang::pool::PoolVec; use crate::ui::text::text_pos::TextPos; pub fn start_new_list(ed_model: &mut EdModel) -> EdResult { diff --git a/editor/src/editor/mvc/lookup_update.rs b/editor/src/editor/mvc/lookup_update.rs index 03a4905942..5d5c9f5396 100644 --- a/editor/src/editor/mvc/lookup_update.rs +++ b/editor/src/editor/mvc/lookup_update.rs @@ -1,9 +1,10 @@ +use roc_ast::lang::core::expr::expr2::{Expr2, ExprId}; +use roc_ast::mem_pool::pool_str::PoolStr; +use roc_code_markup::slow_pool::MarkNodeId; + use crate::editor::ed_error::EdResult; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; -use crate::editor::slow_pool::MarkNodeId; -use crate::lang::ast::{Expr2, ExprId}; -use crate::lang::pool::PoolStr; use crate::ui::text::lines::SelectableLines; pub fn update_invalid_lookup( diff --git a/editor/src/editor/mvc/record_update.rs b/editor/src/editor/mvc/record_update.rs index 0b8c911ab6..d27bb0c5d0 100644 --- a/editor/src/editor/mvc/record_update.rs +++ b/editor/src/editor/mvc/record_update.rs @@ -1,23 +1,26 @@ use crate::editor::ed_error::EdResult; use crate::editor::ed_error::MissingParent; use crate::editor::ed_error::RecordWithoutFields; -use crate::editor::markup::attribute::Attributes; -use crate::editor::markup::common_nodes::new_blank_mn; -use crate::editor::markup::common_nodes::new_left_accolade_mn; -use crate::editor::markup::common_nodes::new_right_accolade_mn; -use crate::editor::markup::nodes; -use crate::editor::markup::nodes::MarkupNode; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_update::get_node_context; use crate::editor::mvc::ed_update::NodeContext; -use crate::editor::slow_pool::MarkNodeId; -use crate::editor::syntax_highlight::HighlightStyle; use crate::editor::util::index_of; -use crate::lang::ast::{Expr2, ExprId, RecordField}; -use crate::lang::parse::ASTNodeId; -use crate::lang::pool::{PoolStr, PoolVec}; use crate::ui::text::text_pos::TextPos; +use roc_ast::lang::core::ast::ASTNodeId; +use roc_ast::lang::core::expr::expr2::Expr2; +use roc_ast::lang::core::expr::expr2::ExprId; +use roc_ast::lang::core::expr::record_field::RecordField; +use roc_ast::mem_pool::pool_str::PoolStr; +use roc_ast::mem_pool::pool_vec::PoolVec; +use roc_code_markup::markup::attribute::Attributes; +use roc_code_markup::markup::common_nodes::new_blank_mn; +use roc_code_markup::markup::common_nodes::new_left_accolade_mn; +use roc_code_markup::markup::common_nodes::new_right_accolade_mn; +use roc_code_markup::markup::nodes; +use roc_code_markup::markup::nodes::MarkupNode; +use roc_code_markup::slow_pool::MarkNodeId; +use roc_code_markup::syntax_highlight::HighlightStyle; use snafu::OptionExt; pub fn start_new_record(ed_model: &mut EdModel) -> EdResult { @@ -123,7 +126,7 @@ pub fn update_empty_record( content: new_input.to_owned(), ast_node_id, syn_high_style: HighlightStyle::RecordField, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: 0, }; @@ -232,7 +235,7 @@ pub fn update_record_colon( content: record_colon.to_owned(), ast_node_id: ASTNodeId::AExprId(record_ast_node_id), syn_high_style: HighlightStyle::Operator, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: Some(parent_id), newlines_at_end: 0, }; diff --git a/editor/src/editor/mvc/string_update.rs b/editor/src/editor/mvc/string_update.rs index 2c35ca89fd..8ee2ddd881 100644 --- a/editor/src/editor/mvc/string_update.rs +++ b/editor/src/editor/mvc/string_update.rs @@ -1,16 +1,17 @@ +use roc_ast::lang::core::expr::expr2::ArrString; +use roc_ast::lang::core::expr::expr2::Expr2; +use roc_ast::lang::core::str::update_str_expr; +use roc_ast::mem_pool::pool_str::PoolStr; +use roc_code_markup::markup::attribute::Attributes; +use roc_code_markup::markup::nodes; +use roc_code_markup::markup::nodes::MarkupNode; +use roc_code_markup::syntax_highlight::HighlightStyle; + use crate::editor::ed_error::EdResult; -use crate::editor::markup::attribute::Attributes; -use crate::editor::markup::nodes; -use crate::editor::markup::nodes::MarkupNode; use crate::editor::mvc::app_update::InputOutcome; use crate::editor::mvc::ed_model::EdModel; use crate::editor::mvc::ed_update::get_node_context; use crate::editor::mvc::ed_update::NodeContext; -use crate::editor::syntax_highlight::HighlightStyle; -use crate::lang::ast::update_str_expr; -use crate::lang::ast::ArrString; -use crate::lang::ast::Expr2; -use crate::lang::pool::PoolStr; pub fn update_small_string( new_char: &char, @@ -149,7 +150,7 @@ pub fn start_new_string(ed_model: &mut EdModel) -> EdResult { content: nodes::STRING_QUOTES.to_owned(), ast_node_id, syn_high_style: HighlightStyle::String, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt, newlines_at_end: curr_mark_node_nls, }; diff --git a/editor/src/editor/mvc/tld_value_update.rs b/editor/src/editor/mvc/tld_value_update.rs index 8cdb85feba..6ca2f8e0fd 100644 --- a/editor/src/editor/mvc/tld_value_update.rs +++ b/editor/src/editor/mvc/tld_value_update.rs @@ -1,23 +1,28 @@ +use roc_ast::{ + lang::{ + core::{ + ast::ASTNodeId, + def::def2::Def2, + expr::expr2::Expr2, + pattern::{get_identifier_string, Pattern2}, + }, + env::Env, + }, + mem_pool::pool::NodeId, +}; +use roc_code_markup::{ + markup::{ + attribute::Attributes, + common_nodes::{new_blank_mn_w_nls, new_equals_mn}, + nodes::{set_parent_for_all, MarkupNode}, + }, + slow_pool::{MarkNodeId, SlowPool}, + syntax_highlight::HighlightStyle, +}; use roc_module::symbol::{Interns, Symbol}; use crate::{ - editor::{ - ed_error::{EdResult, FailedToUpdateIdentIdName, KeyNotFound}, - markup::{ - attribute::Attributes, - common_nodes::{new_blank_mn_w_nls, new_equals_mn}, - nodes::{set_parent_for_all, MarkupNode}, - }, - slow_pool::{MarkNodeId, SlowPool}, - syntax_highlight::HighlightStyle, - }, - lang::{ - ast::{Def2, Expr2}, - expr::Env, - parse::ASTNodeId, - pattern::{get_identifier_string, Pattern2}, - pool::NodeId, - }, + editor::ed_error::{EdResult, FailedToUpdateIdentIdName, KeyNotFound}, ui::text::text_pos::TextPos, }; @@ -44,7 +49,7 @@ pub fn tld_mark_node<'a>( content: val_name, ast_node_id, syn_high_style: HighlightStyle::Variable, - attributes: Attributes::new(), + attributes: Attributes::default(), parent_id_opt: None, newlines_at_end: 0, }; diff --git a/editor/src/editor/render_ast.rs b/editor/src/editor/render_ast.rs index 3fa42bc1de..00d035a692 100644 --- a/editor/src/editor/render_ast.rs +++ b/editor/src/editor/render_ast.rs @@ -1,11 +1,10 @@ -use super::markup::nodes::{MarkupNode, BLANK_PLACEHOLDER}; -use super::slow_pool::MarkNodeId; use crate::editor::mvc::ed_view::RenderedWgpu; -use crate::editor::slow_pool::SlowPool; use crate::editor::{ed_error::EdResult, theme::EdTheme, util::map_get}; use crate::graphics::primitives::rect::Rect; use crate::graphics::primitives::text as gr_text; use cgmath::Vector2; +use roc_code_markup::markup::nodes::{MarkupNode, BLANK_PLACEHOLDER}; +use roc_code_markup::slow_pool::{MarkNodeId, SlowPool}; use winit::dpi::PhysicalSize; use crate::{editor::config::Config, graphics::colors}; diff --git a/editor/src/editor/render_debug.rs b/editor/src/editor/render_debug.rs index 36e4377d80..2fc1f23863 100644 --- a/editor/src/editor/render_debug.rs +++ b/editor/src/editor/render_debug.rs @@ -1,11 +1,11 @@ use crate::editor::ed_error::EdResult; -use crate::editor::markup::nodes::tree_as_string; use crate::editor::mvc::ed_model::EdModel; use crate::graphics::colors; use crate::graphics::colors::from_hsb; use crate::graphics::primitives::text as gr_text; -use crate::lang::ast::def2_to_string; use cgmath::Vector2; +use roc_ast::lang::core::def::def2::def2_to_string; +use roc_code_markup::markup::nodes::tree_as_string; use winit::dpi::PhysicalSize; use crate::editor::config::Config; diff --git a/editor/src/editor/theme.rs b/editor/src/editor/theme.rs index f0809cd890..30e45a47a9 100644 --- a/editor/src/editor/theme.rs +++ b/editor/src/editor/theme.rs @@ -1,8 +1,8 @@ use gr_colors::{from_hsb, RgbaTup}; +use roc_code_markup::syntax_highlight::{default_highlight_map, HighlightStyle}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use crate::editor::syntax_highlight::{default_highlight_map, HighlightStyle}; use crate::graphics::colors as gr_colors; use crate::ui::theme::UITheme; diff --git a/editor/src/graphics/lowlevel/buffer.rs b/editor/src/graphics/lowlevel/buffer.rs index f9cf9fc9a4..723e5957ef 100644 --- a/editor/src/graphics/lowlevel/buffer.rs +++ b/editor/src/graphics/lowlevel/buffer.rs @@ -105,7 +105,7 @@ pub fn create_rect_buffers( let vertex_buffer = gpu_device.create_buffer(&wgpu::BufferDescriptor { label: None, size: Vertex::SIZE * 4 * nr_of_rects, - usage: wgpu::BufferUsage::VERTEX | wgpu::BufferUsage::COPY_DST, + usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); @@ -114,7 +114,7 @@ pub fn create_rect_buffers( let index_buffer = gpu_device.create_buffer(&wgpu::BufferDescriptor { label: None, size: u32_size * 6 * nr_of_rects, - usage: wgpu::BufferUsage::INDEX | wgpu::BufferUsage::COPY_DST, + usage: wgpu::BufferUsages::INDEX | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); @@ -148,7 +148,7 @@ impl StagingBuffer { StagingBuffer { buffer: device.create_buffer_init(&BufferInitDescriptor { contents: bytemuck::cast_slice(data), - usage: wgpu::BufferUsage::COPY_SRC, + usage: wgpu::BufferUsages::COPY_SRC, label: Some("Staging Buffer"), }), size: size_of_slice(data) as wgpu::BufferAddress, diff --git a/editor/src/graphics/lowlevel/ortho.rs b/editor/src/graphics/lowlevel/ortho.rs index 0b5871bb93..2f4577871a 100644 --- a/editor/src/graphics/lowlevel/ortho.rs +++ b/editor/src/graphics/lowlevel/ortho.rs @@ -2,7 +2,7 @@ use cgmath::{Matrix4, Ortho}; use wgpu::util::DeviceExt; use wgpu::{ BindGroup, BindGroupLayout, BindGroupLayoutDescriptor, BindGroupLayoutEntry, Buffer, - ShaderStage, + ShaderStages, }; // orthographic projection is used to transform pixel coords to the coordinate system used by wgpu @@ -45,7 +45,7 @@ pub fn update_ortho_buffer( let new_ortho_buffer = gpu_device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("Ortho uniform buffer"), contents: bytemuck::cast_slice(&[new_uniforms]), - usage: wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_SRC, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_SRC, }); // get a command encoder for the current frame @@ -83,14 +83,14 @@ pub fn init_ortho( let ortho_buffer = gpu_device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("Ortho uniform buffer"), contents: bytemuck::cast_slice(&[uniforms]), - usage: wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_DST, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, }); // bind groups consist of extra resources that are provided to the shaders let ortho_bind_group_layout = gpu_device.create_bind_group_layout(&BindGroupLayoutDescriptor { entries: &[BindGroupLayoutEntry { binding: 0, - visibility: ShaderStage::VERTEX, + visibility: ShaderStages::VERTEX, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, diff --git a/editor/src/graphics/lowlevel/pipelines.rs b/editor/src/graphics/lowlevel/pipelines.rs index 810766ccfd..5bc768a33f 100644 --- a/editor/src/graphics/lowlevel/pipelines.rs +++ b/editor/src/graphics/lowlevel/pipelines.rs @@ -9,9 +9,9 @@ pub struct RectResources { pub fn make_rect_pipeline( gpu_device: &wgpu::Device, - swap_chain_descr: &wgpu::SwapChainDescriptor, + surface_config: &wgpu::SurfaceConfiguration, ) -> RectResources { - let ortho = init_ortho(swap_chain_descr.width, swap_chain_descr.height, gpu_device); + let ortho = init_ortho(surface_config.width, surface_config.height, gpu_device); let pipeline_layout = gpu_device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { bind_group_layouts: &[&ortho.bind_group_layout], @@ -21,11 +21,10 @@ pub fn make_rect_pipeline( let pipeline = create_render_pipeline( gpu_device, &pipeline_layout, - swap_chain_descr.format, + surface_config.format, &wgpu::ShaderModuleDescriptor { label: None, source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("../shaders/shader.wgsl"))), - flags: wgpu::ShaderFlags::all(), }, ); @@ -61,7 +60,7 @@ pub fn create_render_pipeline( }, alpha: wgpu::BlendComponent::REPLACE, }), - write_mask: wgpu::ColorWrite::ALL, + write_mask: wgpu::ColorWrites::ALL, }], }), primitive: wgpu::PrimitiveState::default(), diff --git a/editor/src/graphics/lowlevel/vertex.rs b/editor/src/graphics/lowlevel/vertex.rs index 64cf9fac1f..f17a840bb2 100644 --- a/editor/src/graphics/lowlevel/vertex.rs +++ b/editor/src/graphics/lowlevel/vertex.rs @@ -18,7 +18,7 @@ impl Vertex { pub const SIZE: wgpu::BufferAddress = std::mem::size_of::() as wgpu::BufferAddress; pub const DESC: wgpu::VertexBufferLayout<'static> = wgpu::VertexBufferLayout { array_stride: Self::SIZE, - step_mode: wgpu::InputStepMode::Vertex, + step_mode: wgpu::VertexStepMode::Vertex, attributes: &[ // position wgpu::VertexAttribute { diff --git a/editor/src/lang/ast.rs b/editor/src/lang/ast.rs deleted file mode 100644 index 3fffb3cdad..0000000000 --- a/editor/src/lang/ast.rs +++ /dev/null @@ -1,742 +0,0 @@ -#![allow(clippy::manual_map)] - -use std::collections::{HashMap, HashSet}; -use std::hash::BuildHasherDefault; - -use crate::editor::ed_error::{EdResult, UnexpectedASTNode}; -use crate::lang::pattern::{Pattern2, PatternId}; -use crate::lang::pool::Pool; -use crate::lang::pool::{NodeId, PoolStr, PoolVec, ShallowClone}; -use crate::lang::types::{Type2, TypeId}; -use arraystring::{typenum::U30, ArrayString}; -use roc_can::expr::Recursive; -use roc_collections::all::WyHash; -use roc_module::low_level::LowLevel; -use roc_module::operator::CalledVia; -use roc_module::symbol::Symbol; -use roc_types::subs::Variable; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum Problem { - RanOutOfNodeIds, -} - -pub type Res = Result; - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum IntStyle { - Decimal, - Octal, - Hex, - Binary, -} - -impl IntStyle { - pub fn from_base(base: roc_parse::ast::Base) -> Self { - use roc_parse::ast::Base; - match base { - Base::Decimal => Self::Decimal, - Base::Octal => Self::Octal, - Base::Hex => Self::Hex, - Base::Binary => Self::Binary, - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum IntVal { - I64(i64), - U64(u64), - I32(i32), - U32(u32), - I16(i16), - U16(u16), - I8(i8), - U8(u8), -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum FloatVal { - F64(f64), - F32(f32), -} - -#[derive(Debug)] -pub enum RecordField { - InvalidLabelOnly(PoolStr, Variable), - LabelOnly(PoolStr, Variable, Symbol), - LabeledValue(PoolStr, Variable, ExprId), -} - -#[test] -fn size_of_intval() { - assert_eq!(std::mem::size_of::(), 16); -} - -pub type ArrString = ArrayString; - -/// An Expr that fits in 32B. -/// It has a 1B discriminant and variants which hold payloads of at most 31B. -#[derive(Debug)] -pub enum Expr2 { - /// A negative number literal without a dot - SmallInt { - number: IntVal, // 16B - var: Variable, // 4B - style: IntStyle, // 1B - text: PoolStr, // 8B - }, - // TODO(rvcas): rename this eventually - /// A large (over 64-bit) negative number literal without a dot. - /// This variant can't use IntVal because if IntVal stored 128-bit - /// integers, it would be 32B on its own because of alignment. - I128 { - number: i128, // 16B - var: Variable, // 4B - style: IntStyle, // 1B - text: PoolStr, // 8B - }, - // TODO(rvcas): rename this eventually - /// A large (over 64-bit) nonnegative number literal without a dot - /// This variant can't use IntVal because if IntVal stored 128-bit - /// integers, it would be 32B on its own because of alignment. - U128 { - number: u128, // 16B - var: Variable, // 4B - style: IntStyle, // 1B - text: PoolStr, // 8B - }, - /// A floating-point literal (with a dot) - Float { - number: FloatVal, // 16B - var: Variable, // 4B - text: PoolStr, // 8B - }, - /// string literals of length up to 30B - SmallStr(ArrString), // 31B - /// string literals of length 31B or more - Str(PoolStr), // 8B - // Lookups - Var(Symbol), // 8B - InvalidLookup(PoolStr), // 8B - - List { - elem_var: Variable, // 4B - elems: PoolVec, // 8B - }, - If { - cond_var: Variable, // 4B - expr_var: Variable, // 4B - branches: PoolVec<(ExprId, ExprId)>, // 8B - final_else: ExprId, // 4B - }, - When { - cond_var: Variable, // 4B - expr_var: Variable, // 4B - branches: PoolVec, // 8B - cond: ExprId, // 4B - }, - LetRec { - defs: PoolVec, // 8B - body_var: Variable, // 8B - body_id: ExprId, // 4B - }, - LetFunction { - def_id: NodeId, // 4B - body_var: Variable, // 8B - body_id: ExprId, // 4B - }, - LetValue { - def_id: NodeId, // 4B - body_id: ExprId, // 4B - body_var: Variable, // 4B - }, - Call { - args: PoolVec<(Variable, ExprId)>, // 8B - expr: ExprId, // 4B - expr_var: Variable, // 4B - fn_var: Variable, // 4B - closure_var: Variable, // 4B - called_via: CalledVia, // 2B - }, - RunLowLevel { - op: LowLevel, // 1B - args: PoolVec<(Variable, ExprId)>, // 8B - ret_var: Variable, // 4B - }, - Closure { - args: PoolVec<(Variable, NodeId)>, // 8B - name: Symbol, // 8B - body: ExprId, // 4B - function_type: Variable, // 4B - recursive: Recursive, // 1B - extra: NodeId, // 4B - }, - // Product Types - Record { - record_var: Variable, // 4B - fields: PoolVec, // 8B - }, - /// Empty record constant - EmptyRecord, - /// Look up exactly one field on a record, e.g. (expr).foo. - Access { - field: PoolStr, // 4B - expr: ExprId, // 4B - record_var: Variable, // 4B - ext_var: Variable, // 4B - field_var: Variable, // 4B - }, - - /// field accessor as a function, e.g. (.foo) expr - Accessor { - function_var: Variable, // 4B - closure_var: Variable, // 4B - field: PoolStr, // 4B - record_var: Variable, // 4B - ext_var: Variable, // 4B - field_var: Variable, // 4B - }, - Update { - symbol: Symbol, // 8B - updates: PoolVec, // 8B - record_var: Variable, // 4B - ext_var: Variable, // 4B - }, - - // Sum Types - GlobalTag { - name: PoolStr, // 4B - variant_var: Variable, // 4B - ext_var: Variable, // 4B - arguments: PoolVec<(Variable, ExprId)>, // 8B - }, - PrivateTag { - name: Symbol, // 8B - variant_var: Variable, // 4B - ext_var: Variable, // 4B - arguments: PoolVec<(Variable, ExprId)>, // 8B - }, - Blank, // Rendered as empty box in editor - - // Compiles, but will crash if reached - RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */), -} - -// A top level definition, not inside a function. For example: `main = "Hello, world!"` -#[derive(Debug)] -pub enum Def2 { - // ValueDef example: `main = "Hello, world!"`. identifier -> `main`, expr -> "Hello, world!" - ValueDef { - identifier_id: NodeId, - expr_id: NodeId, - }, - Blank, -} - -#[derive(Debug)] -pub enum ValueDef { - WithAnnotation { - pattern_id: PatternId, // 4B - expr_id: ExprId, // 4B - type_id: TypeId, - rigids: Rigids, - expr_var: Variable, // 4B - }, - NoAnnotation { - pattern_id: PatternId, // 4B - expr_id: ExprId, // 4B - expr_var: Variable, // 4B - }, -} - -impl ShallowClone for ValueDef { - fn shallow_clone(&self) -> Self { - match self { - Self::WithAnnotation { - pattern_id, - expr_id, - type_id, - rigids, - expr_var, - } => Self::WithAnnotation { - pattern_id: *pattern_id, - expr_id: *expr_id, - type_id: *type_id, - rigids: rigids.shallow_clone(), - expr_var: *expr_var, - }, - Self::NoAnnotation { - pattern_id, - expr_id, - expr_var, - } => Self::NoAnnotation { - pattern_id: *pattern_id, - expr_id: *expr_id, - expr_var: *expr_var, - }, - } - } -} - -impl ValueDef { - pub fn get_expr_id(&self) -> ExprId { - match self { - ValueDef::WithAnnotation { expr_id, .. } => *expr_id, - ValueDef::NoAnnotation { expr_id, .. } => *expr_id, - } - } - - pub fn get_pattern_id(&self) -> NodeId { - match self { - ValueDef::WithAnnotation { pattern_id, .. } => *pattern_id, - ValueDef::NoAnnotation { pattern_id, .. } => *pattern_id, - } - } -} - -pub fn value_def_to_string(val_def: &ValueDef, pool: &Pool) -> String { - match val_def { - ValueDef::WithAnnotation { - pattern_id, - expr_id, - type_id, - rigids, - expr_var, - } => { - format!("WithAnnotation {{ pattern_id: {:?}, expr_id: {:?}, type_id: {:?}, rigids: {:?}, expr_var: {:?}}}", pool.get(*pattern_id), expr2_to_string(*expr_id, pool), pool.get(*type_id), rigids, expr_var) - } - ValueDef::NoAnnotation { - pattern_id, - expr_id, - expr_var, - } => { - format!( - "NoAnnotation {{ pattern_id: {:?}, expr_id: {:?}, expr_var: {:?}}}", - pool.get(*pattern_id), - expr2_to_string(*expr_id, pool), - expr_var - ) - } - } -} - -#[derive(Debug)] -pub enum FunctionDef { - WithAnnotation { - name: Symbol, // 8B - arguments: PoolVec<(PatternId, Type2)>, // 8B - rigids: NodeId, // 4B - return_type: TypeId, // 4B - body: ExprId, // 4B - }, - NoAnnotation { - name: Symbol, // 8B - arguments: PoolVec<(PatternId, Variable)>, // 8B - return_var: Variable, // 4B - body: ExprId, // 4B - }, -} - -impl ShallowClone for FunctionDef { - fn shallow_clone(&self) -> Self { - match self { - Self::WithAnnotation { - name, - arguments, - rigids, - return_type, - body, - } => Self::WithAnnotation { - name: *name, - arguments: arguments.shallow_clone(), - rigids: *rigids, - return_type: *return_type, - body: *body, - }, - - Self::NoAnnotation { - name, - arguments, - return_var, - body, - } => Self::NoAnnotation { - name: *name, - arguments: arguments.shallow_clone(), - return_var: *return_var, - body: *body, - }, - } - } -} - -#[derive(Debug)] -pub struct Rigids { - pub names: PoolVec<(Option, Variable)>, // 8B - padding: [u8; 1], -} - -#[allow(clippy::needless_collect)] -impl Rigids { - pub fn new( - named: HashMap<&str, Variable, BuildHasherDefault>, - unnamed: HashSet>, - pool: &mut Pool, - ) -> Self { - let names = PoolVec::with_capacity((named.len() + unnamed.len()) as u32, pool); - - let mut temp_names = Vec::new(); - - temp_names.extend(named.iter().map(|(name, var)| (Some(*name), *var))); - - temp_names.extend(unnamed.iter().map(|var| (None, *var))); - - for (node_id, (opt_name, variable)) in names.iter_node_ids().zip(temp_names) { - let poolstr = opt_name.map(|name| PoolStr::new(name, pool)); - - pool[node_id] = (poolstr, variable); - } - - Self { - names, - padding: Default::default(), - } - } - - pub fn named(&self, pool: &mut Pool) -> PoolVec<(PoolStr, Variable)> { - let named = self - .names - .iter(pool) - .filter_map(|(opt_pool_str, var)| { - if let Some(pool_str) = opt_pool_str { - Some((*pool_str, *var)) - } else { - None - } - }) - .collect::>(); - - PoolVec::new(named.into_iter(), pool) - } - - pub fn unnamed(&self, pool: &mut Pool) -> PoolVec { - let unnamed = self - .names - .iter(pool) - .filter_map(|(opt_pool_str, var)| { - if opt_pool_str.is_none() { - Some(*var) - } else { - None - } - }) - .collect::>(); - - PoolVec::new(unnamed.into_iter(), pool) - } -} - -/// This is overflow data from a Closure variant, which needs to store -/// more than 32B of total data -#[derive(Debug)] -pub struct ClosureExtra { - pub return_type: Variable, // 4B - pub captured_symbols: PoolVec<(Symbol, Variable)>, // 8B - pub closure_type: Variable, // 4B - pub closure_ext_var: Variable, // 4B -} - -#[derive(Debug)] -pub struct WhenBranch { - pub patterns: PoolVec, // 4B - pub body: ExprId, // 3B - pub guard: Option, // 4B -} - -// TODO make the inner types private? -pub type ExprId = NodeId; - -pub type DefId = NodeId; - -use RecordField::*; - -use super::parse::ASTNodeId; -impl RecordField { - pub fn get_record_field_var(&self) -> &Variable { - match self { - InvalidLabelOnly(_, var) => var, - LabelOnly(_, var, _) => var, - LabeledValue(_, var, _) => var, - } - } - - pub fn get_record_field_pool_str(&self) -> &PoolStr { - match self { - InvalidLabelOnly(pool_str, _) => pool_str, - LabelOnly(pool_str, _, _) => pool_str, - LabeledValue(pool_str, _, _) => pool_str, - } - } - - pub fn get_record_field_pool_str_mut(&mut self) -> &mut PoolStr { - match self { - InvalidLabelOnly(pool_str, _) => pool_str, - LabelOnly(pool_str, _, _) => pool_str, - LabeledValue(pool_str, _, _) => pool_str, - } - } - - pub fn get_record_field_val_node_id(&self) -> Option { - match self { - InvalidLabelOnly(_, _) => None, - LabelOnly(_, _, _) => None, - LabeledValue(_, _, field_val_id) => Some(*field_val_id), - } - } -} - -pub fn ast_node_to_string(node_id: ASTNodeId, pool: &Pool) -> String { - match node_id { - ASTNodeId::ADefId(def_id) => def2_to_string(def_id, pool), - ASTNodeId::AExprId(expr_id) => expr2_to_string(expr_id, pool), - } -} - -pub fn expr2_to_string(node_id: ExprId, pool: &Pool) -> String { - let mut full_string = String::new(); - let expr2 = pool.get(node_id); - - expr2_to_string_helper(expr2, 0, pool, &mut full_string); - - full_string -} - -fn get_spacing(indent_level: usize) -> String { - std::iter::repeat(" ") - .take(indent_level) - .collect::>() - .join("") -} - -fn expr2_to_string_helper( - expr2: &Expr2, - indent_level: usize, - pool: &Pool, - out_string: &mut String, -) { - out_string.push_str(&get_spacing(indent_level)); - - match expr2 { - Expr2::SmallStr(arr_string) => out_string.push_str(&format!( - "{}{}{}", - "SmallStr(\"", - arr_string.as_str(), - "\")", - )), - Expr2::Str(pool_str) => { - out_string.push_str(&format!("{}{}{}", "Str(\"", pool_str.as_str(pool), "\")",)) - } - Expr2::Blank => out_string.push_str("Blank"), - Expr2::EmptyRecord => out_string.push_str("EmptyRecord"), - Expr2::Record { record_var, fields } => { - out_string.push_str("Record:\n"); - out_string.push_str(&var_to_string(record_var, indent_level + 1)); - - out_string.push_str(&format!("{}fields: [\n", get_spacing(indent_level + 1))); - - let mut first_child = true; - - for field in fields.iter(pool) { - if !first_child { - out_string.push_str(", ") - } else { - first_child = false; - } - - match field { - RecordField::InvalidLabelOnly(pool_str, var) => { - out_string.push_str(&format!( - "{}({}, Var({:?})", - get_spacing(indent_level + 2), - pool_str.as_str(pool), - var, - )); - } - RecordField::LabelOnly(pool_str, var, symbol) => { - out_string.push_str(&format!( - "{}({}, Var({:?}), Symbol({:?})", - get_spacing(indent_level + 2), - pool_str.as_str(pool), - var, - symbol - )); - } - RecordField::LabeledValue(pool_str, var, val_node_id) => { - out_string.push_str(&format!( - "{}({}, Var({:?}), Expr2(\n", - get_spacing(indent_level + 2), - pool_str.as_str(pool), - var, - )); - - let val_expr2 = pool.get(*val_node_id); - expr2_to_string_helper(val_expr2, indent_level + 3, pool, out_string); - out_string.push_str(&format!("{})\n", get_spacing(indent_level + 2))); - } - } - } - - out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1))); - } - Expr2::List { elem_var, elems } => { - out_string.push_str("List:\n"); - out_string.push_str(&var_to_string(elem_var, indent_level + 1)); - out_string.push_str(&format!("{}elems: [\n", get_spacing(indent_level + 1))); - - let mut first_elt = true; - - for elem_expr2_id in elems.iter(pool) { - if !first_elt { - out_string.push_str(", ") - } else { - first_elt = false; - } - - let elem_expr2 = pool.get(*elem_expr2_id); - - expr2_to_string_helper(elem_expr2, indent_level + 2, pool, out_string) - } - - out_string.push_str(&format!("{}]\n", get_spacing(indent_level + 1))); - } - Expr2::InvalidLookup(pool_str) => { - out_string.push_str(&format!("InvalidLookup({})", pool_str.as_str(pool))); - } - Expr2::SmallInt { text, .. } => { - out_string.push_str(&format!("SmallInt({})", text.as_str(pool))); - } - Expr2::LetValue { - def_id, body_id, .. - } => { - out_string.push_str(&format!( - "LetValue(def_id: >>{:?}), body_id: >>{:?})", - value_def_to_string(pool.get(*def_id), pool), - pool.get(*body_id) - )); - } - other => todo!("Implement for {:?}", other), - } - - out_string.push('\n'); -} - -pub fn def2_to_string(node_id: DefId, pool: &Pool) -> String { - let mut full_string = String::new(); - let def2 = pool.get(node_id); - - match def2 { - Def2::ValueDef { - identifier_id, - expr_id, - } => { - full_string.push_str(&format!( - "Def2::ValueDef(identifier_id: >>{:?}), expr_id: >>{:?})", - pool.get(*identifier_id), - expr2_to_string(*expr_id, pool) - )); - } - Def2::Blank => { - full_string.push_str("Def2::Blank"); - } - } - - full_string -} - -fn var_to_string(some_var: &Variable, indent_level: usize) -> String { - format!("{}Var({:?})\n", get_spacing(indent_level + 1), some_var) -} - -// get string from SmallStr or Str -pub fn get_string_from_expr2(node_id: ExprId, pool: &Pool) -> EdResult { - match pool.get(node_id) { - Expr2::SmallStr(arr_string) => Ok(arr_string.as_str().to_string()), - Expr2::Str(pool_str) => Ok(pool_str.as_str(pool).to_owned()), - other => UnexpectedASTNode { - required_node_type: "SmallStr or Str", - encountered_node_type: format!("{:?}", other), - } - .fail()?, - } -} - -pub fn update_str_expr( - node_id: ExprId, - new_char: char, - insert_index: usize, - pool: &mut Pool, -) -> EdResult<()> { - let str_expr = pool.get_mut(node_id); - - enum Either { - MyString(String), - MyPoolStr(PoolStr), - Done, - } - - let insert_either = match str_expr { - Expr2::SmallStr(arr_string) => { - let insert_res = arr_string.try_insert(insert_index as u8, new_char); - - match insert_res { - Ok(_) => Either::Done, - _ => { - let mut new_string = arr_string.as_str().to_string(); - new_string.insert(insert_index, new_char); - - Either::MyString(new_string) - } - } - } - Expr2::Str(old_pool_str) => Either::MyPoolStr(*old_pool_str), - other => UnexpectedASTNode { - required_node_type: "SmallStr or Str", - encountered_node_type: format!("{:?}", other), - } - .fail()?, - }; - - match insert_either { - Either::MyString(new_string) => { - let new_pool_str = PoolStr::new(&new_string, pool); - - pool.set(node_id, Expr2::Str(new_pool_str)) - } - Either::MyPoolStr(old_pool_str) => { - let mut new_string = old_pool_str.as_str(pool).to_owned(); - - new_string.insert(insert_index, new_char); - - let new_pool_str = PoolStr::new(&new_string, pool); - - pool.set(node_id, Expr2::Str(new_pool_str)) - } - Either::Done => (), - } - - Ok(()) -} - -#[test] -fn size_of_expr() { - assert_eq!(std::mem::size_of::(), crate::lang::pool::NODE_BYTES); -} - -impl ShallowClone for Rigids { - fn shallow_clone(&self) -> Self { - Self { - names: self.names.shallow_clone(), - padding: self.padding, - } - } -} diff --git a/editor/src/lang/expr.rs b/editor/src/lang/expr.rs deleted file mode 100644 index a50c315f94..0000000000 --- a/editor/src/lang/expr.rs +++ /dev/null @@ -1,1555 +0,0 @@ -#![allow(clippy::all)] -#![allow(dead_code)] -#![allow(unused_imports)] -use bumpalo::{collections::Vec as BumpVec, Bump}; -use std::collections::HashMap; -use std::iter::FromIterator; - -use crate::lang::ast::{ - expr2_to_string, value_def_to_string, ClosureExtra, Def2, Expr2, ExprId, FloatVal, IntStyle, - IntVal, RecordField, ValueDef, WhenBranch, -}; -use crate::lang::def::{ - canonicalize_defs, sort_can_defs, CanDefs, Declaration, Def, PendingDef, References, -}; -use crate::lang::pattern::{to_pattern2, Pattern2, PatternId}; -use crate::lang::pool::{NodeId, Pool, PoolStr, PoolVec, ShallowClone}; -use crate::lang::scope::Scope; -use crate::lang::types::{Alias, Annotation2, Type2, TypeId}; - -use roc_can::expr::Recursive; -use roc_can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int}; -use roc_can::operator::desugar_expr; -use roc_collections::all::default_hasher; -use roc_collections::all::{MutMap, MutSet}; -use roc_module::ident::{Ident, Lowercase, ModuleName}; -use roc_module::low_level::LowLevel; -use roc_module::operator::CalledVia; -use roc_module::symbol::{IdentIds, ModuleId, ModuleIds, Symbol}; -use roc_parse::ast; -use roc_parse::ast::Expr; -use roc_parse::ast::StrLiteral; -use roc_parse::parser::{loc, Parser, State, SyntaxError}; -use roc_parse::pattern::PatternType; -use roc_problem::can::{Problem, RuntimeError}; -use roc_region::all::{Located, Region}; -use roc_types::subs::{VarStore, Variable}; - -#[derive(Clone, Debug, PartialEq, Default)] -pub struct IntroducedVariables { - // Rigids must be unique within a type annoation. - // E.g. in `identity : a -> a`, there should only be one - // variable (a rigid one, with name "a"). - // Hence `rigids : Map` - // - // But then between annotations, the same name can occur multiple times, - // but a variable can only have one name. Therefore - // `ftv : Map`. - pub wildcards: Vec, - pub var_by_name: MutMap, - pub name_by_var: MutMap, - pub host_exposed_aliases: MutMap, -} - -impl IntroducedVariables { - pub fn insert_named(&mut self, name: Lowercase, var: Variable) { - self.var_by_name.insert(name.clone(), var); - self.name_by_var.insert(var, name); - } - - pub fn insert_wildcard(&mut self, var: Variable) { - self.wildcards.push(var); - } - - pub fn insert_host_exposed_alias(&mut self, symbol: Symbol, var: Variable) { - self.host_exposed_aliases.insert(symbol, var); - } - - pub fn union(&mut self, other: &Self) { - self.wildcards.extend(other.wildcards.iter().cloned()); - self.var_by_name.extend(other.var_by_name.clone()); - self.name_by_var.extend(other.name_by_var.clone()); - self.host_exposed_aliases - .extend(other.host_exposed_aliases.clone()); - } - - pub fn var_by_name(&self, name: &Lowercase) -> Option<&Variable> { - self.var_by_name.get(name) - } - - pub fn name_by_var(&self, var: Variable) -> Option<&Lowercase> { - self.name_by_var.get(&var) - } -} - -#[derive(Clone, Default, Debug, PartialEq)] -pub struct Output { - pub references: References, - pub tail_call: Option, - pub introduced_variables: IntroducedVariables, - pub aliases: MutMap>, - pub non_closures: MutSet, -} - -impl Output { - pub fn union(&mut self, other: Self) { - self.references.union_mut(other.references); - - if let (None, Some(later)) = (self.tail_call, other.tail_call) { - self.tail_call = Some(later); - } - - self.aliases.extend(other.aliases); - self.non_closures.extend(other.non_closures); - } -} - -#[derive(Debug)] -pub struct Env<'a> { - pub home: ModuleId, - pub var_store: &'a mut VarStore, - pub pool: &'a mut Pool, - pub arena: &'a Bump, - - pub problems: BumpVec<'a, Problem>, - - pub dep_idents: MutMap, - pub module_ids: &'a ModuleIds, - pub ident_ids: IdentIds, - pub exposed_ident_ids: IdentIds, - - pub closures: MutMap, - /// Symbols which were referenced by qualified lookups. - pub qualified_lookups: MutSet, - - pub top_level_symbols: MutSet, - - pub closure_name_symbol: Option, - pub tailcallable_symbol: Option, -} - -impl<'a> Env<'a> { - pub fn new( - home: ModuleId, - arena: &'a Bump, - pool: &'a mut Pool, - var_store: &'a mut VarStore, - dep_idents: MutMap, - module_ids: &'a ModuleIds, - exposed_ident_ids: IdentIds, - ) -> Env<'a> { - Env { - home, - arena, - pool, - problems: BumpVec::new_in(arena), - var_store, - dep_idents, - module_ids, - ident_ids: exposed_ident_ids.clone(), // we start with these, but will add more later - exposed_ident_ids, - closures: MutMap::default(), - qualified_lookups: MutSet::default(), - tailcallable_symbol: None, - closure_name_symbol: None, - top_level_symbols: MutSet::default(), - } - } - - pub fn add(&mut self, item: T, region: Region) -> NodeId { - let id = self.pool.add(item); - self.set_region(id, region); - - id - } - - pub fn problem(&mut self, problem: Problem) { - self.problems.push(problem); - } - - pub fn set_region(&mut self, _node_id: NodeId, _region: Region) { - dbg!("Don't Forget to set the region eventually"); - } - - pub fn register_closure(&mut self, symbol: Symbol, references: References) { - self.closures.insert(symbol, references); - } - - /// Generates a unique, new symbol like "$1" or "$5", - /// using the home module as the module_id. - /// - /// This is used, for example, during canonicalization of an Expr::Closure - /// to generate a unique symbol to refer to that closure. - pub fn gen_unique_symbol(&mut self) -> Symbol { - let ident_id = self.ident_ids.gen_unique(); - - Symbol::new(self.home, ident_id) - } - - /// Returns Err if the symbol resolved, but it was not exposed by the given module - pub fn qualified_lookup( - &mut self, - module_name: &str, - ident: &str, - region: Region, - ) -> Result { - debug_assert!( - !module_name.is_empty(), - "Called env.qualified_lookup with an unqualified ident: {:?}", - ident - ); - - let module_name: ModuleName = module_name.into(); - - match self.module_ids.get_id(&module_name) { - Some(&module_id) => { - let ident: Ident = ident.into(); - - // You can do qualified lookups on your own module, e.g. - // if I'm in the Foo module, I can do a `Foo.bar` lookup. - if module_id == self.home { - match self.ident_ids.get_id(&ident) { - Some(ident_id) => { - let symbol = Symbol::new(module_id, *ident_id); - - self.qualified_lookups.insert(symbol); - - Ok(symbol) - } - None => Err(RuntimeError::LookupNotInScope( - Located { - value: ident, - region, - }, - self.ident_ids - .idents() - .map(|(_, string)| string.as_ref().into()) - .collect(), - )), - } - } else { - match self - .dep_idents - .get(&module_id) - .and_then(|exposed_ids| exposed_ids.get_id(&ident)) - { - Some(ident_id) => { - let symbol = Symbol::new(module_id, *ident_id); - - self.qualified_lookups.insert(symbol); - - Ok(symbol) - } - None => Err(RuntimeError::ValueNotExposed { - module_name: ModuleName::from(module_name), - ident, - region, - }), - } - } - } - None => Err(RuntimeError::ModuleNotImported { - module_name, - imported_modules: self - .module_ids - .available_modules() - .map(|string| string.as_ref().into()) - .collect(), - region, - }), - } - } -} - -const ZERO: Region = Region::zero(); - -pub fn as_expr_id<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - expr_id: ExprId, - parse_expr: &'a roc_parse::ast::Expr<'a>, - region: Region, -) -> Output { - let (expr, output) = to_expr2(env, scope, parse_expr, region); - - env.pool[expr_id] = expr; - env.set_region(expr_id, region); - - output -} - -pub fn to_expr_id<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - parse_expr: &'a roc_parse::ast::Expr<'a>, - region: Region, -) -> (ExprId, Output) { - let (expr, output) = to_expr2(env, scope, parse_expr, region); - - (env.add(expr, region), output) -} - -pub fn str_to_def2<'a>( - arena: &'a Bump, - input: &'a str, - env: &mut Env<'a>, - scope: &mut Scope, - region: Region, -) -> Result, SyntaxError<'a>> { - match roc_parse::test_helpers::parse_defs_with(arena, input.trim()) { - Ok(vec_loc_def) => Ok(defs_to_defs2( - arena, - env, - scope, - arena.alloc(vec_loc_def), - region, - )), - Err(fail) => Err(fail), - } -} - -pub fn str_to_expr2<'a>( - arena: &'a Bump, - input: &'a str, - env: &mut Env<'a>, - scope: &mut Scope, - region: Region, -) -> Result<(Expr2, self::Output), SyntaxError<'a>> { - match roc_parse::test_helpers::parse_loc_with(arena, input.trim()) { - Ok(loc_expr) => Ok(loc_expr_to_expr2(arena, loc_expr, env, scope, region)), - Err(fail) => Err(fail), - } -} - -fn loc_expr_to_expr2<'a>( - arena: &'a Bump, - loc_expr: Located>, - env: &mut Env<'a>, - scope: &mut Scope, - region: Region, -) -> (Expr2, self::Output) { - let desugared_loc_expr = desugar_expr(arena, arena.alloc(loc_expr)); - - to_expr2(env, scope, arena.alloc(desugared_loc_expr.value), region) -} - -pub fn to_expr2<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - parse_expr: &'a roc_parse::ast::Expr<'a>, - region: Region, -) -> (Expr2, self::Output) { - use roc_parse::ast::Expr::*; - - match parse_expr { - Float(string) => { - match finish_parsing_float(string) { - Ok(float) => { - let expr = Expr2::Float { - number: FloatVal::F64(float), - var: env.var_store.fresh(), - text: PoolStr::new(string, &mut env.pool), - }; - - (expr, Output::default()) - } - Err((raw, error)) => { - // emit runtime error - let runtime_error = RuntimeError::InvalidFloat(error, ZERO, raw.into()); - - env.problem(Problem::RuntimeError(runtime_error.clone())); - // - // Expr::RuntimeError(runtime_error) - todo!() - } - } - } - Num(string) => { - match finish_parsing_int(string) { - Ok(int) => { - let expr = Expr2::SmallInt { - number: IntVal::I64(int), - var: env.var_store.fresh(), - // TODO non-hardcode - style: IntStyle::Decimal, - text: PoolStr::new(string, &mut env.pool), - }; - - (expr, Output::default()) - } - Err((raw, error)) => { - // emit runtime error - let runtime_error = RuntimeError::InvalidInt( - error, - roc_parse::ast::Base::Decimal, - ZERO, - raw.into(), - ); - - env.problem(Problem::RuntimeError(runtime_error.clone())); - // - // Expr::RuntimeError(runtime_error) - todo!() - } - } - } - NonBase10Int { - string, - base, - is_negative, - } => { - match finish_parsing_base(string, *base, *is_negative) { - Ok(int) => { - let expr = Expr2::SmallInt { - number: IntVal::I64(int), - var: env.var_store.fresh(), - // TODO non-hardcode - style: IntStyle::from_base(*base), - text: PoolStr::new(string, &mut env.pool), - }; - - (expr, Output::default()) - } - Err((raw, error)) => { - // emit runtime error - let runtime_error = RuntimeError::InvalidInt(error, *base, ZERO, raw.into()); - - env.problem(Problem::RuntimeError(runtime_error.clone())); - // - // Expr::RuntimeError(runtime_error) - todo!() - } - } - } - - Str(literal) => flatten_str_literal(env, scope, &literal), - - List { items, .. } => { - let mut output = Output::default(); - let output_ref = &mut output; - - let elems: PoolVec = PoolVec::with_capacity(items.len() as u32, env.pool); - - for (node_id, item) in elems.iter_node_ids().zip(items.iter()) { - let (expr, sub_output) = to_expr2(env, scope, &item.value, item.region); - - output_ref.union(sub_output); - - let expr_id = env.pool.add(expr); - env.pool[node_id] = expr_id; - } - - let expr = Expr2::List { - elem_var: env.var_store.fresh(), - elems, - }; - - (expr, output) - } - - GlobalTag(tag) => { - // a global tag without any arguments - ( - Expr2::GlobalTag { - name: PoolStr::new(tag, env.pool), - variant_var: env.var_store.fresh(), - ext_var: env.var_store.fresh(), - arguments: PoolVec::empty(env.pool), - }, - Output::default(), - ) - } - PrivateTag(name) => { - // a private tag without any arguments - let ident_id = env.ident_ids.get_or_insert(&(*name).into()); - let name = Symbol::new(env.home, ident_id); - ( - Expr2::PrivateTag { - name, - variant_var: env.var_store.fresh(), - ext_var: env.var_store.fresh(), - arguments: PoolVec::empty(env.pool), - }, - Output::default(), - ) - } - - RecordUpdate { - fields, - update: loc_update, - final_comments: _, - } => { - let (can_update, update_out) = - to_expr2(env, scope, &loc_update.value, loc_update.region); - - if let Expr2::Var(symbol) = &can_update { - match canonicalize_fields(env, scope, fields) { - Ok((can_fields, mut output)) => { - output.references.union_mut(update_out.references); - - let answer = Expr2::Update { - record_var: env.var_store.fresh(), - ext_var: env.var_store.fresh(), - symbol: *symbol, - updates: can_fields, - }; - - (answer, output) - } - Err(CanonicalizeRecordProblem::InvalidOptionalValue { - field_name: _, - field_region: _, - record_region: _, - }) => { - // let runtime_error = roc_problem::can::RuntimeError::InvalidOptionalValue { - // field_name, - // field_region, - // record_region, - // }; - // - // env.problem(Problem::RuntimeError(runtime_error)); - - todo!() - } - } - } else { - // only (optionally qualified) variables can be updated, not arbitrary expressions - - // let error = roc_problem::can::RuntimeError::InvalidRecordUpdate { - // region: can_update.region, - // }; - // - // let answer = Expr::RuntimeError(error.clone()); - // - // env.problems.push(Problem::RuntimeError(error)); - // - // (answer, Output::default()) - todo!() - } - } - - Record { - fields, - final_comments: _, - } => { - if fields.is_empty() { - (Expr2::EmptyRecord, Output::default()) - } else { - match canonicalize_fields(env, scope, fields) { - Ok((can_fields, output)) => ( - Expr2::Record { - record_var: env.var_store.fresh(), - fields: can_fields, - }, - output, - ), - Err(CanonicalizeRecordProblem::InvalidOptionalValue { - field_name: _, - field_region: _, - record_region: _, - }) => { - // let runtime_error = RuntimeError::InvalidOptionalValue { - // field_name, - // field_region, - // record_region, - // }; - // - // env.problem(runtime_error); - // ( - // Expr::RuntimeError( - // ), - // Output::default(), - // - // ) - todo!() - } - } - } - } - - Access(record_expr, field) => { - // TODO - let region = ZERO; - let (record_expr_id, output) = to_expr_id(env, scope, record_expr, region); - - ( - Expr2::Access { - record_var: env.var_store.fresh(), - field_var: env.var_store.fresh(), - ext_var: env.var_store.fresh(), - expr: record_expr_id, - field: PoolStr::new(field, env.pool), - }, - output, - ) - } - - AccessorFunction(field) => ( - Expr2::Accessor { - function_var: env.var_store.fresh(), - record_var: env.var_store.fresh(), - ext_var: env.var_store.fresh(), - closure_var: env.var_store.fresh(), - field_var: env.var_store.fresh(), - field: PoolStr::new(field, env.pool), - }, - Output::default(), - ), - - If(branches, final_else) => { - let mut new_branches = Vec::with_capacity(branches.len()); - let mut output = Output::default(); - - for (condition, then_branch) in branches.iter() { - let (cond, cond_output) = to_expr2(env, scope, &condition.value, condition.region); - - let (then_expr, then_output) = - to_expr2(env, scope, &then_branch.value, then_branch.region); - - output.references.union_mut(cond_output.references); - output.references.union_mut(then_output.references); - - new_branches.push((env.pool.add(cond), env.pool.add(then_expr))); - } - - let (else_expr, else_output) = - to_expr2(env, scope, &final_else.value, final_else.region); - - output.references.union_mut(else_output.references); - - let expr = Expr2::If { - cond_var: env.var_store.fresh(), - expr_var: env.var_store.fresh(), - branches: PoolVec::new(new_branches.into_iter(), env.pool), - final_else: env.pool.add(else_expr), - }; - - (expr, output) - } - - When(loc_cond, branches) => { - // Infer the condition expression's type. - let cond_var = env.var_store.fresh(); - let (can_cond, mut output) = to_expr2(env, scope, &loc_cond.value, loc_cond.region); - - // the condition can never be a tail-call - output.tail_call = None; - - let can_branches = PoolVec::with_capacity(branches.len() as u32, env.pool); - - for (node_id, branch) in can_branches.iter_node_ids().zip(branches.iter()) { - let (can_when_branch, branch_references) = - canonicalize_when_branch(env, scope, *branch, &mut output); - - output.references.union_mut(branch_references); - - env.pool[node_id] = can_when_branch; - } - - // A "when" with no branches is a runtime error, but it will mess things up - // if code gen mistakenly thinks this is a tail call just because its condition - // happened to be one. (The condition gave us our initial output value.) - if branches.is_empty() { - output.tail_call = None; - } - - // Incorporate all three expressions into a combined Output value. - let expr = Expr2::When { - expr_var: env.var_store.fresh(), - cond_var, - cond: env.pool.add(can_cond), - branches: can_branches, - }; - - (expr, output) - } - - Closure(loc_arg_patterns, loc_body_expr) => { - // The globally unique symbol that will refer to this closure once it gets converted - // into a top-level procedure for code gen. - // - // In the Foo module, this will look something like Foo.$1 or Foo.$2. - let symbol = env - .closure_name_symbol - .unwrap_or_else(|| env.gen_unique_symbol()); - env.closure_name_symbol = None; - - // The body expression gets a new scope for canonicalization. - // Shadow `scope` to make sure we don't accidentally use the original one for the - // rest of this block, but keep the original around for later diffing. - let original_scope = scope; - let mut scope = original_scope.shallow_clone(); - let can_args = PoolVec::with_capacity(loc_arg_patterns.len() as u32, env.pool); - let mut output = Output::default(); - - let mut bound_by_argument_patterns = MutSet::default(); - - for (node_id, loc_pattern) in can_args.iter_node_ids().zip(loc_arg_patterns.iter()) { - let (new_output, can_arg) = to_pattern2( - env, - &mut scope, - roc_parse::pattern::PatternType::FunctionArg, - &loc_pattern.value, - loc_pattern.region, - ); - - bound_by_argument_patterns - .extend(new_output.references.bound_symbols.iter().copied()); - - output.union(new_output); - - let pattern_id = env.add(can_arg, loc_pattern.region); - env.pool[node_id] = (env.var_store.fresh(), pattern_id); - } - - let (body_expr, new_output) = - to_expr2(env, &mut scope, &loc_body_expr.value, loc_body_expr.region); - - let mut captured_symbols: MutSet = - new_output.references.lookups.iter().copied().collect(); - - // filter out the closure's name itself - captured_symbols.remove(&symbol); - - // symbols bound either in this pattern or deeper down are not captured! - captured_symbols.retain(|s| !new_output.references.bound_symbols.contains(s)); - captured_symbols.retain(|s| !bound_by_argument_patterns.contains(s)); - - // filter out top-level symbols - // those will be globally available, and don't need to be captured - captured_symbols.retain(|s| !env.top_level_symbols.contains(s)); - - // filter out imported symbols - // those will be globally available, and don't need to be captured - captured_symbols.retain(|s| s.module_id() == env.home); - - // TODO any Closure that has an empty `captured_symbols` list could be excluded! - - output.union(new_output); - - // filter out aliases - captured_symbols.retain(|s| !output.references.referenced_aliases.contains(s)); - - // filter out functions that don't close over anything - captured_symbols.retain(|s| !output.non_closures.contains(s)); - - // Now that we've collected all the references, check to see if any of the args we defined - // went unreferenced. If any did, report them as unused arguments. - for (sub_symbol, region) in scope.symbols() { - if !original_scope.contains_symbol(sub_symbol) { - if !output.references.has_lookup(sub_symbol) { - // The body never referenced this argument we declared. It's an unused argument! - env.problem(Problem::UnusedArgument(symbol, sub_symbol, region)); - } - - // We shouldn't ultimately count arguments as referenced locals. Otherwise, - // we end up with weird conclusions like the expression (\x -> x + 1) - // references the (nonexistant) local variable x! - output.references.lookups.remove(&sub_symbol); - } - } - - env.register_closure(symbol, output.references.clone()); - - let mut captured_symbols: Vec<_> = captured_symbols - .into_iter() - .map(|s| (s, env.var_store.fresh())) - .collect(); - - // sort symbols, so we know the order in which they're stored in the closure record - captured_symbols.sort(); - - // store that this function doesn't capture anything. It will be promoted to a - // top-level function, and does not need to be captured by other surrounding functions. - if captured_symbols.is_empty() { - output.non_closures.insert(symbol); - } - - let captured_symbols = PoolVec::new(captured_symbols.into_iter(), env.pool); - - let extra = ClosureExtra { - return_type: env.var_store.fresh(), // 4B - captured_symbols, // 8B - closure_type: env.var_store.fresh(), // 4B - closure_ext_var: env.var_store.fresh(), // 4B - }; - - ( - Expr2::Closure { - function_type: env.var_store.fresh(), - name: symbol, - recursive: Recursive::NotRecursive, - args: can_args, - body: env.add(body_expr, loc_body_expr.region), - extra: env.pool.add(extra), - }, - output, - ) - } - - Apply(loc_fn, loc_args, application_style) => { - // The expression that evaluates to the function being called, e.g. `foo` in - // (foo) bar baz - let fn_region = loc_fn.region; - - // Canonicalize the function expression and its arguments - let (fn_expr, mut output) = to_expr2(env, scope, &loc_fn.value, fn_region); - - // The function's return type - let args = PoolVec::with_capacity(loc_args.len() as u32, env.pool); - - for (node_id, loc_arg) in args.iter_node_ids().zip(loc_args.iter()) { - let (arg_expr_id, arg_out) = to_expr_id(env, scope, &loc_arg.value, loc_arg.region); - - env.pool[node_id] = (env.var_store.fresh(), arg_expr_id); - - output.references.union_mut(arg_out.references); - } - - // Default: We're not tail-calling a symbol (by name), we're tail-calling a function value. - output.tail_call = None; - - let expr = match fn_expr { - Expr2::Var(ref symbol) => { - output.references.calls.insert(*symbol); - - // we're tail-calling a symbol by name, check if it's the tail-callable symbol - output.tail_call = match &env.tailcallable_symbol { - Some(tc_sym) if *tc_sym == *symbol => Some(*symbol), - Some(_) | None => None, - }; - - // IDEA: Expr2::CallByName? - let fn_expr_id = env.add(fn_expr, fn_region); - Expr2::Call { - args, - expr: fn_expr_id, - expr_var: env.var_store.fresh(), - fn_var: env.var_store.fresh(), - closure_var: env.var_store.fresh(), - called_via: *application_style, - } - } - Expr2::RuntimeError() => { - // We can't call a runtime error; bail out by propagating it! - return (fn_expr, output); - } - Expr2::GlobalTag { - variant_var, - ext_var, - name, - .. - } => Expr2::GlobalTag { - variant_var, - ext_var, - name, - arguments: args, - }, - Expr2::PrivateTag { - variant_var, - ext_var, - name, - .. - } => Expr2::PrivateTag { - variant_var, - ext_var, - name, - arguments: args, - }, - _ => { - // This could be something like ((if True then fn1 else fn2) arg1 arg2). - let fn_expr_id = env.add(fn_expr, fn_region); - Expr2::Call { - args, - expr: fn_expr_id, - expr_var: env.var_store.fresh(), - fn_var: env.var_store.fresh(), - closure_var: env.var_store.fresh(), - called_via: *application_style, - } - } - }; - - (expr, output) - } - - Defs(loc_defs, loc_ret) => { - let (unsorted, mut scope, defs_output, symbols_introduced) = canonicalize_defs( - env, - Output::default(), - &scope, - loc_defs, - PatternType::DefExpr, - ); - - // The def as a whole is a tail call iff its return expression is a tail call. - // Use its output as a starting point because its tail_call already has the right answer! - let (ret_expr, mut output) = to_expr2(env, &mut scope, &loc_ret.value, loc_ret.region); - - output - .introduced_variables - .union(&defs_output.introduced_variables); - - output.references.union_mut(defs_output.references); - - // Now that we've collected all the references, check to see if any of the new idents - // we defined went unused by the return expression. If any were unused, report it. - for (symbol, region) in symbols_introduced { - if !output.references.has_lookup(symbol) { - env.problem(Problem::UnusedDef(symbol, region)); - } - } - - let (can_defs, output) = sort_can_defs(env, unsorted, output); - - match can_defs { - Ok(decls) => { - let mut expr = ret_expr; - - for declaration in decls.into_iter().rev() { - expr = decl_to_let(env.pool, env.var_store, declaration, expr); - } - - (expr, output) - } - Err(_err) => { - // TODO: fix this to be something from Expr2 - // (RuntimeError(err), output) - todo!() - } - } - } - - PrecedenceConflict { .. } => { - // use roc_problem::can::RuntimeError::*; - // - // let problem = PrecedenceProblem::BothNonAssociative( - // *whole_region, - // binop1.clone(), - // binop2.clone(), - // ); - // - // env.problem(Problem::PrecedenceProblem(problem.clone())); - // - // ( - // RuntimeError(InvalidPrecedence(problem, region)), - // Output::default(), - // ) - todo!() - } - MalformedClosure => { - // use roc_problem::can::RuntimeError::*; - // (RuntimeError(MalformedClosure(region)), Output::default()) - todo!() - } - MalformedIdent(_name, _problem) => { - // use roc_problem::can::RuntimeError::*; - // - // let problem = MalformedIdentifier((*name).into(), region); - // env.problem(Problem::RuntimeError(problem.clone())); - // - // (RuntimeError(problem), Output::default()) - todo!() - } - Var { module_name, ident } => canonicalize_lookup(env, scope, module_name, ident, region), - - // Below this point, we shouln't see any of these nodes anymore because - // operator desugaring should have removed them! - bad_expr @ ParensAround(_) => { - panic!( - "A ParensAround did not get removed during operator desugaring somehow: {:#?}", - bad_expr - ); - } - bad_expr @ SpaceBefore(_, _) => { - panic!( - "A SpaceBefore did not get removed during operator desugaring somehow: {:#?}", - bad_expr - ); - } - bad_expr @ SpaceAfter(_, _) => { - panic!( - "A SpaceAfter did not get removed during operator desugaring somehow: {:#?}", - bad_expr - ); - } - bad_expr @ BinOps { .. } => { - panic!( - "A binary operator chain did not get desugared somehow: {:#?}", - bad_expr - ); - } - bad_expr @ UnaryOp(_, _) => { - panic!( - "A unary operator did not get desugared somehow: {:#?}", - bad_expr - ); - } - - rest => todo!("not yet implemented {:?}", rest), - } -} - -pub fn defs_to_defs2<'a>( - arena: &'a Bump, - env: &mut Env<'a>, - scope: &mut Scope, - parsed_defs: &'a BumpVec>>, - region: Region, -) -> Vec { - use roc_parse::ast::Expr::*; - - parsed_defs - .iter() - .map(|loc| to_def2_from_def(arena, env, scope, &loc.value, region)) - .collect() -} - -pub fn to_def2_from_def<'a>( - arena: &'a Bump, - env: &mut Env<'a>, - scope: &mut Scope, - parsed_def: &'a roc_parse::ast::Def<'a>, - region: Region, -) -> Def2 { - use roc_parse::ast::Def::*; - - match parsed_def { - SpaceBefore(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region), - SpaceAfter(inner_def, _) => to_def2_from_def(arena, env, scope, inner_def, region), - Body(&loc_pattern, &loc_expr) => { - // TODO loc_pattern use identifier - let expr2 = loc_expr_to_expr2(arena, loc_expr, env, scope, region).0; - let expr_id = env.pool.add(expr2); - - use roc_parse::ast::Pattern::*; - - match loc_pattern.value { - Identifier(_) => { - let (_, pattern2) = to_pattern2( - env, - scope, - PatternType::TopLevelDef, - &loc_pattern.value, - region, - ); - let pattern_id = env.pool.add(pattern2); - - // TODO support with annotation - Def2::ValueDef { - identifier_id: pattern_id, - expr_id, - } - } - other => { - unimplemented!( - "I don't yet know how to convert the pattern {:?} into an expr2", - other - ) - } - } - } - other => { - unimplemented!( - "I don't know how to make an expr2 from this def yet: {:?}", - other - ) - } - } -} - -fn flatten_str_literal<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - literal: &StrLiteral<'a>, -) -> (Expr2, Output) { - use roc_parse::ast::StrLiteral::*; - - match literal { - PlainLine(str_slice) => { - // TODO use smallstr - let expr = Expr2::Str(PoolStr::new(str_slice, &mut env.pool)); - - (expr, Output::default()) - } - Line(segments) => flatten_str_lines(env, scope, &[segments]), - Block(lines) => flatten_str_lines(env, scope, lines), - } -} - -enum StrSegment { - Interpolation(Expr2), - Plaintext(PoolStr), -} - -fn flatten_str_lines<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - lines: &[&[roc_parse::ast::StrSegment<'a>]], -) -> (Expr2, Output) { - use roc_parse::ast::StrSegment::*; - - let mut buf = String::new(); - let mut segments = Vec::new(); - let mut output = Output::default(); - - for line in lines { - for segment in line.iter() { - match segment { - Plaintext(string) => { - buf.push_str(string); - } - Unicode(loc_hex_digits) => match u32::from_str_radix(loc_hex_digits.value, 16) { - Ok(code_pt) => match std::char::from_u32(code_pt) { - Some(ch) => { - buf.push(ch); - } - None => { - // env.problem(Problem::InvalidUnicodeCodePt(loc_hex_digits.region)); - // - // return ( - // Expr::RuntimeError(RuntimeError::InvalidUnicodeCodePt( - // loc_hex_digits.region, - // )), - // output, - // ); - todo!() - } - }, - Err(_) => { - // env.problem(Problem::InvalidHexadecimal(loc_hex_digits.region)); - // - // return ( - // Expr::RuntimeError(RuntimeError::InvalidHexadecimal( - // loc_hex_digits.region, - // )), - // output, - // ); - todo!() - } - }, - Interpolated(loc_expr) => { - if roc_can::expr::is_valid_interpolation(loc_expr.value) { - // Interpolations desugar to Str.concat calls - output.references.calls.insert(Symbol::STR_CONCAT); - - if !buf.is_empty() { - segments.push(StrSegment::Plaintext(PoolStr::new(&buf, &mut env.pool))); - - buf = String::new(); - } - - let (loc_expr, new_output) = - to_expr2(env, scope, loc_expr.value, loc_expr.region); - - output.union(new_output); - - segments.push(StrSegment::Interpolation(loc_expr)); - } else { - // env.problem(Problem::InvalidInterpolation(loc_expr.region)); - // - // return ( - // Expr::RuntimeError(RuntimeError::InvalidInterpolation(loc_expr.region)), - // output, - // ); - todo!() - } - } - EscapedChar(escaped) => buf.push(roc_can::expr::unescape_char(escaped)), - } - } - } - - if !buf.is_empty() { - segments.push(StrSegment::Plaintext(PoolStr::new(&buf, &mut env.pool))); - } - - (desugar_str_segments(env, segments), output) -} - -/// Resolve string interpolations by desugaring a sequence of StrSegments -/// into nested calls to Str.concat -fn desugar_str_segments<'a>(env: &mut Env<'a>, segments: Vec) -> Expr2 { - use StrSegment::*; - - let pool = &mut env.pool; - let var_store = &mut env.var_store; - - let mut iter = segments.into_iter().rev(); - let mut expr = match iter.next() { - Some(Plaintext(pool_str)) => Expr2::Str(pool_str), - Some(Interpolation(expr_id)) => expr_id, - None => { - // No segments? Empty string! - - let pool_str = PoolStr::new("", pool); - Expr2::Str(pool_str) - } - }; - - for seg in iter { - let new_expr = match seg { - Plaintext(string) => Expr2::Str(string), - Interpolation(expr_id) => expr_id, - }; - - let concat_expr_id = pool.add(Expr2::Var(Symbol::STR_CONCAT)); - - let args = vec![ - (var_store.fresh(), pool.add(new_expr)), - (var_store.fresh(), pool.add(expr)), - ]; - let args = PoolVec::new(args.into_iter(), pool); - - let new_call = Expr2::Call { - args, - expr: concat_expr_id, - expr_var: var_store.fresh(), - fn_var: var_store.fresh(), - closure_var: var_store.fresh(), - called_via: CalledVia::Space, - }; - - expr = new_call - } - - expr -} - -enum CanonicalizeRecordProblem { - InvalidOptionalValue { - field_name: PoolStr, - field_region: Region, - record_region: Region, - }, -} - -enum FieldVar { - VarAndExprId(Variable, ExprId), - OnlyVar(Variable), -} - -fn canonicalize_fields<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - fields: &'a [Located>>], -) -> Result<(PoolVec, Output), CanonicalizeRecordProblem> { - let mut can_fields: MutMap<&'a str, FieldVar> = MutMap::default(); - let mut output = Output::default(); - - for loc_field in fields.iter() { - match canonicalize_field(env, scope, &loc_field.value) { - Ok(can_field) => { - match can_field { - CanonicalField::LabelAndValue { - label, - value_expr, - value_output, - var, - } => { - let expr_id = env.pool.add(value_expr); - - let replaced = - can_fields.insert(label, FieldVar::VarAndExprId(var, expr_id)); - - if let Some(_old) = replaced { - // env.problems.push(Problem::DuplicateRecordFieldValue { - // field_name: label, - // field_region: loc_field.region, - // record_region: region, - // replaced_region: old.region, - // }); - todo!() - } - - output.references.union_mut(value_output.references); - } - CanonicalField::InvalidLabelOnly { label, var } => { - let replaced = can_fields.insert(label, FieldVar::OnlyVar(var)); - - if let Some(_old) = replaced { - todo!() - } - } - } - } - - Err(CanonicalizeFieldProblem::InvalidOptionalValue { - field_name: _, - field_region: _, - }) => { - // env.problem(Problem::InvalidOptionalValue { - // field_name: field_name.clone(), - // field_region, - // record_region: region, - // }); - // return Err(CanonicalizeRecordProblem::InvalidOptionalValue { - // field_name, - // field_region, - // record_region: region, - // }); - todo!() - } - } - } - - let pool_vec = PoolVec::with_capacity(can_fields.len() as u32, env.pool); - - for (node_id, (string, field_var)) in pool_vec.iter_node_ids().zip(can_fields.into_iter()) { - let name = PoolStr::new(string, env.pool); - - match field_var { - FieldVar::VarAndExprId(var, expr_id) => { - env.pool[node_id] = RecordField::LabeledValue(name, var, expr_id); - } - FieldVar::OnlyVar(var) => { - env.pool[node_id] = RecordField::InvalidLabelOnly(name, var); - } // TODO RecordField::LabelOnly - } - } - - Ok((pool_vec, output)) -} - -enum CanonicalizeFieldProblem { - InvalidOptionalValue { - field_name: PoolStr, - field_region: Region, - }, -} -enum CanonicalField<'a> { - LabelAndValue { - label: &'a str, - value_expr: Expr2, - value_output: Output, - var: Variable, - }, - InvalidLabelOnly { - label: &'a str, - var: Variable, - }, // TODO make ValidLabelOnly -} -fn canonicalize_field<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - field: &'a roc_parse::ast::AssignedField<'a, roc_parse::ast::Expr<'a>>, -) -> Result, CanonicalizeFieldProblem> { - use roc_parse::ast::AssignedField::*; - - match field { - // Both a label and a value, e.g. `{ name: "blah" }` - RequiredValue(label, _, loc_expr) => { - let field_var = env.var_store.fresh(); - let (loc_can_expr, output) = to_expr2(env, scope, &loc_expr.value, loc_expr.region); - - Ok(CanonicalField::LabelAndValue { - label: label.value, - value_expr: loc_can_expr, - value_output: output, - var: field_var, - }) - } - - OptionalValue(label, _, loc_expr) => Err(CanonicalizeFieldProblem::InvalidOptionalValue { - field_name: PoolStr::new(label.value, env.pool), - field_region: Region::span_across(&label.region, &loc_expr.region), - }), - - // A label with no value, e.g. `{ name }` (this is sugar for { name: name }) - LabelOnly(label) => { - let field_var = env.var_store.fresh(); - // TODO return ValidLabel if label points to in scope variable - Ok(CanonicalField::InvalidLabelOnly { - label: label.value, - var: field_var, - }) - } - - SpaceBefore(sub_field, _) | SpaceAfter(sub_field, _) => { - canonicalize_field(env, scope, sub_field) - } - - Malformed(_string) => { - panic!("TODO canonicalize malformed record field"); - } - } -} - -#[inline(always)] -fn canonicalize_when_branch<'a>( - env: &mut Env<'a>, - scope: &mut Scope, - branch: &'a roc_parse::ast::WhenBranch<'a>, - output: &mut Output, -) -> (WhenBranch, References) { - let patterns = PoolVec::with_capacity(branch.patterns.len() as u32, env.pool); - - let original_scope = scope; - let mut scope = original_scope.shallow_clone(); - - // TODO report symbols not bound in all patterns - for (node_id, loc_pattern) in patterns.iter_node_ids().zip(branch.patterns.iter()) { - let (new_output, can_pattern) = to_pattern2( - env, - &mut scope, - roc_parse::pattern::PatternType::WhenBranch, - &loc_pattern.value, - loc_pattern.region, - ); - - output.union(new_output); - - env.set_region(node_id, loc_pattern.region); - env.pool[node_id] = can_pattern; - } - - let (value, mut branch_output) = - to_expr2(env, &mut scope, &branch.value.value, branch.value.region); - let value_id = env.pool.add(value); - env.set_region(value_id, branch.value.region); - - let guard = match &branch.guard { - None => None, - Some(loc_expr) => { - let (can_guard, guard_branch_output) = - to_expr2(env, &mut scope, &loc_expr.value, loc_expr.region); - - let expr_id = env.pool.add(can_guard); - env.set_region(expr_id, loc_expr.region); - - branch_output.union(guard_branch_output); - Some(expr_id) - } - }; - - // Now that we've collected all the references for this branch, check to see if - // any of the new idents it defined were unused. If any were, report it. - for (symbol, region) in scope.symbols() { - let symbol = symbol; - - if !output.references.has_lookup(symbol) - && !branch_output.references.has_lookup(symbol) - && !original_scope.contains_symbol(symbol) - { - env.problem(Problem::UnusedDef(symbol, region)); - } - } - - let references = branch_output.references.clone(); - output.union(branch_output); - - ( - WhenBranch { - patterns, - body: value_id, - guard, - }, - references, - ) -} - -fn canonicalize_lookup( - env: &mut Env<'_>, - scope: &mut Scope, - module_name: &str, - ident: &str, - region: Region, -) -> (Expr2, Output) { - use Expr2::*; - - let mut output = Output::default(); - let can_expr = if module_name.is_empty() { - // Since module_name was empty, this is an unqualified var. - // Look it up in scope! - match scope.lookup(&(*ident).into(), region) { - Ok(symbol) => { - output.references.lookups.insert(symbol); - - Var(symbol) - } - Err(problem) => { - env.problem(Problem::RuntimeError(problem.clone())); - - RuntimeError() - } - } - } else { - // Since module_name was nonempty, this is a qualified var. - // Look it up in the env! - match env.qualified_lookup(module_name, ident, region) { - Ok(symbol) => { - output.references.lookups.insert(symbol); - - Var(symbol) - } - Err(problem) => { - // Either the module wasn't imported, or - // it was imported but it doesn't expose this ident. - env.problem(Problem::RuntimeError(problem.clone())); - - RuntimeError() - } - } - }; - - // If it's valid, this ident should be in scope already. - - (can_expr, output) -} - -fn decl_to_let(pool: &mut Pool, var_store: &mut VarStore, decl: Declaration, ret: Expr2) -> Expr2 { - match decl { - Declaration::Declare(def) => match def { - Def::AnnotationOnly { .. } => todo!(), - Def::Value(value_def) => { - let def_id = pool.add(value_def); - - let body_id = pool.add(ret); - - Expr2::LetValue { - def_id, - body_id, - body_var: var_store.fresh(), - } - } - Def::Function(function_def) => { - let def_id = pool.add(function_def); - let body_id = pool.add(ret); - - Expr2::LetFunction { - def_id, - body_id, - body_var: var_store.fresh(), - } - } - }, - Declaration::DeclareRec(defs) => { - let mut function_defs = vec![]; - - for def in defs { - match def { - Def::AnnotationOnly { .. } => todo!(), - Def::Function(function_def) => function_defs.push(function_def), - Def::Value(_) => unreachable!(), - } - } - - let body_id = pool.add(ret); - - Expr2::LetRec { - defs: PoolVec::new(function_defs.into_iter(), pool), - body_var: var_store.fresh(), - body_id, - } - } - Declaration::InvalidCycle(_entries, _) => { - // TODO: replace with something from Expr2 - // Expr::RuntimeError(RuntimeError::CircularDef(entries)) - todo!() - } - Declaration::Builtin(_) => { - // Builtins should only be added to top-level decls, not to let-exprs! - unreachable!() - } - } -} diff --git a/editor/src/lang/mod.rs b/editor/src/lang/mod.rs deleted file mode 100644 index fa747386e1..0000000000 --- a/editor/src/lang/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub mod ast; -pub mod constrain; -mod def; -pub mod expr; -mod module; -pub mod parse; -pub mod pattern; -pub mod pool; -pub mod roc_file; -pub mod scope; -pub mod solve; -pub mod types; diff --git a/editor/src/lang/parse.rs b/editor/src/lang/parse.rs deleted file mode 100644 index 9c7253bb75..0000000000 --- a/editor/src/lang/parse.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::fmt::Debug; - -use crate::{ - editor::ed_error::ASTNodeIdWithoutExprId, editor::ed_error::EdResult, lang::scope::Scope, -}; -use bumpalo::Bump; -use roc_parse::parser::SyntaxError; -use roc_region::all::Region; - -use super::{ - ast::{DefId, Expr2, ExprId}, - expr::{str_to_def2, Env}, -}; - -#[derive(Debug)] -pub struct AST { - pub header: AppHeader, - pub def_ids: Vec, -} - -#[derive(Debug, PartialEq, Copy, Clone)] -pub enum ASTNodeId { - ADefId(DefId), - AExprId(ExprId), -} - -impl ASTNodeId { - pub fn to_expr_id(&self) -> EdResult { - match self { - ASTNodeId::AExprId(expr_id) => Ok(*expr_id), - _ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?, - } - } - - pub fn to_def_id(&self) -> EdResult { - match self { - ASTNodeId::ADefId(def_id) => Ok(*def_id), - _ => ASTNodeIdWithoutExprId { ast_node_id: *self }.fail()?, - } - } -} - -#[derive(Debug)] -pub struct AppHeader { - pub app_name: String, - pub packages_base: String, - pub imports: Vec, - pub provides: Vec, - pub ast_node_id: ExprId, // TODO probably want to use HeaderId -} - -impl AST { - pub fn parse_from_string<'a>( - code_str: &'a str, - env: &mut Env<'a>, - ast_arena: &'a Bump, - ) -> Result> { - let blank_line_indx = code_str - .find("\n\n") - .expect("I was expecting a double newline to split header and rest of code."); - - let header_str = &code_str[0..blank_line_indx]; - let tail_str = &code_str[blank_line_indx..]; - - let mut scope = Scope::new(env.home, env.pool, env.var_store); - let region = Region::new(0, 0, 0, 0); - - let mut def_ids = Vec::::new(); - - let def2_vec = str_to_def2(ast_arena, tail_str, env, &mut scope, region)?; - - for def2 in def2_vec { - let def_id = env.pool.add(def2); - - def_ids.push(def_id); - } - - let ast_node_id = env.pool.add(Expr2::Blank); - - Ok(AST { - header: AppHeader::parse_from_string(header_str, ast_node_id), - def_ids, - }) - } -} - -impl AppHeader { - // TODO don't use mock struct and actually parse string - pub fn parse_from_string(_header_str: &str, ast_node_id: ExprId) -> Self { - AppHeader { - app_name: "\"untitled-app\"".to_owned(), - packages_base: "\"platform\"".to_owned(), - imports: vec![], - provides: vec!["main".to_owned()], - ast_node_id, - } - } -} diff --git a/editor/src/lang/pool.rs b/editor/src/lang/pool.rs deleted file mode 100644 index 2504cfcffc..0000000000 --- a/editor/src/lang/pool.rs +++ /dev/null @@ -1,657 +0,0 @@ -/// A pool of 32-byte nodes. The node value 0 is reserved for the pool's -/// use, and valid nodes may never have that value. -/// -/// Internally, the pool is divided into pages of 4096 bytes. It stores nodes -/// into one page at a time, and when it runs out, it uses mmap to reserve an -/// anonymous memory page in which to store nodes. -/// -/// Since nodes are 32 bytes, one page can store 128 nodes; you can access a -/// particular node by its NodeId, which is an opaque wrapper around a pointer. -/// -/// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied. -/// This is important for performance. -use libc::{c_void, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE}; -use roc_can::expected::Expected; -use roc_can::expected::PExpected; -use std::any::type_name; -use std::cmp::Ordering; -use std::marker::PhantomData; -use std::mem::size_of; -use std::ptr::null; - -pub const NODE_BYTES: usize = 32; - -// Each page has 128 slots. Each slot holds one 32B node -// This means each page is 4096B, which is the size of a memory page -// on typical systems where the compiler will be run. -// -// Nice things about this system include: -// * Allocating a new page is as simple as asking the OS for a memory page. -// * Since each node is 32B, each node's memory address will be a multiple of 16. -// * Thanks to the free lists and our consistent chunk sizes, we should -// end up with very little fragmentation. -// * Finding a slot for a given node should be very fast: see if the relevant -// free list has any openings; if not, try the next size up. -// -// Less nice things include: -// * This system makes it very hard to ever give a page back to the OS. -// We could try doing the Mesh Allocator strategy: whenever we allocate -// something, assign it to a random slot in the page, and then periodically -// try to merge two pages into one (by locking and remapping them in the OS) -// and then returning the redundant physical page back to the OS. This should -// work in theory, but is pretty complicated, and we'd need to schedule it. -// Keep in mind that we can't use the Mesh Allocator itself because it returns -// usize pointers, which would be too big for us to have 16B nodes. -// On the plus side, we could be okay with higher memory usage early on, -// and then later use the Mesh strategy to reduce long-running memory usage. -// -// With this system, we can allocate up to 4B nodes. If we wanted to keep -// a generational index in there, like https://crates.io/crates/sharded-slab -// does, we could use some of the 32 bits for that. For example, if we wanted -// to have a 5-bit generational index (supporting up to 32 generations), then -// we would have 27 bits remaining, meaning we could only support at most -// 134M nodes. Since the editor has a separate Pool for each module, is that -// enough for any single module we'll encounter in practice? Probably, and -// especially if we allocate super large collection literals on the heap instead -// of in the pool. -// -// Another possible design is to try to catch reuse bugs using an "ASan" like -// approach: in development builds, whenever we "free" a particular slot, we -// can add it to a dev-build-only "freed nodes" list and don't hand it back -// out (so, we leak the memory.) Then we can (again, in development builds only) -// check to see if we're about to store something in zeroed-out memory; if so, check -// to see if it was - -#[derive(Debug, Eq)] -pub struct NodeId { - index: u32, - _phantom: PhantomData, -} - -impl Clone for NodeId { - fn clone(&self) -> Self { - NodeId { - index: self.index, - _phantom: PhantomData::default(), - } - } -} - -impl PartialEq for NodeId { - fn eq(&self, other: &Self) -> bool { - self.index == other.index - } -} - -impl Copy for NodeId {} - -#[derive(Debug)] -pub struct Pool { - nodes: *mut [u8; NODE_BYTES], - num_nodes: u32, - capacity: u32, - // free_1node_slots: Vec>, -} - -impl Pool { - pub fn with_capacity(nodes: u32) -> Self { - // round up number of nodes requested to nearest page size in bytes - let bytes_per_page = page_size::get(); - let node_bytes = NODE_BYTES * nodes as usize; - let leftover = node_bytes % bytes_per_page; - let bytes_to_mmap = if leftover == 0 { - node_bytes - } else { - node_bytes + bytes_per_page - leftover - }; - - let nodes = unsafe { - // mmap anonymous memory pages - that is, contiguous virtual memory - // addresses from the OS which will be lazily translated into - // physical memory one 4096-byte page at a time, once we actually - // try to read or write in that page's address range. - libc::mmap( - null::() as *mut c_void, - bytes_to_mmap, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - 0, - 0, - ) - } as *mut [u8; NODE_BYTES]; - - // This is our actual capacity, in nodes. - // It might be higher than the requested capacity due to rounding up - // to nearest page size. - let capacity = (bytes_to_mmap / NODE_BYTES) as u32; - - Pool { - nodes, - num_nodes: 0, - capacity, - } - } - - pub fn add(&mut self, node: T) -> NodeId { - // It's only safe to store this if T fits in S. - debug_assert!( - size_of::() <= NODE_BYTES, - "{} has a size of {}, but it needs to be at most {}", - type_name::(), - size_of::(), - NODE_BYTES - ); - - let node_id = self.reserve(1); - let node_ptr = unsafe { self.nodes.offset(node_id.index as isize) } as *mut T; - - unsafe { *node_ptr = node }; - - node_id - } - - /// Reserves the given number of contiguous node slots, and returns - /// the NodeId of the first one. We only allow reserving 2^32 in a row. - fn reserve(&mut self, nodes: u32) -> NodeId { - // TODO once we have a free list, look in there for an open slot first! - let index = self.num_nodes; - - if index < self.capacity { - self.num_nodes = index + nodes; - - NodeId { - index, - _phantom: PhantomData::default(), - } - } else { - todo!("pool ran out of capacity. TODO reallocate the nodes pointer to map to a bigger space. Can use mremap on Linux, but must memcpy lots of bytes on macOS and Windows."); - } - } - - pub fn get<'a, 'b, T>(&'a self, node_id: NodeId) -> &'b T { - unsafe { - let node_ptr = self.nodes.offset(node_id.index as isize) as *const T; - - &*node_ptr - } - } - - pub fn get_mut(&mut self, node_id: NodeId) -> &mut T { - unsafe { - let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T; - - &mut *node_ptr - } - } - - pub fn set(&mut self, node_id: NodeId, element: T) { - unsafe { - let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T; - - *node_ptr = element; - } - } - - // A node is available iff its bytes are all zeroes - #[allow(dead_code)] - fn is_available(&self, node_id: NodeId) -> bool { - debug_assert_eq!(size_of::(), NODE_BYTES); - - unsafe { - let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES]; - - *node_ptr == [0; NODE_BYTES] - } - } -} - -impl std::ops::Index> for Pool { - type Output = T; - - fn index(&self, node_id: NodeId) -> &Self::Output { - self.get(node_id) - } -} - -impl std::ops::IndexMut> for Pool { - fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output { - self.get_mut(node_id) - } -} - -impl Drop for Pool { - fn drop(&mut self) { - unsafe { - libc::munmap( - self.nodes as *mut c_void, - NODE_BYTES * self.capacity as usize, - ); - } - } -} - -/// A string containing at most 2^32 pool-allocated bytes. -#[derive(Debug, Copy, Clone)] -pub struct PoolStr { - first_node_id: NodeId<()>, - len: u32, -} - -#[test] -fn pool_str_size() { - assert_eq!(size_of::(), 8); -} - -impl PoolStr { - pub fn new(string: &str, pool: &mut Pool) -> Self { - debug_assert!(string.len() <= u32::MAX as usize); - - let chars_per_node = NODE_BYTES / size_of::(); - - let number_of_nodes = f64::ceil(string.len() as f64 / chars_per_node as f64) as u32; - - if number_of_nodes > 0 { - let first_node_id = pool.reserve(number_of_nodes); - let index = first_node_id.index as isize; - let next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut c_void; - - unsafe { - libc::memcpy( - next_node_ptr, - string.as_ptr() as *const c_void, - string.len(), - ); - } - - PoolStr { - first_node_id, - len: string.len() as u32, - } - } else { - PoolStr { - first_node_id: NodeId { - index: 0, - _phantom: PhantomData::default(), - }, - len: 0, - } - } - } - - pub fn as_str(&self, pool: &Pool) -> &str { - unsafe { - let node_ptr = pool.nodes.offset(self.first_node_id.index as isize) as *const u8; - - let node_slice: &[u8] = std::slice::from_raw_parts(node_ptr, self.len as usize); - - std::str::from_utf8_unchecked(&node_slice[0..self.len as usize]) - } - } - - #[allow(clippy::len_without_is_empty)] - pub fn len(&self, pool: &Pool) -> usize { - let contents = self.as_str(pool); - - contents.len() - } - - pub fn is_empty(&self, pool: &Pool) -> bool { - self.len(pool) == 0 - } -} - -impl ShallowClone for PoolStr { - fn shallow_clone(&self) -> Self { - // Question: should this fully clone, or is a shallow copy - // (and the aliasing it entails) OK? - Self { - first_node_id: self.first_node_id, - len: self.len, - } - } -} - -/// An array of at most 2^32 pool-allocated nodes. -#[derive(Debug)] -pub struct PoolVec { - first_node_id: NodeId, - len: u32, -} - -#[test] -fn pool_vec_size() { - assert_eq!(size_of::>(), 8); -} - -impl<'a, T: 'a + Sized> PoolVec { - pub fn empty(pool: &mut Pool) -> Self { - Self::new(std::iter::empty(), pool) - } - - pub fn with_capacity(len: u32, pool: &mut Pool) -> Self { - debug_assert!( - size_of::() <= NODE_BYTES, - "{} has a size of {}", - type_name::(), - size_of::() - ); - - if len == 0 { - Self::empty(pool) - } else { - let first_node_id = pool.reserve(len); - - PoolVec { first_node_id, len } - } - } - - pub fn len(&self) -> usize { - self.len as usize - } - - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - pub fn new>(nodes: I, pool: &mut Pool) -> Self { - debug_assert!(nodes.len() <= u32::MAX as usize); - debug_assert!(size_of::() <= NODE_BYTES); - - let len = nodes.len() as u32; - - if len > 0 { - let first_node_id = pool.reserve(len); - let index = first_node_id.index as isize; - let mut next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut T; - - for (indx_inc, node) in nodes.enumerate() { - unsafe { - *next_node_ptr = node; - - next_node_ptr = pool.nodes.offset(index + (indx_inc as isize) + 1) as *mut T; - } - } - - PoolVec { first_node_id, len } - } else { - PoolVec { - first_node_id: NodeId { - index: 0, - _phantom: PhantomData::default(), - }, - len: 0, - } - } - } - - pub fn iter(&self, pool: &'a Pool) -> impl ExactSizeIterator { - self.pool_list_iter(pool) - } - - pub fn iter_mut(&self, pool: &'a mut Pool) -> impl ExactSizeIterator { - self.pool_list_iter_mut(pool) - } - - pub fn iter_node_ids(&self) -> impl ExactSizeIterator> { - self.pool_list_iter_node_ids() - } - - /// Private version of into_iter which exposes the implementation detail - /// of PoolVecIter. We don't want that struct to be public, but we - /// actually do want to have this separate function for code reuse - /// in the iterator's next() method. - #[inline(always)] - fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> { - PoolVecIter { - pool, - current_node_id: self.first_node_id, - len_remaining: self.len, - } - } - - #[inline(always)] - fn pool_list_iter_mut(&self, pool: &'a Pool) -> PoolVecIterMut<'a, T> { - PoolVecIterMut { - pool, - current_node_id: self.first_node_id, - len_remaining: self.len, - } - } - - #[inline(always)] - fn pool_list_iter_node_ids(&self) -> PoolVecIterNodeIds { - PoolVecIterNodeIds { - current_node_id: self.first_node_id, - len_remaining: self.len, - } - } - - pub fn free(self, pool: &'a mut Pool) { - // zero out the memory - unsafe { - let index = self.first_node_id.index as isize; - let node_ptr = pool.nodes.offset(index) as *mut c_void; - let bytes = self.len as usize * NODE_BYTES; - - libc::memset(node_ptr, 0, bytes); - } - - // TODO insert it into the pool's free list - } -} - -impl ShallowClone for PoolVec { - fn shallow_clone(&self) -> Self { - // Question: should this fully clone, or is a shallow copy - // (and the aliasing it entails) OK? - Self { - first_node_id: self.first_node_id, - len: self.len, - } - } -} - -struct PoolVecIter<'a, T> { - pool: &'a Pool, - current_node_id: NodeId, - len_remaining: u32, -} - -impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T> -where - T: 'a, -{ - fn len(&self) -> usize { - self.len_remaining as usize - } -} - -impl<'a, T> Iterator for PoolVecIter<'a, T> -where - T: 'a, -{ - type Item = &'a T; - - fn next(&mut self) -> Option { - let len_remaining = self.len_remaining; - - match len_remaining.cmp(&1) { - Ordering::Greater => { - // Get the current node - let index = self.current_node_id.index; - let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T; - - // Advance the node pointer to the next node in the current page - self.current_node_id = NodeId { - index: index + 1, - _phantom: PhantomData::default(), - }; - self.len_remaining = len_remaining - 1; - - Some(unsafe { &*node_ptr }) - } - Ordering::Equal => { - self.len_remaining = 0; - - // Don't advance the node pointer's node, because that might - // advance past the end of the page! - - let index = self.current_node_id.index; - let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T; - - Some(unsafe { &*node_ptr }) - } - Ordering::Less => { - // len_remaining was 0 - None - } - } - } -} - -struct PoolVecIterMut<'a, T> { - pool: &'a Pool, - current_node_id: NodeId, - len_remaining: u32, -} - -impl<'a, T> ExactSizeIterator for PoolVecIterMut<'a, T> -where - T: 'a, -{ - fn len(&self) -> usize { - self.len_remaining as usize - } -} - -impl<'a, T> Iterator for PoolVecIterMut<'a, T> -where - T: 'a, -{ - type Item = &'a mut T; - - fn next(&mut self) -> Option { - let len_remaining = self.len_remaining; - - match len_remaining.cmp(&1) { - Ordering::Greater => { - // Get the current node - let index = self.current_node_id.index; - let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T; - - // Advance the node pointer to the next node in the current page - self.current_node_id = NodeId { - index: index + 1, - _phantom: PhantomData::default(), - }; - self.len_remaining = len_remaining - 1; - - Some(unsafe { &mut *node_ptr }) - } - Ordering::Equal => { - self.len_remaining = 0; - - // Don't advance the node pointer's node, because that might - // advance past the end of the page! - - let index = self.current_node_id.index; - let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T; - - Some(unsafe { &mut *node_ptr }) - } - Ordering::Less => { - // len_remaining was 0 - None - } - } - } -} - -struct PoolVecIterNodeIds { - current_node_id: NodeId, - len_remaining: u32, -} - -impl ExactSizeIterator for PoolVecIterNodeIds { - fn len(&self) -> usize { - self.len_remaining as usize - } -} - -impl Iterator for PoolVecIterNodeIds { - type Item = NodeId; - - fn next(&mut self) -> Option { - let len_remaining = self.len_remaining; - - match len_remaining.cmp(&1) { - Ordering::Greater => { - // Get the current node - let current = self.current_node_id; - let index = current.index; - - // Advance the node pointer to the next node in the current page - self.current_node_id = NodeId { - index: index + 1, - _phantom: PhantomData::default(), - }; - self.len_remaining = len_remaining - 1; - - Some(current) - } - Ordering::Equal => { - self.len_remaining = 0; - - // Don't advance the node pointer's node, because that might - // advance past the end of the page! - - Some(self.current_node_id) - } - Ordering::Less => { - // len_remaining was 0 - None - } - } - } -} - -#[test] -fn pool_vec_iter_test() { - let expected_vec: Vec = vec![2, 4, 8, 16]; - - let mut test_pool = Pool::with_capacity(1024); - let pool_vec = PoolVec::new(expected_vec.clone().into_iter(), &mut test_pool); - - let current_vec: Vec = pool_vec.iter(&test_pool).copied().collect(); - - assert_eq!(current_vec, expected_vec); -} -/// Clones the outer node, but does not clone any nodeids -pub trait ShallowClone { - fn shallow_clone(&self) -> Self; -} - -impl ShallowClone for Expected { - fn shallow_clone(&self) -> Self { - use Expected::*; - - match self { - NoExpectation(t) => NoExpectation(t.shallow_clone()), - ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region), - FromAnnotation(loc_pat, n, source, t) => { - FromAnnotation(loc_pat.clone(), *n, *source, t.shallow_clone()) - } - } - } -} - -impl ShallowClone for PExpected { - fn shallow_clone(&self) -> Self { - use PExpected::*; - - match self { - NoExpectation(t) => NoExpectation(t.shallow_clone()), - ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region), - } - } -} diff --git a/editor/src/lib.rs b/editor/src/lib.rs index f6f3053840..5ee7ea9fd5 100644 --- a/editor/src/lib.rs +++ b/editor/src/lib.rs @@ -3,14 +3,12 @@ #![allow(clippy::large_enum_variant, clippy::upper_case_acronyms)] #[cfg_attr(test, macro_use)] -extern crate indoc; extern crate pest; #[cfg_attr(test, macro_use)] extern crate pest_derive; mod editor; mod graphics; -pub mod lang; //TODO remove pub for unused warnings mod ui; mod window; diff --git a/editor/tests/solve_expr2.rs b/editor/tests/solve_expr2.rs deleted file mode 100644 index 88f5e5248b..0000000000 --- a/editor/tests/solve_expr2.rs +++ /dev/null @@ -1,372 +0,0 @@ -#[macro_use] -extern crate pretty_assertions; -#[macro_use] -extern crate indoc; - -use bumpalo::Bump; -use roc_can::expected::Expected; -use roc_collections::all::MutMap; -use roc_editor::lang::solve; -use roc_editor::lang::{ - constrain::constrain_expr, - constrain::Constraint, - expr::{str_to_expr2, Env}, - pool::Pool, - scope::Scope, - types::Type2, -}; -use roc_module::ident::Lowercase; -use roc_module::symbol::Interns; -use roc_module::symbol::Symbol; -use roc_module::symbol::{IdentIds, ModuleIds}; -use roc_region::all::Region; -use roc_types::solved_types::Solved; -use roc_types::subs::{Subs, Variable}; -use roc_types::{pretty_print::content_to_string, subs::VarStore}; - -fn run_solve<'a>( - arena: &'a Bump, - mempool: &mut Pool, - aliases: MutMap, - rigid_variables: MutMap, - constraint: Constraint, - var_store: VarStore, -) -> (Solved, solve::Env, Vec) { - let env = solve::Env { - vars_by_symbol: MutMap::default(), - aliases, - }; - - let mut subs = Subs::new(var_store); - - for (var, name) in rigid_variables { - subs.rigid_var(var, name); - } - - // Now that the module is parsed, canonicalized, and constrained, - // we need to type check it. - let mut problems = Vec::new(); - - // Run the solver to populate Subs. - let (solved_subs, solved_env) = - solve::run(arena, mempool, &env, &mut problems, subs, &constraint); - - (solved_subs, solved_env, problems) -} - -fn infer_eq(actual: &str, expected_str: &str) { - let mut env_pool = Pool::with_capacity(1024); - let env_arena = Bump::new(); - let code_arena = Bump::new(); - - let mut var_store = VarStore::default(); - let var = var_store.fresh(); - let dep_idents = IdentIds::exposed_builtins(8); - let exposed_ident_ids = IdentIds::default(); - let mut module_ids = ModuleIds::default(); - let mod_id = module_ids.get_or_insert(&"ModId123".into()); - - let mut env = Env::new( - mod_id, - &env_arena, - &mut env_pool, - &mut var_store, - dep_idents, - &module_ids, - exposed_ident_ids, - ); - - let mut scope = Scope::new(env.home, env.pool, env.var_store); - - let region = Region::zero(); - - let expr2_result = str_to_expr2(&code_arena, actual, &mut env, &mut scope, region); - - match expr2_result { - Ok((expr, _)) => { - let constraint = constrain_expr( - &code_arena, - &mut env, - &expr, - Expected::NoExpectation(Type2::Variable(var)), - Region::zero(), - ); - - let Env { - pool, - var_store: ref_var_store, - mut dep_idents, - .. - } = env; - - // extract the var_store out of the env again - let mut var_store = VarStore::default(); - std::mem::swap(ref_var_store, &mut var_store); - - let (mut solved, _, _) = run_solve( - &code_arena, - pool, - Default::default(), - Default::default(), - constraint, - var_store, - ); - - let subs = solved.inner_mut(); - - let content = subs.get_content_without_compacting(var); - - // Connect the ModuleId to it's IdentIds - dep_idents.insert(mod_id, env.ident_ids); - - let interns = Interns { - module_ids: env.module_ids.clone(), - all_ident_ids: dep_idents, - }; - - let actual_str = content_to_string(content, subs, mod_id, &interns); - - assert_eq!(actual_str, expected_str); - } - Err(e) => panic!("syntax error {:?}", e), - } -} - -#[test] -fn constrain_str() { - infer_eq( - indoc!( - r#" - "type inference!" - "# - ), - "Str", - ) -} - -// This will be more useful once we actually map -// strings less than 15 chars to SmallStr -#[test] -fn constrain_small_str() { - infer_eq( - indoc!( - r#" - "a" - "# - ), - "Str", - ) -} - -#[test] -fn constrain_empty_record() { - infer_eq( - indoc!( - r#" - {} - "# - ), - "{}", - ) -} - -#[test] -fn constrain_small_int() { - infer_eq( - indoc!( - r#" - 12 - "# - ), - "Num *", - ) -} - -#[test] -fn constrain_float() { - infer_eq( - indoc!( - r#" - 3.14 - "# - ), - "Float *", - ) -} - -#[test] -fn constrain_record() { - infer_eq( - indoc!( - r#" - { x : 1, y : "hi" } - "# - ), - "{ x : Num *, y : Str }", - ) -} - -#[test] -fn constrain_empty_list() { - infer_eq( - indoc!( - r#" - [] - "# - ), - "List *", - ) -} - -#[test] -fn constrain_list() { - infer_eq( - indoc!( - r#" - [ 1, 2 ] - "# - ), - "List (Num *)", - ) -} - -#[test] -fn constrain_list_of_records() { - infer_eq( - indoc!( - r#" - [ { x: 1 }, { x: 3 } ] - "# - ), - "List { x : Num * }", - ) -} - -#[test] -fn constrain_global_tag() { - infer_eq( - indoc!( - r#" - Foo - "# - ), - "[ Foo ]*", - ) -} - -#[test] -fn constrain_private_tag() { - infer_eq( - indoc!( - r#" - @Foo - "# - ), - "[ @Foo ]*", - ) -} - -#[test] -fn constrain_call_and_accessor() { - infer_eq( - indoc!( - r#" - .foo { foo: "bar" } - "# - ), - "Str", - ) -} - -#[test] -fn constrain_access() { - infer_eq( - indoc!( - r#" - { foo: "bar" }.foo - "# - ), - "Str", - ) -} - -#[test] -fn constrain_if() { - infer_eq( - indoc!( - r#" - if True then Green else Red - "# - ), - "[ Green, Red ]*", - ) -} - -#[test] -fn constrain_when() { - infer_eq( - indoc!( - r#" - when if True then Green else Red is - Green -> Blue - Red -> Purple - "# - ), - "[ Blue, Purple ]*", - ) -} - -#[test] -fn constrain_let_value() { - infer_eq( - indoc!( - r#" - person = { name: "roc" } - - person - "# - ), - "{ name : Str }", - ) -} - -#[test] -fn constrain_update() { - infer_eq( - indoc!( - r#" - person = { name: "roc" } - - { person & name: "bird" } - "# - ), - "{ name : Str }", - ) -} - -#[ignore = "TODO: implement builtins in the editor"] -#[test] -fn constrain_run_low_level() { - infer_eq( - indoc!( - r#" - List.map [ { name: "roc" }, { name: "bird" } ] .name - "# - ), - "List Str", - ) -} - -#[test] -fn constrain_closure() { - infer_eq( - indoc!( - r#" - x = 1 - - \{} -> x - "# - ), - "{}* -> Num *", - ) -} diff --git a/examples/cli/platform/Cargo.toml b/examples/cli/platform/Cargo.toml index ad2bc7c449..eba1dfa680 100644 --- a/examples/cli/platform/Cargo.toml +++ b/examples/cli/platform/Cargo.toml @@ -5,8 +5,16 @@ authors = ["The Roc Contributors"] license = "UPL-1.0" edition = "2018" +links = "app" + [lib] -crate-type = ["staticlib"] +name = "host" +path = "src/lib.rs" +crate-type = ["staticlib", "rlib"] + +[[bin]] +name = "host" +path = "src/main.rs" [dependencies] roc_std = { path = "../../../roc_std" } diff --git a/examples/cli/platform/build.rs b/examples/cli/platform/build.rs new file mode 100644 index 0000000000..73159e387c --- /dev/null +++ b/examples/cli/platform/build.rs @@ -0,0 +1,4 @@ +fn main() { + println!("cargo:rustc-link-lib=dylib=app"); + println!("cargo:rustc-link-search=."); +} diff --git a/examples/cli/platform/host.c b/examples/cli/platform/host.c index 0378c69589..645d900c8e 100644 --- a/examples/cli/platform/host.c +++ b/examples/cli/platform/host.c @@ -1,7 +1,3 @@ -#include - extern int rust_main(); -int main() { - return rust_main(); -} +int main() { return rust_main(); } diff --git a/examples/cli/platform/src/lib.rs b/examples/cli/platform/src/lib.rs index 2b24da5ff7..d316e264d8 100644 --- a/examples/cli/platform/src/lib.rs +++ b/examples/cli/platform/src/lib.rs @@ -27,12 +27,12 @@ extern "C" { } #[no_mangle] -pub unsafe fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { +pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { libc::malloc(size) } #[no_mangle] -pub unsafe fn roc_realloc( +pub unsafe extern "C" fn roc_realloc( c_ptr: *mut c_void, new_size: usize, _old_size: usize, @@ -42,12 +42,12 @@ pub unsafe fn roc_realloc( } #[no_mangle] -pub unsafe fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { +pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { libc::free(c_ptr) } #[no_mangle] -pub unsafe fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { match tag_id { 0 => { let slice = CStr::from_ptr(c_ptr as *const c_char); @@ -60,7 +60,17 @@ pub unsafe fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { } #[no_mangle] -pub fn rust_main() -> isize { +pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { + libc::memcpy(dst, src, n) +} + +#[no_mangle] +pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { + libc::memset(dst, c, n) +} + +#[no_mangle] +pub extern "C" fn rust_main() -> i32 { let size = unsafe { roc_main_size() } as usize; let layout = Layout::array::(size).unwrap(); @@ -81,7 +91,7 @@ pub fn rust_main() -> isize { 0 } -unsafe fn call_the_closure(closure_data_ptr: *const u8) -> i64 { +unsafe extern "C" fn call_the_closure(closure_data_ptr: *const u8) -> i64 { let size = size_Fx_result() as usize; let layout = Layout::array::(size).unwrap(); let buffer = std::alloc::alloc(layout) as *mut u8; @@ -99,7 +109,7 @@ unsafe fn call_the_closure(closure_data_ptr: *const u8) -> i64 { } #[no_mangle] -pub fn roc_fx_getLine() -> RocStr { +pub extern "C" fn roc_fx_getLine() -> RocStr { use std::io::{self, BufRead}; let stdin = io::stdin(); @@ -109,7 +119,7 @@ pub fn roc_fx_getLine() -> RocStr { } #[no_mangle] -pub fn roc_fx_putLine(line: RocStr) -> () { +pub extern "C" fn roc_fx_putLine(line: RocStr) -> () { let bytes = line.as_slice(); let string = unsafe { std::str::from_utf8_unchecked(bytes) }; println!("{}", string); diff --git a/examples/cli/platform/src/main.rs b/examples/cli/platform/src/main.rs new file mode 100644 index 0000000000..51175f934b --- /dev/null +++ b/examples/cli/platform/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + std::process::exit(host::rust_main()); +} diff --git a/examples/hello-rust/.gitignore b/examples/hello-rust/.gitignore index 6b820fd903..8485821d7c 100644 --- a/examples/hello-rust/.gitignore +++ b/examples/hello-rust/.gitignore @@ -1 +1 @@ -hello-world +hello-rust diff --git a/examples/hello-rust/Hello.roc b/examples/hello-rust/Hello.roc index d78f48ff19..cd7092308d 100644 --- a/examples/hello-rust/Hello.roc +++ b/examples/hello-rust/Hello.roc @@ -1,4 +1,4 @@ -app "hello-world" +app "hello-rust" packages { base: "platform" } imports [] provides [ main ] to base diff --git a/examples/hello-rust/platform/Cargo.toml b/examples/hello-rust/platform/Cargo.toml index ad2bc7c449..72f534c88e 100644 --- a/examples/hello-rust/platform/Cargo.toml +++ b/examples/hello-rust/platform/Cargo.toml @@ -4,9 +4,16 @@ version = "0.1.0" authors = ["The Roc Contributors"] license = "UPL-1.0" edition = "2018" +links = "app" [lib] -crate-type = ["staticlib"] +name = "host" +path = "src/lib.rs" +crate-type = ["staticlib", "rlib"] + +[[bin]] +name = "host" +path = "src/main.rs" [dependencies] roc_std = { path = "../../../roc_std" } diff --git a/examples/hello-rust/platform/build.rs b/examples/hello-rust/platform/build.rs new file mode 100644 index 0000000000..73159e387c --- /dev/null +++ b/examples/hello-rust/platform/build.rs @@ -0,0 +1,4 @@ +fn main() { + println!("cargo:rustc-link-lib=dylib=app"); + println!("cargo:rustc-link-search=."); +} diff --git a/examples/hello-rust/platform/host.c b/examples/hello-rust/platform/host.c index 9b91965724..b9214bcf33 100644 --- a/examples/hello-rust/platform/host.c +++ b/examples/hello-rust/platform/host.c @@ -1,12 +1,3 @@ -#include -#include - extern int rust_main(); -int main() { return rust_main(); } - -void *roc_memcpy(void *dest, const void *src, size_t n) { - return memcpy(dest, src, n); -} - -void *roc_memset(void *str, int c, size_t n) { return memset(str, c, n); } \ No newline at end of file +int main() { return rust_main(); } \ No newline at end of file diff --git a/examples/hello-rust/platform/src/lib.rs b/examples/hello-rust/platform/src/lib.rs index 6a78b4db0c..341556bb4b 100644 --- a/examples/hello-rust/platform/src/lib.rs +++ b/examples/hello-rust/platform/src/lib.rs @@ -1,7 +1,6 @@ #![allow(non_snake_case)] use core::ffi::c_void; -use core::mem::MaybeUninit; use libc::c_char; use roc_std::RocStr; use std::ffi::CStr; @@ -12,12 +11,12 @@ extern "C" { } #[no_mangle] -pub unsafe fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { +pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { return libc::malloc(size); } #[no_mangle] -pub unsafe fn roc_realloc( +pub unsafe extern "C" fn roc_realloc( c_ptr: *mut c_void, new_size: usize, _old_size: usize, @@ -27,12 +26,12 @@ pub unsafe fn roc_realloc( } #[no_mangle] -pub unsafe fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { +pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { return libc::free(c_ptr); } #[no_mangle] -pub unsafe fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { match tag_id { 0 => { let slice = CStr::from_ptr(c_ptr as *const c_char); @@ -45,7 +44,17 @@ pub unsafe fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { } #[no_mangle] -pub fn rust_main() -> isize { +pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { + libc::memcpy(dst, src, n) +} + +#[no_mangle] +pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { + libc::memset(dst, c, n) +} + +#[no_mangle] +pub extern "C" fn rust_main() -> i32 { unsafe { let roc_str = roc_main(); diff --git a/examples/hello-rust/platform/src/main.rs b/examples/hello-rust/platform/src/main.rs new file mode 100644 index 0000000000..51175f934b --- /dev/null +++ b/examples/hello-rust/platform/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + std::process::exit(host::rust_main()); +} diff --git a/examples/hello-web/platform/host.js b/examples/hello-web/platform/host.js index 0d953eabff..a90ff4187b 100644 --- a/examples/hello-web/platform/host.js +++ b/examples/hello-web/platform/host.js @@ -11,6 +11,12 @@ async function roc_web_platform_run(wasm_filename, callback) { const importObj = { wasi_snapshot_preview1: { + proc_exit: (code) => { + if (code !== 0) { + console.error(`Exited with code ${code}`); + } + exit_code = code; + }, roc_panic: (_pointer, _tag_id) => { throw 'Roc panicked!'; } diff --git a/examples/hello-web/platform/host.zig b/examples/hello-web/platform/host.zig index 5d588d6912..bfd519e9de 100644 --- a/examples/hello-web/platform/host.zig +++ b/examples/hello-web/platform/host.zig @@ -48,25 +48,21 @@ export fn roc_dealloc(c_ptr: *c_void, alignment: u32) callconv(.C) void { const mem = std.mem; const Allocator = mem.Allocator; -extern fn roc__mainForHost_1_exposed(*RocCallResult) void; - -const RocCallResult = extern struct { flag: u64, content: RocStr }; +extern fn roc__mainForHost_1_exposed(*RocStr) void; const Unit = extern struct {}; extern fn js_display_roc_string(str_bytes: ?[*]u8, str_len: usize) void; pub fn main() u8 { - // make space for the result - var callresult = RocCallResult{ .flag = 0, .content = RocStr.empty() }; - // actually call roc to populate the callresult + var callresult = RocStr.empty(); roc__mainForHost_1_exposed(&callresult); // display the result using JavaScript - js_display_roc_string(callresult.content.str_bytes, callresult.content.str_len); + js_display_roc_string(callresult.asU8ptr(), callresult.len()); - callresult.content.deinit(); + callresult.deinit(); return 0; } diff --git a/linker/README.md b/linker/README.md index 0d1e2d77ee..e5342c64ea 100644 --- a/linker/README.md +++ b/linker/README.md @@ -31,8 +31,6 @@ This linker is run in 2 phases: preprocessing and surigical linking. ## TODO (In a lightly prioritized order) -- Run CLI tests and/or benchmarks with the Roc Linker. -- Test with an executable completely generated by Cargo (It will hopefully work out of the box like zig). - Add Macho support - Honestly should be almost exactly the same code. This means we likely need to do a lot of refactoring to minimize the duplicate code. @@ -41,4 +39,7 @@ This linker is run in 2 phases: preprocessing and surigical linking. - As a prereq, we need roc building on Windows (I'm not sure it does currently). - Definitely a solid bit different than elf, but hopefully after refactoring for Macho, won't be that crazy to add. - Look at enabling completely in memory linking that could be used with `roc run` and/or `roc repl` +- Look more into roc hosts and keeping certain functions. Currently I just disabled linker garbage collection. + This works but adds 1.2MB (40%) to even a tiny app. It may be a size issue for large rust hosts. + Roc, for reference, adds 13MB (20%) when linked without garbage collection. - Add a feature to the compiler to make this linker optional. diff --git a/linker/src/lib.rs b/linker/src/lib.rs index 1a043fbce1..be4aa891c9 100644 --- a/linker/src/lib.rs +++ b/linker/src/lib.rs @@ -28,6 +28,7 @@ use target_lexicon::Triple; use tempfile::Builder; mod metadata; +use metadata::VirtualOffset; pub const CMD_PREPROCESS: &str = "preprocess"; pub const CMD_SURGERY: &str = "surgery"; @@ -196,9 +197,9 @@ fn generate_dynamic_lib( let text_section = out_object.section_id(write::StandardSection::Text); for sym in exposed_to_host { - // TODO properly generate this list. for name in &[ format!("roc__{}_1_exposed", sym), + format!("roc__{}_1_exposed_generic", sym), format!("roc__{}_1_Fx_caller", sym), format!("roc__{}_1_Fx_size", sym), format!("roc__{}_1_Fx_result_size", sym), @@ -316,7 +317,9 @@ fn preprocess_impl( for sym in exec_obj.symbols().filter(|sym| { sym.is_definition() && sym.name().is_ok() && sym.name().unwrap().starts_with("roc_") }) { - let name = sym.name().unwrap().to_string(); + // remove potentially trailing "@version". + let name = sym.name().unwrap().split('@').next().unwrap().to_string(); + // special exceptions for memcpy and memset. if &name == "roc_memcpy" { md.roc_symbol_vaddresses @@ -367,9 +370,6 @@ fn preprocess_impl( println!("PLT File Offset: {:+x}", plt_offset); } - // TODO: it looks like we may need to support global data host relocations. - // Rust host look to be using them by default instead of the plt. - // I think this is due to first linking into a static lib and then linking to the c wrapper. let plt_relocs = (match exec_obj.dynamic_relocations() { Some(relocs) => relocs, None => { @@ -379,7 +379,7 @@ fn preprocess_impl( } }) .map(|(_, reloc)| reloc) - .filter(|reloc| reloc.kind() == RelocationKind::Elf(7)); + .filter(|reloc| matches!(reloc.kind(), RelocationKind::Elf(7))); let app_syms: Vec = exec_obj .dynamic_symbols() @@ -387,6 +387,28 @@ fn preprocess_impl( sym.is_undefined() && sym.name().is_ok() && sym.name().unwrap().starts_with("roc_") }) .collect(); + + let got_app_syms: Vec<(String, usize)> = (match exec_obj.dynamic_relocations() { + Some(relocs) => relocs, + None => { + println!("Executable never calls any application functions."); + println!("No work to do. Probably an invalid input."); + return Ok(-1); + } + }) + .map(|(_, reloc)| reloc) + .filter(|reloc| matches!(reloc.kind(), RelocationKind::Elf(6))) + .map(|reloc| { + for symbol in app_syms.iter() { + if reloc.target() == RelocationTarget::Symbol(symbol.index()) { + return Some((symbol.name().unwrap().to_string(), symbol.index().0)); + } + } + None + }) + .flatten() + .collect(); + for sym in app_syms.iter() { let name = sym.name().unwrap().to_string(); md.app_functions.push(name.clone()); @@ -536,7 +558,7 @@ fn preprocess_impl( .unwrap() .push(metadata::SurgeryEntry { file_offset: offset, - virtual_offset: inst.next_ip(), + virtual_offset: VirtualOffset::Relative(inst.next_ip()), size: op_size, }); } @@ -878,7 +900,7 @@ fn preprocess_impl( sec_offset as usize + md.added_byte_count as usize, sec_size as usize / mem::size_of::>(), ); - for rel in relocations.iter_mut() { + for (i, rel) in relocations.iter_mut().enumerate() { let r_offset = rel.r_offset.get(NativeEndian); if virtual_shift_start <= r_offset { rel.r_offset = endian::U64::new(LittleEndian, r_offset + md.added_byte_count); @@ -890,6 +912,28 @@ fn preprocess_impl( .set(LittleEndian, r_addend + md.added_byte_count as i64); } } + // If the relocation goes to a roc function, we need to surgically link it and change it to relative. + let r_type = rel.r_type(NativeEndian, false); + if r_type == elf::R_X86_64_GLOB_DAT { + let r_sym = rel.r_sym(NativeEndian, false); + for (name, index) in got_app_syms.iter() { + if *index as u32 == r_sym { + rel.set_r_info(LittleEndian, false, 0, elf::R_X86_64_RELATIVE); + let addend_addr = sec_offset as usize + + i * mem::size_of::>() + // This 16 skips the first 2 fields and gets to the addend field. + + 16; + md.surgeries + .get_mut(name) + .unwrap() + .push(metadata::SurgeryEntry { + file_offset: addend_addr as u64, + virtual_offset: VirtualOffset::Absolute, + size: 8, + }); + } + } + } } } @@ -1461,7 +1505,7 @@ fn surgery_impl( let dynsym_offset = md.dynamic_symbol_table_section_offset + md.added_byte_count; for func_name in md.app_functions { - let virt_offset = match app_func_vaddr_map.get(&func_name) { + let func_virt_offset = match app_func_vaddr_map.get(&func_name) { Some(offset) => *offset as u64, None => { println!("Function, {}, was not defined by the app", &func_name); @@ -1471,7 +1515,7 @@ fn surgery_impl( if verbose { println!( "Updating calls to {} to the address: {:+x}", - &func_name, virt_offset + &func_name, func_virt_offset ); } @@ -1479,11 +1523,13 @@ fn surgery_impl( if verbose { println!("\tPerforming surgery: {:+x?}", s); } + let surgery_virt_offset = match s.virtual_offset { + VirtualOffset::Relative(vs) => (vs + md.added_byte_count) as i64, + VirtualOffset::Absolute => 0, + }; match s.size { 4 => { - let target = (virt_offset as i64 - - (s.virtual_offset + md.added_byte_count) as i64) - as i32; + let target = (func_virt_offset as i64 - surgery_virt_offset) as i32; if verbose { println!("\tTarget Jump: {:+x}", target); } @@ -1492,6 +1538,16 @@ fn surgery_impl( ..(s.file_offset + md.added_byte_count) as usize + 4] .copy_from_slice(&data); } + 8 => { + let target = func_virt_offset as i64 - surgery_virt_offset; + if verbose { + println!("\tTarget Jump: {:+x}", target); + } + let data = target.to_le_bytes(); + exec_mmap[(s.file_offset + md.added_byte_count) as usize + ..(s.file_offset + md.added_byte_count) as usize + 8] + .copy_from_slice(&data); + } x => { println!("Surgery size not yet supported: {}", x); return Ok(-1); @@ -1505,7 +1561,8 @@ fn surgery_impl( let plt_off = (*plt_off + md.added_byte_count) as usize; let plt_vaddr = *plt_vaddr + md.added_byte_count; let jmp_inst_len = 5; - let target = (virt_offset as i64 - (plt_vaddr as i64 + jmp_inst_len as i64)) as i32; + let target = + (func_virt_offset as i64 - (plt_vaddr as i64 + jmp_inst_len as i64)) as i32; if verbose { println!("\tPLT: {:+x}, {:+x}", plt_off, plt_vaddr); println!("\tTarget Jump: {:+x}", target); @@ -1524,7 +1581,7 @@ fn surgery_impl( dynsym_offset as usize + *i as usize * mem::size_of::>(), ); sym.st_shndx = endian::U16::new(LittleEndian, new_text_section_index as u16); - sym.st_value = endian::U64::new(LittleEndian, virt_offset as u64); + sym.st_value = endian::U64::new(LittleEndian, func_virt_offset as u64); sym.st_size = endian::U64::new( LittleEndian, match app_func_size_map.get(&func_name) { diff --git a/linker/src/metadata.rs b/linker/src/metadata.rs index 16f06232cd..f24bf9a626 100644 --- a/linker/src/metadata.rs +++ b/linker/src/metadata.rs @@ -1,10 +1,16 @@ use roc_collections::all::MutMap; use serde::{Deserialize, Serialize}; +#[derive(Serialize, Deserialize, PartialEq, Debug)] +pub enum VirtualOffset { + Absolute, + Relative(u64), +} + #[derive(Serialize, Deserialize, PartialEq, Debug)] pub struct SurgeryEntry { pub file_offset: u64, - pub virtual_offset: u64, + pub virtual_offset: VirtualOffset, pub size: u8, } diff --git a/linker/tests/fib/.gitignore b/linker/tests/fib/.gitignore deleted file mode 100644 index a3c0d77f6d..0000000000 --- a/linker/tests/fib/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -fib - -zig-cache -zig-out - -*.o - -dynhost -preprocessedhost -metadata -libapp.so \ No newline at end of file diff --git a/linker/tests/fib/Main.roc b/linker/tests/fib/Main.roc deleted file mode 100644 index 646fdbea75..0000000000 --- a/linker/tests/fib/Main.roc +++ /dev/null @@ -1,15 +0,0 @@ -app "fib" - packages { base: "platform" } - imports [] - provides [ main ] to base - -main : U64 -> U64 -main = \index -> - fibHelp index 0 1 - -fibHelp : U64, U64, U64 -> U64 -fibHelp = \index, parent, grandparent -> - if index == 0 then - parent - else - fibHelp (index - 1) grandparent (parent + grandparent) \ No newline at end of file diff --git a/linker/tests/fib/README.md b/linker/tests/fib/README.md deleted file mode 100644 index 0f1af10077..0000000000 --- a/linker/tests/fib/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# Hello, World! - -To run, `cd` into this directory and run: - -```bash -$ cargo run Hello.roc -``` - -To run in release mode instead, do: - -```bash -$ cargo run --release Hello.roc -``` - -## Troubleshooting - -If you encounter `cannot find -lc++`, run the following for ubuntu `sudo apt install libc++-dev`. - -## Design Notes - -This demonstrates the basic design of hosts: Roc code gets compiled into a pure -function (in this case, a thunk that always returns `"Hello, World!"`) and -then the host calls that function. Fundamentally, that's the whole idea! The host -might not even have a `main` - it could be a library, a plugin, anything. -Everything else is built on this basic "hosts calling linked pure functions" design. - -For example, things get more interesting when the compiled Roc function returns -a `Task` - that is, a tagged union data structure containing function pointers -to callback closures. This lets the Roc pure function describe arbitrary -chainable effects, which the host can interpret to perform I/O as requested by -the Roc program. (The tagged union `Task` would have a variant for each supported -I/O operation.) - -In this trivial example, it's very easy to line up the API between the host and -the Roc program. In a more involved host, this would be much trickier - especially -if the API were changing frequently during development. - -The idea there is to have a first-class concept of "glue code" which host authors -can write (it would be plain Roc code, but with some extra keywords that aren't -available in normal modules - kinda like `port module` in Elm), and which -describe both the Roc-host/C boundary as well as the Roc-host/Roc-app boundary. -Roc application authors only care about the Roc-host/Roc-app portion, and the -host author only cares about the Roc-host/C boundary when implementing the host. - -Using this glue code, the Roc compiler can generate C header files describing the -boundary. This not only gets us host compatibility with C compilers, but also -Rust FFI for free, because [`rust-bindgen`](https://github.com/rust-lang/rust-bindgen) -generates correct Rust FFI bindings from C headers. diff --git a/linker/tests/fib/platform/Package-Config.roc b/linker/tests/fib/platform/Package-Config.roc deleted file mode 100644 index d93cd7c258..0000000000 --- a/linker/tests/fib/platform/Package-Config.roc +++ /dev/null @@ -1,10 +0,0 @@ -platform tests/fib - requires {}{ main : U64 -> U64 } - exposes [] - packages {} - imports [] - provides [ mainForHost ] - effects fx.Effect {} - -mainForHost : U64 -> U64 -mainForHost = \arg -> main arg # workaround for https://github.com/rtfeldman/roc/issues/1622 \ No newline at end of file diff --git a/linker/tests/fib/platform/app.zig b/linker/tests/fib/platform/app.zig deleted file mode 100644 index 105908633f..0000000000 --- a/linker/tests/fib/platform/app.zig +++ /dev/null @@ -1 +0,0 @@ -export fn roc__mainForHost_1_exposed(_i: i64, _result: *u64) void {} diff --git a/linker/tests/fib/platform/build.zig b/linker/tests/fib/platform/build.zig deleted file mode 100644 index deb36d6c78..0000000000 --- a/linker/tests/fib/platform/build.zig +++ /dev/null @@ -1,33 +0,0 @@ -const Builder = @import("std").build.Builder; - -pub fn build(b: *Builder) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. - const target = b.standardTargetOptions(.{}); - - // Standard release options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. - const mode = b.standardReleaseOptions(); - - const app = b.addSharedLibrary("app", "app.zig", .unversioned); - app.setTarget(target); - app.setBuildMode(mode); - app.install(); - - const exe = b.addExecutable("dynhost", "host.zig"); - exe.pie = true; - exe.strip = true; - exe.setTarget(target); - exe.setBuildMode(mode); - exe.linkLibrary(app); - exe.linkLibC(); - exe.install(); - - const run_cmd = exe.run(); - run_cmd.step.dependOn(b.getInstallStep()); - - const run_step = b.step("run", "Run the app"); - run_step.dependOn(&run_cmd.step); -} diff --git a/linker/tests/fib/platform/host.zig b/linker/tests/fib/platform/host.zig deleted file mode 100644 index c439c889c7..0000000000 --- a/linker/tests/fib/platform/host.zig +++ /dev/null @@ -1,73 +0,0 @@ -const std = @import("std"); -const testing = std.testing; -const expectEqual = testing.expectEqual; -const expect = testing.expect; - -comptime { - // This is a workaround for https://github.com/ziglang/zig/issues/8218 - // which is only necessary on macOS. - // - // Once that issue is fixed, we can undo the changes in - // 177cf12e0555147faa4d436e52fc15175c2c4ff0 and go back to passing - // -fcompiler-rt in link.rs instead of doing this. Note that this - // workaround is present in many host.zig files, so make sure to undo - // it everywhere! - if (std.builtin.os.tag == .macos) { - _ = @import("compiler_rt"); - } -} - -extern fn malloc(size: usize) callconv(.C) ?*c_void; -extern fn realloc(c_ptr: [*]align(@alignOf(u128)) u8, size: usize) callconv(.C) ?*c_void; -extern fn free(c_ptr: [*]align(@alignOf(u128)) u8) callconv(.C) void; - -export fn roc_alloc(size: usize, alignment: u32) callconv(.C) ?*c_void { - return malloc(size); -} - -export fn roc_realloc(c_ptr: *c_void, old_size: usize, new_size: usize, alignment: u32) callconv(.C) ?*c_void { - return realloc(@alignCast(16, @ptrCast([*]u8, c_ptr)), new_size); -} - -export fn roc_dealloc(c_ptr: *c_void, alignment: u32) callconv(.C) void { - free(@alignCast(16, @ptrCast([*]u8, c_ptr))); -} - -export fn roc_panic(c_ptr: *c_void, tag_id: u32) callconv(.C) void { - const stderr = std.io.getStdErr().writer(); - const msg = @ptrCast([*:0]const u8, c_ptr); - stderr.print("Application crashed with message\n\n {s}\n\nShutting down\n", .{msg}) catch unreachable; - std.process.exit(0); -} - -const mem = std.mem; -const Allocator = mem.Allocator; - -extern fn roc__mainForHost_1_exposed(i64, *i64) void; - -const Unit = extern struct {}; - -pub export fn main() u8 { - const stdout = std.io.getStdOut().writer(); - const fib_number_to_find: u64 = 10; // find the nth Fibonacci number - const iterations: usize = 50; // number of times to repeatedly find that Fibonacci number - - // make space for the result - var callresult = 0; - var remaining_iterations = iterations; - - while (remaining_iterations > 0) { - // actually call roc to populate the callresult - roc__mainForHost_1_exposed(fib_number_to_find, &callresult); - - remaining_iterations -= 1; - } - - // stdout the final result - stdout.print( - "After calling the Roc app {d} times, the Fibonacci number at index {d} is {d}\n", - .{ iterations, fib_number_to_find, callresult }, - ) catch unreachable; - - return 0; -} diff --git a/nightly_benches/benches/events_bench.rs b/nightly_benches/benches/events_bench.rs index c722eeae7c..be889672f2 100644 --- a/nightly_benches/benches/events_bench.rs +++ b/nightly_benches/benches/events_bench.rs @@ -1,6 +1,6 @@ // Keep this benchmark. It's commented because it requires nightly rust. use cli_utils::bench_utils::{ - bench_cfold, bench_deriv, bench_nqueens, bench_rbtree_ck, bench_rbtree_delete, bench_quicksort + bench_cfold, bench_deriv, bench_nqueens, bench_quicksort, bench_rbtree_ck, bench_rbtree_delete, }; use criterion_perf_events::Perf; use perfcnt::linux::HardwareEventType as Hardware; @@ -18,7 +18,7 @@ fn bench_group(c: &mut Criterion, hw_event_str: &str) { bench_cfold, bench_deriv, bench_rbtree_ck, - bench_rbtree_delete, + // bench_rbtree_delete, bench_quicksort, ]; diff --git a/utils/Cargo.toml b/utils/Cargo.toml new file mode 100644 index 0000000000..75c3f928a4 --- /dev/null +++ b/utils/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "roc_utils" +version = "0.1.0" +authors = ["The Roc Contributors"] +license = "UPL-1.0" +edition = "2018" +description = "Utility functions used all over the code base." + +[dependencies] +snafu = { version = "0.6", features = ["backtraces"] } + +[dev-dependencies] \ No newline at end of file diff --git a/utils/src/lib.rs b/utils/src/lib.rs new file mode 100644 index 0000000000..bbab75e59b --- /dev/null +++ b/utils/src/lib.rs @@ -0,0 +1,95 @@ +use snafu::OptionExt; +use std::{collections::HashMap, slice::SliceIndex}; +use util_error::{IndexOfFailed, KeyNotFound, OutOfBounds, UtilResult}; + +pub mod util_error; + +// replace HashMap method that returns Option with one that returns Result and proper Error +pub fn map_get<'a, K: ::std::fmt::Debug + std::hash::Hash + std::cmp::Eq, V>( + hash_map: &'a HashMap, + key: &K, +) -> UtilResult<&'a V> { + let value = hash_map.get(key).context(KeyNotFound { + key_str: format!("{:?}", key), + })?; + + Ok(value) +} + +pub fn index_of(elt: T, slice: &[T]) -> UtilResult { + let index = slice + .iter() + .position(|slice_elt| *slice_elt == elt) + .with_context(|| { + let elt_str = format!("{:?}", elt); + let collection_str = format!("{:?}", slice); + + IndexOfFailed { + elt_str, + collection_str, + } + })?; + + Ok(index) +} + +// replace slice method that return Option with one that return Result and proper Error +pub fn slice_get(index: usize, slice: &[T]) -> UtilResult<&>::Output> { + let elt_ref = slice.get(index).context(OutOfBounds { + index, + collection_name: "Slice", + len: slice.len(), + })?; + + Ok(elt_ref) +} + +pub fn slice_get_mut( + index: usize, + slice: &mut [T], +) -> UtilResult<&mut >::Output> { + let slice_len = slice.len(); + + let elt_ref = slice.get_mut(index).context(OutOfBounds { + index, + collection_name: "Slice", + len: slice_len, + })?; + + Ok(elt_ref) +} + +// returns the index of the first occurrence of element and index of the last occurrence +pub fn first_last_index_of( + elt: T, + slice: &[T], +) -> UtilResult<(usize, usize)> { + let mut first_index_opt = None; + let mut last_index_opt = None; + + for (index, list_elt) in slice.iter().enumerate() { + if *list_elt == elt { + if first_index_opt.is_none() { + first_index_opt = Some(index); + last_index_opt = Some(index); + } else { + last_index_opt = Some(index) + } + } else if last_index_opt.is_some() { + break; + } + } + + if let (Some(first_index), Some(last_index)) = (first_index_opt, last_index_opt) { + Ok((first_index, last_index)) + } else { + let elt_str = format!("{:?}", elt); + let collection_str = format!("{:?}", slice); + + IndexOfFailed { + elt_str, + collection_str, + } + .fail() + } +} diff --git a/utils/src/util_error.rs b/utils/src/util_error.rs new file mode 100644 index 0000000000..d19c230f11 --- /dev/null +++ b/utils/src/util_error.rs @@ -0,0 +1,35 @@ +use snafu::{Backtrace, Snafu}; + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum UtilError { + #[snafu(display( + "IndexOfFailed: Element {} was not found in collection {}.", + elt_str, + collection_str + ))] + IndexOfFailed { + elt_str: String, + collection_str: String, + backtrace: Backtrace, + }, + #[snafu(display("KeyNotFound: key {} was not found in HashMap.", key_str,))] + KeyNotFound { + key_str: String, + backtrace: Backtrace, + }, + #[snafu(display( + "OutOfBounds: index {} was out of bounds for {} with length {}.", + index, + collection_name, + len + ))] + OutOfBounds { + index: usize, + collection_name: String, + len: usize, + backtrace: Backtrace, + }, +} + +pub type UtilResult = std::result::Result; diff --git a/vendor/morphic_lib/Cargo.toml b/vendor/morphic_lib/Cargo.toml index 5406cf3647..594ad4c408 100644 --- a/vendor/morphic_lib/Cargo.toml +++ b/vendor/morphic_lib/Cargo.toml @@ -5,6 +5,7 @@ authors = ["William Brandon", "Wilson Berkow", "Frank Dai", "Benjamin Driscoll"] edition = "2018" [dependencies] -thiserror = "1.0.24" +thiserror = "1.0" sha2 = "0.9.4" -smallvec = "1.6.1" +smallvec = "1.6" +typed-arena = "2.0" diff --git a/vendor/morphic_lib/src/analyze.rs b/vendor/morphic_lib/src/analyze.rs new file mode 100644 index 0000000000..fbb69785ef --- /dev/null +++ b/vendor/morphic_lib/src/analyze.rs @@ -0,0 +1,1770 @@ +use smallvec::SmallVec; +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::convert::TryInto; +use typed_arena::Arena; + +use crate::api; +use crate::ir; +use crate::name_cache::{EntryPointId, FuncId}; +use crate::type_cache::{TypeCache, TypeData, TypeId}; +use crate::util::flat_slices::FlatSlices; +use crate::util::id_type::Count; +use crate::util::id_vec::IdVec; +use crate::util::norm_pair::NormPair; +use crate::util::op_graph; +use crate::util::replace_none::replace_none; +use crate::util::strongly_connected::{strongly_connected, SccKind}; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct SubSlots { + end_indices: SmallVec<[u32; 10]>, +} + +impl SubSlots { + fn from_slot_counts(slot_counts: impl Iterator) -> Self { + let mut total = 0; + let end_indices = slot_counts + .map(|count| { + total += count; + total + }) + .collect(); + Self { end_indices } + } + + fn slot_count(&self) -> u32 { + self.end_indices.last().cloned().unwrap_or(0) + } + + /// Returns bounds `a, b` for a range of slot indices `a..b` + fn sub_slots(&self, index: u32) -> (u32, u32) { + let start = if index == 0 { + 0 + } else { + self.end_indices[index as usize - 1] + }; + let end = self.end_indices[index as usize]; + (start, end) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum TypeSlots { + Named, + Tuple { field_slots: SubSlots }, + Union { variant_slots: SubSlots }, + HeapCell, + Bag { item_slots: u32 }, +} + +impl TypeSlots { + fn slot_count(&self) -> u32 { + match self { + TypeSlots::Named => 1, + TypeSlots::Tuple { field_slots } => field_slots.slot_count(), + TypeSlots::Union { variant_slots } => variant_slots.slot_count(), + TypeSlots::HeapCell => 1, + TypeSlots::Bag { item_slots } => *item_slots, + } + } +} + +#[derive(Clone, Debug)] +struct SlotCache { + type_cache: TypeCache, + slots: IdVec, +} + +impl SlotCache { + fn new(type_cache: TypeCache) -> Self { + let mut slots: IdVec<_, TypeSlots> = IdVec::new(); + // NOTE: This only works because 'type_cache.types' is guaranteed to assign ids in + // topological order. + for (id, type_) in type_cache.types.iter() { + let this_slots = match type_ { + TypeData::Named { named: _ } => TypeSlots::Named, + TypeData::Tuple { fields } => { + let field_slots = SubSlots::from_slot_counts( + fields.iter().map(|field| slots[field].slot_count()), + ); + TypeSlots::Tuple { field_slots } + } + TypeData::Union { variants } => { + let variant_slots = SubSlots::from_slot_counts( + variants.iter().map(|variant| slots[variant].slot_count()), + ); + TypeSlots::Union { variant_slots } + } + TypeData::HeapCell => TypeSlots::HeapCell, + TypeData::Bag { item } => { + let item_slots = slots[item].slot_count(); + TypeSlots::Bag { item_slots } + } + }; + let pushed_id = slots.push(this_slots); + debug_assert_eq!(pushed_id, id); + } + Self { type_cache, slots } + } + + fn type_cache(&self) -> &TypeCache { + &self.type_cache + } + + fn slots(&self) -> &IdVec { + &self.slots + } +} + +id_type! { + HeapCellId(u32); +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum QueryPoint { + Update(api::UpdateModeVarId), + EntryArg(u32), + CallArg(api::CalleeSpecVarId, u32), + CallRet(api::CalleeSpecVarId, u32), +} + +#[derive(Clone, Debug)] +struct BackRefState { + // TODO: use a more efficient representation + overlay: HashMap>, + parents: Vec, +} + +id_type! { + BackRefStateVersionId(u32); +} + +#[derive(Clone, Copy, Debug)] +struct CallInfo { + callee: FuncId, + ret_slots: u32, +} + +#[derive(Clone)] +struct ForwardState<'a> { + slots_arena: &'a Arena, + value_slots: IdVec>, + // Represents value slots for "iteration n - 1" in an SCC + // TODO: Should this be array-of-structs instead of struct-of-arrays? + value_slots_inductive: IdVec>, + call_arg_aliases: IdVec>>>, + call_arg_origins: IdVec>>, + // TODO: Find a better place to store the data mapping in `calls` + calls: IdVec>, + update_origins: IdVec>, + arg_slots: Option<&'a [HeapCellId]>, + heap_cells: IdVec, + back_ref_states: IdVec, + block_versions: IdVec>, + block_versions_inductive: IdVec>, + entry_version: BackRefStateVersionId, + fates: HashMap, +} + +type Set = HashSet; + +#[derive(Clone, Debug, PartialEq, Eq)] +enum Origin { + /// This heap cell might have been obtained from a `const_ref` op. + /// In this case we don't care what arg slots it might have also been obtained from, because we + /// definitely can't mutate it. + FromConst, + /// This heap cell was definitely not obtained from a `const_ref` op. + /// In this case we care about the (potentially empty) set of arg slots we might have obtained + /// it from. + FromArgSlots(Set), +} + +impl Origin { + pub fn union_with(&mut self, other: &Origin) { + match (&mut *self, other) { + (Origin::FromConst, _) => {} + (Origin::FromArgSlots(_), Origin::FromConst) => *self = Origin::FromConst, + (Origin::FromArgSlots(slots1), Origin::FromArgSlots(slots2)) => slots1.extend(slots2), + } + } +} + +impl Default for Origin { + fn default() -> Self { + Origin::FromArgSlots(Set::new()) + } +} + +#[derive(Clone, Debug)] +struct ForwardData { + origin: Origin, + // invariant: does not contain current heap cell id (all heap cells implicitly alias themselves, + // so storing reflexive alias edges would be redundant). + // invariant: aliases are symmetric; if we alias another heap cell, that heap cell should also + // alias us. + aliases: Set, +} + +fn result_slot_count(sc: &mut SlotCache, val: &op_graph::Node) -> u32 { + sc.slots()[val.op.result_type].slot_count() +} + +fn id_result_slot_count(sc: &mut SlotCache, graph: &ir::Graph, val_id: ir::ValueId) -> u32 { + sc.slots()[graph.values().node(val_id).op.result_type].slot_count() +} + +type HeapCellSlotMapping = HashMap>; + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ForwardSccSlotSummary { + pre_aliases: Set, + inductive_aliases: Set<(ir::ValueId, u32)>, + internal_aliases: Set<(ir::ValueId, u32)>, + back_refs: HashSet, +} + +type ForwardSccSummary = HashMap>; + +fn block_values_inclusive( + graph: &ir::Graph, + block: ir::BlockId, +) -> impl Iterator + '_ { + graph + .blocks() + .block_info(block) + .param + .iter() // iterator impl on Option + .cloned() + .chain(graph.blocks().block_values(block)) +} + +impl<'a> ForwardState<'a> { + fn add_heap_cell(&mut self) -> HeapCellId { + self.heap_cells.push(ForwardData { + origin: Origin::default(), + aliases: Set::new(), + }) + } + + // Factored into a separate function to allow use when other (disjoint) fields of 'self' are borrowed + fn back_refs_in_states( + back_ref_states: &mut IdVec, + version: BackRefStateVersionId, + heap_cell: HeapCellId, + ) -> &mut HashSet { + // TODO: Optimize this so that it does not traverse the whole parent chain when the heap + // cell is guaranteed to not have any back ref annotations before a certain point (i.e., + // when we have some information about when the heap cell was created). + // TODO: Remove query points from back ref sets when they are set to 'DirectTouch'. + if back_ref_states[version].overlay.contains_key(&heap_cell) { + return back_ref_states[version] + .overlay + .get_mut(&heap_cell) + .unwrap(); + } + let back_refs = match &back_ref_states[version].parents as &[_] { + &[parent] => Self::back_refs_in_states(back_ref_states, parent, heap_cell).clone(), + parents => { + let num_parents = parents.len(); + let mut back_refs = HashSet::new(); + for parent_i in 0..num_parents { + let parent = back_ref_states[version].parents[parent_i]; + let parent_back_refs = + Self::back_refs_in_states(back_ref_states, parent, heap_cell); + back_refs.extend(parent_back_refs.iter()); + } + back_refs + } + }; + back_ref_states[version] + .overlay + .entry(heap_cell) + .or_insert(back_refs) // always inserts + } + + fn back_refs( + &mut self, + version: BackRefStateVersionId, + heap_cell: HeapCellId, + ) -> &mut HashSet { + Self::back_refs_in_states(&mut self.back_ref_states, version, heap_cell) + } + + fn add_heap_cells(&mut self, n: u32) -> &'a mut [HeapCellId] { + self.slots_arena + .alloc_extend(std::iter::repeat_with(|| self.add_heap_cell()).take(n as usize)) + } + + fn copy_heap_cell(&mut self, cell: HeapCellId, n: u32) -> &'a mut [HeapCellId] { + self.slots_arena + .alloc_extend(std::iter::repeat(cell).take(n as usize)) + } + + fn add_alias(&mut self, cell1: HeapCellId, cell2: HeapCellId) { + if cell1 == cell2 { + return; + } + self.heap_cells[cell1].aliases.insert(cell2); + self.heap_cells[cell2].aliases.insert(cell1); + } + + fn copy_aliases(&mut self, src: HeapCellId, dst: HeapCellId) { + self.add_alias(src, dst); + + // A trick so that we can iterate over `aliases` and call `add_alias` at the same time. At + // the end of the function we put `src_aliases` back in place. + // + // TODO: revist this if we start using "small" sets + let src_aliases = std::mem::take(&mut self.heap_cells[src].aliases); + for &other in &src_aliases { + debug_assert_ne!(other, src); + self.add_alias(other, dst); + } + self.heap_cells[src].aliases = src_aliases; + } + + fn copy_non_alias_data( + &mut self, + src_version: BackRefStateVersionId, + src: HeapCellId, + dst_version: BackRefStateVersionId, + dst: HeapCellId, + ) { + if src == dst { + return; + } + + let (src_data, dst_data) = self.heap_cells.get2_mut(src, dst).unwrap(); + src_data.origin.union_with(&dst_data.origin); + + let src_back_refs = std::mem::take(self.back_refs(src_version, src)); + let dst_back_refs = self.back_refs(dst_version, dst); + dst_back_refs.extend(src_back_refs.iter()); + debug_assert!(self.back_refs(src_version, src).is_empty()); + *self.back_refs(src_version, src) = src_back_refs; + } + + fn copy_data( + &mut self, + src_version: BackRefStateVersionId, + src: HeapCellId, + dst_version: BackRefStateVersionId, + dst: HeapCellId, + ) { + self.copy_non_alias_data(src_version, src, dst_version, dst); + self.copy_aliases(src, dst); + } + + fn touch(&mut self, version: BackRefStateVersionId, heap_cell: HeapCellId) { + let back_refs = std::mem::take(self.back_refs(version, heap_cell)); + for &query_point in &back_refs { + self.fates.insert(query_point, Fate::DirectTouch); + } + *self.back_refs(version, heap_cell) = back_refs; + } + + fn recursive_touch(&mut self, version: BackRefStateVersionId, heap_cells: &[HeapCellId]) { + for &heap_cell in heap_cells { + self.touch(version, heap_cell); + } + } + + fn add_back_refs( + &mut self, + version: BackRefStateVersionId, + heap_cell: HeapCellId, + query_point: QueryPoint, + mut other_filter: impl FnMut(HeapCellId) -> bool, + ) { + let aliases = std::mem::take(&mut self.heap_cells[heap_cell].aliases); + for other in std::iter::once(heap_cell).chain(aliases.iter().cloned()) { + if other_filter(other) { + self.back_refs(version, other).insert(query_point); + } + } + self.heap_cells[heap_cell].aliases = aliases; + } + + fn analyze_value( + &mut self, + sc: &mut SlotCache, + ctx: &mut SccAnalysisContext, + graph: &ir::Graph, + version: BackRefStateVersionId, + val_id: ir::ValueId, + ) { + let val_node = graph.values().node(val_id); + let input_slot_arrs: SmallVec<[_; 16]> = val_node + .inputs + .iter() + .map(|input| { + self.value_slots[input].expect("values should be processed in topological order") + }) + .collect(); + let op = match &val_node.op.kind { + ir::ValueKind::Op(op) => op, + ir::ValueKind::BlockParam => { + unreachable!("block param should never appear in the values of a block") + } + }; + let ret_slots: &[_] = match op { + ir::OpKind::UnknownWith => { + let new_cell = self.add_heap_cell(); + self.heap_cells[new_cell].origin = Origin::FromConst; + for input_slots in input_slot_arrs { + self.recursive_touch(version, input_slots); + for &input_cell in input_slots { + self.copy_aliases(input_cell, new_cell); + } + } + let slot_count = result_slot_count(sc, &val_node); + self.copy_heap_cell(new_cell, slot_count) + } + + ir::OpKind::Call { + callee_spec_var, + callee, + } => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let arg_slots = input_slot_arrs[0]; + // TODO: optimize this entire case! + let mut heap_cell_slots = HashMap::>::new(); + for (slot_i, &heap_cell) in arg_slots.iter().enumerate() { + heap_cell_slots + .entry(heap_cell) + .or_insert_with(SmallVec::new) + .push(slot_i.try_into().unwrap()); + } + let mut arg_aliases = HashSet::new(); + for (heap_cell, slot_indices) in &heap_cell_slots { + // Wire up to ocurrences of the same heap cell in the argument slots + for (i, &slot_i) in slot_indices.iter().enumerate() { + for &slot_j in &slot_indices[..i] { + arg_aliases.insert(NormPair::new(slot_i, slot_j)); + } + } + // Wire up to distinct aliased heap cells in the argument slots + for &other in &self.heap_cells[heap_cell].aliases { + if let Some(other_slot_indices) = heap_cell_slots.get(&other) { + for &this_slot_i in slot_indices { + for &other_slot_i in other_slot_indices { + arg_aliases.insert(NormPair::new(this_slot_i, other_slot_i)); + } + } + } + } + } + + let ret_slots: &[_] = self.add_heap_cells(result_slot_count(sc, &val_node)); + + if let Some(basic_analysis) = ctx.get_analysis(sc, *callee, None) { + for (arg_slot_i, slot_analysis) in basic_analysis.arg_slots.iter().enumerate() { + if matches!(slot_analysis.fate, Fate::DirectTouch) { + self.touch(version, arg_slots[arg_slot_i]); + } + } + for (ret_slot_i, slot_analysis) in basic_analysis.ret_slots.iter().enumerate() { + let ret_heap_cell = ret_slots[ret_slot_i]; + if slot_analysis.from_const { + self.heap_cells[ret_heap_cell].origin = Origin::FromConst; + } + // Temporarily violate symmetry invariant + for &arg_slot_i in &slot_analysis.arg_aliases { + let arg_heap_cell = arg_slots[arg_slot_i as usize]; + self.heap_cells[ret_heap_cell].aliases.insert(arg_heap_cell); + let (arg_heap_cell_data, ret_heap_cell_data) = self + .heap_cells + .get2_mut(arg_heap_cell, ret_heap_cell) + .unwrap(); + for &alias_of_arg in &arg_heap_cell_data.aliases { + ret_heap_cell_data.aliases.insert(alias_of_arg); + } + self.copy_non_alias_data( + version, + arg_heap_cell, + version, + ret_heap_cell, + ); + } + for &other_ret_slot_i in &slot_analysis.ret_aliases { + self.heap_cells[ret_heap_cell] + .aliases + .insert(ret_slots[other_ret_slot_i as usize]); + } + } + } + + for &arg_alias in &arg_aliases { + if let Some(part_analysis) = ctx.get_analysis(sc, *callee, Some(arg_alias)) { + for (arg_slot_i, slot_analysis) in + part_analysis.arg_slots.iter().enumerate() + { + if matches!(slot_analysis.fate, Fate::DirectTouch) { + self.touch(version, arg_slots[arg_slot_i]); + } + } + for (ret_slot_i, slot_analysis) in + part_analysis.ret_slots.iter().enumerate() + { + // Temporarily violate symmetry invariant + let ret_heap_cell = ret_slots[ret_slot_i]; + for &arg_slot_i in &slot_analysis.arg_aliases { + let arg_heap_cell = arg_slots[arg_slot_i as usize]; + self.heap_cells[ret_heap_cell].aliases.insert(arg_heap_cell); + } + for &other_ret_slot_i in &slot_analysis.ret_aliases { + self.heap_cells[ret_heap_cell] + .aliases + .insert(ret_slots[other_ret_slot_i as usize]); + } + } + } + } + + // Restore symmetry invariant + for &ret_heap_cell in ret_slots { + let aliases = std::mem::take(&mut self.heap_cells[ret_heap_cell].aliases); + for &other in &aliases { + debug_assert_ne!(other, ret_heap_cell); + self.heap_cells[other].aliases.insert(ret_heap_cell); + } + debug_assert!(self.heap_cells[ret_heap_cell].aliases.is_empty()); + self.heap_cells[ret_heap_cell].aliases = aliases; + } + + // We don't use 'replace_none' here because we may write these values multiple times + // during fixed-point iteration. + self.call_arg_aliases[callee_spec_var] = Some(arg_aliases); + self.call_arg_origins[callee_spec_var] = Some( + arg_slots + .iter() + .map(|heap_cell| self.heap_cells[heap_cell].origin.clone()) + .collect(), + ); + self.calls[callee_spec_var] = Some(CallInfo { + callee: *callee, + ret_slots: ret_slots.len().try_into().unwrap(), + }); + + for (arg_slot_i, &arg_heap_cell) in arg_slots.iter().enumerate() { + self.add_back_refs( + version, + arg_heap_cell, + QueryPoint::CallArg(*callee_spec_var, arg_slot_i.try_into().unwrap()), + // TODO: don't use a linear search here + |other| !ret_slots.contains(&other), + ); + } + for (ret_slot_i, &ret_heap_cell) in ret_slots.iter().enumerate() { + self.add_back_refs( + version, + ret_heap_cell, + QueryPoint::CallRet(*callee_spec_var, ret_slot_i.try_into().unwrap()), + // TODO: don't use a linear search here + |other| !arg_slots.contains(&other), + ); + } + + ret_slots + } + + ir::OpKind::ConstRef { const_: _ } => { + debug_assert_eq!(input_slot_arrs.len(), 0); + let slot_count = result_slot_count(sc, &val_node); + let new_heap_cells: &[_] = self.add_heap_cells(slot_count); + for heap_cell in new_heap_cells { + self.heap_cells[heap_cell].origin = Origin::FromConst; + } + new_heap_cells + } + + ir::OpKind::NewHeapCell => { + debug_assert_eq!(input_slot_arrs.len(), 0); + let new_cell = self.add_heap_cell(); + std::slice::from_ref(self.slots_arena.alloc(new_cell)) + } + + ir::OpKind::RecursiveTouch => { + debug_assert_eq!(input_slot_arrs.len(), 1); + self.recursive_touch(version, input_slot_arrs[0]); + &[] + } + + ir::OpKind::UpdateWriteOnly { update_mode_var } => { + debug_assert_eq!(input_slot_arrs.len(), 1); + debug_assert_eq!(input_slot_arrs[0].len(), 1); + let heap_cell = input_slot_arrs[0][0]; + // We don't use 'replace_none' here because we may write this value multiple times + // during fixed-point iteration. + self.update_origins[*update_mode_var] = + Some(self.heap_cells[heap_cell].origin.clone()); + self.add_back_refs( + version, + heap_cell, + QueryPoint::Update(*update_mode_var), + |_| true, + ); + &[] + } + + ir::OpKind::EmptyBag => { + debug_assert_eq!(input_slot_arrs.len(), 0); + let slot_count = result_slot_count(sc, &val_node); + self.add_heap_cells(slot_count) + } + + ir::OpKind::BagInsert => { + debug_assert_eq!(input_slot_arrs.len(), 2); + let slot_count = result_slot_count(sc, &val_node); + let slots = self.add_heap_cells(slot_count); + for input_slots in input_slot_arrs { + for (&input_cell, &new_cell) in input_slots.iter().zip(slots.iter()) { + self.copy_data(version, input_cell, version, new_cell); + } + } + slots + } + + ir::OpKind::BagGet => { + debug_assert_eq!(input_slot_arrs.len(), 1); + input_slot_arrs[0] + } + + ir::OpKind::BagRemove => { + debug_assert_eq!(input_slot_arrs.len(), 1); + self.slots_arena.alloc_extend( + input_slot_arrs[0] + .iter() + .chain(input_slot_arrs[0].iter()) + .cloned(), + ) + } + + ir::OpKind::MakeTuple => self.slots_arena.alloc_extend( + input_slot_arrs + .iter() + .flat_map(|slots| slots.iter().cloned()), + ), + + ir::OpKind::GetTupleField { field_idx } => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let input_type = graph.values().node(val_node.inputs[0]).op.result_type; + let field_slots = if let TypeSlots::Tuple { field_slots } = &sc.slots()[input_type] + { + field_slots + } else { + unreachable!() + }; + let (start, end) = field_slots.sub_slots(*field_idx); + &input_slot_arrs[0][start as usize..end as usize] + } + + ir::OpKind::MakeUnion { variant_idx } => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let variant_slots = if let TypeSlots::Union { variant_slots } = + &sc.slots()[val_node.op.result_type] + { + variant_slots + } else { + unreachable!() + }; + let (start, end) = variant_slots.sub_slots(*variant_idx); + debug_assert_eq!((end - start) as usize, input_slot_arrs[0].len()); + self.slots_arena + .alloc_extend((0..variant_slots.slot_count()).map(|i| { + if start <= i && i < end { + input_slot_arrs[0][(i - start) as usize] + } else { + self.add_heap_cell() + } + })) + } + + ir::OpKind::UnwrapUnion { variant_idx } => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let input_type = graph.values().node(val_node.inputs[0]).op.result_type; + let variant_slots = + if let TypeSlots::Union { variant_slots } = &sc.slots()[input_type] { + variant_slots + } else { + unreachable!() + }; + let (start, end) = variant_slots.sub_slots(*variant_idx); + &input_slot_arrs[0][start as usize..end as usize] + } + + ir::OpKind::MakeNamed => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let new_cell = self.add_heap_cell(); + for &input_cell in input_slot_arrs[0] { + self.copy_data(version, input_cell, version, new_cell); + } + let slot_count = result_slot_count(sc, &val_node); + self.copy_heap_cell(new_cell, slot_count) + } + + ir::OpKind::UnwrapNamed => { + debug_assert_eq!(input_slot_arrs.len(), 1); + let slot_count = result_slot_count(sc, &val_node); + self.copy_heap_cell(input_slot_arrs[0][0], slot_count) + } + }; + replace_none(&mut self.value_slots[val_id], ret_slots).unwrap(); + } + + fn target_arg_slots(&self, graph: &ir::Graph, pred: ir::Predecessor) -> &'a [HeapCellId] { + match pred { + ir::Predecessor::Block(block) => { + self.value_slots[graph.blocks().block_info(block).target_arg.unwrap()].unwrap() + } + ir::Predecessor::Entry => self.arg_slots.unwrap(), + } + } + + fn merge_slots( + &mut self, + new_version: BackRefStateVersionId, + slot_count: u32, + sources: impl Iterator, + ) -> &'a [HeapCellId] + where + F: for<'b> FnMut(&'b Self) -> BackRefStateVersionId, + G: for<'b> FnMut(&'b Self) -> I, + I: Iterator, + { + let min_new_id = self.heap_cells.count(); + let merged_slots: &[_] = self.add_heap_cells(slot_count); + for (mut source_version_fn, mut source) in sources { + let source_version = source_version_fn(self); + // We need to add all of the alias edges up front between slots in the source and slots + // in the value under construction because we might only know that two slots in the + // value under construction alias because of their transitive relationship through + // source slots. For instance, consider the following code: + // + // let x = []; + // let result: (_, _) = choice { (x, x) } or { ([], []) }; + // + // We know that the two slots in result (the value under construction) might alias + // because they alias the first and second tuple elements in the first choice branch + // respectively, and those elements alias. + let mut i = 0; + for source_heap_cell in source(self) { + let merged_heap_cell = merged_slots[i as usize]; + self.copy_data( + source_version, + source_heap_cell, + new_version, + merged_heap_cell, + ); + i += 1; + } + debug_assert_eq!(i, slot_count); + + // Consider the following code: + // + // let x = []; + // let result: (_, _) = choice { (x, []) } or { ([], x) }; + // + // The first and second slots in result (the value under construction) cannot alias. If + // we do not remove the edge between the first slot of result and the first slot of the + // tuple in the first choice branch, then on the next iteration of the loop the + // algorithm will see a transitive alias edge between them. + // + // Note also that we need to remove *all* symmetric edges pointing back from heap cells + // which predate the value under construction, not just symmetric edges pointing back + // from the heap cells which appear directly in the predecessor value. For example, + // consider the following code: + // + // let x = []; + // let y = /* something that aliases x, but has a distinct heap cell */; + // let result: (_, _) = choice { (x, []) } or { ([], y) }; + // + // After processing the first branch of the choice, we need to remove the newly-created + // edges pointing back from both x *and* y. + for &merged_heap_cell in merged_slots { + let merged_aliases = std::mem::take(&mut self.heap_cells[merged_heap_cell].aliases); + for &other in &merged_aliases { + // This removes edges back from heap cells which predate the value under + // construction, but does not remove edges between heap cells in the value under + // construction. Preserving edges within the value under construction is + // important for handling cases like the following example (also discussed + // above) correctly: + // + // let x = []; + // let result: (_, _) = choice { (x, x) } or { ([], []) }; + if other < min_new_id { + // Temporarily violate symmetry invariant + self.heap_cells[other].aliases.remove(&merged_heap_cell); + } + } + debug_assert!(self.heap_cells[merged_heap_cell].aliases.is_empty()); + self.heap_cells[merged_heap_cell].aliases = merged_aliases; + } + } + for &merged_heap_cell in merged_slots { + let merged_aliases = std::mem::take(&mut self.heap_cells[merged_heap_cell].aliases); + for &other in &merged_aliases { + if other < min_new_id { + // Restore symmetry invariant + self.heap_cells[other].aliases.insert(merged_heap_cell); + } + } + debug_assert!(self.heap_cells[merged_heap_cell].aliases.is_empty()); + self.heap_cells[merged_heap_cell].aliases = merged_aliases; + } + merged_slots + } + + fn predecessor_back_refs(&self, pred: ir::Predecessor) -> BackRefStateVersionId { + match pred { + ir::Predecessor::Entry => self.entry_version, + ir::Predecessor::Block(block) => self.block_versions[block].unwrap(), + } + } + + fn analyze_block( + &mut self, + sc: &mut SlotCache, + ctx: &mut SccAnalysisContext, + graph: &ir::Graph, + block: ir::BlockId, + ) { + let block_info = graph.blocks().block_info(block); + let new_version = self.back_ref_states.push(BackRefState { + overlay: HashMap::new(), + parents: block_info + .predecessors + .iter() + .map(|&pred| self.predecessor_back_refs(pred)) + .collect(), + }); + if let Some(param_id) = block_info.param { + let param_slots = if block_info.predecessors.len() == 1 { + self.target_arg_slots(graph, block_info.predecessors[0]) + } else { + let slot_count = id_result_slot_count(sc, graph, param_id); + self.merge_slots( + new_version, + slot_count, + block_info.predecessors.iter().map(|&pred| { + let pred_version = move |this: &Self| this.predecessor_back_refs(pred); + // TODO: is this correct if a block is its own predecessor, without any + // indirection? + let pred_slots = + move |this: &Self| this.target_arg_slots(graph, pred).iter().cloned(); + (pred_version, pred_slots) + }), + ) + }; + replace_none(&mut self.value_slots[param_id], param_slots).unwrap(); + } + for val_id in graph.blocks().block_values(block) { + self.analyze_value(sc, ctx, graph, new_version, val_id); + } + replace_none(&mut self.block_versions[block], new_version).unwrap(); + } + + // TODO: Everything to do with SCC analysis can be significantly optimized + + fn heap_cell_slot_mapping( + &self, + graph: &ir::Graph, + blocks: impl Iterator, + ) -> HeapCellSlotMapping { + let mut heap_cell_to_slots = HashMap::new(); + for block in blocks { + for val_id in block_values_inclusive(graph, block) { + for (i, &heap_cell) in self.value_slots[val_id].unwrap().iter().enumerate() { + heap_cell_to_slots + .entry(heap_cell) + .or_insert_with(SmallVec::new) + .push((val_id, i.try_into().unwrap())); + } + } + } + heap_cell_to_slots + } + + fn summarize_scc( + &mut self, + graph: &ir::Graph, + blocks: impl Iterator, + min_new_id: Count, + heap_cell_slots_inductive: &HeapCellSlotMapping, + heap_cell_slots_current: &HeapCellSlotMapping, + ) -> ForwardSccSummary { + let mut summary = ForwardSccSummary::new(); + for block in blocks { + for val_id in block_values_inclusive(graph, block) { + let block_version = self.block_versions[block].unwrap(); + let slot_summaries = self.value_slots[val_id] + .unwrap() + .iter() + .enumerate() + .map(|(slot_i, &heap_cell)| { + let mut val_summary = ForwardSccSlotSummary { + pre_aliases: Set::new(), + inductive_aliases: Set::new(), + internal_aliases: Set::new(), + back_refs: Self::back_refs_in_states( + &mut self.back_ref_states, + block_version, + heap_cell, + ) + .clone(), + }; + let aliased_heap_cells = std::iter::once(heap_cell) + .chain(self.heap_cells[heap_cell].aliases.iter().cloned()); + for aliased in aliased_heap_cells { + if aliased < min_new_id { + val_summary.pre_aliases.insert(aliased); + } + for &aliased_slot in heap_cell_slots_current + .get(&aliased) + .iter() + .cloned() + .flatten() + { + if aliased_slot == (val_id, slot_i.try_into().unwrap()) { + continue; + } + val_summary.internal_aliases.insert(aliased_slot); + } + for &aliased_slot in heap_cell_slots_inductive + .get(&aliased) + .iter() + .cloned() + .flatten() + { + if aliased_slot == (val_id, slot_i.try_into().unwrap()) { + continue; + } + val_summary.inductive_aliases.insert(aliased_slot); + } + } + val_summary + }) + .collect(); + summary.insert(val_id, slot_summaries); + } + } + summary + } + + fn disconnect_heap_cell(&mut self, heap_cell: HeapCellId) { + let aliases = std::mem::take(&mut self.heap_cells[heap_cell].aliases); + for &other in &aliases { + self.heap_cells[other].aliases.remove(&heap_cell); + } + } + + fn analyze_block_scc( + &mut self, + sc: &mut SlotCache, + ctx: &mut SccAnalysisContext, + graph: &ir::Graph, + scc_id: ir::SccId, + ) { + let scc = graph.sccs().get(scc_id); + match scc.info { + SccKind::Acyclic => { + debug_assert!(scc.items.len() == 1); + self.analyze_block(sc, ctx, graph, scc.items[0]); + } + SccKind::Cyclic => { + let init_version_parents = scc + .items + .iter() + .flat_map(|&block| { + graph + .blocks() + .block_info(block) + .predecessors + .iter() + .filter_map(|&pred| match pred { + ir::Predecessor::Entry => Some(self.entry_version), + ir::Predecessor::Block(pred_block) => { + self.block_versions[pred_block] + } + }) + }) + .collect::>() + .into_iter() + .collect::>(); + let init_version = self.back_ref_states.push(BackRefState { + overlay: HashMap::new(), + parents: init_version_parents, + }); + + let min_new_id = self.heap_cells.count(); + for &block in scc.items { + replace_none(&mut self.block_versions[block], init_version).unwrap(); + for val_id in block_values_inclusive(graph, block) { + let slot_count = id_result_slot_count(sc, graph, val_id); + let init_slots = self.add_heap_cells(slot_count); + replace_none(&mut self.value_slots[val_id], init_slots).unwrap(); + } + } + let mut prev_iter_summary = None; + let mut prev_iter_heap_cell_slot_mapping = + self.heap_cell_slot_mapping(graph, scc.items.iter().cloned()); + let mut prev_iter_min_new_id = min_new_id; + loop { + let curr_iter_min_new_id = self.heap_cells.count(); + // Now: + // - Main layer stores previous iteration state + // - Inductive layer stores irrelevant data + for &block in scc.items { + let version = std::mem::take(&mut self.block_versions[block]); + debug_assert!(version.is_some()); + self.block_versions_inductive[block] = version; + for val_id in block_values_inclusive(graph, block) { + let slots = std::mem::take(&mut self.value_slots[val_id]); + debug_assert!(slots.is_some()); + self.value_slots_inductive[val_id] = slots; + } + // Now: + // - Main layer stores previous iteration state, except for current block, + // for which it stores 'None' + // - Inductive layer stores a mix of irrelevant data and current iteration + // state, except for current block, for which it stores previous iteration + // state + self.analyze_block(sc, ctx, graph, block); + // Now: + // - Main layer stores previous iteration state, except for current block, + // for which it stores current iteration state + // - Inductive layer stores a mix of irrelevant data and current iteration + // state, except for current block, for which it stores previous iteration + // state + std::mem::swap( + &mut self.block_versions[block], + &mut self.block_versions_inductive[block], + ); + for val_id in block_values_inclusive(graph, block) { + std::mem::swap( + &mut self.value_slots[val_id], + &mut self.value_slots_inductive[val_id], + ); + } + // Now: + // - Main layer stores previous iteration state + // - Inductive layer stores a mix of irrelevant data and current iteration + // state. In particular, for all blocks processed so far (including this + // one) it stores current iteration state. + } + // Now: + // - Main layer stores previous iteration state + // - Inductive layer stores current iteration state + for &block in scc.items { + std::mem::swap( + &mut self.block_versions[block], + &mut self.block_versions_inductive[block], + ); + for val_id in block_values_inclusive(graph, block) { + std::mem::swap( + &mut self.value_slots[val_id], + &mut self.value_slots_inductive[val_id], + ); + } + } + // Now: + // - Main layer stores current iteration state + // - Inductive layer stores previous iteration state + let curr_iter_heap_cell_slot_mapping = + self.heap_cell_slot_mapping(graph, scc.items.iter().cloned()); + let curr_iter_summary = self.summarize_scc( + graph, + scc.items.iter().cloned(), + min_new_id, + &prev_iter_heap_cell_slot_mapping, + &curr_iter_heap_cell_slot_mapping, + ); + if Some(&curr_iter_summary) == prev_iter_summary.as_ref() { + break; + } + + // Garbage collect connectsions to irrelevant heap cells from previous iteration + for heap_cell in + (prev_iter_min_new_id.0 .0..curr_iter_min_new_id.0 .0).map(HeapCellId) + { + self.disconnect_heap_cell(heap_cell); + } + + prev_iter_summary = Some(curr_iter_summary); + prev_iter_heap_cell_slot_mapping = curr_iter_heap_cell_slot_mapping; + prev_iter_min_new_id = curr_iter_min_new_id; + } + } + } + } + + fn analyze_graph( + slots_arena: &'a Arena, + sc: &mut SlotCache, + ctx: &mut SccAnalysisContext, + graph: &ir::Graph, + arg_alias: Option>, + ) -> (Self, &'a [HeapCellId]) { + let mut heap_cells = IdVec::new(); + let arg_slots = graph + .blocks() + .block_info(graph.entry_block()) + .param + .map(|arg_val_id| { + let slot_count = id_result_slot_count(sc, graph, arg_val_id); + let arg_slots: &[_] = slots_arena.alloc_extend((0..slot_count).map(|i| { + let mut origin_arg_slots = Set::new(); + origin_arg_slots.insert(i); + heap_cells.push(ForwardData { + origin: Origin::FromArgSlots(origin_arg_slots), + aliases: Set::new(), + }) + })); + if let Some(arg_alias) = arg_alias { + let fst = arg_slots[*arg_alias.fst() as usize]; + let snd = arg_slots[*arg_alias.snd() as usize]; + heap_cells[fst].aliases.insert(snd); + heap_cells[snd].aliases.insert(fst); + } + arg_slots + }); + + let mut back_ref_states = IdVec::new(); + let entry_version = back_ref_states.push(BackRefState { + overlay: arg_slots + .iter() + .cloned() + .flatten() + .enumerate() + .map(|(slot_i, &heap_cell)| { + let mut heap_cell_back_refs = HashSet::new(); + heap_cell_back_refs.insert(QueryPoint::EntryArg(slot_i.try_into().unwrap())); + (heap_cell, heap_cell_back_refs) + }) + .collect(), + parents: Vec::new(), + }); + + let mut state = ForwardState { + slots_arena, + value_slots: IdVec::filled_with(graph.values().count(), || None), + value_slots_inductive: IdVec::filled_with(graph.values().count(), || None), + call_arg_aliases: IdVec::filled_with(graph.callee_spec_vars(), || None), + call_arg_origins: IdVec::filled_with(graph.callee_spec_vars(), || None), + calls: IdVec::filled_with(graph.callee_spec_vars(), || None), + update_origins: IdVec::filled_with(graph.update_mode_vars(), || None), + arg_slots, + heap_cells, + back_ref_states, + block_versions: IdVec::filled_with(graph.blocks().block_count(), || None), + block_versions_inductive: IdVec::filled_with(graph.blocks().block_count(), || None), + entry_version, + fates: HashMap::new(), + }; + + for scc_id in graph.sccs().count().iter() { + state.analyze_block_scc(sc, ctx, graph, scc_id); + } + + let exit_version = state.back_ref_states.push(BackRefState { + overlay: HashMap::new(), + parents: graph + .exit_blocks() + .iter() + .map(|&block| state.block_versions[block].unwrap()) + .collect(), + }); + let ret_slot_count = sc.slots()[graph.ret_type()].slot_count(); + let ret_heap_cells = state.merge_slots( + exit_version, + ret_slot_count, + graph.exit_blocks().iter().map(|&block| { + let block_version = move |this: &Self| this.block_versions[block].unwrap(); + let block_slots = move |this: &Self| { + this.target_arg_slots(graph, ir::Predecessor::Block(block)) + .iter() + .cloned() + }; + (block_version, block_slots) + }), + ); + + for (ret_slot_i, &ret_heap_cell) in ret_heap_cells.iter().enumerate() { + for &query_point in + Self::back_refs_in_states(&mut state.back_ref_states, exit_version, ret_heap_cell) + .iter() + { + if let Fate::Other { ret_slots, .. } = state.fates.entry(query_point).or_default() { + ret_slots.insert(ret_slot_i.try_into().unwrap()); + } + } + } + + (state, ret_heap_cells) + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum Fate { + DirectTouch, + Other { + indirect_touch: bool, + ret_slots: Set, + }, +} + +impl Fate { + fn union_with(&mut self, other: &Fate) { + match (&mut *self, other) { + (Fate::DirectTouch, _) => {} + (Fate::Other { .. }, Fate::DirectTouch) => { + *self = Fate::DirectTouch; + } + ( + Fate::Other { + indirect_touch: indirect_touch_1, + ret_slots: ret_slots_1, + }, + Fate::Other { + indirect_touch: indirect_touch_2, + ret_slots: ret_slots_2, + }, + ) => { + *indirect_touch_1 = *indirect_touch_1 || *indirect_touch_2; + ret_slots_1.extend(ret_slots_2); + } + } + } +} + +impl Default for Fate { + fn default() -> Self { + Fate::Other { + indirect_touch: false, + ret_slots: Set::new(), + } + } +} + +fn analyze_func( + sc: &mut SlotCache, + ctx: &mut SccAnalysisContext, + func_def: &ir::FuncDef, + arg_alias: Option>, +) -> FuncAnalysis { + let slots_arena = Arena::new(); + let (forward, ret_slots) = + ForwardState::analyze_graph(&slots_arena, sc, ctx, &func_def.graph, arg_alias); + let mut heap_cell_to_arg_slot = HashMap::::new(); + for (slot_i, &heap_cell) in forward.arg_slots.unwrap().iter().enumerate() { + let existing = heap_cell_to_arg_slot.insert(heap_cell, slot_i.try_into().unwrap()); + debug_assert!(existing.is_none()); + } + let mut heap_cell_to_ret_slots = HashMap::>::new(); + for (slot_i, &heap_cell) in ret_slots.iter().enumerate() { + heap_cell_to_ret_slots + .entry(heap_cell) + .or_insert_with(SmallVec::new) + .push(slot_i.try_into().unwrap()); + } + let arg_slot_analyses = (0..forward.arg_slots.unwrap().len()) + .map(|arg_slot_i| ArgSlotAnalysis { + fate: forward + .fates + .get(&QueryPoint::EntryArg(arg_slot_i.try_into().unwrap())) + .cloned() + .unwrap_or_default(), + }) + .collect(); + let ret_slot_analyses = ret_slots + .iter() + .enumerate() + .map(|(this_ret_slot_i, &heap_cell)| { + let mut arg_aliases = Set::new(); + let mut ret_aliases = Set::new(); + for other in std::iter::once(heap_cell) + .chain(forward.heap_cells[heap_cell].aliases.iter().cloned()) + { + if let Some(&arg_slot_i) = heap_cell_to_arg_slot.get(&other) { + arg_aliases.insert(arg_slot_i); + } + if let Some(ret_slots_i) = heap_cell_to_ret_slots.get(&other) { + for &ret_slot_i in ret_slots_i { + if ret_slot_i as usize != this_ret_slot_i { + ret_aliases.insert(ret_slot_i); + } + } + } + } + RetSlotAnalysis { + from_const: matches!(forward.heap_cells[heap_cell].origin, Origin::FromConst), + arg_aliases, + ret_aliases, + } + }) + .collect(); + FuncAnalysis { + graph_analysis: GraphAnalysis { + updates: IdVec::filled_with_indexed( + func_def.graph.update_mode_vars(), + |update_mode_var| UpdateAnalysis { + origin: forward.update_origins[update_mode_var] + .as_ref() + .unwrap() + .clone(), + fate: forward + .fates + .get(&QueryPoint::Update(update_mode_var)) + .cloned() + .unwrap_or_default(), + }, + ), + calls: IdVec::filled_with_indexed( + func_def.graph.callee_spec_vars(), + |callee_spec_var| { + // let call_fates = backward.call_fates[callee_spec_var].as_ref().unwrap(); + let call_info = forward.calls[callee_spec_var].unwrap(); + CallAnalysis { + callee: call_info.callee, + arg_aliases: forward.call_arg_aliases[callee_spec_var] + .as_ref() + .unwrap() + .clone(), + arg_slots: forward.call_arg_origins[callee_spec_var] + .as_ref() + .unwrap() + .iter() + .enumerate() + .map(|(arg_slot_i, origin)| ArgAnalysis { + origin: origin.clone(), + fate: forward + .fates + .get(&QueryPoint::CallArg( + callee_spec_var, + arg_slot_i.try_into().unwrap(), + )) + .cloned() + .unwrap_or_default(), + }) + .collect(), + ret_slots: (0..call_info.ret_slots) + .map(|ret_slot_i| { + forward + .fates + .get(&QueryPoint::CallRet(callee_spec_var, ret_slot_i)) + .cloned() + .unwrap_or_default() + }) + .collect(), + } + }, + ), + }, + arg_slots: arg_slot_analyses, + ret_slots: ret_slot_analyses, + } +} + +id_type! { + FuncSccId(u32); +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct UpdateAnalysis { + origin: Origin, + fate: Fate, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ArgAnalysis { + origin: Origin, + fate: Fate, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct CallAnalysis { + // Find a better place to store the callee + callee: FuncId, + arg_aliases: Set>, + arg_slots: SmallVec<[ArgAnalysis; 4]>, + ret_slots: SmallVec<[Fate; 4]>, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct GraphAnalysis { + updates: IdVec, + calls: IdVec, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ArgSlotAnalysis { + fate: Fate, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct RetSlotAnalysis { + from_const: bool, + arg_aliases: Set, + ret_aliases: Set, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct FuncAnalysis { + graph_analysis: GraphAnalysis, + arg_slots: SmallVec<[ArgSlotAnalysis; 4]>, + ret_slots: SmallVec<[RetSlotAnalysis; 4]>, +} + +#[derive(Clone, Debug)] +struct GlobalAnalysisContext<'a> { + func_defs: &'a IdVec, + sccs: &'a FlatSlices, + func_to_scc: &'a IdVec, + committed: IdVec>, FuncAnalysis>>, +} + +impl<'a> GlobalAnalysisContext<'a> { + fn analyze( + &mut self, + sc: &mut SlotCache, + func: FuncId, + arg_alias: Option>, + ) -> &FuncAnalysis { + debug_assert!(!self.committed[func].contains_key(&arg_alias)); + let scc = self.func_to_scc[func]; + let scc_kind = *self.sccs.get(scc).info; + let mut scc_ctx = SccAnalysisContext { + global: &mut *self, + scc, + prev_iter: HashMap::new(), + curr_iter: HashMap::new(), + }; + match scc_kind { + SccKind::Acyclic => { + scc_ctx.get_analysis(sc, func, arg_alias); + debug_assert_eq!(scc_ctx.curr_iter.len(), 1); + } + SccKind::Cyclic => loop { + scc_ctx.get_analysis(sc, func, arg_alias); + // TODO: only compare "signature" information here, not internal annotations on body + // values. + if scc_ctx.curr_iter == scc_ctx.prev_iter { + break; + } + scc_ctx.prev_iter = std::mem::take(&mut scc_ctx.curr_iter); + }, + }; + let results = scc_ctx.curr_iter; + for ((analyzed_func, analyzed_arg_alias), analysis) in results { + let existing = self.committed[analyzed_func] + .insert(analyzed_arg_alias, analysis.unwrap_complete()); + debug_assert!(existing.is_none()); + } + &self.committed[func][&arg_alias] + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, PartialEq, Eq)] +enum AnalysisState { + Pending, + Complete(FuncAnalysis), +} + +impl AnalysisState { + fn unwrap_complete(self) -> FuncAnalysis { + match self { + AnalysisState::Pending => unreachable!(), + AnalysisState::Complete(analysis) => analysis, + } + } + + fn unwrap_complete_ref(&self) -> &FuncAnalysis { + match self { + AnalysisState::Pending => unreachable!(), + AnalysisState::Complete(analysis) => analysis, + } + } +} + +#[derive(Debug)] +struct SccAnalysisContext<'a, 'b> { + global: &'b mut GlobalAnalysisContext<'a>, + scc: FuncSccId, + // Invariant: 'prev_iter' should contain no 'Pending' analyses + prev_iter: HashMap<(FuncId, Option>), AnalysisState>, + curr_iter: HashMap<(FuncId, Option>), AnalysisState>, +} + +impl<'a, 'b> SccAnalysisContext<'a, 'b> { + fn get_analysis<'c>( + &'c mut self, + sc: &mut SlotCache, + func: FuncId, + arg_alias: Option>, + ) -> Option<&'c FuncAnalysis> { + if self.global.committed[func].contains_key(&arg_alias) { + // TODO: is there a way to avoid the double lookup here while passing the borrow + // checker? + return Some(&self.global.committed[func][&arg_alias]); + } + if self.global.func_to_scc[func] != self.scc { + return Some(self.global.analyze(sc, func, arg_alias)); + } + // TODO: can we resolve this clippy error while passing the borrow checker? + #[allow(clippy::map_entry)] + if self.curr_iter.contains_key(&(func, arg_alias)) { + // TODO: as above, can we avoid the double lookup? + match &self.curr_iter[&(func, arg_alias)] { + AnalysisState::Complete(analysis) => Some(analysis), + AnalysisState::Pending => self + .prev_iter + .get(&(func, arg_alias)) + .map(AnalysisState::unwrap_complete_ref), + } + } else { + self.curr_iter + .insert((func, arg_alias), AnalysisState::Pending); + let analysis = analyze_func(sc, self, &self.global.func_defs[func], arg_alias); + match self.curr_iter.entry((func, arg_alias)) { + std::collections::hash_map::Entry::Occupied(mut occupied) => { + *occupied.get_mut() = AnalysisState::Complete(analysis); + Some(occupied.into_mut().unwrap_complete_ref()) + } + std::collections::hash_map::Entry::Vacant(_) => { + unreachable!() + } + } + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +struct Query { + // TODO: improve sparsity of contextual information, prune everything inessential + arg_aliases: BTreeSet>, + // For the purposes of `arg_slots_touched`, an arg slot being 'FromConst' is the same as being + // touched after the call. + arg_slots_touched: SmallVec<[bool; 8]>, + ret_slots_touched: SmallVec<[bool; 8]>, +} + +impl Query { + fn to_spec(&self, func: FuncId) -> api::FuncSpec { + use sha2::{Digest, Sha256}; + let mut hasher = Sha256::new(); + hasher.update(func.0.to_le_bytes()); + hasher.update((self.arg_aliases.len() as u64).to_le_bytes()); + for arg_alias in &self.arg_aliases { + hasher.update(arg_alias.fst().to_le_bytes()); + hasher.update(arg_alias.snd().to_le_bytes()); + } + hasher.update((self.arg_slots_touched.len() as u64).to_le_bytes()); + for &arg_touched in &self.arg_slots_touched { + hasher.update(&[arg_touched as u8]); + } + hasher.update((self.ret_slots_touched.len() as u64).to_le_bytes()); + for &ret_touched in &self.ret_slots_touched { + hasher.update(&[ret_touched as u8]); + } + api::FuncSpec(hasher.finalize().into()) + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct FuncSolution { + pub(crate) update_modes: IdVec, + pub(crate) callee_specs: IdVec, +} + +#[derive(Clone, Debug)] +pub(crate) struct FuncSolutions { + pub(crate) solutions: IdVec>>, +} + +fn resolve_origin<'a>(query: &Query, mut origins: impl Iterator) -> bool { + origins.any(|origin| match origin { + Origin::FromConst => true, + Origin::FromArgSlots(arg_slots) => arg_slots + .iter() + .any(|&arg_slot| query.arg_slots_touched[arg_slot as usize]), + }) +} + +fn resolve_fate<'a>(query: &Query, mut fates: impl Iterator) -> bool { + fates.any(|fate| match fate { + Fate::DirectTouch => true, + Fate::Other { + indirect_touch, + ret_slots, + } => { + *indirect_touch + || ret_slots + .iter() + .any(|&ret_slot| query.ret_slots_touched[ret_slot as usize]) + } + }) +} + +impl FuncSolutions { + fn resolve( + &mut self, + analyses: &IdVec>, FuncAnalysis>>, + func: FuncId, + query: &Query, + ) -> api::FuncSpec { + let spec = query.to_spec(func); + if let std::collections::hash_map::Entry::Vacant(vacant) = self.solutions[func].entry(spec) + { + let func_analyses = &analyses[func]; + let basic_analysis = &func_analyses[&None].graph_analysis; + vacant.insert(None); + let query_analyses: SmallVec<[&GraphAnalysis; 8]> = std::iter::once(basic_analysis) + .chain( + query + .arg_aliases + .iter() + .map(|&arg_alias| &func_analyses[&Some(arg_alias)].graph_analysis), + ) + .collect(); + let update_modes = + IdVec::filled_with_indexed(basic_analysis.updates.count(), |update_mode_var| { + let touched = resolve_origin( + query, + query_analyses + .iter() + .map(|analysis| &analysis.updates[update_mode_var].origin), + ) || resolve_fate( + query, + query_analyses + .iter() + .map(|analysis| &analysis.updates[update_mode_var].fate), + ); + if touched { + api::UpdateMode::Immutable + } else { + api::UpdateMode::InPlace + } + }); + let callee_specs = + IdVec::filled_with_indexed(basic_analysis.calls.count(), |callee_spec_var| { + let mut sub_arg_aliases = BTreeSet::new(); + for analysis in &query_analyses { + sub_arg_aliases.extend(&analysis.calls[callee_spec_var].arg_aliases); + } + let num_arg_slots = basic_analysis.calls[callee_spec_var].arg_slots.len(); + let num_ret_slots = basic_analysis.calls[callee_spec_var].ret_slots.len(); + let sub_arg_slots_touched = (0..num_arg_slots) + .map(|arg_slot_i| { + resolve_origin( + query, + query_analyses.iter().map(|analysis| { + &analysis.calls[callee_spec_var].arg_slots[arg_slot_i].origin + }), + ) || resolve_fate( + query, + query_analyses.iter().map(|analysis| { + &analysis.calls[callee_spec_var].arg_slots[arg_slot_i].fate + }), + ) + }) + .collect(); + let sub_ret_slots_touched = (0..num_ret_slots) + .map(|ret_slot_i| { + resolve_fate( + query, + query_analyses.iter().map(|analysis| { + &analysis.calls[callee_spec_var].ret_slots[ret_slot_i] + }), + ) + }) + .collect(); + let sub_query = Query { + arg_aliases: sub_arg_aliases, + arg_slots_touched: sub_arg_slots_touched, + ret_slots_touched: sub_ret_slots_touched, + }; + self.resolve( + analyses, + basic_analysis.calls[callee_spec_var].callee, + &sub_query, + ) + }); + let solution = FuncSolution { + update_modes, + callee_specs, + }; + self.solutions[func].insert(spec, Some(solution)); + } + spec + } +} + +#[derive(Clone, Debug)] +pub(crate) struct ProgramSolutions { + pub(crate) funcs: FuncSolutions, + pub(crate) entry_points: IdVec, +} + +pub(crate) fn analyze(tc: TypeCache, program: &ir::Program) -> ProgramSolutions { + let mut sc = SlotCache::new(tc); + + let func_sccs: FlatSlices = + strongly_connected(program.funcs.count(), |func_id| { + let func_def = &program.funcs[func_id]; + let values = func_def.graph.values(); + values + .count() + .iter() + .filter_map(move |val_id| match &values.node(val_id).op.kind { + ir::ValueKind::Op(ir::OpKind::Call { + callee, + callee_spec_var: _, + }) => Some(*callee), + + _ => None, + }) + }); + + let mut func_to_scc = IdVec::filled_with(program.funcs.count(), || FuncSccId(u32::MAX)); + for scc_id in func_sccs.count().iter() { + for &func in func_sccs.get(scc_id).items { + func_to_scc[func] = scc_id; + } + } + + let mut ctx = GlobalAnalysisContext { + func_defs: &program.funcs, + sccs: &func_sccs, + func_to_scc: &func_to_scc, + committed: IdVec::filled_with(program.funcs.count(), HashMap::new), + }; + + for (_, &func) in &program.entry_points { + if !ctx.committed[func].contains_key(&None) { + ctx.analyze(&mut sc, func, None); + } + } + + let mut func_solutions = FuncSolutions { + solutions: IdVec::filled_with(program.funcs.count(), HashMap::new), + }; + + let entry_point_solutions = program.entry_points.map(|_, &func| { + func_solutions.resolve( + &ctx.committed, + func, + &Query { + arg_aliases: BTreeSet::new(), + arg_slots_touched: SmallVec::new(), + ret_slots_touched: SmallVec::new(), + }, + ) + }); + + ProgramSolutions { + funcs: func_solutions, + entry_points: entry_point_solutions, + } +} diff --git a/vendor/morphic_lib/src/api.rs b/vendor/morphic_lib/src/api.rs index 6dc57d1c6f..893e2a1c48 100644 --- a/vendor/morphic_lib/src/api.rs +++ b/vendor/morphic_lib/src/api.rs @@ -1,7 +1,9 @@ use sha2::{digest::Digest, Sha256}; use smallvec::SmallVec; use std::collections::{btree_map::Entry, BTreeMap}; +use std::rc::Rc; +use crate::analyze; use crate::preprocess; use crate::render_api_ir; use crate::util::blocks::Blocks; @@ -1372,13 +1374,15 @@ pub enum UpdateMode { pub const SPEC_HASH_BYTES: usize = 32; #[repr(transparent)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct FuncSpec(pub [u8; SPEC_HASH_BYTES]); /// The solution table for an individual specialization. pub struct FuncSpecSolutions { - func_def: FuncDef, - callee_specs: IdVec, + // TODO: eliminate the RC here (this will require introducing a lifetime, and is therefore a + // breaking API change) + func_def: Rc, + solution: analyze::FuncSolution, } impl FuncSpecSolutions { @@ -1392,7 +1396,7 @@ impl FuncSpecSolutions { .callee_spec_vars .get_by_val(&var.into()) { - Some(id) => Ok(self.callee_specs[id]), + Some(id) => Ok(self.solution.callee_specs[id]), None => Err(ErrorKind::CalleeSpecVarNotFound(var.into()).into()), } } @@ -1407,7 +1411,7 @@ impl FuncSpecSolutions { .update_mode_vars .get_by_val(&var.into()) { - Some(_id) => Ok(UpdateMode::Immutable), + Some(id) => Ok(self.solution.update_modes[id]), None => Err(ErrorKind::UpdateModeVarNotFound(var.into()).into()), } } @@ -1416,20 +1420,20 @@ impl FuncSpecSolutions { /// Zero or more specializations for a single function, and the solution table for each /// specialization. pub struct FuncSolutions { - spec: FuncSpec, - spec_solutions: FuncSpecSolutions, + spec_solutions: BTreeMap, } impl FuncSolutions { pub fn specs(&self) -> impl Iterator { - std::iter::once(&self.spec) + self.spec_solutions.keys() } pub fn spec(&self, spec: &FuncSpec) -> Result<&FuncSpecSolutions> { - if &self.spec != spec { - return Err(ErrorKind::FuncSpecNotFound(*spec).into()); + if let Some(solution) = self.spec_solutions.get(spec) { + Ok(solution) + } else { + Err(ErrorKind::FuncSpecNotFound(*spec).into()) } - Ok(&self.spec_solutions) } } @@ -1501,7 +1505,7 @@ impl ModSolutions { /// Specializations and solution tables generated for the entire program. pub struct Solutions { mods: BTreeMap, - entry_points: BTreeMap, + entry_points: BTreeMap, } impl Solutions { @@ -1521,9 +1525,8 @@ impl Solutions { // TODO: The clone here is unnecessary -- avoid it! // (might require something like a transmute) match self.entry_points.get(&entry_point.into()) { - Some((mod_name, func_name)) => { - let spec = hash_func_name(mod_name.borrowed(), func_name.borrowed()); - Ok((mod_name.borrowed(), func_name.borrowed(), spec)) + Some((mod_name, func_name, spec)) => { + Ok((mod_name.borrowed(), func_name.borrowed(), *spec)) } None => Err(ErrorKind::EntryPointNotFound(entry_point.into()).into()), } @@ -1552,11 +1555,14 @@ fn populate_specs( results.into_mapped(|_, spec| spec.unwrap()) } -pub fn solve(program: Program) -> Result { - preprocess::preprocess(&program).map_err(ErrorKind::PreprocessError)?; +pub fn solve(api_program: Program) -> Result { + let (nc, tc, program) = + preprocess::preprocess(&api_program).map_err(ErrorKind::PreprocessError)?; + + let mut solutions = analyze::analyze(tc, &program); Ok(Solutions { - mods: program + mods: api_program .mods .into_iter() .map(|(mod_name, mod_def)| { @@ -1565,16 +1571,27 @@ pub fn solve(program: Program) -> Result { .func_defs .into_iter() .map(|(func_name, func_def)| { - let callee_specs = populate_specs( - func_def.builder.expr_builder.callee_spec_vars.count(), - &func_def.builder.expr_builder.vals, - ); + // TODO: avoid the clones here + let func_id = nc + .funcs + .get_by_val(&(mod_name.clone(), func_name.clone())) + .unwrap(); + let func_def = Rc::new(func_def); let func_sols = FuncSolutions { - spec: hash_func_name(mod_name.borrowed(), func_name.borrowed()), - spec_solutions: FuncSpecSolutions { - func_def, - callee_specs, - }, + spec_solutions: std::mem::take( + &mut solutions.funcs.solutions[func_id], + ) + .into_iter() + .map(|(spec, solution)| { + ( + spec, + FuncSpecSolutions { + func_def: func_def.clone(), + solution: solution.unwrap(), + }, + ) + }) + .collect(), }; (func_name, func_sols) }) @@ -1600,7 +1617,15 @@ pub fn solve(program: Program) -> Result { (mod_name, mod_sols) }) .collect(), - entry_points: program.entry_points, + entry_points: api_program + .entry_points + .into_iter() + .map(|(entry_point_name, (mod_name, func_name))| { + let entry_point_id = nc.entry_points.get_by_val(&entry_point_name).unwrap(); + let spec = solutions.entry_points[entry_point_id]; + (entry_point_name, (mod_name, func_name, spec)) + }) + .collect(), }) } diff --git a/vendor/morphic_lib/src/ir.rs b/vendor/morphic_lib/src/ir.rs index b6408ebc98..d283d84bed 100644 --- a/vendor/morphic_lib/src/ir.rs +++ b/vendor/morphic_lib/src/ir.rs @@ -6,10 +6,12 @@ use smallvec::SmallVec; use crate::api::{CalleeSpecVarId, UpdateModeVarId}; -use crate::name_cache::{ConstId, FuncId}; +use crate::name_cache::{ConstId, EntryPointId, FuncId, NamedTypeId}; use crate::type_cache::TypeId; use crate::util::blocks::Blocks; -use crate::util::flat_slices::{FlatSlices, Slice}; +use crate::util::flat_slices::FlatSlices; +use crate::util::id_type::Count; +use crate::util::id_vec::IdVec; use crate::util::op_graph::OpGraph; use crate::util::strongly_connected::{strongly_connected, SccKind}; @@ -123,6 +125,13 @@ pub(crate) enum JumpTarget { Ret, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) enum Predecessor { + Block(BlockId), + Entry, +} + +pub(crate) const PREDECESSORS_INLINE_COUNT: usize = 8; pub(crate) const JUMP_TARGETS_INLINE_COUNT: usize = 8; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -132,6 +141,8 @@ pub(crate) struct BlockInfo { /// /// Invariant: If `param` is `Some`, it must point to a `BlockParam` value, not an `Op`. pub(crate) param: Option, + /// Blocks which jump to this block + pub(crate) predecessors: SmallVec<[Predecessor; PREDECESSORS_INLINE_COUNT]>, /// List of zero or more jump targets to nondeterministically choose from. pub(crate) jump_targets: SmallVec<[JumpTarget; JUMP_TARGETS_INLINE_COUNT]>, /// Optional argument which will be passed to the chosen jump target. @@ -149,6 +160,7 @@ id_type! { pub(crate) struct GraphBuilder { values: OpGraph, blocks: Blocks, + exit_blocks: SmallVec<[BlockId; 1]>, } impl GraphBuilder { @@ -156,6 +168,7 @@ impl GraphBuilder { GraphBuilder { values: OpGraph::new(), blocks: Blocks::new(), + exit_blocks: SmallVec::new(), } } @@ -182,6 +195,7 @@ impl GraphBuilder { self.values.count().0, BlockInfo { param: None, + predecessors: SmallVec::new(), jump_targets: SmallVec::new(), target_arg: None, }, @@ -200,6 +214,7 @@ impl GraphBuilder { self.values.count().0, BlockInfo { param: Some(param_id), + predecessors: SmallVec::new(), jump_targets: SmallVec::new(), target_arg: None, }, @@ -213,8 +228,22 @@ impl GraphBuilder { target_arg: Option, jump_targets: SmallVec<[JumpTarget; JUMP_TARGETS_INLINE_COUNT]>, ) { + for target in &jump_targets { + match target { + &JumpTarget::Block(successor) => { + self.blocks + .block_info_mut(successor) + .predecessors + .push(Predecessor::Block(block)); + } + JumpTarget::Ret => { + self.exit_blocks.push(block); + } + } + } let info = self.blocks.block_info_mut(block); info.target_arg = target_arg; + debug_assert!(info.jump_targets.is_empty()); info.jump_targets = jump_targets; } @@ -226,23 +255,37 @@ impl GraphBuilder { &self.blocks } - pub(crate) fn build(self, entry_block: BlockId) -> Graph { + pub(crate) fn build( + mut self, + entry_block: BlockId, + ret_type: TypeId, + update_mode_vars: Count, + callee_spec_vars: Count, + ) -> Graph { debug_assert!(entry_block < self.blocks.block_count()); - let rev_sccs = strongly_connected(self.blocks.block_count(), |block| { + self.blocks + .block_info_mut(entry_block) + .predecessors + .push(Predecessor::Entry); + let sccs = strongly_connected(self.blocks.block_count(), |block| { self.blocks .block_info(block) - .jump_targets + .predecessors .iter() - .filter_map(|&jump_target| match jump_target { - JumpTarget::Ret => None, - JumpTarget::Block(target) => Some(target), + .filter_map(|&pred| match pred { + Predecessor::Entry => None, + Predecessor::Block(pred_block) => Some(pred_block), }) }); Graph { values: self.values, blocks: self.blocks, entry_block, - rev_sccs, + exit_blocks: self.exit_blocks, + ret_type, + sccs, + update_mode_vars, + callee_spec_vars, } } } @@ -252,14 +295,15 @@ pub(crate) struct Graph { values: OpGraph, blocks: Blocks, entry_block: BlockId, - - // Invariant: `rev_sccs` must be stored in *reverse* topological order. If an SCC 'A' can jump - // to an SCC 'B', then 'A' must appear *after* 'B' in `rev_sccs`. - // - // We don't store the SCCs in topological order because control flow graph edges point from - // *source block* to *target block*, so running Tarjan's algorithm on the control flow graph - // gives us a reverse topological sort rather than a topological sort. - rev_sccs: FlatSlices, + // We use an inline capacity of 1 here because, in the current implementation of `preprocess`, + // there is always exactly one exit block per function. However, this is no fundamental reason + // this must be so. + exit_blocks: SmallVec<[BlockId; 1]>, + ret_type: TypeId, + // Invariant: `sccs` is strored in topological order. + sccs: FlatSlices, + update_mode_vars: Count, + callee_spec_vars: Count, } impl Graph { @@ -275,20 +319,24 @@ impl Graph { self.entry_block } - pub(crate) fn rev_sccs(&self) -> &FlatSlices { - &self.rev_sccs + pub(crate) fn exit_blocks(&self) -> &[BlockId] { + &self.exit_blocks } - /// Iterate over sccs in topological order. - /// - /// IF an SCC 'A' can jump to an SCC 'B', then 'A' is guaranteed to appear *before* 'B' in the - /// returned iterator. - pub(crate) fn iter_sccs(&self) -> impl Iterator> + '_ { - self.rev_sccs - .count() - .iter() - .rev() - .map(move |scc_id| self.rev_sccs.get(scc_id)) + pub(crate) fn ret_type(&self) -> TypeId { + self.ret_type + } + + pub(crate) fn sccs(&self) -> &FlatSlices { + &self.sccs + } + + pub(crate) fn update_mode_vars(&self) -> Count { + self.update_mode_vars + } + + pub(crate) fn callee_spec_vars(&self) -> Count { + self.callee_spec_vars } } @@ -301,3 +349,11 @@ pub(crate) struct FuncDef { pub(crate) struct ConstDef { pub(crate) graph: Graph, } + +#[derive(Clone, Debug)] +pub(crate) struct Program { + pub(crate) named_types: IdVec, + pub(crate) funcs: IdVec, + pub(crate) consts: IdVec, + pub(crate) entry_points: IdVec, +} diff --git a/vendor/morphic_lib/src/lib.rs b/vendor/morphic_lib/src/lib.rs index a1470770dc..3b72a6227b 100644 --- a/vendor/morphic_lib/src/lib.rs +++ b/vendor/morphic_lib/src/lib.rs @@ -4,6 +4,7 @@ #[macro_use] mod util; +mod analyze; mod api; mod bindings; mod ir; diff --git a/vendor/morphic_lib/src/preprocess.rs b/vendor/morphic_lib/src/preprocess.rs index f52e0f648b..36eccb21bd 100644 --- a/vendor/morphic_lib/src/preprocess.rs +++ b/vendor/morphic_lib/src/preprocess.rs @@ -265,7 +265,9 @@ struct FuncSig { ret_type: TypeId, } -pub(crate) fn preprocess(program: &api::Program) -> Result<(), Error> { +pub(crate) fn preprocess( + program: &api::Program, +) -> Result<(NameCache, TypeCache, ir::Program), Error> { let mut nc = NameCache::default(); let mut tc = TypeCache::default(); @@ -348,15 +350,15 @@ pub(crate) fn preprocess(program: &api::Program) -> Result<(), Error> { const_sigs: &const_sigs, }; - for (func_id, func_def) in &funcs { + let preprocessed_funcs = funcs.try_map(|func_id, func_def| { preprocess_func_def(&mut tc, ctx, func_def, &func_body_types[func_id]) - .map_err(Error::annotate_func_def(&nc, func_id))?; - } + .map_err(Error::annotate_func_def(&nc, func_id)) + })?; - for (const_id, const_def) in &consts { + let preprocessed_consts = consts.try_map(|const_id, const_def| { preprocess_const_def(&mut tc, ctx, const_def, &const_body_types[const_id]) - .map_err(Error::annotate_const_def(&nc, const_id))?; - } + .map_err(Error::annotate_const_def(&nc, const_id)) + })?; let mut entry_points = IdVec::::new(); for (entry_point_name, (mod_, func)) in &program.entry_points { @@ -383,7 +385,16 @@ pub(crate) fn preprocess(program: &api::Program) -> Result<(), Error> { debug_assert_eq!(nc_id, pushed_id); } - Ok(()) + Ok(( + nc, + tc, + ir::Program { + named_types: typedef_contents, + funcs: preprocessed_funcs, + consts: preprocessed_consts, + entry_points, + }, + )) } #[derive(Clone, Copy, Debug)] @@ -1165,7 +1176,12 @@ fn preprocess_func_def( )?; graph_builder.set_jump_targets(final_block, Some(ret_val), smallvec![ir::JumpTarget::Ret]); Ok(ir::FuncDef { - graph: graph_builder.build(entry_block), + graph: graph_builder.build( + entry_block, + body_types[func_def.ret_type], + func_def.builder.expr_builder.update_mode_vars.count(), + func_def.builder.expr_builder.callee_spec_vars.count(), + ), }) } @@ -1199,6 +1215,11 @@ fn preprocess_const_def( )?; graph_builder.set_jump_targets(final_block, Some(ret_val), smallvec![ir::JumpTarget::Ret]); Ok(ir::ConstDef { - graph: graph_builder.build(entry_block), + graph: graph_builder.build( + entry_block, + body_types[const_def.type_], + const_def.builder.expr_builder.update_mode_vars.count(), + const_def.builder.expr_builder.callee_spec_vars.count(), + ), }) } diff --git a/vendor/morphic_lib/src/type_cache.rs b/vendor/morphic_lib/src/type_cache.rs index beb16b182b..d9b1b9dd18 100644 --- a/vendor/morphic_lib/src/type_cache.rs +++ b/vendor/morphic_lib/src/type_cache.rs @@ -7,6 +7,8 @@ id_type! { pub TypeId(u32); } +// TODO: Add slot information + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum TypeData { Named { named: NamedTypeId }, diff --git a/vendor/morphic_lib/src/util/blocks.rs b/vendor/morphic_lib/src/util/blocks.rs index 22c182cbfa..56a1ce60a0 100644 --- a/vendor/morphic_lib/src/util/blocks.rs +++ b/vendor/morphic_lib/src/util/blocks.rs @@ -14,6 +14,7 @@ struct BlockFrag { min_val: ValId, /// Exclusive bound max_val: ValId, + prev: Option, next: Option, } @@ -57,6 +58,7 @@ impl Blocks { let frag = BlockFrag { min_val: start_hint.clone(), max_val: start_hint, + prev: None, next: None, }; let frag_id = self.frags.push(frag); @@ -77,6 +79,7 @@ impl Blocks { let new_tail = BlockFrag { min_val: val_id.clone(), max_val: ValId::from_index_or_panic(val_id.to_index() + 1), + prev: Some(block.tail), next: None, }; let new_tail_id = self.frags.push(new_tail); @@ -113,4 +116,24 @@ impl Blocks { Some(this_val) }) } + + pub fn block_values_rev(&self, block_id: BlockId) -> impl Iterator + '_ { + let mut frag = &self.frags[self.blocks[block_id].tail]; + let mut val = frag.max_val.clone(); + std::iter::from_fn(move || { + while val.to_index() <= frag.min_val.to_index() { + match frag.prev { + Some(prev) => { + frag = &self.frags[prev]; + val = frag.max_val.clone(); + } + None => { + return None; + } + } + } + val = ValId::from_index_unchecked(val.to_index() - 1); + Some(val.clone()) + }) + } } diff --git a/vendor/morphic_lib/src/util/flat_slices.rs b/vendor/morphic_lib/src/util/flat_slices.rs index fe72cce7e8..ea56c2a624 100644 --- a/vendor/morphic_lib/src/util/flat_slices.rs +++ b/vendor/morphic_lib/src/util/flat_slices.rs @@ -99,14 +99,16 @@ impl FlatSlices { items: &mut self.flat_data[start..end], } } + + pub fn iter(&self) -> impl Iterator)> { + self.count().iter().map(move |i| (i.clone(), self.get(i))) + } } impl fmt::Debug for FlatSlices { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_map() - .entries(self.count().iter().map(|i| (i.clone(), self.get(i)))) - .finish() + f.debug_map().entries(self.iter()).finish() } } diff --git a/vendor/morphic_lib/src/util/get2_mut.rs b/vendor/morphic_lib/src/util/get2_mut.rs new file mode 100644 index 0000000000..8359e118ab --- /dev/null +++ b/vendor/morphic_lib/src/util/get2_mut.rs @@ -0,0 +1,16 @@ +use std::cmp::Ordering; + +// inspired by https://docs.rs/generational-arena/0.2.8/generational_arena/struct.Arena.html#method.get2_mut +pub fn get2_mut(slice: &mut [T], i: usize, j: usize) -> Option<(&mut T, &mut T)> { + match i.cmp(&j) { + Ordering::Less => { + let (l, r) = slice.split_at_mut(j); + Some((&mut l[i], &mut r[0])) + } + Ordering::Greater => { + let (l, r) = slice.split_at_mut(i); + Some((&mut r[0], &mut l[j])) + } + Ordering::Equal => None, + } +} diff --git a/vendor/morphic_lib/src/util/id_bi_map.rs b/vendor/morphic_lib/src/util/id_bi_map.rs index 00b04c2142..876574c49f 100644 --- a/vendor/morphic_lib/src/util/id_bi_map.rs +++ b/vendor/morphic_lib/src/util/id_bi_map.rs @@ -67,4 +67,8 @@ impl IdBiMap { pub fn get_by_val(&self, val: &V) -> Option { self.val_to_key.get(val).cloned() } + + pub fn iter(&self) -> impl Iterator { + self.key_to_val.iter() + } } diff --git a/vendor/morphic_lib/src/util/id_vec.rs b/vendor/morphic_lib/src/util/id_vec.rs index ab222559ea..52c6553502 100644 --- a/vendor/morphic_lib/src/util/id_vec.rs +++ b/vendor/morphic_lib/src/util/id_vec.rs @@ -6,6 +6,7 @@ use std::ops::{Index, IndexMut}; use std::slice; use std::vec; +use crate::util::get2_mut::get2_mut; use crate::util::id_type::{Count, Id}; #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -129,6 +130,13 @@ impl IdVec { } } + pub fn filled_with_indexed(count: Count, mut f: impl FnMut(K) -> V) -> Self { + IdVec { + key: PhantomData, + items: count.iter().map(|k| f(k)).collect(), + } + } + pub fn items(&self) -> &[V] { &self.items } @@ -234,6 +242,10 @@ impl IdVec { } Some(Self::from_items(items)) } + + pub fn get2_mut(&mut self, i: K, j: K) -> Option<(&mut V, &mut V)> { + get2_mut(&mut self.items, i.to_index(), j.to_index()) + } } impl> Index for IdVec { diff --git a/vendor/morphic_lib/src/util/mod.rs b/vendor/morphic_lib/src/util/mod.rs index d5fb572b08..a7f5146c48 100644 --- a/vendor/morphic_lib/src/util/mod.rs +++ b/vendor/morphic_lib/src/util/mod.rs @@ -9,8 +9,10 @@ pub mod forward_trait; pub mod blocks; pub mod flat_slices; +pub mod get2_mut; pub mod id_bi_map; pub mod id_vec; +pub mod norm_pair; pub mod op_graph; pub mod replace_none; pub mod strongly_connected; diff --git a/vendor/morphic_lib/src/util/norm_pair.rs b/vendor/morphic_lib/src/util/norm_pair.rs new file mode 100644 index 0000000000..f370b40cfa --- /dev/null +++ b/vendor/morphic_lib/src/util/norm_pair.rs @@ -0,0 +1,33 @@ +/// A normalized unordered pair, where the first component is always <= the second +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct NormPair(T, T); + +impl NormPair { + pub fn new(fst: T, snd: T) -> Self { + if fst <= snd { + NormPair(fst, snd) + } else { + NormPair(snd, fst) + } + } + + pub fn fst(&self) -> &T { + &self.0 + } + + pub fn snd(&self) -> &T { + &self.1 + } + + pub fn into_fst(self) -> T { + self.0 + } + + pub fn into_snd(self) -> T { + self.1 + } + + pub fn into_tuple(self) -> (T, T) { + (self.0, self.1) + } +} diff --git a/vendor/morphic_lib/tests/recursive.rs b/vendor/morphic_lib/tests/recursive.rs index 9e50172a50..629e19852d 100644 --- a/vendor/morphic_lib/tests/recursive.rs +++ b/vendor/morphic_lib/tests/recursive.rs @@ -1,6 +1,6 @@ use morphic_lib::{ BlockExpr, CalleeSpecVar, EntryPointName, Error, ExprContext, FuncDefBuilder, FuncName, - ModDefBuilder, ModName, ProgramBuilder, TypeContext, UpdateModeVar, + ModDefBuilder, ModName, ProgramBuilder, TypeContext, UpdateMode, UpdateModeVar, }; #[test] @@ -83,7 +83,8 @@ fn test_recursive() { .func_solutions(FuncName(b"rec"))? .spec(&rec_spec)?; - let _update_mode = rec_sol.update_mode(UpdateModeVar(b"mode"))?; + let update_mode = rec_sol.update_mode(UpdateModeVar(b"mode"))?; + assert_eq!(update_mode, UpdateMode::InPlace); Ok(()) } diff --git a/vendor/morphic_lib/tests/structures.rs b/vendor/morphic_lib/tests/structures.rs new file mode 100644 index 0000000000..7143e3f9e1 --- /dev/null +++ b/vendor/morphic_lib/tests/structures.rs @@ -0,0 +1,73 @@ +use morphic_lib::{ + BlockExpr, EntryPointName, Error, ExprContext, FuncDefBuilder, FuncName, ModDefBuilder, + ModName, ProgramBuilder, TypeContext, UpdateMode, UpdateModeVar, +}; + +#[test] +fn test_structures() { + fn run() -> Result<(), Error> { + let main_def = { + let mut f = FuncDefBuilder::new(); + let b = f.add_block(); + let h1 = f.add_new_heap_cell(b)?; + let h2 = f.add_new_heap_cell(b)?; + let t = f.add_make_tuple(b, &[h1, h2])?; + let heap_cell_type = f.add_heap_cell_type(); + let u1 = f.add_make_union(b, &[heap_cell_type, heap_cell_type], 0, h1)?; + let u2 = f.add_make_union(b, &[heap_cell_type, heap_cell_type], 1, h2)?; + let h3 = f.add_get_tuple_field(b, t, 0)?; + let h4 = f.add_get_tuple_field(b, t, 1)?; + let h5 = f.add_unwrap_union(b, u1, 0)?; + let h6 = f.add_unwrap_union(b, u2, 1)?; + f.add_touch(b, h3)?; + f.add_update_write_only(b, UpdateModeVar(b"mode1"), h1)?; + f.add_update_write_only(b, UpdateModeVar(b"mode2"), h2)?; + f.add_update_write_only(b, UpdateModeVar(b"mode3"), h5)?; + f.add_update_write_only(b, UpdateModeVar(b"mode4"), h6)?; + f.add_touch(b, h4)?; + let unit = f.add_make_tuple(b, &[])?; + let unit_type = f.add_tuple_type(&[])?; + f.build(unit_type, unit_type, BlockExpr(b, unit))? + }; + + let main_mod = { + let mut m = ModDefBuilder::new(); + m.add_func(FuncName(b"main"), main_def)?; + m.build()? + }; + + let program = { + let mut p = ProgramBuilder::new(); + p.add_mod(ModName(b"main"), main_mod)?; + p.add_entry_point(EntryPointName(b"main"), ModName(b"main"), FuncName(b"main"))?; + p.build()? + }; + + let program_sol = morphic_lib::solve(program)?; + + let (_, _, main_spec) = program_sol.entry_point_solution(EntryPointName(b"main"))?; + + let main_mod_sol = program_sol.mod_solutions(ModName(b"main"))?; + + let main_def_sol = main_mod_sol + .func_solutions(FuncName(b"main"))? + .spec(&main_spec)?; + + let mode1 = main_def_sol.update_mode(UpdateModeVar(b"mode1"))?; + let mode2 = main_def_sol.update_mode(UpdateModeVar(b"mode2"))?; + let mode3 = main_def_sol.update_mode(UpdateModeVar(b"mode3"))?; + let mode4 = main_def_sol.update_mode(UpdateModeVar(b"mode4"))?; + + assert_eq!(mode1, UpdateMode::InPlace); + assert_eq!(mode2, UpdateMode::Immutable); + assert_eq!(mode3, UpdateMode::InPlace); + assert_eq!(mode4, UpdateMode::Immutable); + + Ok(()) + } + + let result = run(); + if let Err(err) = result { + panic!("error: {}", err); + } +} diff --git a/www/public/index.html b/www/public/index.html index 7144f8368a..cccb3ab181 100644 --- a/www/public/index.html +++ b/www/public/index.html @@ -16,9 +16,10 @@

Roc's initial release is still under development, and this website is a placeholder until that release is ready.

In the meantime, if you'd like to learn more about Roc, here are some videos: