Merge branch 'main' into fix-docs-css

This commit is contained in:
Luke Boswell 2024-01-24 15:32:09 +11:00 committed by GitHub
commit 4e7b822ec3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
452 changed files with 19402 additions and 36091 deletions

View file

@ -9,15 +9,31 @@ concurrency:
env:
# use .tar.gz for quick testing
ARCHIVE_FORMAT: .tar.gz
BASIC_CLI_BRANCH: main
ARCHIVE_FORMAT: .tar.br
# Make a new basic-cli git tag and set it here before starting this workflow
RELEASE_TAG: 0.7.1
jobs:
fetch-releases:
prepare:
runs-on: [ubuntu-20.04]
steps:
- uses: actions/checkout@v3
with:
repository: roc-lang/basic-cli
- name: check if provided RELEASE_TAG is fresh
run: |
git fetch --tags
TAG_DATE=$(git log -1 --format=%ai ${{ env.RELEASE_TAG }})
CURRENT_DATE=$(date +%Y-%m-%d)
TAG_AGE=$(( ($(date -d $CURRENT_DATE +%s) - $(date -d "$TAG_DATE" +%s) )/(60*60*24) ))
if [ $TAG_AGE -gt 4 ]; then
echo "The provided RELEASE_TAG (${{ env.RELEASE_TAG }}) seems stale, it is $TAG_AGE days old. Did you set it correctly at the top of this workflow?"
exit 1
fi
# get latest nightly releases
- run: curl -fOL https://github.com/roc-lang/roc/releases/download/nightly/roc_nightly-linux_x86_64-latest.tar.gz
- run: curl -fOL https://github.com/roc-lang/roc/releases/download/nightly/roc_nightly-linux_arm64-latest.tar.gz
- run: curl -fOL https://github.com/roc-lang/roc/releases/download/nightly/roc_nightly-macos_x86_64-latest.tar.gz
@ -30,7 +46,7 @@ jobs:
build-linux-x86_64-files:
runs-on: [ubuntu-20.04]
needs: [fetch-releases]
needs: [prepare]
steps:
- uses: actions/checkout@v3
@ -47,14 +63,14 @@ jobs:
with:
name: linux-x86_64-files
path: |
basic-cli/src/metadata_linux-x64.rm
basic-cli/src/linux-x64.rh
basic-cli/src/linux-x64.o
basic-cli/platform/metadata_linux-x64.rm
basic-cli/platform/linux-x64.rh
basic-cli/platform/linux-x64.o
build-linux-arm64-files:
runs-on: [self-hosted, Linux, ARM64]
needs: [fetch-releases]
needs: [prepare]
steps:
- uses: actions/checkout@v3
@ -74,11 +90,11 @@ jobs:
with:
name: linux-arm64-files
path: |
basic-cli/src/linux-arm64.o
basic-cli/platform/linux-arm64.o
build-macos-x86_64-files:
runs-on: [macos-11] # I expect the generated files to work on macOS 12 and up
needs: [fetch-releases]
needs: [prepare]
steps:
- uses: actions/checkout@v3
@ -92,12 +108,12 @@ jobs:
with:
name: macos-x86_64-files
path: |
basic-cli/src/macos-x64.o
basic-cli/platform/macos-x64.o
build-macos-apple-silicon-files:
name: build apple silicon .o file
runs-on: [self-hosted, macOS, ARM64]
needs: [fetch-releases]
needs: [prepare]
steps:
- uses: actions/checkout@v3
@ -111,7 +127,7 @@ jobs:
with:
name: macos-apple-silicon-files
path: |
basic-cli/src/macos-arm64.o
basic-cli/platform/macos-arm64.o
create-release-archive:
needs: [build-linux-x86_64-files, build-linux-arm64-files, build-macos-x86_64-files, build-macos-apple-silicon-files]
@ -140,17 +156,25 @@ jobs:
- run: git clone https://github.com/roc-lang/basic-cli.git
- run: cp macos-apple-silicon-files/* ./basic-cli/src
- run: cp macos-apple-silicon-files/* ./basic-cli/platform
- run: cp linux-x86_64-files/* ./basic-cli/src
- run: cp linux-x86_64-files/* ./basic-cli/platform
- run: cp linux-arm64-files/* ./basic-cli/src
- run: cp linux-arm64-files/* ./basic-cli/platform
- run: cp macos-x86_64-files/* ./basic-cli/src
- run: cp macos-x86_64-files/* ./basic-cli/platform
- run: ./roc_nightly/roc build --bundle=${{ env.ARCHIVE_FORMAT }} ./basic-cli/src/main.roc
- name: bundle basic-cli release archive
run: ./roc_nightly/roc build --bundle=${{ env.ARCHIVE_FORMAT }} ./basic-cli/platform/main.roc
- run: echo "TAR_FILENAME=$(ls -d basic-cli/src/* | grep ${{ env.ARCHIVE_FORMAT }})" >> $GITHUB_ENV
- name: build basic-cli docs
env:
ROC_DOCS_URL_ROOT: /packages/basic-cli/${{ env.RELEASE_TAG }}
run: |
./roc_nightly/roc docs ./basic-cli/platform/main.roc
tar -czvf docs.tar.gz generated-docs/
- run: echo "TAR_FILENAME=$(ls -d basic-cli/platform/* | grep ${{ env.ARCHIVE_FORMAT }})" >> $GITHUB_ENV
- name: Upload platform archive
uses: actions/upload-artifact@v3
@ -159,6 +183,13 @@ jobs:
path: |
${{ env.TAR_FILENAME }}
- name: Upload docs archive
uses: actions/upload-artifact@v3
with:
name: release-assets-docs
path: |
docs.tar.gz
test-release-ubuntu:
needs: [create-release-archive]
runs-on: [ubuntu-20.04]
@ -194,18 +225,19 @@ jobs:
- name: Install ncat for tests if we dont have it yet
run: if ! dpkg -l | grep -qw ncat; then sudo apt install -y ncat; fi
- name: prep testing
- name: prepare testing
run: |
mv roc_nightly basic-cli-platform/.
cd basic-cli-platform
mkdir src
find . -maxdepth 1 -type f -exec mv {} src/ \;
mkdir platform
# move all files to platform dir
find . -maxdepth 1 -type f -exec mv {} platform/ \;
mkdir temp-basic-cli
cd temp-basic-cli
git clone https://github.com/roc-lang/basic-cli.git
cd basic-cli
git checkout ${{ env.BASIC_CLI_BRANCH }}
git checkout ${{ env.RELEASE_TAG }}
cp -r examples ../..
cp -r ci ../..
cp -r LICENSE ../..

View file

@ -53,6 +53,10 @@ jobs:
latestTag=$(git describe --tags $(git rev-list --tags --max-count=1))
git checkout $latestTag
# temp issue with new string interpolation syntax
# TODO undo when 0.7.2 or 0.8.0 is released
- run: sed -i 's/\$//g' basic-cli/examples/tcp-client.roc
- name: Run all tests with latest roc nightly and latest basic-cli release
run: |
sed -i 's/x86_64/arm64/g' ./ci/test_latest_release.sh

View file

@ -1,6 +1,6 @@
on:
pull_request:
name: CI manager
# cancel current runs when a new commit is pushed
@ -21,7 +21,7 @@ jobs:
id: filecheck
run: |
git fetch origin ${{ github.base_ref }}
if git diff --name-only origin/${{ github.base_ref }} HEAD | grep -qvE '(\.md$|\.css$|\.html$)'; then
if git diff --name-only origin/${{ github.base_ref }} HEAD | grep -qvE '(\.md$|\.css$|\.html$|^AUTHORS$)'; then
echo "run_tests=full" >> $GITHUB_OUTPUT
else
echo "run_tests=none" >> $GITHUB_OUTPUT
@ -52,7 +52,7 @@ jobs:
start-macos-x86-64-tests:
needs: check-changes
if: needs.check-changes.outputs.run_tests == 'full'
uses: ./.github/workflows/ubuntu_x86_64.yml
uses: ./.github/workflows/macos_x86_64.yml
start-ubuntu-x86-64-tests:
needs: check-changes
@ -74,7 +74,7 @@ jobs:
if: needs.check-changes.outputs.run_tests == 'full'
uses: ./.github/workflows/benchmarks.yml
finish-full:
ran-full:
runs-on: ubuntu-22.04
needs: [
start-nix-linux-x86-64-tests,
@ -90,10 +90,29 @@ jobs:
steps:
- run: echo "all workflows succeeded!"
finish-none:
ran-none:
runs-on: ubuntu-22.04
needs: [check-changes]
if: needs.check-changes.outputs.run_tests == 'none'
steps:
- run: echo "Only non-code files changed. CI manager did not run any workflows."
# we need a single end job for the required checks under branch protection rules
finish:
runs-on: ubuntu-22.04
needs: [ran-full, ran-none]
if: |
always()
steps:
- name: Check previous job results
run: |
if [ "${{ needs.ran-full.result }}" != "success" ] && [ "${{ needs.ran-none.result }}" != "success" ]; then
echo "One or more jobs failed."
exit 1
fi
- run: echo "Workflow succeeded :)"

View file

@ -1,33 +1,33 @@
on:
workflow_call:
workflow_call:
name: Macos x86-64 rust tests
env:
RUST_BACKTRACE: 1
RUST_BACKTRACE: 1
jobs:
test-rust-macos-x86-64:
runs-on: [self-hosted, macOS, X64]
timeout-minutes: 90
env:
RUSTC_WRAPPER: /Users/username1/.cargo/bin/sccache
steps:
- uses: actions/checkout@v3
test-rust-macos-x86-64:
runs-on: [self-hosted, macOS, X64]
timeout-minutes: 90
env:
RUSTC_WRAPPER: /Users/username1/.cargo/bin/sccache
steps:
- uses: actions/checkout@v3
- name: set LLVM_SYS_160_PREFIX
run: echo "LLVM_SYS_160_PREFIX=$(brew --prefix llvm@16)" >> $GITHUB_ENV
- name: set LLVM_SYS_160_PREFIX
run: echo "LLVM_SYS_160_PREFIX=$(brew --prefix llvm@16)" >> $GITHUB_ENV
- name: Update PATH to use zig 11
run: |
echo "PATH=/Users/username1/Downloads/zig-macos-x86_64-0.11.0:$PATH" >> $GITHUB_ENV
- run: zig version
- name: Update PATH to use zig 11
run: |
echo "PATH=/Users/username1/Downloads/zig-macos-x86_64-0.11.0:$PATH" >> $GITHUB_ENV
- name: test_gen llvm tests
run: cargo nextest-gen-llvm --release --no-fail-fast --locked -E "package(test_gen) - test(gen_str::str_append_scalar)"
- run: zig version
- name: regular rust tests
run: cargo test --locked --release -- --skip opaque_wrap_function --skip gen_list::bool_list_literal --skip platform_switching_swift --skip swift_ui --skip gen_str::str_append_scalar --skip gen_tags::phantom_polymorphic_record && sccache --show-stats
# swift tests are skipped because of "Could not find or use auto-linked library 'swiftCompatibilityConcurrency'" on macos-11 x86_64 CI machine
# this issue may be caused by using older versions of XCode
- name: test_gen llvm tests
run: cargo nextest-gen-llvm --release --no-fail-fast --locked -E "package(test_gen) - test(gen_str::str_append_scalar)"
- name: regular rust tests
run: cargo test --locked --release -- --skip opaque_wrap_function --skip gen_list::bool_list_literal --skip platform_switching_swift --skip swift_ui --skip gen_tags::phantom_polymorphic_record && sccache --show-stats
# swift tests are skipped because of "Could not find or use auto-linked library 'swiftCompatibilityConcurrency'" on macos-11 x86_64 CI machine
# this issue may be caused by using older versions of XCode

View file

@ -25,7 +25,7 @@ jobs:
run: ./ci/write_version.sh
- name: build release with lto
run: cargo build --profile=release-with-lto --locked --bin roc
run: cargo build --profile=release-with-lto --locked --bin roc --bin roc_ls
- name: get commit SHA
run: echo "SHA=$(git rev-parse --short "$GITHUB_SHA")" >> $GITHUB_ENV

View file

@ -25,7 +25,7 @@ jobs:
run: ./ci/write_version.sh
- name: build release with lto
run: RUSTFLAGS="-C target-cpu=x86-64" cargo build --profile=release-with-lto --locked --bin roc
run: RUSTFLAGS="-C target-cpu=x86-64" cargo build --profile=release-with-lto --locked --bin roc --bin roc_ls
# target-cpu=x86-64 -> For maximal compatibility for all CPU's. This was also faster in our tests: https://roc.zulipchat.com/#narrow/stream/231635-compiler-development/topic/.2Ecargo.2Fconfig.2Etoml/near/325726299
- name: get commit SHA

View file

@ -42,11 +42,7 @@ jobs:
run: ./ci/write_version.sh
- name: build nightly release
run: cargo build --locked --profile=release-with-lto --bin roc
# this makes the roc binary a lot smaller
- name: strip debug info
run: strip ./target/release-with-lto/roc
run: cargo build --locked --profile=release-with-lto --bin roc --bin roc_ls
- name: package release
run: ./ci/package_release.sh ${{ env.RELEASE_FOLDER_NAME }}

View file

@ -1,63 +1,62 @@
on:
# pull_request:
workflow_dispatch:
schedule:
- cron: '0 9 * * *' # 9=9am utc+0
# pull_request:
workflow_dispatch:
schedule:
- cron: "0 9 * * *" # 9=9am utc+0
name: Nightly Release macOS x86_64
env:
LLVM_SYS_160_PREFIX: /usr/local/opt/llvm@16
LLVM_SYS_160_PREFIX: /usr/local/opt/llvm@16
jobs:
test-build-upload:
name: build, test, package and upload nightly release
runs-on: [self-hosted, macOS, X64]
timeout-minutes: 120
steps:
- uses: actions/checkout@v3
test-build-upload:
name: build, test, package and upload nightly release
runs-on: [self-hosted, macOS, X64]
timeout-minutes: 120
steps:
- uses: actions/checkout@v3
- name: Update PATH to use zig 11
run: |
echo "PATH=/Users/username1/Downloads/zig-macos-x86_64-0.11.0:$PATH" >> $GITHUB_ENV
- run: zig version
- name: write version to file
run: ./ci/write_version.sh
- name: execute rust tests
run: cargo test --release --locked -- --skip opaque_wrap_function --skip gen_list::bool_list_literal --skip platform_switching_swift --skip swift_ui --skip gen_str::str_append_scalar --skip gen_tags::phantom_polymorphic_record
# swift tests are skipped because of "Could not find or use auto-linked library 'swiftCompatibilityConcurrency'" on macos-11 x86_64 CI machine
# this issue may be caused by using older versions of XCode
- name: Update PATH to use zig 11
run: |
echo "PATH=/Users/username1/Downloads/zig-macos-x86_64-0.11.0:$PATH" >> $GITHUB_ENV
- name: build release
run: RUSTFLAGS="-C target-cpu=x86-64" cargo build --profile=release-with-lto --locked --bin roc
# target-cpu=x86-64 -> For maximal compatibility for all CPU's.
- name: get commit SHA
run: echo "SHA=$(git rev-parse --short "$GITHUB_SHA")" >> $GITHUB_ENV
- name: get date
run: echo "DATE=$(date "+%Y-%m-%d")" >> $GITHUB_ENV
- name: build file name
env:
DATE: ${{ env.DATE }}
SHA: ${{ env.SHA }}
run: echo "RELEASE_FOLDER_NAME=roc_nightly-macos_x86_64-$DATE-$SHA" >> $GITHUB_ENV
- run: zig version
# this makes the roc binary a lot smaller
- name: strip debug info
run: strip ./target/release-with-lto/roc
- name: package release
run: ./ci/package_release.sh ${{ env.RELEASE_FOLDER_NAME }}
- name: Upload artifact. Actually uploading to github releases has to be done manually.
uses: actions/upload-artifact@v3
with:
name: ${{ env.RELEASE_FOLDER_NAME }}.tar.gz
path: ${{ env.RELEASE_FOLDER_NAME }}.tar.gz
retention-days: 4
- name: write version to file
run: ./ci/write_version.sh
- name: execute rust tests
run: cargo test --release --locked -- --skip opaque_wrap_function --skip gen_list::bool_list_literal --skip platform_switching_swift --skip swift_ui --skip gen_tags::phantom_polymorphic_record
# swift tests are skipped because of "Could not find or use auto-linked library 'swiftCompatibilityConcurrency'" on macos-11 x86_64 CI machine
# this issue may be caused by using older versions of XCode
- name: build release
run: RUSTFLAGS="-C target-cpu=x86-64" cargo build --profile=release-with-lto --locked --bin roc --bin roc_ls
# target-cpu=x86-64 -> For maximal compatibility for all CPU's.
- name: get commit SHA
run: echo "SHA=$(git rev-parse --short "$GITHUB_SHA")" >> $GITHUB_ENV
- name: get date
run: echo "DATE=$(date "+%Y-%m-%d")" >> $GITHUB_ENV
- name: build file name
env:
DATE: ${{ env.DATE }}
SHA: ${{ env.SHA }}
run: echo "RELEASE_FOLDER_NAME=roc_nightly-macos_x86_64-$DATE-$SHA" >> $GITHUB_ENV
# this makes the roc binary a lot smaller
- name: strip debug info
run: strip ./target/release-with-lto/roc
- name: package release
run: ./ci/package_release.sh ${{ env.RELEASE_FOLDER_NAME }}
- name: Upload artifact. Actually uploading to github releases has to be done manually.
uses: actions/upload-artifact@v3
with:
name: ${{ env.RELEASE_FOLDER_NAME }}.tar.gz
path: ${{ env.RELEASE_FOLDER_NAME }}.tar.gz
retention-days: 4

View file

@ -20,6 +20,9 @@ jobs:
- name: execute tests with --release
run: nix develop -c cargo test --locked --release
- name: roc test all builtins
run: nix develop -c ./ci/roc_test_builtins.sh
- name: test wasm32 cli_run
run: nix develop -c cargo test --locked --release --features="wasm32-cli-run"

View file

@ -32,8 +32,12 @@ jobs:
- name: test building default.nix
run: nix-build
# for skipped tests: see issue 6274
- name: execute tests with --release
run: nix develop -c cargo test --locked --release
run: nix develop -c cargo test --locked --release -- --skip cli_run::inspect_gui --skip cli_run::hello_gui
- name: roc test all builtins
run: nix develop -c ./ci/roc_test_builtins.sh
- name: make a libapp.so for the next step
run: nix develop -c cargo run -- gen-stub-lib examples/platform-switching/rocLovesRust.roc

View file

@ -20,6 +20,9 @@ jobs:
run: nix develop -c cargo test --locked --release -p roc_cli -- --skip hello_gui
# see 5932 for hello_gui
- name: roc test all builtins
run: nix develop -c ./ci/roc_test_builtins.sh
- name: make a libapp.so for the next step
run: nix develop -c cargo run -- gen-stub-lib examples/platform-switching/rocLovesRust.roc

View file

@ -47,11 +47,8 @@ jobs:
- name: test gen-wasm single threaded # gen-wasm has some multithreading problems to do with the wasmer runtime
run: cargo test --locked --release --package test_gen --no-default-features --features gen-wasm -- --test-threads=1 && sccache --show-stats
- name: run `roc test` on Str builtins
run: cargo run --locked --release -- test crates/compiler/builtins/roc/Str.roc && sccache --show-stats
- name: run `roc test` on Dict builtins
run: cargo run --locked --release -- test crates/compiler/builtins/roc/Dict.roc && sccache --show-stats
- name: roc test all builtins
run: ./ci/roc_test_builtins.sh
- name: wasm repl test
run: crates/repl_test/test_wasm.sh && sccache --show-stats

39
.gitignore vendored
View file

@ -1,3 +1,27 @@
### Do not modify these first three ignore rules. Needed to ignore files with no extension ###
# Ignore all files including binary files that have no extension
*
# Unignore all files with extensions
!*.*
# Unignore all directories
!*/
# Specifically keep these files with no extension
!Earthfile
!AUTHORS
!LICENSE*
!LEGAL*
!Dockerfile
# .reuse/dep5 see https://reuse.software/
!dep5
# NotARocFile is used for formatter test
!NotARocFile
# also includes keeping one exe benchmark file
!dynhost_benchmarks*
### Add specific file extensions and directories below ###
# Ignore the following directories and file extensions
target
generated-docs
zig-cache
@ -63,10 +87,12 @@ result
# Only keep Cargo.lock dependencies for the main compiler.
# Examples and test only crates should be fine to be unlocked.
# This remove unneccessary lock file versioning.
# This remove unnecessary lock file versioning.
# It also ensures the compiler can always pull in 1 version of things and doesn't get restricted by sub lockfiles.
/**/Cargo.lock
!/Cargo.lock
!/examples/static-site-gen/platform/Cargo.lock
# static-site-gen exception is because of https://github.com/tkaitchuck/aHash/issues/195
# snapshot tests temp file
*.pending-snap
@ -81,4 +107,13 @@ www/dist
# ignore the examples folder in the WIP website, this is copied from roc-lang/examples in when building the site
www/content/examples
www/examples-main.zip
www/examples-main
www/examples-main
examples/static-site-gen/**/*.html
# glue auto-generated fixture code
crates/glue/tests/fixtures/*/Cargo.toml
crates/glue/tests/fixtures/*/build.rs
crates/glue/tests/fixtures/*/host.c
crates/glue/tests/fixtures/*/src/main.rs
crates/glue/tests/fixtures/*/test_glue/

View file

@ -123,7 +123,7 @@ Luca Cervello <luca.cervello@gmail.com>
Josh Mak <joshmak@berkeley.edu>
Jakub Kozłowski <kubukoz@gmail.com>
Travis Staloch <twostepted@gmail.com>
Nick Gravgaard <nick@nickgravgaard.com>
Nick Gravgaard <nick@nick-gravgaard.com>
Keerthana Kasthuril <76804118+keerthanak-tw@users.noreply.github.com>
Salman Shaik <salmansiddiq.shaik@gmail.com>
Austin Clements <austinclementsbass@gmail.com>

View file

@ -76,7 +76,7 @@ To run the test suite (via `cargo test`), you additionally need to install:
- [`valgrind`](https://www.valgrind.org/) (needs special treatment to [install on macOS](https://stackoverflow.com/a/61359781)
Alternatively, you can use `cargo test --no-fail-fast` or `cargo test -p specific_tests` to skip over the valgrind failures & tests.
For debugging LLVM IR, we use [DebugIR](https://github.com/vaivaswatha/debugir). This dependency is only required to build with the `--debug` flag, and for normal development you should be fine without it.
For emitting LLVM IR for debugging purposes, the `--emit-llvm-ir` flag can be used.
### libxcb libraries

View file

@ -26,7 +26,7 @@ Check [Building from source](BUILDING_FROM_SOURCE.md) for instructions.
Most contributors execute the following commands before pushing their code:
```sh
cargo test
cargo test --release
cargo fmt --all -- --check
cargo clippy --workspace --tests -- --deny warnings
```

136
Cargo.lock generated
View file

@ -19,13 +19,14 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.8.3"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
@ -61,6 +62,12 @@ dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]]
name = "android-tzdata"
version = "0.1.1"
@ -124,13 +131,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "arena-pool"
version = "0.0.1"
dependencies = [
"roc_error_macros",
]
[[package]]
name = "arrayref"
version = "0.3.7"
@ -209,7 +209,7 @@ dependencies = [
"cfg-if",
"libc",
"miniz_oxide",
"object 0.32.1",
"object",
"rustc-demangle",
]
@ -707,7 +707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.14.1",
"hashbrown 0.14.3",
"lock_api",
"once_cell",
"parking_lot_core",
@ -722,6 +722,17 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "diff"
version = "0.1.13"
@ -827,6 +838,12 @@ dependencies = [
"regex",
]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
version = "0.3.5"
@ -1041,7 +1058,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http",
"indexmap",
"indexmap 1.9.3",
"slab",
"tokio",
"tokio-util",
@ -1062,20 +1079,14 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.13.2"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
dependencies = [
"ahash",
"bumpalo",
"allocator-api2",
]
[[package]]
name = "hashbrown"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12"
[[package]]
name = "heck"
version = "0.4.1"
@ -1258,6 +1269,16 @@ dependencies = [
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
dependencies = [
"equivalent",
"hashbrown 0.14.3",
]
[[package]]
name = "indoc"
version = "1.0.9"
@ -1636,24 +1657,16 @@ dependencies = [
[[package]]
name = "object"
version = "0.30.4"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03b4680b86d9cfafba8fc491dc9b6df26b68cf40e9e6cd73909194759a63c385"
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
dependencies = [
"crc32fast",
"flate2",
"hashbrown 0.13.2",
"indexmap",
"memchr",
]
[[package]]
name = "object"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
dependencies = [
"hashbrown 0.14.3",
"indexmap 2.1.0",
"memchr",
"ruzstd",
]
[[package]]
@ -2152,9 +2165,9 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.22"
version = "0.11.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
dependencies = [
"base64 0.21.4",
"bytes",
@ -2396,7 +2409,7 @@ dependencies = [
"bitvec",
"bumpalo",
"fnv",
"hashbrown 0.13.2",
"hashbrown 0.14.3",
"im",
"im-rc",
"smallvec",
@ -2526,7 +2539,7 @@ version = "0.0.1"
dependencies = [
"bumpalo",
"capstone",
"object 0.30.4",
"object",
"packed_struct",
"roc_builtins",
"roc_can",
@ -2591,7 +2604,7 @@ dependencies = [
"cli_utils",
"dircpy",
"fnv",
"indexmap",
"indexmap 2.1.0",
"indoc",
"libc",
"libloading",
@ -2682,7 +2695,7 @@ dependencies = [
"libc",
"mach_object",
"memmap2",
"object 0.30.4",
"object",
"roc_collections",
"roc_error_macros",
"roc_load",
@ -2785,7 +2798,7 @@ dependencies = [
"arrayvec 0.7.4",
"bitvec",
"bumpalo",
"hashbrown 0.13.2",
"hashbrown 0.14.3",
"parking_lot",
"roc_builtins",
"roc_can",
@ -3282,6 +3295,17 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "ruzstd"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58c4eb8a81997cf040a091d1f7e1938aeab6749d3a0dfa73af43cdc32393483d"
dependencies = [
"byteorder",
"derive_more",
"twox-hash",
]
[[package]]
name = "ryu"
version = "1.0.15"
@ -4135,6 +4159,16 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "typed-arena"
version = "2.0.2"
@ -4683,3 +4717,23 @@ name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "zerocopy"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.38",
]

View file

@ -101,11 +101,11 @@ fnv = "1.0.7"
fs_extra = "1.3.0"
futures = "0.3.26"
glyph_brush = "0.7.7"
hashbrown = { version = "0.13.2", features = ["bumpalo"] }
hashbrown = { version = "0.14.3" }
iced-x86 = { version = "1.18.0", default-features = false, features = ["std", "decoder", "op_code_info", "instr_info"] }
im = "15.1.0"
im-rc = "15.1.0"
indexmap = "1.9.2"
indexmap = "2.1.0"
indoc = "1.0.9"
insta = "1.28.0"
js-sys = "0.3.61"
@ -120,7 +120,7 @@ maplit = "1.0.2"
memmap2 = "0.5.10"
mimalloc = { version = "0.1.34", default-features = false }
nonempty = "0.8.1"
object = { version = "0.30.3", features = ["read", "write"] }
object = { version = "0.32.2", features = ["read", "write"] }
packed_struct = "0.10.1"
page_size = "0.5.0"
palette = "0.6.1"
@ -139,7 +139,7 @@ quote = "1.0.23"
rand = "0.8.5"
regex = "1.7.1"
remove_dir_all = "0.8.1"
reqwest = { version = "0.11.20", default-features = false, features = ["blocking", "rustls-tls"] } # default-features=false removes libopenssl as a dependency on Linux, which might not be available!
reqwest = { version = "0.11.23", default-features = false, features = ["blocking", "rustls-tls"] } # default-features=false removes libopenssl as a dependency on Linux, which might not be available!
rlimit = "0.9.1"
rustyline = { git = "https://github.com/roc-lang/rustyline", rev = "e74333c" }
rustyline-derive = { git = "https://github.com/roc-lang/rustyline", rev = "e74333c" }

View file

@ -53,9 +53,7 @@ build-nightly-release:
COPY --dir .git LICENSE LEGAL_DETAILS ci ./
# version.txt is used by the CLI: roc --version
RUN ./ci/write_version.sh
RUN RUSTFLAGS=$RUSTFLAGS cargo build --profile=release-with-lto --locked --bin roc
# strip debug info
RUN strip ./target/release-with-lto/roc
RUN RUSTFLAGS=$RUSTFLAGS cargo build --profile=release-with-lto --locked --bin roc --bin roc_ls
RUN ./ci/package_release.sh $RELEASE_FOLDER_NAME
RUN ls
SAVE ARTIFACT ./$RELEASE_FOLDER_NAME.tar.gz AS LOCAL $RELEASE_FOLDER_NAME.tar.gz

View file

@ -277,4 +277,33 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
===========================================================
* ankerl::unordered_dense - https://github.com/martinus/unordered_dense
A rather direct port of the source into Roc is currently the implementation for our Dict type.
Source code is in crates/compiler/builtins/roc/Dict.roc
MIT License
Copyright (c) 2022 Martin Leitner-Ankerl
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -6,7 +6,7 @@
- [**tutorial**](https://roc-lang.org/tutorial)
- [**docs** for the standard library](https://www.roc-lang.org/builtins)
- [**examples**](https://github.com/roc-lang/examples/tree/main/examples)
- [**faq**: frequently asked questions](https://github.com/roc-lang/roc/blob/main/FAQ.md)
- [**faq**: frequently asked questions](https://github.com/roc-lang/roc/blob/main/www/content/faq.md)
- [**group chat**](https://roc.zulipchat.com) for help, questions and discussions
If you'd like to contribute, check out [good first issues](https://github.com/roc-lang/roc/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). Don't hesitate to ask for help on our [group chat](https://roc.zulipchat.com), we're friendly!
@ -17,14 +17,17 @@ You can 💜 **sponsor** 💜 Roc on:
- [GitHub](https://github.com/sponsors/roc-lang)
- [Liberapay](https://liberapay.com/roc_lang)
We are very grateful for our corporate sponsors [Vendr](https://www.vendr.com/), [RWX](https://www.rwx.com), [Tweede golf](https://tweedegolf.nl/en), and [ohne-makler](https://www.ohne-makler.net):
We are very grateful for our corporate sponsors [Vendr](https://www.vendr.com/), [RWX](https://www.rwx.com), [Tweede golf](https://tweedegolf.nl/en), [ohne-makler](https://www.ohne-makler.net), and [Decem](https://www.decem.com.au):
[<img src="https://user-images.githubusercontent.com/1094080/223597445-81755626-a080-4299-a38c-3c92e7548489.png" height="60" alt="Vendr logo"/>](https://www.vendr.com)
&nbsp;&nbsp;&nbsp;&nbsp;
[<img src="https://github.com/roc-lang/roc/assets/1094080/82c0868e-d23f-42a0-ac2d-c6e6b2e16575" height="60" alt="RWX logo"/>](https://www.rwx.com)
&nbsp;&nbsp;&nbsp;&nbsp;
[<img src="https://user-images.githubusercontent.com/1094080/183123052-856815b1-8cc9-410a-83b0-589f03613188.svg" height="60" alt="tweede golf logo"/>](https://tweedegolf.nl/en)
&nbsp;&nbsp;&nbsp;&nbsp;
[<img src="https://www.ohne-makler.net/static/img/brand/logo.svg" height="60" alt="ohne-makler logo"/>](https://www.ohne-makler.net)
&nbsp;&nbsp;&nbsp;&nbsp;
[<img src="https://github.com/roc-lang/roc/assets/1094080/fd2a759c-7f6d-4f57-9eca-9601deba87b6" height="60" alt="Decem logo"/>](https://www.decem.com.au)
If you would like your company to become a corporate sponsor of Roc's development, please [DM Richard Feldman on Zulip](https://roc.zulipchat.com/#narrow/pm-with/281383-user281383)!
@ -39,7 +42,7 @@ We'd also like to express our gratitude to our generous [individual sponsors](ht
* [Lucas Rosa](https://github.com/rvcas)
* [Jonas Schell](https://github.com/Ocupe)
* [Christopher Dolan](https://github.com/cdolan)
* [Nick Gravgaard](https://github.com/nickgravgaard)
* [Nick Gravgaard](https://github.com/nick-gravgaard)
* [Zeljko Nesic](https://github.com/popara)
* [Shritesh Bhattarai](https://github.com/shritesh)
* [Richard Feldman](https://github.com/rtfeldman)

View file

@ -3,7 +3,6 @@ use data_encoding::HEXUPPER;
use is_executable::IsExecutable;
use regex::Regex;
use ring::digest::{Context, Digest, SHA256};
use std::fs::File;
use std::io::Read;
use std::{
collections::{HashMap, HashSet, VecDeque},
@ -22,7 +21,7 @@ fn main() {
delete_old_bench_results();
if optional_args.check_executables_changed {
println!("Doing a test run to verify benchmarks are working correctly and generate executables.");
println!("\nDoing a test run to verify benchmarks are working correctly and generate executables.\n");
std::env::set_var("BENCH_DRY_RUN", "1");
@ -79,6 +78,7 @@ fn finish(all_regressed_benches: HashSet<String>, nr_repeat_benchmarks: usize) {
// returns all benchmarks that have regressed
fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
delete_old_bench_results();
do_benchmark("main");
let mut all_regressed_benches = do_benchmark("branch");
@ -91,6 +91,7 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
for _ in 1..nr_repeat_benchmarks {
delete_old_bench_results();
do_benchmark("main");
let regressed_benches = do_benchmark("branch");
@ -110,17 +111,25 @@ fn do_all_benches(nr_repeat_benchmarks: usize) -> HashSet<String> {
// returns Vec with names of regressed benchmarks
fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
let mut cmd_child = Command::new(format!(
"./bench-folder-{}/target/release/deps/time_bench",
branch_name
))
.args(&["--bench", "--noplot"])
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let mut bench_cmd =
Command::new(format!(
"./bench-folder-{}/target/release/deps/time_bench",
branch_name
));
let stdout = cmd_child.stdout.as_mut().unwrap();
let bench_cmd_w_args =
bench_cmd.args(&["--bench", "--noplot"]);
let bench_cmd_as_str = format!("{bench_cmd_w_args:?}");
let mut bench_cmd_child =
bench_cmd_w_args
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.unwrap_or_else(|_| panic!("Failed to benchmark {}.", branch_name));
let stdout = bench_cmd_child.stdout.as_mut().unwrap();
let stdout_reader = BufReader::new(stdout);
let stdout_lines = stdout_reader.lines();
@ -147,6 +156,18 @@ fn do_benchmark(branch_name: &'static str) -> HashSet<String> {
println!(">>bench {:?}: {:?}", branch_name, line_str);
}
let exit_status = bench_cmd_child.wait().expect("Failed to wait on cmd_child");
if !exit_status.success() {
panic!(
"Error: time-bench execution failed with exit code {}.\n\
See output above for error info.\n\
Command was:\n\t{}",
exit_status,
bench_cmd_as_str
);
}
regressed_benches
}
@ -190,20 +211,21 @@ fn sha256_digest<R: Read>(mut reader: R) -> Result<Digest, io::Error> {
}
fn sha_file(file_path: &Path) -> Result<String, io::Error> {
// Debug info is dependent on the dir in which executable was created,
// so we need to strip that to be able to compare binaries.
let no_debug_info_file_path = file_path.to_str().unwrap().to_string() + ("_no_debug_info");
std::fs::copy(file_path, &no_debug_info_file_path)?;
// only checking disassembly because of #6386
let disassembly_output = Command::new("objdump")
.args(["-d", file_path.to_str().unwrap()])
.output()
.expect("failed to execute objdump");
let strip_output = Command::new("strip")
.args(["--strip-debug", &no_debug_info_file_path])
.output()
.expect("failed to execute process");
assert!(disassembly_output.status.success());
assert!(strip_output.status.success());
let mut reader = BufReader::new(disassembly_output.stdout.as_slice());
// the first line contains the path, we want to skip it
let mut _discard_lines = String::new();
reader.read_line(&mut _discard_lines)?;
reader.read_line(&mut _discard_lines)?;
let no_debug_info_file = File::open(no_debug_info_file_path)?;
let reader = BufReader::new(no_debug_info_file);
let digest = sha256_digest(reader)?;
Ok(HEXUPPER.encode(digest.as_ref()))
@ -250,6 +272,7 @@ fn check_if_bench_executables_changed() -> bool {
let main_bench_hashes = calc_hashes_for_folder(&main_benches_path_str);
let branch_benches_path_str = [BENCH_FOLDER_BRANCH, bench_folder_str].join("");
let branch_bench_hashes = calc_hashes_for_folder(&branch_benches_path_str);
if main_bench_hashes.keys().len() == branch_bench_hashes.keys().len() {

View file

@ -4,6 +4,9 @@
set -euxo pipefail
git clone https://github.com/roc-lang/basic-cli.git
cd basic-cli
git checkout $RELEASE_TAG
cd ..
if [ "$(uname -s)" == "Linux" ]; then
@ -13,7 +16,7 @@ if [ "$(uname -s)" == "Linux" ]; then
timeout 300s sudo apt-get install -y musl-tools
fi
cd basic-cli/src # we cd to install the target for the right rust version
cd basic-cli/platform # we cd to install the target for the right rust version
if [ "$(uname -m)" == "x86_64" ]; then
rustup target add x86_64-unknown-linux-musl
elif [ "$(uname -m)" == "aarch64" ]; then

View file

@ -3,15 +3,21 @@
# https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
set -euxo pipefail
cp target/release-with-lto/roc ./roc # to be able to delete "target" later
# this makes the binaries a lot smaller
strip ./target/release-with-lto/roc
strip ./target/release-with-lto/roc_ls
# to be able to delete "target" later
cp target/release-with-lto/roc ./roc
cp target/release-with-lto/roc_ls ./roc_lang_server
# delete unnecessary files and folders
git clean -fdx --exclude roc
git clean -fdx --exclude roc --exclude roc_lang_server
mkdir $1
mv roc LICENSE LEGAL_DETAILS $1
mv roc roc_lang_server LICENSE LEGAL_DETAILS $1
mkdir $1/examples
mv examples/helloWorld.roc examples/platform-switching examples/cli $1/examples

10
ci/roc_test_builtins.sh Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
# https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
set -euxo pipefail
for file in crates/compiler/builtins/roc/*.roc; do
if grep -qE '^\s*expect' "$file"; then
cargo run --locked --release -- test "$file"
fi
done

23
ci/update_basic_cli_url.sh Executable file
View file

@ -0,0 +1,23 @@
#!/usr/bin/env bash
# https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
set -euxo pipefail
# Check if the correct number of arguments is given
if [ "$#" -ne 2 ]; then
echo "Usage: ./update_basic_cli_url.sh OLD_URL NEW_URL"
exit 1
fi
OLD_URL=$1
NEW_URL=$2
# Use git ls-files to list all files tracked by Git, excluding those in .gitignore
files=$(git ls-files)
# Use perl to replace OLD_URL with NEW_URL in the files
for file in $files; do
perl -pi -e "s|\Q$OLD_URL\E|$NEW_URL|g" $file
done
echo "Replaced all old basic-cli URLs with the new one."

View file

@ -1,19 +1,12 @@
# Running the benchmarks
Install cargo criterion:
If you're not using nix, install cargo criterion:
```sh
cargo install cargo-criterion
```
To prevent stack overflow on the `CFold` benchmark:
```sh
ulimit -s unlimited
```
In the `cli` folder execute:
In the `crates/cli` folder execute:
```sh
cargo criterion

View file

@ -50,7 +50,8 @@ pub const CMD_GLUE: &str = "glue";
pub const CMD_GEN_STUB_LIB: &str = "gen-stub-lib";
pub const CMD_PREPROCESS_HOST: &str = "preprocess-host";
pub const FLAG_DEBUG: &str = "debug";
pub const FLAG_EMIT_LLVM_IR: &str = "emit-llvm-ir";
pub const FLAG_PROFILING: &str = "profiling";
pub const FLAG_BUNDLE: &str = "bundle";
pub const FLAG_DEV: &str = "dev";
pub const FLAG_OPTIMIZE: &str = "optimize";
@ -102,9 +103,15 @@ pub fn build_app() -> Command {
.action(ArgAction::SetTrue)
.required(false);
let flag_debug = Arg::new(FLAG_DEBUG)
.long(FLAG_DEBUG)
.help("Store LLVM debug information in the generated program")
let flag_emit_llvm_ir = Arg::new(FLAG_EMIT_LLVM_IR)
.long(FLAG_EMIT_LLVM_IR)
.help("Emit a `.ll` file containing the LLVM IR of the program")
.action(ArgAction::SetTrue)
.required(false);
let flag_profiling = Arg::new(FLAG_PROFILING)
.long(FLAG_PROFILING)
.help("Keep debug info in the final generated program even in optmized builds")
.action(ArgAction::SetTrue)
.required(false);
@ -163,7 +170,8 @@ pub fn build_app() -> Command {
.arg(flag_max_threads.clone())
.arg(flag_opt_size.clone())
.arg(flag_dev.clone())
.arg(flag_debug.clone())
.arg(flag_emit_llvm_ir.clone())
.arg(flag_profiling.clone())
.arg(flag_time.clone())
.arg(flag_linker.clone())
.arg(flag_prebuilt.clone())
@ -212,7 +220,8 @@ pub fn build_app() -> Command {
.arg(flag_max_threads.clone())
.arg(flag_opt_size.clone())
.arg(flag_dev.clone())
.arg(flag_debug.clone())
.arg(flag_emit_llvm_ir.clone())
.arg(flag_profiling.clone())
.arg(flag_time.clone())
.arg(flag_linker.clone())
.arg(flag_prebuilt.clone())
@ -234,7 +243,8 @@ pub fn build_app() -> Command {
.arg(flag_max_threads.clone())
.arg(flag_opt_size.clone())
.arg(flag_dev.clone())
.arg(flag_debug.clone())
.arg(flag_emit_llvm_ir.clone())
.arg(flag_profiling.clone())
.arg(flag_time.clone())
.arg(flag_linker.clone())
.arg(flag_prebuilt.clone())
@ -247,7 +257,8 @@ pub fn build_app() -> Command {
.arg(flag_max_threads.clone())
.arg(flag_opt_size.clone())
.arg(flag_dev.clone())
.arg(flag_debug.clone())
.arg(flag_emit_llvm_ir.clone())
.arg(flag_profiling.clone())
.arg(flag_time.clone())
.arg(flag_linker.clone())
.arg(flag_prebuilt.clone())
@ -376,7 +387,8 @@ pub fn build_app() -> Command {
.arg(flag_max_threads)
.arg(flag_opt_size)
.arg(flag_dev)
.arg(flag_debug)
.arg(flag_emit_llvm_ir)
.arg(flag_profiling)
.arg(flag_time)
.arg(flag_linker)
.arg(flag_prebuilt)
@ -453,7 +465,6 @@ pub fn test(matches: &ArgMatches, triple: Triple) -> io::Result<i32> {
let arena = &arena;
let target = &triple;
let opt_level = opt_level;
let target_info = TargetInfo::from(target);
// TODO may need to determine this dynamically based on dev builds.
let function_kind = FunctionKind::LambdaSet;
@ -696,7 +707,13 @@ pub fn build(
CodeGenBackend::Llvm(backend_mode)
};
let emit_debug_info = matches.get_flag(FLAG_DEBUG);
let emit_llvm_ir = matches.get_flag(FLAG_EMIT_LLVM_IR);
if emit_llvm_ir && !matches!(code_gen_backend, CodeGenBackend::Llvm(_)) {
user_error!("Cannot emit llvm ir while using a dev backend.");
}
let emit_debug_info = matches.get_flag(FLAG_PROFILING)
|| matches!(opt_level, OptLevel::Development | OptLevel::Normal);
let emit_timings = matches.get_flag(FLAG_TIME);
let threading = match matches.get_one::<usize>(FLAG_MAX_THREADS) {
@ -745,6 +762,7 @@ pub fn build(
backend: code_gen_backend,
opt_level,
emit_debug_info,
emit_llvm_ir,
};
let load_config = standard_load_config(&triple, build_ordering, threading);

View file

@ -145,7 +145,7 @@ fn main() -> io::Result<()> {
let triple = target.to_triple();
let function_kind = FunctionKind::LambdaSet;
let (platform_path, stub_lib, stub_dll_symbols) = roc_linker::generate_stub_lib(
&input_path,
input_path,
RocCacheDir::Persistent(cache::roc_cache_dir().as_path()),
&triple,
function_kind,
@ -333,7 +333,10 @@ fn main() -> io::Result<()> {
}
}
FormatMode::WriteToStdout => {
std::io::stdout().lock().write_all(src.as_bytes()).unwrap();
std::io::stdout()
.lock()
.write_all(formatted_src.as_bytes())
.unwrap();
0
}

View file

@ -554,19 +554,20 @@ mod cli_run {
r#"
This expectation failed:
18 expect x != x
^^^^^^
19 expect words == []
^^^^^^^^^^^
When it failed, these variables had these values:
x : Num *
x = 42
words : List Str
words = ["this", "will", "for", "sure", "be", "a", "large", "string", "so", "when", "we", "split", "it", "it", "will", "use", "seamless", "slices", "which", "affect", "printing"]
[#UserApp] 42
[#UserApp] "Fjoer en ferdjer frieten oan dyn geve lea"
[#UserApp] "abc"
[#UserApp] 10
[#UserApp] (A (B C))
[<ignored for tests>:22] x = 42
[<ignored for tests>:23] "Fjoer en ferdjer frieten oan dyn geve lea" = "Fjoer en ferdjer frieten oan dyn geve lea"
[<ignored for tests>:24] "this is line 24" = "this is line 24"
[<ignored for tests>:13] x = "abc"
[<ignored for tests>:13] x = 10
[<ignored for tests>:13] x = (A (B C))
Program finished!
"#
),
@ -873,7 +874,7 @@ mod cli_run {
This roc file can print it's own source code. The source is:
app "ingested-file"
packages { pf: "https://github.com/roc-lang/basic-cli/releases/download/0.6.0/QOQW08n38nHHrVVkJNiPIjzjvbR3iMjXeFY5w1aT46w.tar.br" }
packages { pf: "https://github.com/roc-lang/basic-cli/releases/download/0.7.1/Icc3xJoIixF3hCcfXrDwLCu4wQHtNdPyoJkEbkgIElA.tar.br" }
imports [
pf.Stdout,
"ingested-file.roc" as ownCode : Str,
@ -900,7 +901,7 @@ mod cli_run {
&[],
&[],
&[],
"30256\n",
"162088\n",
UseValgrind::No,
TestCliCommands::Run,
)
@ -942,7 +943,7 @@ mod cli_run {
test_roc_app_slim(
"examples",
"inspect-logging.roc",
r#"{friends: [{2}, {2}, {0, 1}], people: [{age: 27, favoriteColor: Blue, firstName: "John", hasBeard: Bool.true, lastName: "Smith"}, {age: 47, favoriteColor: Green, firstName: "Debby", hasBeard: Bool.false, lastName: "Johnson"}, {age: 33, favoriteColor: (RGB (255, 255, 0)), firstName: "Jane", hasBeard: Bool.false, lastName: "Doe"}]}
r#"(@Community {friends: [{2}, {2}, {0, 1}], people: [(@Person {age: 27, favoriteColor: Blue, firstName: "John", hasBeard: Bool.true, lastName: "Smith"}), (@Person {age: 47, favoriteColor: Green, firstName: "Debby", hasBeard: Bool.false, lastName: "Johnson"}), (@Person {age: 33, favoriteColor: (RGB (255, 255, 0)), firstName: "Jane", hasBeard: Bool.false, lastName: "Doe"})]})
"#,
UseValgrind::Yes,
)
@ -975,13 +976,13 @@ mod cli_run {
// TODO fix QuicksortApp and then remove this!
match roc_filename {
"QuicksortApp.roc" => {
"quicksortApp.roc" => {
eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because the test is broken right now!"
);
return;
}
"TestAStar.roc" => {
"testAStar.roc" => {
if cfg!(feature = "wasm32-cli-run") {
eprintln!(
"WARNING: skipping testing benchmark {roc_filename} because it currently does not work on wasm32 due to dictionaries."
@ -1136,20 +1137,20 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn nqueens() {
test_benchmark("NQueens.roc", &["6"], "4\n", UseValgrind::Yes)
test_benchmark("nQueens.roc", &["6"], "4\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn cfold() {
test_benchmark("CFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes)
test_benchmark("cFold.roc", &["3"], "11 & 11\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn deriv() {
test_benchmark(
"Deriv.roc",
"deriv.roc",
&["2"],
"1 count: 6\n2 count: 22\n",
UseValgrind::Yes,
@ -1159,14 +1160,14 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn rbtree_ck() {
test_benchmark("RBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes)
test_benchmark("rBTreeCk.roc", &["100"], "10\n", UseValgrind::Yes)
}
#[test]
#[cfg_attr(windows, ignore)]
fn rbtree_insert() {
test_benchmark(
"RBTreeInsert.roc",
"rBTreeInsert.roc",
&[],
"Node Black 0 {} Empty Empty\n",
UseValgrind::Yes,
@ -1178,25 +1179,25 @@ mod cli_run {
#[test]
fn rbtree_del() {
test_benchmark(
"RBTreeDel.roc",
"rBTreeDel.roc",
&["420"],
&[],
"30\n",
true
UseValgrind::Yes,
)
}*/
}
*/
#[test]
#[cfg_attr(windows, ignore)]
fn astar() {
test_benchmark("TestAStar.roc", &[], "True\n", UseValgrind::No)
test_benchmark("testAStar.roc", &[], "True\n", UseValgrind::No)
}
#[test]
#[cfg_attr(windows, ignore)]
fn base64() {
test_benchmark(
"TestBase64.roc",
"testBase64.roc",
&[],
"encoded: SGVsbG8gV29ybGQ=\ndecoded: Hello World\n",
UseValgrind::Yes,
@ -1206,19 +1207,19 @@ mod cli_run {
#[test]
#[cfg_attr(windows, ignore)]
fn closure() {
test_benchmark("Closure.roc", &[], "", UseValgrind::No)
test_benchmark("closure.roc", &[], "", UseValgrind::No)
}
#[test]
#[cfg_attr(windows, ignore)]
fn issue2279() {
test_benchmark("Issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes)
test_benchmark("issue2279.roc", &[], "Hello, world!\n", UseValgrind::Yes)
}
#[test]
fn quicksort_app() {
test_benchmark(
"QuicksortApp.roc",
"quicksortApp.roc",
&[],
"todo put the correct quicksort answer here",
UseValgrind::Yes,
@ -1332,7 +1333,7 @@ mod cli_run {
&[],
indoc!(
r#"
TYPE MISMATCH tests/known_bad/TypeError.roc
TYPE MISMATCH in tests/known_bad/TypeError.roc
Something is off with the body of the main definition:
@ -1361,6 +1362,29 @@ mod cli_run {
);
}
#[test]
fn known_type_error_with_long_path() {
check_compile_error(
&known_bad_file("UnusedImportButWithALongFileNameForTesting.roc"),
&[],
indoc!(
r#"
UNUSED IMPORT in ...nown_bad/UnusedImportButWithALongFileNameForTesting.roc
Nothing from Symbol is used in this module.
3 imports [Symbol.{ Ident }]
^^^^^^^^^^^^^^^^
Since Symbol isn't used, you don't need to import it.
0 errors and 1 warning found in <ignored for test> ms."#
),
);
}
#[test]
fn exposed_not_defined() {
check_compile_error(
@ -1368,7 +1392,7 @@ mod cli_run {
&[],
indoc!(
r#"
MISSING DEFINITION tests/known_bad/ExposedNotDefined.roc
MISSING DEFINITION in tests/known_bad/ExposedNotDefined.roc
bar is listed as exposed, but it isn't defined in this module.
@ -1389,7 +1413,7 @@ mod cli_run {
&[],
indoc!(
r#"
UNUSED IMPORT tests/known_bad/UnusedImport.roc
UNUSED IMPORT in tests/known_bad/UnusedImport.roc
Nothing from Symbol is used in this module.
@ -1412,7 +1436,7 @@ mod cli_run {
&[],
indoc!(
r#"
UNKNOWN GENERATES FUNCTION tests/known_bad/UnknownGeneratesWith.roc
UNKNOWN GENERATES FUNCTION in tests/known_bad/UnknownGeneratesWith.roc
I don't know how to generate the foobar function.

View file

@ -1,11 +0,0 @@
app
*.o
*.dSYM
dynhost
libapp.so
metadata
preprocessedhost
packages-test
multi-dep-str/multi-dep-str
multi-dep-thunk/multi-dep-thunk

View file

@ -1 +0,0 @@
Main

View file

@ -52,9 +52,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
extern fn kill(pid: c_int, sig: c_int) c_int;

View file

@ -1 +0,0 @@
Main

View file

@ -52,9 +52,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
extern fn kill(pid: c_int, sig: c_int) c_int;

View file

@ -52,9 +52,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
extern fn kill(pid: c_int, sig: c_int) c_int;

View file

@ -0,0 +1,7 @@
interface UnusedImportButWithALongFileNameForTesting
exposes [plainText, emText]
imports [Symbol.{ Ident }]
plainText = \str -> PlainText str
emText = \str -> EmText str

View file

@ -1,12 +0,0 @@
*.dSYM
libhost.a
libapp.so
dynhost
preprocessedhost
metadata
expects
benchmarks/rbtree-ck
benchmarks/rbtree-insert
benchmarks/test-astar
benchmarks/test-base64

View file

@ -1,2 +0,0 @@
fibonacci
quicksort

View file

@ -66,9 +66,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {

View file

@ -63,9 +63,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {

View file

@ -1,12 +0,0 @@
cfold
closure
deriv
issue2279
nqueens
quicksortapp
RBTreeCk
RBTreeDel
RBTreeInsert
TestAStar
TestBase64
*.wasm

View file

@ -15,7 +15,7 @@ closure1 = \_ ->
Task.succeed (foo toUnitBorrowed "a long string such that it's malloced")
|> Task.map \_ -> {}
toUnitBorrowed = \x -> Str.countGraphemes x
toUnitBorrowed = \x -> Str.countUtf8Bytes x
foo = \f, x -> f x

View file

@ -68,9 +68,9 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {

View file

@ -248,4 +248,4 @@ del = \t, k ->
rebalanceLeft cx lx ky vy ry
Delmin (Del ry Bool.false) ky vy ->
Del (Node cx lx ky vy ry) Bool.false
Del (Node cx lx ky vy ry) Bool.false

View file

@ -14,10 +14,14 @@ polyDbg = \x ->
x
main =
str = "this will for sure be a large string so when we split it it will use seamless slices which affect printing"
words = Str.split str " "
expect words == []
x = 42
expect x != x
dbg x
dbg "Fjoer en ferdjer frieten oan dyn geve lea"
dbg "this is line 24"
r = {x : polyDbg "abc", y: polyDbg 10u8, z : polyDbg (A (B C))}

View file

@ -52,10 +52,10 @@ export fn roc_panic(msg: *RocStr, tag_id: u32) callconv(.C) void {
std.process.exit(1);
}
export fn roc_dbg(loc: *RocStr, msg: *RocStr) callconv(.C) void {
export fn roc_dbg(loc: *RocStr, msg: *RocStr, src: *RocStr) callconv(.C) void {
// This platform uses stdout for testing purposes instead of the normal stderr.
const stdout = std.io.getStdOut().writer();
stdout.print("[{s}] {s}\n", .{ loc.asSlice(), msg.asSlice() }) catch unreachable;
stdout.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {

File diff suppressed because one or more lines are too long

View file

@ -255,7 +255,12 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test"));
.unwrap_or_else(|err| {
panic!(
"Encountered error:\n\t{:?}\nWhile executing cmd:\n\t{:?}",
err, cmd_str
)
});
{
let stdin = child.stdin.as_mut().expect("Failed to open stdin");
@ -269,7 +274,7 @@ pub fn run_cmd<'a, I: IntoIterator<Item = &'a str>, E: IntoIterator<Item = (&'a
let output = child
.wait_with_output()
.unwrap_or_else(|_| panic!("failed to execute cmd `{cmd_name}` in CLI test"));
.unwrap_or_else(|_| panic!("Failed to execute cmd:\n\t`{:?}`", cmd_str));
Out {
cmd_str,

View file

@ -1173,6 +1173,16 @@ fn lowlevel_spec<'a>(
_ => unreachable!(),
}
}
ListClone => {
let list = env.symbols[&arguments[0]];
let bag = builder.add_get_tuple_field(block, list, LIST_BAG_INDEX)?;
let cell = builder.add_get_tuple_field(block, list, LIST_CELL_INDEX)?;
let _unit = builder.add_update(block, update_mode_var, cell)?;
with_new_heap_cell(builder, block, bag)
}
ListSwap => {
let list = env.symbols[&arguments[0]];

View file

@ -1,12 +0,0 @@
[package]
name = "arena-pool"
description = "An implementation of an arena allocator designed for the compiler's workloads."
authors.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
version.workspace = true
[dependencies]
roc_error_macros = { path = "../../error_macros" }

View file

@ -1,2 +0,0 @@
//! An implementation of an [arena allocator](https://mgravell.github.io/Pipelines.Sockets.Unofficial/docs/arenas.html) designed for the compiler's workloads.
pub mod pool;

View file

@ -1,397 +0,0 @@
use roc_error_macros::internal_error;
use std::marker::PhantomPinned;
use std::ptr::{copy_nonoverlapping, NonNull};
pub struct ArenaRef<T> {
ptr: NonNull<T>,
_pin: PhantomPinned,
}
impl<T> ArenaRef<T> {
pub fn get<'a, A: AsArena<T>>(&'a self, arena: &A) -> &'a T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_ref() }
}
pub fn get_mut<'a, A: AsArena<T>>(&'a mut self, arena: &A) -> &'a mut T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_mut() }
}
}
/// Like a Vec, except the capacity you give it initially is its maximum
/// capacity forever. If you ever exceed it, it'll panic!
pub struct ArenaVec<T> {
buffer_ptr: NonNull<T>,
len: usize,
capacity: usize,
_pin: PhantomPinned,
}
impl<T> ArenaVec<T> {
pub fn new_in(arena: &mut Arena<T>) -> Self {
// We can't start with a NonNull::dangling pointer because when we go
// to push elements into this, they'll try to verify the dangling
// pointer resides in the arena it was given, which will likely panic.
//
// Instead, we'll take a pointer inside the array but never use it
// other than for verification, because our capacity is 0.
Self::with_capacity_in(0, arena)
}
pub fn with_capacity_in(capacity: usize, arena: &mut Arena<T>) -> Self {
let ptr = arena.alloc_vec(capacity);
Self {
buffer_ptr: unsafe { NonNull::new_unchecked(ptr) },
capacity,
len: 0,
_pin: PhantomPinned,
}
}
pub fn push(&mut self, val: T, arena: &mut Arena<T>) {
// Verify that this is the arena where we originally got our buffer,
// and is therefore safe to read and to write to. (If we have sufficient
// capacity, we'll write to it, and otherwise we'll read from it when
// copying our buffer over to the new reserved block.)
arena.verify_ownership(self.buffer_ptr);
if self.len <= self.capacity {
// We're all set!
//
// This empty branch is just here for branch prediction,
// since this should be the most common case in practice.
} else {
// Double our capacity and reserve a new block.
self.capacity *= 2;
let ptr = arena.alloc_vec(self.capacity);
// SAFETY: the existing buffer must have at least self.len elements,
// as must the new one, so copying that many between them is safe.
unsafe {
// Copy all elements from the current buffer into the new one
copy_nonoverlapping(self.buffer_ptr.as_ptr(), ptr, self.len);
}
self.buffer_ptr = unsafe { NonNull::new_unchecked(ptr) };
}
// Store the element in the appropriate memory address.
let elem_ptr = unsafe { &mut *self.buffer_ptr.as_ptr().add(self.len) };
*elem_ptr = val;
self.len += 1;
}
pub fn get<'a>(&'a self, index: usize, arena: &Arena<T>) -> Option<&'a T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &*self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
pub fn get_mut<'a>(&'a mut self, index: usize, arena: &Arena<T>) -> Option<&'a mut T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &mut *self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct ArenaPool<T> {
first_chunk: Vec<T>,
extra_chunks: Vec<Vec<T>>,
num_leased: usize,
default_chunk_capacity: usize,
}
impl<T> ArenaPool<T> {
const DEFAULT_CHUNK_SIZE: usize = 1024;
/// Be careful! Both of these arguments are of type usize.
///
/// The first is the number of elements that will be in each arena.
/// The second is the number of arenas.
///
/// This returns a new Pool, and also an iterator of Arenas. These Arenas can
/// be given to different threads, where they can be used to allocate
/// ArenaRef and ArenaVec values which can then be dereferenced by the Arena
/// that created them, or by this pool once those Arenas have been
/// reabsorbed back into it.
///
/// (A word of warning: if you try to use this pool to dereference ArenaRec
/// and ArenaVec values which were allocated by arenas that have *not* yet
/// been reabsorbed, it may work some of the time and panic other times,
/// depending on whether the arena needed to allocate extra chunks beyond
/// its initial chunk. tl;dr - doing that may panic, so don't try it!)
///
/// Before this pool gets dropped, you must call reabsorb() on every
/// arena that has been leased - otherwise, you'll get a panic when this
/// gets dropped! The memory safety of the system depends on all arenas
/// having been reabsorbed before the pool gets deallocated, which is why
/// the pool's Drop implementation enforces it.
pub fn new(num_arenas: usize, elems_per_arena: usize) -> (ArenaPool<T>, ArenaIter<T>) {
Self::with_chunk_size(num_arenas, elems_per_arena, Self::DEFAULT_CHUNK_SIZE)
}
/// Like `new`, except you can also specify the chunk size that each
/// arena will use to allocate its extra chunks if it runs out of space
/// in its main buffer.
///
/// Things will run fastest if that main buffer never runs out, though!
pub fn with_chunk_size(
num_arenas: usize,
elems_per_arena: usize,
chunk_size: usize,
) -> (ArenaPool<T>, ArenaIter<T>) {
let mut first_chunk = Vec::with_capacity(elems_per_arena * num_arenas);
let iter = ArenaIter {
ptr: first_chunk.as_mut_ptr(),
quantity_remaining: num_arenas,
first_chunk_capacity: elems_per_arena,
};
let pool = Self {
first_chunk,
extra_chunks: Vec::new(),
num_leased: num_arenas,
default_chunk_capacity: chunk_size,
};
(pool, iter)
}
/// Return an arena to the pool. (This would have been called "return" but
/// that's a reserved keyword.)
pub fn reabsorb(&mut self, arena: Arena<T>) {
// Ensure we're reabsorbing an arena that was
// actually leased by this pool in the first place!
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
arena.first_chunk_ptr,
);
// Add the arena's extra chunks to our own, so their memory remains live
// after the arena gets dropped. This is important, because at this
// point their pointers can still potentially be dereferenced!
self.extra_chunks.extend(arena.extra_chunks);
self.num_leased -= 1;
}
}
impl<T> Drop for ArenaPool<T> {
fn drop(&mut self) {
// When an ArenaPool gets dropped, it must not have any leased
// arenas remaining. If it does, there will be outstanding IDs which
// could be used with those non-reabsorbed Arenas to read freed memory!
// This would be a use-after-free; we panic rather than permit that.
assert_eq!(self.num_leased, 0);
}
}
pub struct ArenaIter<T> {
ptr: *mut T,
quantity_remaining: usize,
first_chunk_capacity: usize,
}
// Implement `Iterator` for `Fibonacci`.
// The `Iterator` trait only requires a method to be defined for the `next` element.
impl<T> Iterator for ArenaIter<T> {
type Item = Arena<T>;
// Here, we define the sequence using `.curr` and `.next`.
// The return type is `Option<T>`:
// * When the `Iterator` is finished, `None` is returned.
// * Otherwise, the next value is wrapped in `Some` and returned.
fn next(&mut self) -> Option<Arena<T>> {
if self.quantity_remaining != 0 {
let first_chunk_ptr = self.ptr;
self.ptr = unsafe { self.ptr.add(self.first_chunk_capacity) };
self.quantity_remaining -= 1;
Some(Arena {
first_chunk_ptr,
first_chunk_len: 0,
first_chunk_cap: self.first_chunk_capacity,
extra_chunks: Vec::new(),
})
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct Arena<T> {
first_chunk_ptr: *mut T,
first_chunk_len: usize,
first_chunk_cap: usize,
extra_chunks: Vec<Vec<T>>,
}
impl<T> Arena<T> {
pub fn alloc(&mut self, val: T) -> ArenaRef<T> {
let ptr: *mut T = if self.first_chunk_len < self.first_chunk_cap {
// We have enough room in the first chunk for 1 allocation.
self.first_chunk_len += 1;
// Return a pointer to the next available slot.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() >= chunk.capacity() {
// We've run out of space in our last chunk. Create a new one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
chunk.push(val);
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
};
ArenaRef {
ptr: unsafe { NonNull::new_unchecked(ptr) },
_pin: PhantomPinned,
}
}
fn alloc_vec(&mut self, num_elems: usize) -> *mut T {
if self.first_chunk_len + num_elems <= self.first_chunk_cap {
// We have enough room in the first chunk for this vec.
self.first_chunk_len += num_elems;
// Return a pointer to the next available element.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
let new_chunk_cap = self.first_chunk_cap.max(num_elems);
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() + num_elems >= chunk.capacity() {
// We don't have enough space in our last chunk.
// Create a new one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
}
}
}
pub trait AsArena<T> {
fn verify_ownership(&self, ptr: NonNull<T>);
}
impl<T> AsArena<T> for ArenaPool<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
impl<T> AsArena<T> for Arena<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk_ptr,
self.first_chunk_cap,
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
fn verify_ownership<T>(
first_chunk_ptr: *const T,
first_chunk_cap: usize,
extra_chunks: &[Vec<T>],
ptr: *const T,
) {
let addr = ptr as usize;
let start_addr = first_chunk_ptr as usize;
let end_addr = start_addr + first_chunk_cap;
if start_addr <= addr && addr < end_addr {
// This is within our first chunk's address space, so it's verified!
} else {
// This wasn't within our first chunk's address space, so we need
// to see if we can find it in one of our extra_chunks.
for chunk in extra_chunks {
let start_addr = chunk.as_ptr() as usize;
let end_addr = start_addr + chunk.capacity();
if start_addr <= addr && addr < end_addr {
// Found it! No need to loop anymore; verification passed.
return;
}
}
// The address wasn't within any of our chunks' bounds.
// Panic to avoid use-after-free errors!
internal_error!("Pointer ownership verification failed.");
}
}

View file

@ -1,17 +0,0 @@
// #[macro_use]
// extern crate pretty_assertions;
extern crate arena_pool;
#[cfg(test)]
mod test_arena_pool {
use arena_pool::pool::{ArenaIter, ArenaPool};
#[test]
fn empty_pool() {
// Neither of these does anything, but they
// at least shouldn't panic or anything.
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::new(0, 0);
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::with_chunk_size(0, 0, 0);
}
}

View file

@ -85,6 +85,7 @@ pub struct CodeGenOptions {
pub backend: CodeGenBackend,
pub opt_level: OptLevel,
pub emit_debug_info: bool,
pub emit_llvm_ir: bool,
}
type GenFromMono<'a> = (CodeObject, CodeGenTiming, ExpectMetadata<'a>);
@ -101,6 +102,7 @@ pub fn gen_from_mono_module<'a>(
) -> GenFromMono<'a> {
let path = roc_file_path;
let debug = code_gen_options.emit_debug_info;
let emit_llvm_ir = code_gen_options.emit_llvm_ir;
let opt = code_gen_options.opt_level;
match code_gen_options.backend {
@ -120,15 +122,23 @@ pub fn gen_from_mono_module<'a>(
wasm_dev_stack_bytes,
backend_mode,
),
CodeGenBackend::Llvm(backend_mode) => {
gen_from_mono_module_llvm(arena, loaded, path, target, opt, backend_mode, debug)
}
CodeGenBackend::Llvm(backend_mode) => gen_from_mono_module_llvm(
arena,
loaded,
path,
target,
opt,
backend_mode,
debug,
emit_llvm_ir,
),
}
}
// TODO how should imported modules factor into this? What if those use builtins too?
// TODO this should probably use more helper functions
// TODO make this polymorphic in the llvm functions so it can be reused for another backend.
#[allow(clippy::too_many_arguments)]
fn gen_from_mono_module_llvm<'a>(
arena: &'a bumpalo::Bump,
loaded: MonomorphizedModule<'a>,
@ -137,6 +147,7 @@ fn gen_from_mono_module_llvm<'a>(
opt_level: OptLevel,
backend_mode: LlvmBackendMode,
emit_debug_info: bool,
emit_llvm_ir: bool,
) -> GenFromMono<'a> {
use crate::target::{self, convert_opt_level};
use inkwell::attributes::{Attribute, AttributeLoc};
@ -151,9 +162,6 @@ fn gen_from_mono_module_llvm<'a>(
let context = Context::create();
let module = arena.alloc(module_from_builtins(target, &context, "app"));
// strip Zig debug stuff
// module.strip_debug_info();
// mark our zig-defined builtins as internal
let app_ll_file = {
let mut temp = PathBuf::from(roc_file_path);
@ -245,8 +253,9 @@ fn gen_from_mono_module_llvm<'a>(
env.dibuilder.finalize();
// we don't use the debug info, and it causes weird errors.
module.strip_debug_info();
if !emit_debug_info {
module.strip_debug_info();
}
// Uncomment this to see the module's optimized LLVM instruction output:
// env.module.print_to_stderr();
@ -265,6 +274,11 @@ fn gen_from_mono_module_llvm<'a>(
);
}
if emit_llvm_ir {
eprintln!("Emitting LLVM IR to {}", &app_ll_file.display());
module.print_to_file(&app_ll_file).unwrap();
}
// Uncomment this to see the module's optimized LLVM instruction output:
// env.module.print_to_stderr();
@ -359,65 +373,6 @@ fn gen_from_mono_module_llvm<'a>(
assert!(bc_to_object.status.success(), "{bc_to_object:#?}");
MemoryBuffer::create_from_file(&app_o_file).expect("memory buffer creation works")
} else if emit_debug_info {
module.strip_debug_info();
let mut app_ll_dbg_file = PathBuf::from(roc_file_path);
app_ll_dbg_file.set_extension("dbg.ll");
let mut app_o_file = PathBuf::from(roc_file_path);
app_o_file.set_extension("o");
use std::process::Command;
// write the ll code to a file, so we can modify it
module.print_to_file(&app_ll_file).unwrap();
// run the debugir https://github.com/vaivaswatha/debugir tool
match Command::new("debugir")
.args(["-instnamer", app_ll_file.to_str().unwrap()])
.output()
{
Ok(_) => {}
Err(error) => {
use std::io::ErrorKind;
match error.kind() {
ErrorKind::NotFound => internal_error!(
r"I could not find the `debugir` tool on the PATH, install it from https://github.com/vaivaswatha/debugir"
),
_ => internal_error!("{:?}", error),
}
}
}
use target_lexicon::Architecture;
match target.architecture {
Architecture::X86_64
| Architecture::X86_32(_)
| Architecture::Aarch64(_)
| Architecture::Wasm32 => {
// write the .o file. Note that this builds the .o for the local machine,
// and ignores the `target_machine` entirely.
//
// different systems name this executable differently, so we shotgun for
// the most common ones and then give up.
let ll_to_object = Command::new("llc")
.args([
"-relocation-model=pic",
"-filetype=obj",
app_ll_dbg_file.to_str().unwrap(),
"-o",
app_o_file.to_str().unwrap(),
])
.output()
.unwrap();
assert!(ll_to_object.stderr.is_empty(), "{ll_to_object:#?}");
}
_ => unreachable!(),
}
MemoryBuffer::create_from_file(&app_o_file).expect("memory buffer creation works")
} else {
// Emit the .o file
@ -847,7 +802,7 @@ fn build_loaded_file<'a>(
platform_main_roc.with_file_name(roc_linker::preprocessed_host_filename(target).unwrap())
};
let mut output_exe_path = match out_path {
let output_exe_path = match out_path {
Some(path) => {
// true iff the path ends with a directory separator,
// e.g. '/' on UNIX, '/' or '\\' on Windows
@ -875,12 +830,22 @@ fn build_loaded_file<'a>(
if ends_with_sep {
let filename = app_module_path.file_name().unwrap_or_default();
with_executable_extension(&path.join(filename), operating_system)
with_output_extension(
&path.join(filename),
operating_system,
linking_strategy,
link_type,
)
} else {
path.to_path_buf()
}
}
None => with_executable_extension(&app_module_path, operating_system),
None => with_output_extension(
&app_module_path,
operating_system,
linking_strategy,
link_type,
),
};
// We don't need to spawn a rebuild thread when using a prebuilt host.
@ -1039,7 +1004,6 @@ fn build_loaded_file<'a>(
}
(LinkingStrategy::Additive, _) | (LinkingStrategy::Legacy, LinkType::None) => {
// Just copy the object file to the output folder.
output_exe_path.set_extension(operating_system.object_file_ext());
std::fs::write(&output_exe_path, &*roc_app_bytes).unwrap();
}
(LinkingStrategy::Legacy, _) => {
@ -1326,6 +1290,7 @@ pub fn build_str_test<'a>(
backend: CodeGenBackend::Llvm(LlvmBackendMode::Binary),
opt_level: OptLevel::Normal,
emit_debug_info: false,
emit_llvm_ir: false,
};
let emit_timings = false;
@ -1368,6 +1333,17 @@ pub fn build_str_test<'a>(
)
}
fn with_executable_extension(path: &Path, os: OperatingSystem) -> PathBuf {
path.with_extension(os.executable_file_ext().unwrap_or_default())
fn with_output_extension(
path: &Path,
os: OperatingSystem,
linking_strategy: LinkingStrategy,
link_type: LinkType,
) -> PathBuf {
match (linking_strategy, link_type) {
(LinkingStrategy::Additive, _) | (LinkingStrategy::Legacy, LinkType::None) => {
// Additive linking and no linking both output the object file type.
path.with_extension(os.object_file_ext())
}
_ => path.with_extension(os.executable_file_ext().unwrap_or_default()),
}
}

View file

@ -1 +0,0 @@
builtins.ll

View file

@ -1,5 +0,0 @@
zig-out
zig-cache
src/zig-cache
benchmark/zig-cache
dec

View file

@ -11,7 +11,7 @@ fn roc_alloc(_: usize, _: u32) callconv(.C) ?*anyopaque {
fn roc_panic(_: *anyopaque, _: u32) callconv(.C) void {
@panic("Not needed for dec benchmark");
}
fn roc_dbg(_: *anyopaque, _: *anyopaque) callconv(.C) void {
fn roc_dbg(_: *anyopaque, _: *anyopaque, _: *anyopaque) callconv(.C) void {
@panic("Not needed for dec benchmark");
}

View file

@ -7,7 +7,7 @@ const CrossTarget = std.zig.CrossTarget;
const Arch = std.Target.Cpu.Arch;
pub fn build(b: *Build) void {
// const mode = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast });
// const mode = b.standardOptimizeOption(.{ .preferred_optimize_mode = .Debug });
const mode = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast });
// Options
@ -58,6 +58,9 @@ fn generateLlvmIrFile(
) void {
const obj = b.addObject(.{ .name = object_name, .root_source_file = main_path, .optimize = mode, .target = target, .use_llvm = true });
obj.strip = true;
obj.disable_stack_probing = true;
if (target.cpu_arch != .wasm32)
obj.bundle_compiler_rt = true;
// Generating the bin seems required to get zig to generate the llvm ir.
_ = obj.getEmittedBin();
@ -89,6 +92,9 @@ fn generateObjectFile(
obj.strip = true;
obj.link_function_sections = true;
obj.force_pic = true;
obj.disable_stack_probing = true;
if (target.cpu_arch != .wasm32)
obj.bundle_compiler_rt = true;
const obj_file = obj.getEmittedBin();
@ -110,7 +116,7 @@ fn makeLinux32Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -120,7 +126,7 @@ fn makeLinuxAarch64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.aarch64;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -130,7 +136,7 @@ fn makeLinuxX64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -140,7 +146,7 @@ fn makeWindows64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
target.os_tag = std.Target.Os.Tag.windows;
target.abi = std.Target.Abi.gnu;
target.abi = std.Target.Abi.none;
return target;
}

View file

@ -1,478 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const math = std.math;
// Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore.
// Until then, we are manually ingesting used parts of compiler-rt here.
//
// Taken from
// https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/
// Thank you Zig Contributors!
// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
// calling convention of @Vector(2, u64), rather than what's standard.
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
const v2u64 = @Vector(2, u64);
// Export it as weak incase it is already linked in by something else.
comptime {
if (!want_windows_v2u64_abi) {
@export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak });
@export(__lshrti3, .{ .name = "__lshrti3", .linkage = .Weak });
@export(__divti3, .{ .name = "__divti3", .linkage = .Weak });
@export(__modti3, .{ .name = "__modti3", .linkage = .Weak });
@export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak });
@export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak });
@export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak });
@export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak });
@export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak });
@export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak });
}
}
pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) {
return muloXi4_genericFast(i128, a, b, overflow);
} else {
return muloXi4_genericSmall(i128, a, b, overflow);
}
}
pub fn __divti3(a: i128, b: i128) callconv(.C) i128 {
return div(a, b);
}
fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(div(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
}
inline fn div(a: i128, b: i128) i128 {
const s_a = a >> (128 - 1);
const s_b = b >> (128 - 1);
const an = (a ^ s_a) -% s_a;
const bn = (b ^ s_b) -% s_b;
const r = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), null);
const s = s_a ^ s_b;
return (@as(i128, @bitCast(r)) ^ s) -% s;
}
pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 {
return udivmod(u128, a, b, null);
}
fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), null)));
}
pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 {
var r: u128 = undefined;
_ = udivmod(u128, a, b, &r);
return r;
}
fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
var r: u128 = undefined;
_ = udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), &r);
return @as(v2u64, @bitCast(r));
}
pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
return mod(a, b);
}
fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
}
inline fn mod(a: i128, b: i128) i128 {
const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0
const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0
const an = (a ^ s_a) -% s_a; // negate if s == -1
const bn = (b ^ s_b) -% s_b; // negate if s == -1
var r: u128 = undefined;
_ = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), &r);
return (@as(i128, @bitCast(r)) ^ s_a) -% s_a; // negate if s == -1
}
pub fn __fixdfti(a: f64) callconv(.C) i128 {
return floatToInt(i128, a);
}
fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(i128, a)));
}
pub fn __fixsfti(a: f32) callconv(.C) i128 {
return floatToInt(i128, a);
}
fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(i128, a)));
}
pub fn __fixunsdfti(a: f64) callconv(.C) u128 {
return floatToInt(u128, a);
}
fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(u128, a)));
}
pub fn __fixunssfti(a: f32) callconv(.C) u128 {
return floatToInt(u128, a);
}
fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(u128, a)));
}
// mulo - multiplication overflow
// * return a*%b.
// * return if a*b overflows => 1 else => 0
// - muloXi4_genericSmall as default
// - muloXi4_genericFast for 2*bitsize <= usize
inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
overflow.* = 0;
const min = math.minInt(ST);
var res: ST = a *% b;
// Hacker's Delight section Overflow subsection Multiplication
// case a=-2^{31}, b=-1 problem, because
// on some machines a*b = -2^{31} with overflow
// Then -2^{31}/-1 overflows and any result is possible.
// => check with a<0 and b=-2^{31}
if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
overflow.* = 1;
return res;
}
inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
overflow.* = 0;
const EST = switch (ST) {
i32 => i64,
i64 => i128,
i128 => i256,
else => unreachable,
};
const min = math.minInt(ST);
const max = math.maxInt(ST);
var res: EST = @as(EST, a) * @as(EST, b);
//invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1}
if (res < min or max < res)
overflow.* = 1;
return @as(ST, @truncate(res));
}
const native_endian = builtin.cpu.arch.endian();
const low = switch (native_endian) {
.Big => 1,
.Little => 0,
};
const high = 1 - low;
pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt {
// @setRuntimeSafety(builtin.is_test);
const double_int_bits = @typeInfo(DoubleInt).Int.bits;
const single_int_bits = @divExact(double_int_bits, 2);
const SingleInt = std.meta.Int(.unsigned, single_int_bits);
const SignedDoubleInt = std.meta.Int(.signed, double_int_bits);
const Log2SingleInt = std.math.Log2Int(SingleInt);
const n = @as([2]SingleInt, @bitCast(a));
const d = @as([2]SingleInt, @bitCast(b));
var q: [2]SingleInt = undefined;
var r: [2]SingleInt = undefined;
var sr: c_uint = undefined;
// special cases, X is unknown, K != 0
if (n[high] == 0) {
if (d[high] == 0) {
// 0 X
// ---
// 0 X
if (maybe_rem) |rem| {
rem.* = n[low] % d[low];
}
return n[low] / d[low];
}
// 0 X
// ---
// K X
if (maybe_rem) |rem| {
rem.* = n[low];
}
return 0;
}
// n[high] != 0
if (d[low] == 0) {
if (d[high] == 0) {
// K X
// ---
// 0 0
if (maybe_rem) |rem| {
rem.* = n[high] % d[low];
}
return n[high] / d[low];
}
// d[high] != 0
if (n[low] == 0) {
// K 0
// ---
// K 0
if (maybe_rem) |rem| {
r[high] = n[high] % d[high];
r[low] = 0;
rem.* = @as(DoubleInt, @bitCast(r));
}
return n[high] / d[high];
}
// K K
// ---
// K 0
if ((d[high] & (d[high] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
r[low] = n[low];
r[high] = n[high] & (d[high] - 1);
rem.* = @as(DoubleInt, @bitCast(r));
}
return n[high] >> @as(Log2SingleInt, @intCast(@ctz(d[high])));
}
// K K
// ---
// K 0
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
// 0 <= sr <= single_int_bits - 2 or sr large
if (sr > single_int_bits - 2) {
if (maybe_rem) |rem| {
rem.* = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= single_int_bits - 1
// q.all = a << (double_int_bits - sr);
q[low] = 0;
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
// r.all = a >> sr;
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
} else {
// d[low] != 0
if (d[high] == 0) {
// K X
// ---
// 0 K
if ((d[low] & (d[low] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
rem.* = n[low] & (d[low] - 1);
}
if (d[low] == 1) {
return a;
}
sr = @ctz(d[low]);
q[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
q[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
return @as(DoubleInt, @bitCast(q));
}
// K X
// ---
// 0 K
sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high]));
// 2 <= sr <= double_int_bits - 1
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
if (sr == single_int_bits) {
q[low] = 0;
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else if (sr < single_int_bits) {
// 2 <= sr <= single_int_bits - 1
q[low] = 0;
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
} else {
// single_int_bits + 1 <= sr <= double_int_bits - 1
q[low] = n[low] << @as(Log2SingleInt, @intCast(double_int_bits - sr));
q[high] = (n[high] << @as(Log2SingleInt, @intCast(double_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr - single_int_bits)));
r[high] = 0;
r[low] = n[high] >> @as(Log2SingleInt, @intCast(sr - single_int_bits));
}
} else {
// K X
// ---
// K K
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
// 0 <= sr <= single_int_bits - 1 or sr large
if (sr > single_int_bits - 1) {
if (maybe_rem) |rem| {
rem.* = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= single_int_bits
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
q[low] = 0;
if (sr == single_int_bits) {
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else {
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
}
}
}
// Not a special case
// q and r are initialized with:
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
// 1 <= sr <= double_int_bits - 1
var carry: u32 = 0;
var r_all: DoubleInt = undefined;
while (sr > 0) : (sr -= 1) {
// r:q = ((r:q) << 1) | carry
r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1));
r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1));
q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1));
q[low] = (q[low] << 1) | carry;
// carry = 0;
// if (r.all >= b)
// {
// r.all -= b;
// carry = 1;
// }
r_all = @as(DoubleInt, @bitCast(r));
const s: SignedDoubleInt = @as(SignedDoubleInt, @bitCast(b -% r_all -% 1)) >> (double_int_bits - 1);
carry = @as(u32, @intCast(s & 1));
r_all -= b & @as(DoubleInt, @bitCast(s));
r = @as([2]SingleInt, @bitCast(r_all));
}
const q_all = (@as(DoubleInt, @bitCast(q)) << 1) | carry;
if (maybe_rem) |rem| {
rem.* = r_all;
}
return q_all;
}
pub inline fn floatToInt(comptime I: type, a: anytype) I {
const Log2Int = math.Log2Int;
const Int = @import("std").meta.Int;
const F = @TypeOf(a);
const float_bits = @typeInfo(F).Float.bits;
const int_bits = @typeInfo(I).Int.bits;
const rep_t = Int(.unsigned, float_bits);
const sig_bits = math.floatMantissaBits(F);
const exp_bits = math.floatExponentBits(F);
const fractional_bits = floatFractionalBits(F);
// const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
const implicit_bit = @as(rep_t, 1) << sig_bits;
const max_exp = (1 << (exp_bits - 1));
const exp_bias = max_exp - 1;
const sig_mask = (@as(rep_t, 1) << sig_bits) - 1;
// Break a into sign, exponent, significand
const a_rep: rep_t = @as(rep_t, @bitCast(a));
const negative = (a_rep >> (float_bits - 1)) != 0;
const exponent = @as(i32, @intCast((a_rep << 1) >> (sig_bits + 1))) - exp_bias;
const significand: rep_t = (a_rep & sig_mask) | implicit_bit;
// If the exponent is negative, the result rounds to zero.
if (exponent < 0) return 0;
// If the value is too large for the integer type, saturate.
switch (@typeInfo(I).Int.signedness) {
.unsigned => {
if (negative) return 0;
if (@as(c_uint, @intCast(exponent)) >= @min(int_bits, max_exp)) return math.maxInt(I);
},
.signed => if (@as(c_uint, @intCast(exponent)) >= @min(int_bits - 1, max_exp)) {
return if (negative) math.minInt(I) else math.maxInt(I);
},
}
// If 0 <= exponent < sig_bits, right shift to get the result.
// Otherwise, shift left.
var result: I = undefined;
if (exponent < fractional_bits) {
result = @as(I, @intCast(significand >> @as(Log2Int(rep_t), @intCast(fractional_bits - exponent))));
} else {
result = @as(I, @intCast(significand)) << @as(Log2Int(I), @intCast(exponent - fractional_bits));
}
if ((@typeInfo(I).Int.signedness == .signed) and negative)
return ~result +% 1;
return result;
}
/// Returns the number of fractional bits in the mantissa of floating point type T.
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
comptime std.debug.assert(@typeInfo(T) == .Float);
// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).Float.bits) {
16 => 10,
32 => 23,
64 => 52,
80 => 63,
128 => 112,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
pub fn __lshrti3(a: i128, b: i32) callconv(.C) i128 {
return lshrXi3(i128, a, b);
}
// Logical shift right: shift in 0 from left to right
// Precondition: 0 <= b < T.bit_count
inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
const word_t = HalveInt(T, false);
const S = std.math.Log2Int(word_t.HalfT);
const input = word_t{ .all = a };
var output: word_t = undefined;
if (b >= word_t.bits) {
output.s.high = 0;
output.s.low = input.s.high >> @as(S, @intCast(b - word_t.bits));
} else if (b == 0) {
return a;
} else {
output.s.high = input.s.high >> @as(S, @intCast(b));
output.s.low = input.s.high << @as(S, @intCast(word_t.bits - b));
output.s.low |= input.s.low >> @as(S, @intCast(b));
}
return output.all;
}
/// Allows to access underlying bits as two equally sized lower and higher
/// signed or unsigned integers.
fn HalveInt(comptime T: type, comptime signed_half: bool) type {
return extern union {
pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
pub const HalfTU = std.meta.Int(.unsigned, bits);
pub const HalfTS = std.meta.Int(.signed, bits);
pub const HalfT = if (signed_half) HalfTS else HalfTU;
all: T,
s: if (native_endian == .Little)
extern struct { low: HalfT, high: HalfT }
else
extern struct { high: HalfT, low: HalfT },
};
}

View file

@ -0,0 +1,11 @@
const std = @import("std");
const builtin = @import("builtin");
const RocStr = @import("str.zig").RocStr;
// An optional debug impl to be called during `roc test`
pub fn dbg_impl(loc: *const RocStr, msg: *const RocStr, src: *const RocStr) callconv(.C) void {
if (builtin.target.cpu.arch != .wasm32) {
const stderr = std.io.getStdErr().writer();
stderr.print("[{s}] {s} = {s}\n", .{ loc.asSlice(), src.asSlice(), msg.asSlice() }) catch unreachable;
}
}

View file

@ -252,7 +252,6 @@ pub const RocDec = extern struct {
if (answer.has_overflowed) {
roc_panic("Decimal addition overflowed!", 0);
unreachable;
} else {
return answer.value;
}
@ -283,7 +282,6 @@ pub const RocDec = extern struct {
if (answer.has_overflowed) {
roc_panic("Decimal subtraction overflowed!", 0);
unreachable;
} else {
return answer.value;
}
@ -347,7 +345,6 @@ pub const RocDec = extern struct {
if (answer.has_overflowed) {
roc_panic("Decimal multiplication overflowed!", 0);
unreachable;
} else {
return answer.value;
}
@ -369,7 +366,7 @@ pub const RocDec = extern struct {
// (n / 0) is an error
if (denominator_i128 == 0) {
@panic("TODO runtime exception for dividing by 0!");
roc_panic("Decimal division by 0!", 0);
}
// If they're both negative, or if neither is negative, the final answer
@ -397,7 +394,7 @@ pub const RocDec = extern struct {
if (denominator_i128 == one_point_zero_i128) {
return self;
} else {
@panic("TODO runtime exception for overflow when dividing!");
roc_panic("Decimal division overflow in numerator!", 0);
}
};
const numerator_u128 = @as(u128, @intCast(numerator_abs_i128));
@ -410,7 +407,7 @@ pub const RocDec = extern struct {
if (numerator_i128 == one_point_zero_i128) {
return other;
} else {
@panic("TODO runtime exception for overflow when dividing!");
roc_panic("Decimal division overflow in denominator!", 0);
}
};
const denominator_u128 = @as(u128, @intCast(denominator_abs_i128));
@ -422,7 +419,7 @@ pub const RocDec = extern struct {
if (answer.hi == 0 and answer.lo <= math.maxInt(i128)) {
unsigned_answer = @as(i128, @intCast(answer.lo));
} else {
@panic("TODO runtime exception for overflow when dividing!");
roc_panic("Decimal division overflow!", 0);
}
return RocDec{ .num = if (is_answer_negative) -unsigned_answer else unsigned_answer };
@ -463,6 +460,10 @@ pub const RocDec = extern struct {
return RocDec{ .num = out };
}
pub fn log(self: RocDec) RocDec {
return fromF64(@log(self.toF64())).?;
}
// I belive the output of the trig functions is always in range of Dec.
// If not, we probably should just make it saturate the Dec.
// I don't think this should crash or return errors.
@ -632,7 +633,7 @@ fn mul_and_decimalize(a: u128, b: u128) i128 {
const d = answer[0];
if (overflowed == 1) {
@panic("TODO runtime exception for overflow!");
roc_panic("Decimal multiplication overflow!", 0);
}
// Final 512bit value is d, c, b, a
@ -1190,6 +1191,10 @@ test "div: 500 / 1000" {
try expectEqual(RocDec.fromStr(roc_str), number1.div(number2));
}
test "log: 1" {
try expectEqual(RocDec.fromU64(0), RocDec.log(RocDec.fromU64(1)));
}
// exports
pub fn fromStr(arg: RocStr) callconv(.C) num_.NumParseResult(i128) {
@ -1208,7 +1213,7 @@ pub fn fromF64C(arg: f64) callconv(.C) i128 {
if (@call(.always_inline, RocDec.fromF64, .{arg})) |dec| {
return dec.num;
} else {
@panic("TODO runtime exception failing convert f64 to RocDec");
roc_panic("Decimal conversion from f64 failed!", 0);
}
}
@ -1217,7 +1222,7 @@ pub fn fromF32C(arg_f32: f32) callconv(.C) i128 {
if (@call(.always_inline, RocDec.fromF64, .{arg_f64})) |dec| {
return dec.num;
} else {
@panic("TODO runtime exception failing convert f64 to RocDec");
roc_panic("Decimal conversion from f32!", 0);
}
}
@ -1232,7 +1237,7 @@ pub fn exportFromInt(comptime T: type, comptime name: []const u8) void {
const answer = @mulWithOverflow(this, RocDec.one_point_zero_i128);
if (answer[1] == 1) {
@panic("TODO runtime exception failing convert integer to RocDec");
roc_panic("Decimal conversion from Integer failed!", 0);
} else {
return answer[0];
}
@ -1258,11 +1263,15 @@ pub fn neqC(arg1: RocDec, arg2: RocDec) callconv(.C) bool {
}
pub fn negateC(arg: RocDec) callconv(.C) i128 {
return if (@call(.always_inline, RocDec.negate, .{arg})) |dec| dec.num else @panic("TODO overflow for negating RocDec");
return if (@call(.always_inline, RocDec.negate, .{arg})) |dec| dec.num else {
roc_panic("Decimal negation overflow!", 0);
};
}
pub fn absC(arg: RocDec) callconv(.C) i128 {
const result = @call(.always_inline, RocDec.abs, .{arg}) catch @panic("TODO overflow for calling absolute value on RocDec");
const result = @call(.always_inline, RocDec.abs, .{arg}) catch {
roc_panic("Decimal absolute value overflow!", 0);
};
return result.num;
}
@ -1282,6 +1291,10 @@ pub fn divC(arg1: RocDec, arg2: RocDec) callconv(.C) i128 {
return @call(.always_inline, RocDec.div, .{ arg1, arg2 }).num;
}
pub fn logC(arg: RocDec) callconv(.C) i128 {
return @call(.always_inline, RocDec.log, .{arg}).num;
}
pub fn sinC(arg: RocDec) callconv(.C) i128 {
return @call(.always_inline, RocDec.sin, .{arg}).num;
}

File diff suppressed because it is too large Load diff

View file

@ -1,87 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const musl = @import("libc/musl.zig");
const folly = @import("libc/folly.zig");
const cpuid = @import("libc/cpuid.zig");
comptime {
// TODO: remove this workaround.
// Our wasm llvm pipeline always links in memcpy.
// As such, our impl will conflict.
if (builtin.is_test) {
// We don't need memcpy for tests because the tests are built with -lc
} else if (arch != .wasm32) {
@export(memcpy, .{ .name = "memcpy", .linkage = .Strong });
}
}
const Memcpy = *const fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8;
pub var memcpy_target: Memcpy = switch (arch) {
.x86_64 => dispatch_memcpy,
else => unreachable,
};
pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
switch (builtin.os.tag) {
.windows => {
return musl.memcpy(dest, src, len);
},
else => switch (arch) {
// x86_64 has a special optimized memcpy that can use avx2.
.x86_64 => {
return memcpy_target(dest, src, len);
},
else => {
return musl.memcpy(dest, src, len);
},
},
}
}
const MemcpyDecision = enum {
uninitialized,
folly_prefetchw,
folly_prefetcht0,
musl,
};
var memcpy_decision: MemcpyDecision = .uninitialized;
fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
switch (arch) {
.x86_64 => {
// TODO: Switch this to overwrite the memcpy_target pointer once the surgical linker can support it.
// Then dispatch will just happen on the first call instead of every call.
// if (cpuid.supports_avx2()) {
// if (cpuid.supports_prefetchw()) {
// memcpy_target = folly.memcpy_prefetchw;
// } else {
// memcpy_target = folly.memcpy_prefetcht0;
// }
// } else {
// memcpy_target = musl.memcpy;
// }
// return memcpy_target(dest, src, len);
switch (memcpy_decision) {
.uninitialized => {
if (cpuid.supports_avx2()) {
if (cpuid.supports_prefetchw()) {
memcpy_decision = .folly_prefetchw;
} else {
memcpy_decision = .folly_prefetcht0;
}
} else {
memcpy_decision = .musl;
}
return dispatch_memcpy(dest, src, len);
},
.folly_prefetchw => return folly.memcpy_prefetchw(dest, src, len),
.folly_prefetcht0 => return folly.memcpy_prefetcht0(dest, src, len),
.musl => return musl.memcpy(dest, src, len),
}
},
else => unreachable,
}
}

View file

@ -1,7 +0,0 @@
const builtin = @import("builtin");
const os = builtin.os;
pub const function_prefix = switch (os.tag) {
.macos => "_",
else => "",
};

View file

@ -1,53 +0,0 @@
// Check if AVX2 is supported.
// Returns 1 if AVX2 is supported, 0 otherwise.
.global {[function_prefix]s}supports_avx2;
{[function_prefix]s}supports_avx2:
// Save the EBX register.
push %rbx
// Call the CPUID instruction with the EAX register set to 7 and ECX set to 0.
// This will get the CPUID information for the current CPU.
mov $7, %eax
mov $0, %ecx
cpuid
// The AVX2 feature flag is located in the EBX register at bit 5.
bt $5, %ebx
jc .avx2_supported
// AVX2 is not supported.
pop %rbx
mov $0, %eax
ret
.avx2_supported:
pop %rbx
mov $1, %eax
ret
// Check if prefetchw is supported.
// Returns 1 if the prefetchw instruction is supported, 0 otherwise.
.global {[function_prefix]s}supports_prefetchw;
{[function_prefix]s}supports_prefetchw:
// Save the EBX register.
push %rbx
// Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
// This will get the CPUID information for the current CPU.
mov $0x80000001, %eax
mov $0, %ecx
cpuid
// The prefetchw feature flag is located in the ECX register at bit 8.
bt $8, %ecx
jc .prefetchw_supported
// AVX2 is not supported.
pop %rbx
mov $0, %eax
ret
.prefetchw_supported:
pop %rbx
mov $1, %eax
ret

View file

@ -1,18 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("assembly_util.zig").function_prefix;
// I couldn't manage to define this in a PIE friendly way with inline assembly.
// Instead, I am defining it as global assembly functions.
comptime {
switch (arch) {
.x86_64 => {
asm (std.fmt.comptimePrint(@embedFile("cpuid.S"), .{ .function_prefix = function_prefix }));
},
else => unreachable,
}
}
pub extern fn supports_avx2() bool;
pub extern fn supports_prefetchw() bool;

View file

@ -1,2 +0,0 @@
pub const memcpy_prefetchw = @import("folly/memcpy.zig").__folly_memcpy_prefetchw;
pub const memcpy_prefetcht0 = @import("folly/memcpy.zig").__folly_memcpy_prefetcht0;

View file

@ -1,437 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* __folly_memcpy: An optimized memcpy implementation that uses prefetch and
* AVX2 instructions.
*
* This implementation of memcpy acts as a memmove: while overlapping copies
* are undefined in memcpy, in some implementations they're the same function and
* legacy programs rely on this behavior.
*
* This implementation uses prefetch to avoid dtlb misses. This can
* substantially reduce dtlb store misses in cases where the destination
* location is absent from L1 cache and where the copy size is small enough
* that the hardware prefetcher doesn't have a large impact.
*
* The number of branches is limited by the use of overlapping loads & stores.
* This helps with copies where the source and destination cache lines are already
* present in L1 because there are fewer instructions to execute and fewer
* branches to potentially mispredict.
* e.g. to copy the last 4 <= n <= 7 bytes: copy the first & last 4 bytes (overlapped):
* movl (%rsi), %r8d
* movl -4(%rsi,%rdx), %r9d
* movl %r8d, (%rdi)
* movl %r9d, -4(%rdi,%rdx)
*
*
* For sizes up to 256 all source data is first read into registers and then written:
* - n <= 16: overlapping movs
* - n <= 32: overlapping unaligned 16-byte SSE XMM load/stores
* - n <= 256: overlapping unaligned 32-byte AVX YMM load/stores
*
* Large copies (> 256 bytes) use unaligned loads + aligned stores.
* This is observed to always be faster than rep movsb, so the rep movsb
* instruction is not used.
* - The head & tail may be unaligned => they're always written using unaligned stores.
*
* If the copy size is humongous (> 32 KiB) and the source and destination are both
* aligned, this memcpy will use non-temporal operations (AVX2). This can have
* a substantial speedup for copies where data is absent from L1, but it
* is significantly slower if the source and destination data were already
* in L1. The use of non-temporal operations also has the effect that after
* the copy is complete, the data will be moved out of L1, even if the data was
* present before the copy started.
*
* For n > 256 and overlapping src & dst buffers (memmove):
* - use unaligned loads + aligned stores, but not non-temporal stores
* - for dst < src forward copy in 128 byte batches:
* - unaligned load the first 32 bytes & last 4 x 32 bytes
* - forward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
* - unaligned store the first 32 bytes & last 4 x 32 bytes
* - for dst > src backward copy in 128 byte batches:
* - unaligned load the first 4 x 32 bytes & last 32 bytes
* - backward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
* - unaligned store the first 4 x 32 bytes & last 32 bytes
*
* @author Logan Evans <lpe@fb.com>
*/
// .type {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, @function not supported by windows
{[function_prefix]s}__folly_memcpy_short_{[prefetch]s}:
.cfi_startproc
.L_GE1_LE7_{[prefetch]s}:
cmp $1, %rdx
je .L_EQ1_{[prefetch]s}
cmp $4, %rdx
jae .L_GE4_LE7_{[prefetch]s}
.L_GE2_LE3_{[prefetch]s}:
movw (%rsi), %r8w
movw -2(%rsi,%rdx), %r9w
movw %r8w, (%rdi)
movw %r9w, -2(%rdi,%rdx)
ret
.balign 2
.L_EQ1_{[prefetch]s}:
movb (%rsi), %r8b
movb %r8b, (%rdi)
ret
// Aligning the target of a jump to an even address has a measurable
// speedup in microbenchmarks.
.balign 2
.L_GE4_LE7_{[prefetch]s}:
movl (%rsi), %r8d
movl -4(%rsi,%rdx), %r9d
movl %r8d, (%rdi)
movl %r9d, -4(%rdi,%rdx)
ret
.cfi_endproc
// .size {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s} not supported by windows
// memcpy is an alternative entrypoint into the function named __folly_memcpy.
// The compiler is able to call memcpy since the name is global while
// stacktraces will show __folly_memcpy since that is the name of the function.
// This is intended to aid in debugging by making it obvious which version of
// memcpy is being used.
.balign 64
.globl {[function_prefix]s}__folly_memcpy_{[prefetch]s}
// .type {[function_prefix]s}__folly_memcpy_{[prefetch]s}, @function not supported by windows
{[function_prefix]s}__folly_memcpy_{[prefetch]s}:
.cfi_startproc
mov %rdi, %rax // return: $rdi
test %rdx, %rdx
je .L_EQ0_{[prefetch]s}
{[prefetch]s} (%rdi)
{[prefetch]s} -1(%rdi,%rdx)
cmp $8, %rdx
jb .L_GE1_LE7_{[prefetch]s}
.L_GE8_{[prefetch]s}:
cmp $32, %rdx
ja .L_GE33_{[prefetch]s}
.L_GE8_LE32_{[prefetch]s}:
cmp $16, %rdx
ja .L_GE17_LE32_{[prefetch]s}
.L_GE8_LE16_{[prefetch]s}:
mov (%rsi), %r8
mov -8(%rsi,%rdx), %r9
mov %r8, (%rdi)
mov %r9, -8(%rdi,%rdx)
.L_EQ0_{[prefetch]s}:
ret
.balign 2
.L_GE17_LE32_{[prefetch]s}:
movdqu (%rsi), %xmm0
movdqu -16(%rsi,%rdx), %xmm1
movdqu %xmm0, (%rdi)
movdqu %xmm1, -16(%rdi,%rdx)
ret
.balign 2
.L_GE193_LE256_{[prefetch]s}:
vmovdqu %ymm3, 96(%rdi)
vmovdqu %ymm4, -128(%rdi,%rdx)
.L_GE129_LE192_{[prefetch]s}:
vmovdqu %ymm2, 64(%rdi)
vmovdqu %ymm5, -96(%rdi,%rdx)
.L_GE65_LE128_{[prefetch]s}:
vmovdqu %ymm1, 32(%rdi)
vmovdqu %ymm6, -64(%rdi,%rdx)
.L_GE33_LE64_{[prefetch]s}:
vmovdqu %ymm0, (%rdi)
vmovdqu %ymm7, -32(%rdi,%rdx)
vzeroupper
ret
.balign 2
.L_GE33_{[prefetch]s}:
vmovdqu (%rsi), %ymm0
vmovdqu -32(%rsi,%rdx), %ymm7
cmp $64, %rdx
jbe .L_GE33_LE64_{[prefetch]s}
{[prefetch]s} 64(%rdi)
vmovdqu 32(%rsi), %ymm1
vmovdqu -64(%rsi,%rdx), %ymm6
cmp $128, %rdx
jbe .L_GE65_LE128_{[prefetch]s}
{[prefetch]s} 128(%rdi)
vmovdqu 64(%rsi), %ymm2
vmovdqu -96(%rsi,%rdx), %ymm5
cmp $192, %rdx
jbe .L_GE129_LE192_{[prefetch]s}
{[prefetch]s} 192(%rdi)
vmovdqu 96(%rsi), %ymm3
vmovdqu -128(%rsi,%rdx), %ymm4
cmp $256, %rdx
jbe .L_GE193_LE256_{[prefetch]s}
.L_GE257_{[prefetch]s}:
{[prefetch]s} 256(%rdi)
// Check if there is an overlap. If there is an overlap then the caller
// has a bug since this is undefined behavior. However, for legacy
// reasons this behavior is expected by some callers.
//
// All copies through 256 bytes will operate as a memmove since for
// those sizes all reads are performed before any writes.
//
// This check uses the idea that there is an overlap if
// (%rdi < (%rsi + %rdx)) && (%rsi < (%rdi + %rdx)),
// or equivalently, there is no overlap if
// ((%rsi + %rdx) <= %rdi) || ((%rdi + %rdx) <= %rsi).
//
// %r9 will be used after .L_ALIGNED_DST_LOOP to calculate how many
// bytes remain to be copied.
// (%rsi + %rdx <= %rdi) => no overlap
lea (%rsi,%rdx), %r9
cmp %rdi, %r9
jbe .L_NO_OVERLAP_{[prefetch]s}
// (%rdi + %rdx <= %rsi) => no overlap
lea (%rdi,%rdx), %r8
cmp %rsi, %r8
// If no info is available in branch predictor's cache, Intel CPUs assume
// forward jumps are not taken. Use a forward jump as overlapping buffers
// are unlikely.
ja .L_OVERLAP_{[prefetch]s}
.balign 2
.L_NO_OVERLAP_{[prefetch]s}:
vmovdqu %ymm0, (%rdi)
vmovdqu %ymm1, 32(%rdi)
vmovdqu %ymm2, 64(%rdi)
vmovdqu %ymm3, 96(%rdi)
// Align %rdi to a 32 byte boundary.
// %rcx = 128 - 31 & %rdi
mov $128, %rcx
and $31, %rdi
sub %rdi, %rcx
lea (%rsi,%rcx), %rsi
lea (%rax,%rcx), %rdi
sub %rcx, %rdx
// %r8 is the end condition for the loop.
lea -128(%rsi,%rdx), %r8
// This threshold is half of L1 cache on a Skylake machine, which means that
// potentially all of L1 will be populated by this copy once it is executed
// (dst and src are cached for temporal copies).
// NON_TEMPORAL_STORE_THRESHOLD = $32768
// cmp NON_TEMPORAL_STORE_THRESHOLD, %rdx
cmp $32768, %rdx
jae .L_NON_TEMPORAL_LOOP_{[prefetch]s}
.balign 2
.L_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} 128(%rdi)
{[prefetch]s} 192(%rdi)
vmovdqu (%rsi), %ymm0
vmovdqu 32(%rsi), %ymm1
vmovdqu 64(%rsi), %ymm2
vmovdqu 96(%rsi), %ymm3
add $128, %rsi
vmovdqa %ymm0, (%rdi)
vmovdqa %ymm1, 32(%rdi)
vmovdqa %ymm2, 64(%rdi)
vmovdqa %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_ALIGNED_DST_LOOP_{[prefetch]s}
.L_ALIGNED_DST_LOOP_END_{[prefetch]s}:
sub %rsi, %r9
mov %r9, %rdx
vmovdqu %ymm4, -128(%rdi,%rdx)
vmovdqu %ymm5, -96(%rdi,%rdx)
vmovdqu %ymm6, -64(%rdi,%rdx)
vmovdqu %ymm7, -32(%rdi,%rdx)
vzeroupper
ret
.balign 2
.L_NON_TEMPORAL_LOOP_{[prefetch]s}:
testb $31, %sil
jne .L_ALIGNED_DST_LOOP_{[prefetch]s}
// This is prefetching the source data unlike ALIGNED_DST_LOOP which
// prefetches the destination data. This choice is again informed by
// benchmarks. With a non-temporal store the entirety of the cache line
// is being written so the previous data can be discarded without being
// fetched.
prefetchnta 128(%rsi)
prefetchnta 196(%rsi)
vmovntdqa (%rsi), %ymm0
vmovntdqa 32(%rsi), %ymm1
vmovntdqa 64(%rsi), %ymm2
vmovntdqa 96(%rsi), %ymm3
add $128, %rsi
vmovntdq %ymm0, (%rdi)
vmovntdq %ymm1, 32(%rdi)
vmovntdq %ymm2, 64(%rdi)
vmovntdq %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_NON_TEMPORAL_LOOP_{[prefetch]s}
sfence
jmp .L_ALIGNED_DST_LOOP_END_{[prefetch]s}
.L_OVERLAP_{[prefetch]s}:
.balign 2
cmp %rdi, %rsi
jb .L_OVERLAP_BWD_{[prefetch]s} // %rsi < %rdi => backward-copy
je .L_RET_{[prefetch]s} // %rsi == %rdi => return, nothing to copy
// Source & destination buffers overlap. Forward copy.
vmovdqu (%rsi), %ymm8
// Align %rdi to a 32 byte boundary.
// %rcx = 32 - 31 & %rdi
mov $32, %rcx
and $31, %rdi
sub %rdi, %rcx
lea (%rsi,%rcx), %rsi
lea (%rax,%rcx), %rdi
sub %rcx, %rdx
// %r8 is the end condition for the loop.
lea -128(%rsi,%rdx), %r8
.L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} 128(%rdi)
{[prefetch]s} 192(%rdi)
vmovdqu (%rsi), %ymm0
vmovdqu 32(%rsi), %ymm1
vmovdqu 64(%rsi), %ymm2
vmovdqu 96(%rsi), %ymm3
add $128, %rsi
vmovdqa %ymm0, (%rdi)
vmovdqa %ymm1, 32(%rdi)
vmovdqa %ymm2, 64(%rdi)
vmovdqa %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}
sub %rsi, %r9
mov %r9, %rdx
vmovdqu %ymm4, -128(%rdi,%rdx)
vmovdqu %ymm5, -96(%rdi,%rdx)
vmovdqu %ymm6, -64(%rdi,%rdx)
vmovdqu %ymm7, -32(%rdi,%rdx)
vmovdqu %ymm8, (%rax) // %rax == the original (unaligned) %rdi
vzeroupper
.L_RET_{[prefetch]s}:
ret
.L_OVERLAP_BWD_{[prefetch]s}:
// Save last 32 bytes.
vmovdqu -32(%rsi, %rdx), %ymm8
lea -32(%rdi, %rdx), %r9
// %r8 is the end condition for the loop.
lea 128(%rsi), %r8
// Align %rdi+%rdx (destination end) to a 32 byte boundary.
// %rcx = (%rdi + %rdx - 32) & 31
mov %r9, %rcx
and $31, %rcx
// Set %rsi & %rdi to the end of the 32 byte aligned range.
sub %rcx, %rdx
add %rdx, %rsi
add %rdx, %rdi
.L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} -128(%rdi)
{[prefetch]s} -192(%rdi)
vmovdqu -32(%rsi), %ymm4
vmovdqu -64(%rsi), %ymm5
vmovdqu -96(%rsi), %ymm6
vmovdqu -128(%rsi), %ymm7
sub $128, %rsi
vmovdqa %ymm4, -32(%rdi)
vmovdqa %ymm5, -64(%rdi)
vmovdqa %ymm6, -96(%rdi)
vmovdqa %ymm7, -128(%rdi)
sub $128, %rdi
cmp %r8, %rsi
ja .L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}
vmovdqu %ymm0, (%rax) // %rax == the original unaligned %rdi
vmovdqu %ymm1, 32(%rax)
vmovdqu %ymm2, 64(%rax)
vmovdqu %ymm3, 96(%rax)
vmovdqu %ymm8, (%r9)
vzeroupper
ret
.cfi_endproc
// .size {[function_prefix]s}__folly_memcpy_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_{[prefetch]s} not supported by windows

View file

@ -1,18 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("../assembly_util.zig").function_prefix;
comptime {
switch (arch) {
.x86_64 => {
inline for ([_][]const u8{ "prefetchw", "prefetcht0" }) |prefetch| {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .prefetch = prefetch, .function_prefix = function_prefix }));
}
},
else => unreachable,
}
}
pub extern fn __folly_memcpy_prefetchw(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
pub extern fn __folly_memcpy_prefetcht0(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;

View file

@ -1 +0,0 @@
pub const memcpy = @import("musl/memcpy.zig").memcpy;

View file

@ -1,193 +0,0 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Valentin Ochs
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Valentin Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View file

@ -1,2 +0,0 @@
This set of files all come from [musl libc](https://musl.libc.org/).
Roc just directly uses a few of them instead of depending on musl libc fully.

View file

@ -1,30 +0,0 @@
.global {[function_prefix]s}musl_memcpy
// Windows does not support the type directive.
// .type {[function_prefix]s}musl_memcpy,@function
{[function_prefix]s}musl_memcpy:
push %esi
push %edi
mov 12(%esp),%edi
mov 16(%esp),%esi
mov 20(%esp),%ecx
mov %edi,%eax
cmp $4,%ecx
jc 1f
test $3,%edi
jz 1f
2: movsb
dec %ecx
test $3,%edi
jnz 2b
1: mov %ecx,%edx
shr $2,%ecx
rep
movsl
and $3,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: pop %edi
pop %esi
ret

View file

@ -1,23 +0,0 @@
.global {[function_prefix]s}musl_memcpy
// Windows does not support the type directive.
// .type {[function_prefix]s}musl_memcpy,@function
{[function_prefix]s}musl_memcpy:
mov %rdi,%rax
cmp $8,%rdx
jc 1f
test $7,%edi
jz 1f
2: movsb
dec %rdx
test $7,%edi
jnz 2b
1: mov %rdx,%rcx
shr $3,%rcx
rep
movsq
and $7,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: ret

View file

@ -1,223 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("../assembly_util.zig").function_prefix;
comptime {
switch (arch) {
.x86_64 => {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .function_prefix = function_prefix }));
},
.x86 => {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86.S"), .{ .function_prefix = function_prefix }));
},
// TODO: add assembly implementations for other platforms.
else => {},
}
}
pub const memcpy =
switch (builtin.os.tag) {
.windows => fallback_memcpy,
else => switch (arch) {
.x86_64, .x86 => musl_memcpy,
else => fallback_memcpy,
},
};
pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
// Note: this is written to only support little endian targets.
// To support big endian, `<<` and `>>` wold need to be swapped.
pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
var d = dest;
var s = src;
var n = len;
switch (@min(n, @intFromPtr(s) % 4)) {
1 => {
d[0] = s[0];
d += 1;
s += 1;
n -= 1;
},
2 => {
d[0] = s[0];
d[1] = s[1];
d += 2;
s += 2;
n -= 2;
},
3 => {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d += 3;
s += 3;
n -= 3;
},
else => {},
}
if (@intFromPtr(d) % 4 == 0) {
var d4 = @as([*]align(4) u8, @alignCast(d));
var s4 = @as([*]align(4) const u8, @alignCast(s));
while (n >= 16) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d_u32[2] = s_u32[2];
d_u32[3] = s_u32[3];
d4 += 16;
s4 += 16;
}
if (n & 8 != 0) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d4 += 8;
s4 += 8;
}
if (n & 4 != 0) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d4 += 4;
s4 += 4;
}
d = d4;
s = s4;
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}
if (n >= 32) {
switch (@intFromPtr(d) % 4) {
1 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 3;
while (n >= 17) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 1)));
var x = s_u32[0];
d_u32[0] = (w >> 24) | (x << 8);
w = s_u32[1];
d_u32[1] = (x >> 24) | (w << 8);
x = s_u32[2];
d_u32[2] = (w >> 24) | (x << 8);
w = s_u32[3];
d_u32[3] = (x >> 24) | (w << 8);
d += 16;
s += 16;
}
},
2 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 2;
while (n >= 18) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 2)));
var x = s_u32[0];
d_u32[0] = (w >> 16) | (x << 16);
w = s_u32[1];
d_u32[1] = (x >> 16) | (w << 16);
x = s_u32[2];
d_u32[2] = (w >> 16) | (x << 16);
w = s_u32[3];
d_u32[3] = (x >> 16) | (w << 16);
d += 16;
s += 16;
}
},
3 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
n -= 1;
while (n >= 19) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 3)));
var x = s_u32[0];
d_u32[0] = (w >> 8) | (x << 24);
w = s_u32[1];
d_u32[1] = (x >> 8) | (w << 24);
x = s_u32[2];
d_u32[2] = (w >> 8) | (x << 24);
w = s_u32[3];
d_u32[3] = (x >> 8) | (w << 24);
d += 16;
s += 16;
}
},
else => unreachable,
}
}
if (n & 16 != 0) {
comptime var i = 0;
inline while (i < 16) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 8 != 0) {
comptime var i = 0;
inline while (i < 8) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 4 != 0) {
comptime var i = 0;
inline while (i < 4) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}

View file

@ -21,16 +21,18 @@ const SEAMLESS_SLICE_BIT: usize =
pub const RocList = extern struct {
bytes: ?[*]u8,
length: usize,
// This technically points to directly after the refcount.
// This is an optimization that enables use one code path for regular lists and slices for geting the refcount ptr.
capacity_or_ref_ptr: usize,
// For normal lists, contains the capacity.
// For seamless slices contains the pointer to the original allocation.
// This pointer is to the first element of the original list.
// Note we storing an allocation pointer, the pointer must be right shifted by one.
capacity_or_alloc_ptr: usize,
pub inline fn len(self: RocList) usize {
return self.length;
}
pub fn getCapacity(self: RocList) usize {
const list_capacity = self.capacity_or_ref_ptr;
const list_capacity = self.capacity_or_alloc_ptr;
const slice_capacity = self.length;
const slice_mask = self.seamlessSliceMask();
const capacity = (list_capacity & ~slice_mask) | (slice_capacity & slice_mask);
@ -38,14 +40,14 @@ pub const RocList = extern struct {
}
pub fn isSeamlessSlice(self: RocList) bool {
return @as(isize, @bitCast(self.capacity_or_ref_ptr)) < 0;
return @as(isize, @bitCast(self.capacity_or_alloc_ptr)) < 0;
}
// This returns all ones if the list is a seamless slice.
// Otherwise, it returns all zeros.
// This is done without branching for optimization purposes.
pub fn seamlessSliceMask(self: RocList) usize {
return @as(usize, @bitCast(@as(isize, @bitCast(self.capacity_or_ref_ptr)) >> (@bitSizeOf(isize) - 1)));
return @as(usize, @bitCast(@as(isize, @bitCast(self.capacity_or_alloc_ptr)) >> (@bitSizeOf(isize) - 1)));
}
pub fn isEmpty(self: RocList) bool {
@ -53,7 +55,7 @@ pub const RocList = extern struct {
}
pub fn empty() RocList {
return RocList{ .bytes = null, .length = 0, .capacity_or_ref_ptr = 0 };
return RocList{ .bytes = null, .length = 0, .capacity_or_alloc_ptr = 0 };
}
pub fn eql(self: RocList, other: RocList) bool {
@ -99,21 +101,22 @@ pub const RocList = extern struct {
return list;
}
// returns a pointer to just after the refcount.
// It is just after the refcount as an optimization for other shared code paths.
// For regular list, it just returns their bytes pointer.
// For seamless slices, it returns the pointer stored in capacity_or_ref_ptr.
pub fn getRefcountPtr(self: RocList) ?[*]u8 {
const list_ref_ptr = @intFromPtr(self.bytes);
const slice_ref_ptr = self.capacity_or_ref_ptr << 1;
// returns a pointer to the original allocation.
// This pointer points to the first element of the allocation.
// The pointer is to just after the refcount.
// For big lists, it just returns their bytes pointer.
// For seamless slices, it returns the pointer stored in capacity_or_alloc_ptr.
pub fn getAllocationPtr(self: RocList) ?[*]u8 {
const list_alloc_ptr = @intFromPtr(self.bytes);
const slice_alloc_ptr = self.capacity_or_alloc_ptr << 1;
const slice_mask = self.seamlessSliceMask();
const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
return @as(?[*]u8, @ptrFromInt(ref_ptr));
const alloc_ptr = (list_alloc_ptr & ~slice_mask) | (slice_alloc_ptr & slice_mask);
return @as(?[*]u8, @ptrFromInt(alloc_ptr));
}
pub fn decref(self: RocList, alignment: u32) void {
// We use the raw capacity to ensure we always decrement the refcount of seamless slices.
utils.decref(self.getRefcountPtr(), self.capacity_or_ref_ptr, alignment);
utils.decref(self.getAllocationPtr(), self.capacity_or_alloc_ptr, alignment);
}
pub fn elements(self: RocList, comptime T: type) ?[*]T {
@ -187,7 +190,7 @@ pub const RocList = extern struct {
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity_or_ref_ptr = capacity,
.capacity_or_alloc_ptr = capacity,
};
}
@ -204,7 +207,7 @@ pub const RocList = extern struct {
return RocList{
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
.length = length,
.capacity_or_ref_ptr = length,
.capacity_or_alloc_ptr = length,
};
}
@ -216,13 +219,13 @@ pub const RocList = extern struct {
) RocList {
if (self.bytes) |source_ptr| {
if (self.isUnique() and !self.isSeamlessSlice()) {
const capacity = self.capacity_or_ref_ptr;
const capacity = self.capacity_or_alloc_ptr;
if (capacity >= new_length) {
return RocList{ .bytes = self.bytes, .length = new_length, .capacity_or_ref_ptr = capacity };
return RocList{ .bytes = self.bytes, .length = new_length, .capacity_or_alloc_ptr = capacity };
} else {
const new_capacity = utils.calculateCapacity(capacity, new_length, element_width);
const new_source = utils.unsafeReallocate(source_ptr, alignment, capacity, new_capacity, element_width);
return RocList{ .bytes = new_source, .length = new_length, .capacity_or_ref_ptr = new_capacity };
return RocList{ .bytes = new_source, .length = new_length, .capacity_or_alloc_ptr = new_capacity };
}
}
return self.reallocateFresh(alignment, new_length, element_width);
@ -500,8 +503,8 @@ pub fn listReleaseExcessCapacity(
update_mode: UpdateMode,
) callconv(.C) RocList {
const old_length = list.len();
// We use the direct list.capacity_or_ref_ptr to make sure both that there is no extra capacity and that it isn't a seamless slice.
if ((update_mode == .InPlace or list.isUnique()) and list.capacity_or_ref_ptr == old_length) {
// We use the direct list.capacity_or_alloc_ptr to make sure both that there is no extra capacity and that it isn't a seamless slice.
if ((update_mode == .InPlace or list.isUnique()) and list.capacity_or_alloc_ptr == old_length) {
return list;
} else if (old_length == 0) {
list.decref(alignment);
@ -649,14 +652,14 @@ pub fn listSublist(
output.length = keep_len;
return output;
} else {
const list_ref_ptr = (@intFromPtr(source_ptr) >> 1) | SEAMLESS_SLICE_BIT;
const slice_ref_ptr = list.capacity_or_ref_ptr;
const list_alloc_ptr = (@intFromPtr(source_ptr) >> 1) | SEAMLESS_SLICE_BIT;
const slice_alloc_ptr = list.capacity_or_alloc_ptr;
const slice_mask = list.seamlessSliceMask();
const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
const alloc_ptr = (list_alloc_ptr & ~slice_mask) | (slice_alloc_ptr & slice_mask);
return RocList{
.bytes = source_ptr + start * element_width,
.length = keep_len,
.capacity_or_ref_ptr = ref_ptr,
.capacity_or_alloc_ptr = alloc_ptr,
};
}
}
@ -959,16 +962,24 @@ pub fn listIsUnique(
return list.isEmpty() or list.isUnique();
}
pub fn listClone(
list: RocList,
alignment: u32,
element_width: usize,
) callconv(.C) RocList {
return list.makeUnique(alignment, element_width);
}
pub fn listCapacity(
list: RocList,
) callconv(.C) usize {
return list.getCapacity();
}
pub fn listRefcountPtr(
pub fn listAllocationPtr(
list: RocList,
) callconv(.C) ?[*]u8 {
return list.getRefcountPtr();
return list.getAllocationPtr();
}
test "listConcat: non-unique with unique overlapping" {

View file

@ -4,11 +4,7 @@ const math = std.math;
const utils = @import("utils.zig");
const expect = @import("expect.zig");
const panic_utils = @import("panic.zig");
comptime {
_ = @import("compiler_rt.zig");
_ = @import("libc.zig");
}
const dbg_utils = @import("dbg.zig");
const ROC_BUILTINS = "roc_builtins";
const NUM = "num";
@ -17,6 +13,13 @@ const STR = "str";
// Dec Module
const dec = @import("dec.zig");
var FLTUSED: i32 = 0;
comptime {
if (builtin.os.tag == .windows) {
@export(FLTUSED, .{ .name = "_fltused", .linkage = .Weak });
}
}
comptime {
exportDecFn(dec.absC, "abs");
exportDecFn(dec.acosC, "acos");
@ -32,6 +35,7 @@ comptime {
exportDecFn(dec.fromF64C, "from_float.f64");
exportDecFn(dec.fromStr, "from_str");
exportDecFn(dec.fromU64C, "from_u64");
exportDecFn(dec.logC, "log");
exportDecFn(dec.mulC, "mul_with_overflow");
exportDecFn(dec.mulOrPanicC, "mul_or_panic");
exportDecFn(dec.mulSaturatedC, "mul_saturated");
@ -71,8 +75,9 @@ comptime {
exportListFn(list.listReplaceInPlace, "replace_in_place");
exportListFn(list.listSwap, "swap");
exportListFn(list.listIsUnique, "is_unique");
exportListFn(list.listClone, "clone");
exportListFn(list.listCapacity, "capacity");
exportListFn(list.listRefcountPtr, "refcount_ptr");
exportListFn(list.listAllocationPtr, "allocation_ptr");
exportListFn(list.listReleaseExcessCapacity, "release_excess_capacity");
}
@ -106,19 +111,6 @@ comptime {
exportNumFn(num.greaterThanU128, "greater_than.u128");
exportNumFn(num.greaterThanOrEqualU128, "greater_than_or_equal.u128");
exportNumFn(num.compareI128, "compare.i128");
exportNumFn(num.compareU128, "compare.u128");
exportNumFn(num.lessThanI128, "less_than.i128");
exportNumFn(num.lessThanOrEqualI128, "less_than_or_equal.i128");
exportNumFn(num.greaterThanI128, "greater_than.i128");
exportNumFn(num.greaterThanOrEqualI128, "greater_than_or_equal.i128");
exportNumFn(num.lessThanU128, "less_than.u128");
exportNumFn(num.lessThanOrEqualU128, "less_than_or_equal.u128");
exportNumFn(num.greaterThanU128, "greater_than.u128");
exportNumFn(num.greaterThanOrEqualU128, "greater_than_or_equal.u128");
inline for (INTEGERS, 0..) |T, i| {
num.exportPow(T, ROC_BUILTINS ++ "." ++ NUM ++ ".pow_int.");
num.exportDivCeil(T, ROC_BUILTINS ++ "." ++ NUM ++ ".div_ceil.");
@ -186,15 +178,12 @@ comptime {
const str = @import("str.zig");
comptime {
exportStrFn(str.init, "init");
exportStrFn(str.strToScalarsC, "to_scalars");
exportStrFn(str.strSplit, "str_split");
exportStrFn(str.countSegments, "count_segments");
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
exportStrFn(str.countUtf8Bytes, "count_utf8_bytes");
exportStrFn(str.isEmpty, "is_empty");
exportStrFn(str.getCapacity, "capacity");
exportStrFn(str.startsWith, "starts_with");
exportStrFn(str.startsWithScalar, "starts_with_scalar");
exportStrFn(str.endsWith, "ends_with");
exportStrFn(str.strConcatC, "concat");
exportStrFn(str.strJoinWithC, "joinWith");
@ -203,8 +192,6 @@ comptime {
exportStrFn(str.substringUnsafe, "substring_unsafe");
exportStrFn(str.getUnsafe, "get_unsafe");
exportStrFn(str.reserve, "reserve");
exportStrFn(str.getScalarUnsafe, "get_scalar_unsafe");
exportStrFn(str.appendScalar, "append_scalar");
exportStrFn(str.strToUtf8C, "to_utf8");
exportStrFn(str.fromUtf8RangeC, "from_utf8_range");
exportStrFn(str.repeat, "repeat");
@ -213,8 +200,7 @@ comptime {
exportStrFn(str.strTrimEnd, "trim_end");
exportStrFn(str.strCloneTo, "clone_to");
exportStrFn(str.withCapacity, "with_capacity");
exportStrFn(str.strGraphemes, "graphemes");
exportStrFn(str.strRefcountPtr, "refcount_ptr");
exportStrFn(str.strAllocationPtr, "allocation_ptr");
exportStrFn(str.strReleaseExcessCapacity, "release_excess_capacity");
inline for (INTEGERS) |T| {
@ -244,6 +230,7 @@ comptime {
exportUtilsFn(utils.dictPseudoSeed, "dict_pseudo_seed");
@export(panic_utils.panic, .{ .name = "roc_builtins.utils." ++ "panic", .linkage = .Weak });
@export(dbg_utils.dbg_impl, .{ .name = "roc_builtins.utils." ++ "dbg_impl", .linkage = .Weak });
if (builtin.target.cpu.arch != .wasm32) {
exportUtilsFn(expect.expectFailedStartSharedBuffer, "expect_failed_start_shared_buffer");
@ -259,6 +246,9 @@ comptime {
if (builtin.target.cpu.arch == .aarch64) {
@export(__roc_force_setjmp, .{ .name = "__roc_force_setjmp", .linkage = .Weak });
@export(__roc_force_longjmp, .{ .name = "__roc_force_longjmp", .linkage = .Weak });
} else if (builtin.os.tag == .windows) {
@export(__roc_force_setjmp_windows, .{ .name = "__roc_force_setjmp", .linkage = .Weak });
@export(__roc_force_longjmp_windows, .{ .name = "__roc_force_longjmp", .linkage = .Weak });
}
}
@ -274,14 +264,103 @@ pub extern fn _longjmp([*c]c_int, c_int) noreturn;
pub extern fn sigsetjmp([*c]c_int, c_int) c_int;
pub extern fn siglongjmp([*c]c_int, c_int) noreturn;
pub extern fn longjmperror() void;
// Zig won't expose the externs (and hence link correctly) unless we force them to be used.
fn __roc_force_setjmp(it: [*c]c_int) callconv(.C) c_int {
return setjmp(it);
}
fn __roc_force_longjmp(a0: [*c]c_int, a1: c_int) callconv(.C) noreturn {
longjmp(a0, a1);
}
pub extern fn windows_setjmp([*c]c_int) c_int;
pub extern fn windows_longjmp([*c]c_int, c_int) noreturn;
fn __roc_force_setjmp_windows(it: [*c]c_int) callconv(.C) c_int {
return windows_setjmp(it);
}
fn __roc_force_longjmp_windows(a0: [*c]c_int, a1: c_int) callconv(.C) noreturn {
windows_longjmp(a0, a1);
}
comptime {
if (builtin.os.tag == .windows) {
asm (
\\.global windows_longjmp;
\\windows_longjmp:
\\ movq 0x00(%rcx), %rdx
\\ movq 0x08(%rcx), %rbx
\\ # note 0x10 is not used yet!
\\ movq 0x18(%rcx), %rbp
\\ movq 0x20(%rcx), %rsi
\\ movq 0x28(%rcx), %rdi
\\ movq 0x30(%rcx), %r12
\\ movq 0x38(%rcx), %r13
\\ movq 0x40(%rcx), %r14
\\ movq 0x48(%rcx), %r15
\\
\\ # restore stack pointer
\\ movq 0x10(%rcx), %rsp
\\
\\ # load jmp address
\\ movq 0x50(%rcx), %r8
\\
\\ # set up return value
\\ movq %rbx, %rax
\\
\\ movdqu 0x60(%rcx), %xmm6
\\ movdqu 0x70(%rcx), %xmm7
\\ movdqu 0x80(%rcx), %xmm8
\\ movdqu 0x90(%rcx), %xmm9
\\ movdqu 0xa0(%rcx), %xmm10
\\ movdqu 0xb0(%rcx), %xmm11
\\ movdqu 0xc0(%rcx), %xmm12
\\ movdqu 0xd0(%rcx), %xmm13
\\ movdqu 0xe0(%rcx), %xmm14
\\ movdqu 0xf0(%rcx), %xmm15
\\
\\ jmp *%r8
\\
\\.global windows_setjmp;
\\windows_setjmp:
\\ movq %rdx, 0x00(%rcx)
\\ movq %rbx, 0x08(%rcx)
\\ # note 0x10 is not used yet!
\\ movq %rbp, 0x18(%rcx)
\\ movq %rsi, 0x20(%rcx)
\\ movq %rdi, 0x28(%rcx)
\\ movq %r12, 0x30(%rcx)
\\ movq %r13, 0x38(%rcx)
\\ movq %r14, 0x40(%rcx)
\\ movq %r15, 0x48(%rcx)
\\
\\ # the stack location right after the windows_setjmp call
\\ leaq 0x08(%rsp), %r8
\\ movq %r8, 0x10(%rcx)
\\
\\ movq (%rsp), %r8
\\ movq %r8, 0x50(%rcx)
\\
\\ movdqu %xmm6, 0x60(%rcx)
\\ movdqu %xmm7, 0x70(%rcx)
\\ movdqu %xmm8, 0x80(%rcx)
\\ movdqu %xmm9, 0x90(%rcx)
\\ movdqu %xmm10, 0xa0(%rcx)
\\ movdqu %xmm11, 0xb0(%rcx)
\\ movdqu %xmm12, 0xc0(%rcx)
\\ movdqu %xmm13, 0xd0(%rcx)
\\ movdqu %xmm14, 0xe0(%rcx)
\\ movdqu %xmm15, 0xf0(%rcx)
\\
\\ xorl %eax, %eax
\\ ret
\\
);
}
}
// Export helpers - Must be run inside a comptime
fn exportBuiltinFn(comptime func: anytype, comptime func_name: []const u8) void {
@export(func, .{ .name = "roc_builtins." ++ func_name, .linkage = .Strong });

View file

@ -233,7 +233,9 @@ pub fn exportCeiling(comptime F: type, comptime T: type, comptime name: []const
pub fn exportDivCeil(comptime T: type, comptime name: []const u8) void {
comptime var f = struct {
fn func(a: T, b: T) callconv(.C) T {
return math.divCeil(T, a, b) catch @panic("TODO runtime exception for dividing by 0!");
return math.divCeil(T, a, b) catch {
roc_panic("Integer division by 0!", 0);
};
}
}.func;
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });
@ -379,8 +381,7 @@ pub fn exportAddOrPanic(comptime T: type, comptime name: []const u8) void {
fn func(self: T, other: T) callconv(.C) T {
const result = addWithOverflow(T, self, other);
if (result.has_overflowed) {
roc_panic("integer addition overflowed!", 0);
unreachable;
roc_panic("Integer addition overflowed!", 0);
} else {
return result.value;
}
@ -437,8 +438,7 @@ pub fn exportSubOrPanic(comptime T: type, comptime name: []const u8) void {
fn func(self: T, other: T) callconv(.C) T {
const result = subWithOverflow(T, self, other);
if (result.has_overflowed) {
roc_panic("integer subtraction overflowed!", 0);
unreachable;
roc_panic("Integer subtraction overflowed!", 0);
} else {
return result.value;
}
@ -622,8 +622,7 @@ pub fn exportMulOrPanic(comptime T: type, comptime W: type, comptime name: []con
fn func(self: T, other: T) callconv(.C) T {
const result = @call(.always_inline, mulWithOverflow, .{ T, W, self, other });
if (result.has_overflowed) {
roc_panic("integer multiplication overflowed!", 0);
unreachable;
roc_panic("Integer multiplication overflowed!", 0);
} else {
return result.value;
}
@ -634,8 +633,8 @@ pub fn exportMulOrPanic(comptime T: type, comptime W: type, comptime name: []con
pub fn exportCountLeadingZeroBits(comptime T: type, comptime name: []const u8) void {
comptime var f = struct {
fn func(self: T) callconv(.C) usize {
return @as(usize, @clz(self));
fn func(self: T) callconv(.C) u8 {
return @as(u8, @clz(self));
}
}.func;
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });
@ -643,8 +642,8 @@ pub fn exportCountLeadingZeroBits(comptime T: type, comptime name: []const u8) v
pub fn exportCountTrailingZeroBits(comptime T: type, comptime name: []const u8) void {
comptime var f = struct {
fn func(self: T) callconv(.C) usize {
return @as(usize, @ctz(self));
fn func(self: T) callconv(.C) u8 {
return @as(u8, @ctz(self));
}
}.func;
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });
@ -652,8 +651,8 @@ pub fn exportCountTrailingZeroBits(comptime T: type, comptime name: []const u8)
pub fn exportCountOneBits(comptime T: type, comptime name: []const u8) void {
comptime var f = struct {
fn func(self: T) callconv(.C) usize {
return @as(usize, @popCount(self));
fn func(self: T) callconv(.C) u8 {
return @as(u8, @popCount(self));
}
}.func;
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });

View file

@ -2,14 +2,14 @@ const std = @import("std");
const RocStr = @import("str.zig").RocStr;
// Signals to the host that the program has panicked
extern fn roc_panic(msg: *const RocStr, tag_id: u32) callconv(.C) void;
extern fn roc_panic(msg: *const RocStr, tag_id: u32) callconv(.C) noreturn;
pub fn panic_help(msg: []const u8, tag_id: u32) void {
pub fn panic_help(msg: []const u8, tag_id: u32) noreturn {
var str = RocStr.init(msg.ptr, msg.len);
roc_panic(&str, tag_id);
}
// must export this explicitly because right now it is not used from zig code
pub fn panic(msg: *const RocStr, alignment: u32) callconv(.C) void {
pub fn panic(msg: *const RocStr, alignment: u32) callconv(.C) noreturn {
return roc_panic(msg, alignment);
}

File diff suppressed because it is too large Load diff

View file

@ -20,11 +20,11 @@ extern fn roc_realloc(c_ptr: *anyopaque, new_size: usize, old_size: usize, align
// This should never be passed a null pointer.
extern fn roc_dealloc(c_ptr: *anyopaque, alignment: u32) callconv(.C) void;
extern fn roc_dbg(file_path: *anyopaque, message: *anyopaque) callconv(.C) void;
extern fn roc_dbg(loc: *anyopaque, message: *anyopaque, src: *anyopaque) callconv(.C) void;
// Since roc_dbg is never used by the builtins, we need at export a function that uses it to stop DCE.
pub fn test_dbg(file_path: *anyopaque, message: *anyopaque) callconv(.C) void {
roc_dbg(file_path, message);
pub fn test_dbg(loc: *anyopaque, src: *anyopaque, message: *anyopaque) callconv(.C) void {
roc_dbg(loc, message, src);
}
extern fn kill(pid: c_int, sig: c_int) c_int;
@ -47,9 +47,10 @@ fn testing_roc_mmap(addr: ?*anyopaque, length: c_uint, prot: c_int, flags: c_int
return mmap(addr, length, prot, flags, fd, offset);
}
fn testing_roc_dbg(file_path: *anyopaque, message: *anyopaque) callconv(.C) void {
fn testing_roc_dbg(loc: *anyopaque, message: *anyopaque, src: *anyopaque) callconv(.C) void {
_ = message;
_ = file_path;
_ = src;
_ = loc;
}
comptime {

View file

@ -57,7 +57,7 @@ DecodeError : [TooShort]
## Return type of a [Decoder].
##
## This is can be useful when creating a [custom](#custom) decoder or when
## This can be useful when creating a [custom](#custom) decoder or when
## using [fromBytesPartial](#fromBytesPartial). For example writing unit tests,
## such as;
## ```

File diff suppressed because it is too large Load diff

View file

@ -34,8 +34,7 @@ interface Inspect
custom,
apply,
toInspector,
DbgFormatter,
toDbgStr,
toStr,
]
imports [
Bool.{ Bool },
@ -99,6 +98,12 @@ inspect = \val ->
(@Inspector valFn) = toInspector val
valFn (init {})
toStr : val -> Str where val implements Inspect
toStr = \val ->
val
|> inspect
|> toDbgStr
# The current default formatter for inspect.
# This just returns a simple string for debugging.
# More powerful formatters will likely be wanted in the future.

View file

@ -32,6 +32,7 @@ interface List
product,
walkWithIndex,
walkUntil,
walkWithIndexUntil,
walkFrom,
walkFromUntil,
range,
@ -434,7 +435,8 @@ repeatHelp = \value, count, accum ->
## ```
reverse : List a -> List a
reverse = \list ->
reverseHelp list 0 (Num.subSaturated (List.len list) 1)
end = List.len list |> Num.subSaturated 1
reverseHelp (List.clone list) 0 end
reverseHelp = \list, left, right ->
if left < right then
@ -442,6 +444,9 @@ reverseHelp = \list, left, right ->
else
list
# Ensures that the list in unique (will re-use if already unique)
clone : List a -> List a
## Join the given lists together into one list.
## ```
## expect List.join [[1], [2, 3], [], [4, 5]] == [1, 2, 3, 4, 5]
@ -520,6 +525,25 @@ walkWithIndexHelp = \list, state, f, index, length ->
else
state
## Like [walkUntil], but at each step the function also receives the index of the current element.
walkWithIndexUntil : List elem, state, (state, elem, Nat -> [Continue state, Break state]) -> state
walkWithIndexUntil = \list, state, f ->
when walkWithIndexUntilHelp list state f 0 (List.len list) is
Continue new -> new
Break new -> new
## internal helper
walkWithIndexUntilHelp : List elem, s, (s, elem, Nat -> [Continue s, Break b]), Nat, Nat -> [Continue s, Break b]
walkWithIndexUntilHelp = \list, state, f, index, length ->
if index < length then
when f state (List.getUnsafe list index) index is
Continue nextState ->
walkWithIndexUntilHelp list nextState f (Num.addWrap index 1) length
Break b -> Break b
else
Continue state
## Note that in other languages, `walkBackwards` is sometimes called `reduceRight`,
## `fold`, `foldRight`, or `foldr`.
walkBackwards : List elem, state, (state, elem -> state) -> state

View file

@ -48,6 +48,7 @@ interface Num
isLte,
isGt,
isGte,
isApproxEq,
sin,
cos,
tan,
@ -560,8 +561,6 @@ tau = 2 * pi
# ------- Functions
## Convert a number to a [Str].
##
## This is the same as calling `Num.format {}` - so for more details on
## exact formatting, see `Num.format`.
## ```
## Num.toStr 42
## ```
@ -573,7 +572,6 @@ tau = 2 * pi
## When this function is given a non-[finite](Num.isFinite)
## [F64] or [F32] value, the returned string will be `"NaN"`, `"∞"`, or `"-∞"`.
##
## To get strings in hexadecimal, octal, or binary format, use `Num.format`.
toStr : Num * -> Str
intCast : Int a -> Int b
@ -664,6 +662,22 @@ isLte : Num a, Num a -> Bool
## is [defined to be unordered](https://en.wikipedia.org/wiki/NaN#Comparison_with_NaN).)
isGte : Num a, Num a -> Bool
## Returns `Bool.true` if the first number and second number are within a specific threshold
##
## A specific relative and absolute tolerance can be provided to change the threshold
##
## If either argument is [*NaN*](Num.isNaN), returns `Bool.false` no matter what. (*NaN*
## is [defined to be unordered](https://en.wikipedia.org/wiki/NaN#Comparison_with_NaN).)
isApproxEq : Frac a, Frac a, { rtol ? Frac a, atol ? Frac a } -> Bool
isApproxEq = \value, refValue, { rtol ? 0.00001, atol ? 0.00000001 } -> value
<= refValue
&& value
>= refValue
|| Num.absDiff value refValue
<= atol
+ rtol
* Num.abs refValue
## Returns `Bool.true` if the number is `0`, and `Bool.false` otherwise.
isZero : Num a -> Bool
@ -1054,7 +1068,7 @@ shiftLeftBy : Int a, U8 -> Int a
##
## The most significant bits are copied from the current.
## ```
## shiftRightBy 0b0000_0011 2 == 0b0000_1100
## shiftRightBy 0b0000_1100 2 == 0b0000_0011
##
## 0b0001_0100 |> shiftRightBy 2 == 0b0000_0101
##
@ -1065,16 +1079,16 @@ shiftRightBy : Int a, U8 -> Int a
## Bitwise logical right shift of a number by another
##
## The most significant bits always become 0. This means that shifting left is
## The most significant bits always become 0. This means that shifting right is
## like dividing by factors of two for unsigned integers.
## ```
## shiftRightBy 0b0010_1000 2 == 0b0000_1010
## shiftRightZfBy 0b0010_1000 2 == 0b0000_1010
##
## 0b0010_1000 |> shiftRightBy 2 == 0b0000_1010
## 0b0010_1000 |> shiftRightZfBy 2 == 0b0000_1010
##
## 0b1001_0000 |> shiftRightBy 2 == 0b0010_0100
## 0b1001_0000 |> shiftRightZfBy 2 == 0b0010_0100
## ```
## In some languages `shiftRightBy` is implemented as a binary operator `>>`.
## In some languages `shiftRightZfBy` is implemented as a binary operator `>>`.
shiftRightZfBy : Int a, U8 -> Int a
## Round off the given fraction to the nearest integer.
@ -1112,7 +1126,7 @@ powInt : Int a, Int a -> Int a
##
## 8
## ```
countLeadingZeroBits : Int a -> Nat
countLeadingZeroBits : Int a -> U8
## Counts the number of least-significant (trailing in a big-Endian sense) zeroes in an integer.
##
@ -1125,7 +1139,7 @@ countLeadingZeroBits : Int a -> Nat
##
## 8
## ```
countTrailingZeroBits : Int a -> Nat
countTrailingZeroBits : Int a -> U8
## Counts the number of set bits in an integer.
##
@ -1138,7 +1152,7 @@ countTrailingZeroBits : Int a -> Nat
##
## 0
## ```
countOneBits : Int a -> Nat
countOneBits : Int a -> U8
addWrap : Int range, Int range -> Int range
@ -1433,12 +1447,11 @@ toU32 : Int * -> U32
toU64 : Int * -> U64
toU128 : Int * -> U128
## Converts an [Int] to a [Nat]. If the given number doesn't fit in [Nat], it will be truncated.
## Converts an [Int] to a [Nat]. If the given number doesn't fit in [Nat], it will be truncated!
## Since [Nat] has a different maximum number depending on the system you're building
## for, this may give a different answer on different systems.
##
## For example, on a 32-bit system, `Num.maxNat` will return the same answer as
## `Num.maxU32`. This means that calling `Num.toNat 9_000_000_000` on a 32-bit
## For example, on a 32-bit system, calling `Num.toNat 9_000_000_000` on a 32-bit
## system will return `Num.maxU32` instead of 9 billion, because 9 billion is
## higher than `Num.maxU32` and will not fit in a [Nat] on a 32-bit system.
##

View file

@ -2,9 +2,14 @@ interface Set
exposes [
Set,
empty,
withCapacity,
reserve,
releaseExcessCapacity,
single,
walk,
walkUntil,
keepIf,
dropIf,
insert,
len,
isEmpty,
@ -43,7 +48,7 @@ Set k := Dict.Dict k {} where k implements Hash & Eq
},
]
isEq : Set k, Set k -> Bool where k implements Hash & Eq
isEq : Set k, Set k -> Bool
isEq = \xs, ys ->
if len xs != len ys then
Bool.false
@ -54,7 +59,7 @@ isEq = \xs, ys ->
else
Break Bool.false
hashSet : hasher, Set k -> hasher where k implements Hash & Eq, hasher implements Hasher
hashSet : hasher, Set k -> hasher where hasher implements Hasher
hashSet = \hasher, @Set inner -> Hash.hash hasher inner
toInspectorSet : Set k -> Inspector f where k implements Inspect & Hash & Eq, f implements InspectFormatter
@ -72,13 +77,25 @@ toInspectorSet = \set ->
empty : {} -> Set *
empty = \{} -> @Set (Dict.empty {})
## Return a dictionary with space allocated for a number of entries. This
## Return a set with space allocated for a number of entries. This
## may provide a performance optimization if you know how many entries will be
## inserted.
withCapacity : Nat -> Set *
withCapacity = \cap ->
@Set (Dict.withCapacity cap)
## Enlarge the set for at least capacity additional elements
reserve : Set k, Nat -> Set k
reserve = \@Set dict, requested ->
@Set (Dict.reserve dict requested)
## Shrink the memory footprint of a set such that capacity is as small as possible.
## This function will require regenerating the metadata if the size changes.
## There will still be some overhead due to dictionary metadata always being a power of 2.
releaseExcessCapacity : Set k -> Set k
releaseExcessCapacity = \@Set dict ->
@Set (Dict.releaseExcessCapacity dict)
## Creates a new `Set` with a single value.
## ```
## singleItemSet = Set.single "Apple"
@ -86,7 +103,7 @@ withCapacity = \cap ->
##
## expect countValues == 1
## ```
single : k -> Set k where k implements Hash & Eq
single : k -> Set k
single = \key ->
Dict.single key {} |> @Set
@ -102,7 +119,7 @@ single = \key ->
##
## expect countValues == 3
## ```
insert : Set k, k -> Set k where k implements Hash & Eq
insert : Set k, k -> Set k
insert = \@Set dict, key ->
Dict.insert dict key {} |> @Set
@ -187,7 +204,7 @@ expect
## expect has10 == Bool.false
## expect has20 == Bool.true
## ```
remove : Set k, k -> Set k where k implements Hash & Eq
remove : Set k, k -> Set k
remove = \@Set dict, key ->
Dict.remove dict key |> @Set
@ -206,7 +223,7 @@ remove = \@Set dict, key ->
## expect hasApple == Bool.true
## expect hasBanana == Bool.false
## ```
contains : Set k, k -> Bool where k implements Hash & Eq
contains : Set k, k -> Bool
contains = \@Set dict, key ->
Dict.contains dict key
@ -219,7 +236,7 @@ contains = \@Set dict, key ->
##
## expect Set.toList numbers == values
## ```
toList : Set k -> List k where k implements Hash & Eq
toList : Set k -> List k
toList = \@Set dict ->
Dict.keys dict
@ -233,11 +250,12 @@ toList = \@Set dict ->
##
## expect Set.fromList [Pear, Apple, Banana] == values
## ```
fromList : List k -> Set k where k implements Hash & Eq
fromList : List k -> Set k
fromList = \list ->
initial = @Set (Dict.withCapacity (List.len list))
List.walk list initial insert
list
|> List.map \k -> (k, {})
|> Dict.fromList
|> @Set
## Combine two `Set` collection by keeping the
## [union](https://en.wikipedia.org/wiki/Union_(set_theory))
@ -249,7 +267,7 @@ fromList = \list ->
##
## expect Set.union set1 set2 == Set.fromList [Left, Right]
## ```
union : Set k, Set k -> Set k where k implements Hash & Eq
union : Set k, Set k -> Set k
union = \@Set dict1, @Set dict2 ->
Dict.insertAll dict1 dict2 |> @Set
@ -262,7 +280,7 @@ union = \@Set dict1, @Set dict2 ->
##
## expect Set.intersection set1 set2 == Set.single Left
## ```
intersection : Set k, Set k -> Set k where k implements Hash & Eq
intersection : Set k, Set k -> Set k
intersection = \@Set dict1, @Set dict2 ->
Dict.keepShared dict1 dict2 |> @Set
@ -276,7 +294,7 @@ intersection = \@Set dict1, @Set dict2 ->
##
## expect Set.difference first second == Set.fromList [Up, Down]
## ```
difference : Set k, Set k -> Set k where k implements Hash & Eq
difference : Set k, Set k -> Set k
difference = \@Set dict1, @Set dict2 ->
Dict.removeAll dict1 dict2 |> @Set
@ -299,14 +317,14 @@ difference = \@Set dict1, @Set dict2 ->
##
## expect result == 2
## ```
walk : Set k, state, (state, k -> state) -> state where k implements Hash & Eq
walk : Set k, state, (state, k -> state) -> state
walk = \@Set dict, state, step ->
Dict.walk dict state (\s, k, _ -> step s k)
## Convert each value in the set to something new, by calling a conversion
## function on each of them which receives the old value. Then return a
## new set containing the converted values.
map : Set a, (a -> b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
map : Set a, (a -> b) -> Set b
map = \set, transform ->
init = withCapacity (capacity set)
@ -318,7 +336,7 @@ map = \set, transform ->
## (using [Set.union]) into one set.
##
## You may know a similar function named `concatMap` in other languages.
joinMap : Set a, (a -> Set b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
joinMap : Set a, (a -> Set b) -> Set b
joinMap = \set, transform ->
init = withCapacity (capacity set) # Might be a pessimization
@ -340,10 +358,32 @@ joinMap = \set, transform ->
##
## expect result == FoundTheAnswer
## ```
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state where k implements Hash & Eq
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state
walkUntil = \@Set dict, state, step ->
Dict.walkUntil dict state (\s, k, _ -> step s k)
## Run the given function on each element in the `Set`, and return
## a `Set` with just the elements for which the function returned `Bool.true`.
## ```
## expect Set.fromList [1,2,3,4,5]
## |> Set.keepIf \k -> k >= 3
## |> Bool.isEq (Set.fromList [3,4,5])
## ```
keepIf : Set k, (k -> Bool) -> Set k
keepIf = \@Set dict, predicate ->
@Set (Dict.keepIf dict (\(k, _v) -> predicate k))
## Run the given function on each element in the `Set`, and return
## a `Set` with just the elements for which the function returned `Bool.false`.
## ```
## expect Set.fromList [1,2,3,4,5]
## |> Set.dropIf \k -> k >= 3
## |> Bool.isEq (Set.fromList [1,2])
## ```
dropIf : Set k, (k -> Bool) -> Set k
dropIf = \@Set dict, predicate ->
@Set (Dict.dropIf dict (\(k, _v) -> predicate k))
expect
first =
single "Keep Me"
@ -443,3 +483,13 @@ expect
|> insert orderOne
wrapperOne == wrapperTwo
expect
Set.fromList [1, 2, 3, 4, 5]
|> Set.keepIf \k -> k >= 3
|> Bool.isEq (Set.fromList [3, 4, 5])
expect
Set.fromList [1, 2, 3, 4, 5]
|> Set.dropIf \k -> k >= 3
|> Bool.isEq (Set.fromList [1, 2])

View file

@ -1,90 +1,331 @@
## Roc strings are sequences of text values. This module includes functions for combining strings,
## as well as breaking them up into smaller units—most commonly [extended grapheme clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)
## (referred to in this module's documentation as "graphemes" rather than "characters" for clarity;
## "characters" can mean very different things in different languages).
## Strings represent text. For example, `"Hi!"` is a string.
##
## This module focuses on graphemes (as opposed to, say, Unicode code points or LATIN-1 bytes)
## because graphemes avoid common classes of bugs. Breaking strings up using code points often
## leads to bugs around things like emoji, where multiple code points combine to form to a
## single rendered glyph. Graphemes avoid these bugs by treating multi-code-point things like
## emojis as indivisible units.
## This guide starts at a high level and works down to the in-memory representation of strings and their [performance characteristics](#performance). For reasons that will be explained later in this guide, some string operations are in the `Str` module while others (notably [capitalization](#capitalization), [code points](#code-points), [graphemes](#graphemes), and sorting) are in separate packages. There's also a list of recommendations for [when to use code points, graphemes, and UTF-8](#when-to-use).
##
## Because graphemes can have variable length (there's no upper limit on how many code points one
## grapheme can represent), it takes linear time to count the number of graphemes in a string,
## and also linear time to find an individual grapheme within a string by its position (or "index")
## among the string's other graphemes. The only way to get constant-time access to these is in a way
## that can result in bugs if the string contains multi-code-point things like emojis, which is why
## this module does not offer those.
## ## Syntax
##
## The most common way to represent strings is using quotation marks:
##
## ## Working with Unicode strings in Roc
##
## Unicode can represent text values which span multiple languages, symbols, and emoji.
## Here are some valid Roc strings:
## ```
## "Roc!"
## "鹏"
## "🕊"
## "Hello, World!"
## ```
## Every Unicode string is a sequence of [extended grapheme clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster).
## An extended grapheme cluster represents what a person reading a string might
## call a "character" - like "A" or "ö" or "👩‍👩‍👦‍👦".
## Because the term "character" means different things in different areas of
## programming, and "extended grapheme cluster" is a mouthful, in Roc we use the
## term "grapheme" as a shorthand for the more precise "extended grapheme cluster."
##
## You can get the number of graphemes in a string by calling `Str.countGraphemes` on it:
## Using this syntax, the whole string must go on one line. You can write multiline strings using triple quotes:
##
## ```
## Str.countGraphemes "Roc!"
## Str.countGraphemes "折り紙"
## Str.countGraphemes "🕊"
## text =
## """
## In memory, this string will not have any spaces
## at its start. That's because the first line
## starts at the same indentation level as the
## opening quotation mark. Actually, none of these
## lines will be indented.
##
## However, this line will be indented!
## """
## ```
## > The `countGraphemes` function walks through the entire string to get its answer,
## > so if you want to check whether a string is empty, you'll get much better performance
## > by calling `Str.isEmpty myStr` instead of `Str.countGraphemes myStr == 0`.
##
## ### Escape sequences
## In triple-quoted strings, both the opening and closing `"""` must be at the same indentation level. Lines in the string begin at that indentation level; the spaces that indent the multiline string itself are not considered content.
##
## ### Interpolation
##
## *String interpolation* is syntax for inserting a string into another string.
##
## If you put a `\` in a Roc string literal, it begins an *escape sequence*.
## An escape sequence is a convenient way to insert certain strings into other strings.
## For example, suppose you write this Roc string:
## ```
## "I took the one less traveled by,\nAnd that has made all the difference."
## name = "Sam"
##
## "Hi, my name is $(name)!"
## ```
## The `"\n"` in the middle will insert a line break into this string. There are
## other ways of getting a line break in there, but `"\n"` is the most common.
##
## Another way you could insert a newlines is by writing `\u(0A)` instead of `\n`.
## That would result in the same string, because the `\u` escape sequence inserts
## [Unicode code points](https://unicode.org/glossary/#code_point) directly into
## the string. The Unicode code point 10 is a newline, and 10 is `0A` in hexadecimal.
## `\u` escape sequences are always followed by a hexadecimal number inside `(` and `)`
## like this.
## This will evaluate to the string `"Hi, my name is Sam!"`
##
## As another example, `"R\u(6F)c"` is the same string as `"Roc"`, because
## `"\u(6F)"` corresponds to the Unicode code point for lowercase `o`. If you
## want to [spice things up a bit](https://en.wikipedia.org/wiki/Metal_umlaut),
## you can write `"R\u(F6)c"` as an alternative way to get the string `"Röc"\.
## You can put any expression you like inside the parentheses, as long as it's all on one line:
##
## Roc strings also support these escape sequences:
##
## * `\\` - an actual backslash (writing a single `\` always begins an escape sequence!)
## * `\"` - an actual quotation mark (writing a `"` without a `\` ends the string)
## * `\r` - [carriage return](https://en.wikipedia.org/wiki/Carriage_Return)
## * `\t` - [horizontal tab](https://en.wikipedia.org/wiki/Tab_key#Tab_characters)
## * `\v` - [vertical tab](https://en.wikipedia.org/wiki/Tab_key#Tab_characters)
##
## You can also use escape sequences to insert named strings into other strings, like so:
## ```
## name = "Lee"
## city = "Roctown"
## greeting = "Hello there, \(name)! Welcome to \(city)."
## colors = ["red", "green", "blue"]
##
## "The colors are $(colors |> Str.joinWith ", ")!"
## ```
## Here, `greeting` will become the string `"Hello there, Lee! Welcome to Roctown."`.
## This is known as [string interpolation](https://en.wikipedia.org/wiki/String_interpolation),
## and you can use it as many times as you like inside a string. The name
## between the parentheses must refer to a `Str` value that is currently in
## scope, and it must be a name - it can't be an arbitrary expression like a function call.
##
## Interpolation can be used in multiline strings, but the part inside the parentheses must still be on one line.
##
## ### Escapes
##
## There are a few special escape sequences in strings:
##
## * `\n` becomes a [newline](https://en.wikipedia.org/wiki/Newline)
## * `\r` becomes a [carriage return](https://en.wikipedia.org/wiki/Carriage_return#Computers)
## * `\t` becomes a [tab](https://en.wikipedia.org/wiki/Tab_key#Tab_characters)
## * `\"` becomes a normal `"` (this lets you write `"` inside a single-line string)
## * `\\` becomes a normal `\` (this lets you write `\` without it being treated as an escape)
## * `\$` becomes a normal `$` (this lets you write `$` followed by `(` without it being treated as [interpolation](#interpolation))
##
## These work in both single-line and multiline strings. We'll also discuss another escape later, for inserting [Unicode code points](#code-points) into a string.
##
## ### Single quote syntax
##
## Try putting `'👩'` into `roc repl`. You should see this:
##
## ```
## » '👩'
##
## 128105 : Int *
## ```
##
## The single-quote `'` syntax lets you represent a Unicode code point (discussed in the next section) in source code, in a way that renders as the actual text it represents rather than as a number literal. This lets you see what it looks like in the source code rather than looking at a number.
##
## At runtime, the single-quoted value will be treated the same as an ordinary number literal—in other words, `'👩'` is syntax sugar for writing `128105`. You can verify this in `roc repl`:
##
## ```
## » '👩' == 128105
##
## Bool.true : Bool
## ```
##
## Double quotes (`"`), on the other hand, are not type-compatible with integers—not only because strings can be empty (`""` is valid, but `''` is not) but also because there may be more than one code point involved in any given string!
##
## There are also some special escape sequences in single-quote strings:
##
## * `\n` becomes a [newline](https://en.wikipedia.org/wiki/Newline)
## * `\r` becomes a [carriage return](https://en.wikipedia.org/wiki/Carriage_return#Computers)
## * `\t` becomes a [tab](https://en.wikipedia.org/wiki/Tab_key#Tab_characters)
## * `\'` becomes a normal `'` (this lets you write `'` inside a single-quote string)
## * `\\` becomes a normal `\` (this lets you write `\` without it being treated as an escape)
##
## Most often this single-quote syntax is used when writing parsers; most Roc programs never use it at all.
##
## ## Unicode
##
## Roc strings represent text using [Unicode](https://unicode.org) This guide will provide only a basic overview of Unicode (the [Unicode glossary](http://www.unicode.org/glossary/) has over 500 entries in it), but it will include the most relevant differences between these concepts:
##
## * Code points
## * Graphemes
## * UTF-8
##
## It will also explain why some operations are included in Roc's builtin [Str](https://www.roc-lang.org/builtins/Str)
## module, and why others are in separate packages like [roc-lang/unicode](https://github.com/roc-lang/unicode).
##
## ### Graphemes
##
## Let's start with the following string:
##
## `"👩‍👩‍👦‍👦"`
##
## Some might call this a "character." After all, in a monospace font, it looks to be about the same width as the letter "A" or the punctuation mark "!"—both of which are commonly called "characters." Unfortunately, the term "character" in programming has changed meanings many times across the years and across programming languages, and today it's become a major source of confusion.
##
## Unicode uses the less ambiguous term [*grapheme*](https://www.unicode.org/glossary/#grapheme), which it defines as a "user-perceived character" (as opposed to one of the several historical ways the term "character" has been used in programming) or, alternatively, "A minimally distinctive unit of writing in the context of a particular writing system."
##
## By Unicode's definition, each of the following is an individual grapheme:
##
## * `a`
## * `鹏`
## * `👩‍👩‍👦‍👦`
##
## Note that although *grapheme* is less ambiguous than *character*, its definition is still open to interpretation. To address this, Unicode has formally specified [text segmentation rules](https://www.unicode.org/reports/tr29/) which define grapheme boundaries in precise technical terms. We won't get into those rules here, but since they can change with new Unicode releases, functions for working with graphemes are in the [roc-lang/unicode](https://github.com/roc-lang/unicode) package rather than in the builtin [`Str`](https://www.roc-lang.org/builtins/Str) module. This allows them to be updated without being blocked on a new release of the Roc language.
##
## ### Code Points
##
## Every Unicode text value can be broken down into [Unicode code points](http://www.unicode.org/glossary/#code_point), which are integers between `0` and `285_212_438` that describe components of the text. In memory, every Roc string is a sequence of these integers stored in a format called UTF-8, which will be discussed [later](#utf8).
##
## The string `"👩‍👩‍👦‍👦"` happens to be made up of these code points:
##
## ```
## [128105, 8205, 128105, 8205, 128102, 8205, 128102]
## ```
##
## From this we can see that:
##
## - One grapheme can be made up of multiple code points. In fact, there is no upper limit on how many code points can go into a single grapheme! (Some programming languages use the term "character" to refer to individual code points; this can be confusing for graphemes like 👩‍👩‍👦‍👦 because it visually looks like "one character" but no single code point can represent it.)
## - Sometimes code points repeat within an individual grapheme. Here, 128105 repeats twice, as does 128102, and there's an 8205 in between each of the other code points.
##
## ### Combining Code Points
##
## The reason every other code point in 👩‍👩‍👦‍👦 is 8205 is that code point 8205 joins together other code points. This emoji, known as ["Family: Woman, Woman, Boy, Boy"](https://emojipedia.org/family-woman-woman-boy-boy), is made by combining several emoji using [zero-width joiners](https://emojipedia.org/zero-width-joiner)—which are represented by code point 8205 in memory, and which have no visual repesentation on their own.
##
## Here are those code points again, this time with comments about what they represent:
##
## ```
## [128105] # "👩"
## [8205] # (joiner)
## [128105] # "👩"
## [8205] # (joiner)
## [128102] # "👦"
## [8205] # (joiner)
## [128102] # "👦"
## ```
##
## One way to read this is "woman emoji joined to woman emoji joined to boy emoji joined to boy emoji." Without the joins, it would be:
##
## ```
## "👩👩👦👦"
## ```
##
## With the joins, however, it is instead:
##
## ```
## "👩‍👩‍👦‍👦"
## ```
##
## Even though 👩‍👩‍👦‍👦 is visually smaller when rendered, it takes up almost twice as much memory as 👩👩👦👦 does! That's because it has all the same code points, plus the zero-width joiners in between them.
##
## ### String equality and normalization
##
## Besides emoji like 👩‍👩‍👦‍👦, another classic example of multiple code points being combined to render as one grapheme has to do with accent marks. Try putting these two strings into `roc repl`:
##
## ```
## "caf\u(e9)"
## "cafe\u(301)"
## ```
##
## The `\u(e9)` syntax is a way of inserting code points into string literals. In this case, it's the same as inserting the hexadecimal number `0xe9` as a code point onto the end of the string `"caf"`. Since Unicode code point `0xe9` happens to be `é`, the string `"caf\u(e9)"` ends up being identical in memory to the string `"café"`.
##
## We can verify this too:
##
## ```
## » "caf\u(e9)" == "café"
##
## Bool.true : Bool
## ```
##
## As it turns out, `"cafe\u(301)"` is another way to represent the same word. The Unicode code point 0x301 represents a ["combining acute accent"](https://unicodeplus.com/U+0301)—which essentially means that it will add an accent mark to whatever came before it. In this case, since `"cafe\u(301)"` has an `e` before the `"\u(301)"`, that `e` ends up with an accent mark on it and becomes `é`.
##
## Although these two strings get rendered identically to one another, they are different in memory because their code points are different! We can also confirm this in `roc repl`:
##
## ```
## » "caf\u(e9)" == "cafe\u(301)"
##
## Bool.false : Bool
## ```
##
## As you can imagine, this can be a source of bugs. Not only are they considered unequal, they also hash differently, meaning `"caf\u(e9)"` and `"cafe\u(301)"` can both be separate entries in the same [`Set`](https://www.roc-lang.org/builtins/Set).
##
## One way to prevent problems like these is to perform [Unicode normalization](https://www.unicode.org/reports/tr15/), a process which converts conceptually equivalent strings (like `"caf\u(e9)"` and `"cafe\u(301)"`) into one canonical in-memory representation. This makes equality checks on them pass, among other benefits.
##
## It would be technically possible for Roc to perform string normalization automatically on every equality check. Unfortunately, although some programs might want to treat `"caf\u(e9)"` and `"cafe\u(301)"` as equivalent, for other programs it might actually be important to be able to tell them apart. If these equality checks always passed, then there would be no way to tell them apart!
##
## As such, normalization must be performed explicitly when desired. Like graphemes, Unicode normalization rules can change with new releases of Unicode. As such, these functions are in separate packages instead of builtins (normalization is planned to be in [roc-lang/unicode](https://github.com/roc-lang/unicode) in the future, but it has not yet been implemented) so that updates to these functions based on new Unicode releases can happen without waiting on new releases of the Roc language.
##
## ### Capitalization
##
## We've already seen two examples of Unicode definitions that can change with new Unicode releases: graphemes and normalization. Another is capitalization; these rules can change with new Unicode releases (most often in the form of additions of new languages, but breaking changes to capitalization rules for existing languages are also possible), and so they are not included in builtin [`Str`](https://www.roc-lang.org/builtins/Str).
##
## This might seem particularly surprising, since capitalization functions are commonly included in standard libraries. However, it turns out that "capitalizing an arbitrary string" is impossible to do correctly without additional information.
##
## For example, what is the capitalized version of this string?
##
## ```
## "i"
## ```
##
## * In English, the correct answer is `"I"`.
## * In Turkish, the correct answer is `"İ"`.
##
## Similarly, the correct lowercased version of the string `"I"` is `"i"` in English and `"ı"` in Turkish.
##
## Turkish is not the only language to use this [dotless i](https://en.wikipedia.org/wiki/Dotless_I), and it's an example of how a function which capitalizes strings cannot give correct answers without the additional information of which language's capitalization rules should be used.
##
## Many languages defer to the operating system's [localization](https://en.wikipedia.org/wiki/Internationalization_and_localization) settings for this information. In that design, calling a program's capitalization function with an input string of `"i"` might give an answer of `"I"` on one machine and `"İ"` on a different machine, even though it was the same program running on both systems. Naturally, this can cause bugs—but more than that, writing tests to prevent bugs like this usually requires extra complexity compared to writing ordinary tests.
##
## In general, Roc programs should give the same answers for the same inputs even when run on different machines. There are exceptions to this (e.g. a program running out of system resources on one machine, while being able to make more progress on a machine that has more resources), but operating system's language localization is not among them.
##
## For these reasons, capitalization functions are not in [`Str`](https://www.roc-lang.org/builtins/Str). There is a planned `roc-lang` package to handle use cases like capitalization and sorting—sorting can also vary by language as well as by things like country—but implementation work has not yet started on this package.
##
## ### UTF-8
##
## Earlier, we discussed how Unicode code points can be described as [`U32`](https://www.roc-lang.org/builtins/Num#U32) integers. However, many common code points are very low integers, and can fit into a `U8` instead of needing an entire `U32` to represent them in memory. UTF-8 takes advantage of this, using a variable-width encoding to represent code points in 1-4 bytes, which saves a lot of memory in the typical case—especially compared to [UTF-16](https://en.wikipedia.org/wiki/UTF-16), which always uses at least 2 bytes to represent each code point, or [UTF-32](https://en.wikipedia.org/wiki/UTF-32), which always uses the maximum 4 bytes.
##
## This guide won't cover all the details of UTF-8, but the basic idea is this:
##
## - If a code point is 127 or lower, UTF-8 stores it in 1 byte.
## - If it's between 128 and 2047, UTF-8 stores it in 2 bytes.
## - If it's between 2048 and 65535, UTF-8 stores it in 3 bytes.
## - If it's higher than that, UTF-8 stores it in 4 bytes.
##
## The specific [UTF-8 encoding](https://en.wikipedia.org/wiki/UTF-8#Encoding) of these bytes involves using 1 to 5 bits of each byte for metadata about multi-byte sequences.
##
## A valuable feature of UTF-8 is that it is backwards-compatible with the [ASCII](https://en.wikipedia.org/wiki/ASCII) encoding that was widely used for many years. ASCII existed before Unicode did, and only used the integers 0 to 127 to represent its equivalent of code points. The Unicode code points 0 to 127 represent the same semantic information as ASCII, (e.g. the number 64 represents the letter "A" in both ASCII and in Unicode), and since UTF-8 represents code points 0 to 127 using one byte, all valid ASCII strings can be successfully parsed as UTF-8 without any need for conversion.
##
## Since many textual computer encodings—including [CSV](https://en.wikipedia.org/wiki/CSV), [XML](https://en.wikipedia.org/wiki/XML), and [JSON](https://en.wikipedia.org/wiki/JSON)—do not use any code points above 127 for their delimiters, it is often possible to write parsers for these formats using only `Str` functions which present UTF-8 as raw `U8` sequences, such as [`Str.walkUtf8`](https://www.roc-lang.org/builtins/Str#walkUtf8) and [`Str.toUtf8`](https://www.roc-lang.org/builtins/Str#toUtf8). In the typical case where they do not to need to parse out individual Unicode code points, they can get everything they need from `Str` UTF-8 functions without needing to depend on other packages.
##
## ### When to use code points, graphemes, and UTF-8
##
## Deciding when to use code points, graphemes, and UTF-8 can be nonobvious to say the least!
##
## The way Roc organizes the `Str` module and supporting packages is designed to help answer this question. Every situation is different, but the following rules of thumb are typical:
##
## * Most often, using `Str` values along with helper functions like [`split`](https://www.roc-lang.org/builtins/Str#split), [`joinWith`](https://www.roc-lang.org/builtins/Str#joinWith), and so on, is the best option.
## * If you are specifically implementing a parser, working in UTF-8 bytes is usually the best option. So functions like [`walkUtf8`](https://www.roc-lang.org/builtins/Str#walkUtf8), [toUtf8](https://www.roc-lang.org/builtins/Str#toUtf8), and so on. (Note that single-quote literals produce number literals, so ASCII-range literals like `'a'` gives an integer literal that works with a UTF-8 `U8`.)
## * If you are implementing a Unicode library like [roc-lang/unicode](https://github.com/roc-lang/unicode), working in terms of code points will be unavoidable. Aside from basic readability considerations like `\u(...)` in string literals, if you have the option to avoid working in terms of code points, it is almost always correct to avoid them.
## * If it seems like a good idea to split a string into "characters" (graphemes), you should definitely stop and reconsider whether this is really the best design. Almost always, doing this is some combination of more error-prone or slower (usually both) than doing something else that does not require taking graphemes into consideration.
##
## For this reason (among others), grapheme functions live in [roc-lang/unicode](https://github.com/roc-lang/unicode) rather than in [`Str`](https://www.roc-lang.org/builtins/Str). They are more niche than they seem, so they should not be reached for all the time!
##
## ## Performance
##
## This section deals with how Roc strings are represented in memory, and their performance characteristics.
##
## A normal heap-allocated roc `Str` is represented on the stack as:
## - A "capacity" unsigned integer, which respresents how many bytes are allocated on the heap to hold the string's contents.
## - A "length" unsigned integer, which rerepresents how many of the "capacity" bytes are actually in use. (A `Str` can have more bytes allocated on the heap than are actually in use.)
## - The memory address of the first byte in the string's actual contents.
##
## Each of these three fields is the same size: 64 bits on a 64-bit system, and 32 bits on a 32-bit system. The actual contents of the string are stored in one contiguous sequence of bytes, encoded as UTF-8, often on the heap but sometimes elsewhere—more on this later. Empty strings do not have heap allocations, so an empty `Str` on a 64-bit system still takes up 24 bytes on the stack (due to its three 64-bit fields).
##
## ### Reference counting and opportunistic mutation
##
## Like lists, dictionaries, and sets, Roc strings are automatically reference-counted and can benefit from opportunistic in-place mutation. The reference count is stored on the heap immediately before the first byte of the string's contents, and it has the same size as a memory address. This means it can count so high that it's impossible to write a Roc program which overflows a reference count, because having that many simultaneous references (each of which is a memory address) would have exhausted the operating system's address space first.
##
## When the string's reference count is 1, functions like [`Str.concat`](https://www.roc-lang.org/builtins/Str#concat) and [`Str.replaceEach`](https://www.roc-lang.org/builtins/Str#replaceEach) mutate the string in-place rather than allocating a new string. This preserves semantic immutability because it is unobservable in terms of the operation's output; if the reference count is 1, it means that memory would have otherwise been deallocated immediately anyway, and it's more efficient to reuse it instead of deallocating it and then immediately making a new allocation.
##
## The contents of statically-known strings (today that means string literals) are stored in the readonly section of the binary, so they do not need heap allocations or reference counts. They are not eligible for in-place mutation, since mutating the readonly section of the binary would cause an operating system [access violation](https://en.wikipedia.org/wiki/Segmentation_fault).
##
## ### Small String Optimization
##
## Roc uses a "small string optimization" when representing certain strings in memory.
##
## If you have a sufficiently long string, then on a 64-bit system it will be represented on the stack using 24 bytes, and on a 32-bit system it will take 12 bytes—plus however many bytes are in the string itself—on the heap. However, if there is a string shorter than either of these stack sizes (so, a string of up to 23 bytes on a 64-bit system, and up to 11 bytes on a 32-bit system), then that string will be stored entirely on the stack rather than having a separate heap allocation at all.
##
## This can be much more memory-efficient! However, `List` does not have this optimization (it has some runtime cost, and in the case of `List` it's not anticipated to come up nearly as often), which means when converting a small string to `List U8` it can result in a heap allocation.
##
## Note that this optimization is based entirely on how many UTF-8 bytes the string takes up in memory. It doesn't matter how many [graphemes](#graphemes), [code points](#code-points) or anything else it has; the only factor that determines whether a particular string is eligible for the small string optimization is the number of UTF-8 bytes it takes up in memory!
##
## ### Seamless Slices
##
## Try putting this into `roc repl`:
##
## ```
## » "foo/bar/baz" |> Str.split "/"
##
## ["foo", "bar", "baz"] : List Str
## ```
##
## All of these strings are small enough that the [small string optimization](#small) will apply, so none of them will be allocated on the heap.
##
## Now let's suppose they were long enough that this optimization no longer applied:
##
## ```
## » "a much, much, much, much/longer/string compared to the last one!" |> Str.split "/"
##
## ["a much, much, much, much", "longer", "string compared to the last one!"] : List Str
## ```
##
## Here, the only strings small enough for the small string optimization are `"/"` and `"longer"`. They will be allocated on the stack.
##
## The first and last strings in the returned list `"a much, much, much, much"` and `"string compared to the last one!"` will not be allocated on the heap either. Instead, they will be *seamless slices*, which means they will share memory with the original input string.
##
## * `"a much, much, much, much"` will share the first 24 bytes of the original string.
## * `"string compared to the last one!"` will share the last 32 bytes of the original string.
##
## All of these strings are semantically immutable, so sharing these bytes is an implementation detail that should only affect performance. By design, there is no way at either compile time or runtime to tell whether a string is a seamless slice. This allows the optimization's behavior to change in the future without affecting Roc programs' semantic behavior.
##
## Seamless slices create additional references to the original string, which make it ineligible for opportunistic mutation (along with the slices themselves; slices are never eligible for mutation), and which also make it take longer before the original string can be deallocated. A case where this might be noticeable in terms of performance would be:
## 1. A function takes a very large string as an argument and returns a much smaller slice into that string.
## 2. The smaller slice is used for a long time in the program, whereas the much larger original string stops being used.
## 3. In this situation, it might have been better for total program memory usage (although not necessarily overall performance) if the original large string could have been deallocated sooner, even at the expense of having to copy the smaller string into a new allocation instead of reusing the bytes with a seamless slice.
##
## If a situation like this comes up, a slice can be turned into a separate string by using [`Str.concat`](https://www.roc-lang.org/builtins/Str#concat) to concatenate the slice onto an empty string (or one created with [`Str.withCapacity`](https://www.roc-lang.org/builtins/Str#withCapacity)).
##
## Currently, the only way to get seamless slices of strings is by calling certain `Str` functions which return them. In general, `Str` functions which accept a string and return a subset of that string tend to do this. [`Str.trim`](https://www.roc-lang.org/builtins/Str#trim) is another example of a function which returns a seamless slice.
interface Str
exposes [
Utf8Problem,
@ -94,9 +335,7 @@ interface Str
joinWith,
split,
repeat,
countGraphemes,
countUtf8Bytes,
startsWithScalar,
toUtf8,
fromUtf8,
fromUtf8Range,
@ -119,7 +358,6 @@ interface Str
toI16,
toU8,
toI8,
toScalars,
replaceEach,
replaceFirst,
replaceLast,
@ -129,12 +367,8 @@ interface Str
walkUtf8WithIndex,
reserve,
releaseExcessCapacity,
appendScalar,
walkScalars,
walkScalarsUntil,
withCapacity,
withPrefix,
graphemes,
contains,
]
imports [
@ -265,8 +499,7 @@ joinWith : List Str, Str -> Str
## Split a string around a separator.
##
## Passing `""` for the separator is not useful;
## it returns the original string wrapped in a [List]. To split a string
## into its individual [graphemes](https://stackoverflow.com/a/27331885/4200103), use `Str.graphemes`
## it returns the original string wrapped in a [List].
## ```
## expect Str.split "1,2,3" "," == ["1","2","3"]
## expect Str.split "1,2,3" "" == ["1,2,3"]
@ -285,78 +518,6 @@ split : Str, Str -> List Str
## ```
repeat : Str, Nat -> Str
## Counts the number of [extended grapheme clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)
## in the string.
##
## Note that the number of extended grapheme clusters can be different from the number
## of visual glyphs rendered! Consider the following examples:
## ```
## expect Str.countGraphemes "Roc" == 3
## expect Str.countGraphemes "👩‍👩‍👦‍👦" == 4
## expect Str.countGraphemes "🕊" == 1
## ```
## Note that "👩‍👩‍👦‍👦" takes up 4 graphemes (even though visually it appears as a single
## glyph) because under the hood it's represented using an emoji modifier sequence.
## In contrast, "🕊" only takes up 1 grapheme because under the hood it's represented
## using a single Unicode code point.
countGraphemes : Str -> Nat
## Split a string into its constituent graphemes.
##
## This function breaks a string into its individual [graphemes](https://stackoverflow.com/a/27331885/4200103),
## returning them as a list of strings. This is useful for working with text that
## contains complex characters, such as emojis.
##
## Examples:
## ```
## expect Str.graphemes "Roc" == ["R", "o", "c"]
## expect Str.graphemes "नमस्ते" == ["न", "म", "स्", "ते"]
## expect Str.graphemes "👩‍👩‍👦‍👦" == ["👩‍", "👩‍", "👦‍", "👦"]
## ```
##
## Note that the "👩‍👩‍👦‍👦" example consists of 4 grapheme clusters, although it visually
## appears as a single glyph. This is because it uses an emoji modifier sequence.
graphemes : Str -> List Str
## If the string begins with a [Unicode code point](http://www.unicode.org/glossary/#code_point)
## equal to the given [U32], returns [Bool.true]. Otherwise returns [Bool.false].
##
## If the given string is empty, or if the given [U32] is not a valid
## code point, returns [Bool.false].
## ```
## expect Str.startsWithScalar "鹏 means 'roc'" 40527 # "鹏" is Unicode scalar 40527
## expect !Str.startsWithScalar "9" 9 # the Unicode scalar for "9" is 57, not 9
## expect !Str.startsWithScalar "" 40527
## ```
##
## ## Performance Details
##
## This runs slightly faster than [Str.startsWith], so
## if you want to check whether a string begins with something that's representable
## in a single code point, you can use (for example) `Str.startsWithScalar '鹏'`
## instead of `Str.startsWith "鹏"`. ('鹏' evaluates to the [U32] value `40527`.)
## This will not work for graphemes which take up multiple code points, however;
## `Str.startsWithScalar '👩‍👩‍👦‍👦'` would be a compiler error because 👩‍👩‍👦‍👦 takes up
## multiple code points and cannot be represented as a single [U32].
## You'd need to use `Str.startsWithScalar "🕊"` instead.
startsWithScalar : Str, U32 -> Bool
## Returns a [List] of the [Unicode scalar values](https://unicode.org/glossary/#unicode_scalar_value)
## in the given string.
##
## (Roc strings contain only scalar values, not [surrogate code points](https://unicode.org/glossary/#surrogate_code_point),
## so this is equivalent to returning a list of the string's [code points](https://unicode.org/glossary/#code_point).)
## ```
## expect Str.toScalars "Roc" == [82, 111, 99]
## expect Str.toScalars "鹏" == [40527]
## expect Str.toScalars "சி" == [2970, 3007]
## expect Str.toScalars "🐦" == [128038]
## expect Str.toScalars "👩‍👩‍👦‍👦" == [128105, 8205, 128105, 8205, 128102, 8205, 128102]
## expect Str.toScalars "I ♥ Roc" == [73, 32, 9829, 32, 82, 111, 99]
## expect Str.toScalars "" == []
## ```
toScalars : Str -> List U32
## Returns a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
## see [Str.split].)
@ -880,8 +1041,11 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
## state for each byte.
##
## ```
## result = walkUtf8 "hello, world!" "" (\state, byte -> state ++ String.fromCodePoint byte)
## expect result == Ok "hello, world!"
## sumOfUtf8Bytes =
## Str.walkUtf8 "Hello, World!" 0 \total, byte ->
## total + byte
##
## expect sumOfUtf8Bytes == 105
## ```
walkUtf8 : Str, state, (state, U8 -> state) -> state
walkUtf8 = \str, initial, step ->
@ -904,80 +1068,6 @@ expect (walkUtf8 "鹏" [] List.append) == [233, 185, 143]
## Note: This will also convert seamless slices to regular lists.
releaseExcessCapacity : Str -> Str
## is UB when the scalar is invalid
appendScalarUnsafe : Str, U32 -> Str
## Append a [U32] scalar to the given string. If the given scalar is not a valid
## unicode value, it returns [Err InvalidScalar].
## ```
## expect Str.appendScalar "H" 105 == Ok "Hi"
## expect Str.appendScalar "😢" 0xabcdef == Err InvalidScalar
## ```
appendScalar : Str, U32 -> Result Str [InvalidScalar]
appendScalar = \string, scalar ->
if isValidScalar scalar then
Ok (appendScalarUnsafe string scalar)
else
Err InvalidScalar
isValidScalar : U32 -> Bool
isValidScalar = \scalar ->
scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF)
getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat }
## Walks over the unicode [U32] values for the given [Str] and calls a function
## to update state for each.
## ```
## f : List U32, U32 -> List U32
## f = \state, scalar -> List.append state scalar
## expect Str.walkScalars "ABC" [] f == [65, 66, 67]
## ```
walkScalars : Str, state, (state, U32 -> state) -> state
walkScalars = \string, init, step ->
walkScalarsHelp string init step 0 (Str.countUtf8Bytes string)
walkScalarsHelp : Str, state, (state, U32 -> state), Nat, Nat -> state
walkScalarsHelp = \string, state, step, index, length ->
if index < length then
{ scalar, bytesParsed } = getScalarUnsafe string index
newState = step state scalar
walkScalarsHelp string newState step (Num.addWrap index bytesParsed) length
else
state
## Walks over the unicode [U32] values for the given [Str] and calls a function
## to update state for each.
## ```
## f : List U32, U32 -> [Break (List U32), Continue (List U32)]
## f = \state, scalar ->
## check = 66
## if scalar == check then
## Break [check]
## else
## Continue (List.append state scalar)
## expect Str.walkScalarsUntil "ABC" [] f == [66]
## expect Str.walkScalarsUntil "AxC" [] f == [65, 120, 67]
## ```
walkScalarsUntil : Str, state, (state, U32 -> [Break state, Continue state]) -> state
walkScalarsUntil = \string, init, step ->
walkScalarsUntilHelp string init step 0 (Str.countUtf8Bytes string)
walkScalarsUntilHelp : Str, state, (state, U32 -> [Break state, Continue state]), Nat, Nat -> state
walkScalarsUntilHelp = \string, state, step, index, length ->
if index < length then
{ scalar, bytesParsed } = getScalarUnsafe string index
when step state scalar is
Continue newState ->
walkScalarsUntilHelp string newState step (Num.addWrap index bytesParsed) length
Break newState ->
newState
else
state
strToNum : Str -> { berrorcode : U8, aresult : Num * }
strToNumHelp : Str -> Result (Num a) [InvalidNumStr]

View file

@ -43,7 +43,7 @@ interface TotallyNotJson
## An opaque type with the `EncoderFormatting` and
## `DecoderFormatting` abilities.
Json := { fieldNameMapping : FieldNameMapping }
Json := {}
implements [
EncoderFormatting {
u8: encodeU8,
@ -89,21 +89,11 @@ Json := { fieldNameMapping : FieldNameMapping }
]
## Returns a JSON `Encoder` and `Decoder`
json = @Json { fieldNameMapping: Default }
json = @Json {}
## Returns a JSON `Encoder` and `Decoder` with configuration options
jsonWithOptions = \{ fieldNameMapping ? Default } ->
@Json { fieldNameMapping }
## Mapping between Roc record fields and JSON object names
FieldNameMapping : [
Default, # no transformation
SnakeCase, # snake_case
PascalCase, # PascalCase
KebabCase, # kabab-case
CamelCase, # camelCase
Custom (Str -> Str), # provide a custom formatting
]
jsonWithOptions = \{} ->
@Json {}
# TODO encode as JSON numbers as base 10 decimal digits
# e.g. the REPL `Num.toStr 12e42f64` gives
@ -171,14 +161,6 @@ encodeBool = \b ->
else
List.concat bytes (Str.toUtf8 "false")
# Test encode boolean
expect
input = [Bool.true, Bool.false]
actual = Encode.toBytes input json
expected = Str.toUtf8 "[true,false]"
actual == expected
encodeString = \str ->
Encode.custom \bytes, @Json {} ->
List.concat bytes (encodeStrBytes str)
@ -248,38 +230,10 @@ escapedByteToJson = \b ->
0x09 -> [0x5c, 'r'] # U+0009 Tab
_ -> [b]
expect escapedByteToJson '\n' == ['\\', 'n']
expect escapedByteToJson '\\' == ['\\', '\\']
expect escapedByteToJson '"' == ['\\', '"']
# Test encode small string
expect
input = "G'day"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"G'day\""
actual == expected
# Test encode large string
expect
input = "the quick brown fox jumps over the lazy dog"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"the quick brown fox jumps over the lazy dog\""
actual == expected
# Test encode with escapes e.g. "\r" encodes to "\\r"
expect
input = "the quick brown fox jumps over the lazy doga\r\nbc\\\"xz"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"the quick brown fox jumps over the lazy doga\\r\\nbc\\\\\\\"xz\""
actual == expected
encodeList = \lst, encodeElem ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
Encode.custom \bytes, @Json {} ->
writeList = \{ buffer, elemsLeft }, elem ->
bufferWithElem = appendWith buffer (encodeElem elem) (@Json { fieldNameMapping })
bufferWithElem = appendWith buffer (encodeElem elem) (@Json {})
bufferWithSuffix =
if elemsLeft > 1 then
List.append bufferWithElem (Num.toU8 ',')
@ -293,27 +247,16 @@ encodeList = \lst, encodeElem ->
List.append withList (Num.toU8 ']')
# Test encode list of floats
expect
input : List F64
input = [-1, 0.00001, 1e12, 2.0e-2, 0.0003, 43]
actual = Encode.toBytes input json
expected = Str.toUtf8 "[-1,0.00001,1000000000000,0.02,0.0003,43]"
actual == expected
encodeRecord = \fields ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
Encode.custom \bytes, @Json {} ->
writeRecord = \{ buffer, fieldsLeft }, { key, value } ->
fieldName = toObjectNameUsingMap key fieldNameMapping
fieldName = key
bufferWithKeyValue =
List.append buffer (Num.toU8 '"')
|> List.concat (Str.toUtf8 fieldName)
|> List.append (Num.toU8 '"')
|> List.append (Num.toU8 ':') # Note we need to encode using the json config here
|> appendWith value (@Json { fieldNameMapping })
|> appendWith value (@Json {})
bufferWithSuffix =
if fieldsLeft > 1 then
@ -328,52 +271,11 @@ encodeRecord = \fields ->
List.append bytesWithRecord (Num.toU8 '}')
# Test encode for a record with two strings ignoring whitespace
expect
input = { fruitCount: 2, ownerName: "Farmer Joe" }
encoder = jsonWithOptions { fieldNameMapping: PascalCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"FruitCount\":2,\"OwnerName\":\"Farmer Joe\"}"
actual == expected
# Test encode of record with an array of strings and a boolean field
expect
input = { fruitFlavours: ["Apples", "Bananas", "Pears"], isFresh: Bool.true }
encoder = jsonWithOptions { fieldNameMapping: KebabCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"fruit-flavours\":[\"Apples\",\"Bananas\",\"Pears\"],\"is-fresh\":true}"
actual == expected
# Test encode of record with a string and number field
expect
input = { firstSegment: "ab", secondSegment: 10u8 }
encoder = jsonWithOptions { fieldNameMapping: SnakeCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"first_segment\":\"ab\",\"second_segment\":10}"
actual == expected
# Test encode of record of a record
expect
input = { outer: { inner: "a" }, other: { one: "b", two: 10u8 } }
encoder = jsonWithOptions { fieldNameMapping: Custom toYellingCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"OTHER\":{\"ONE\":\"b\",\"TWO\":10},\"OUTER\":{\"INNER\":\"a\"}}"
actual == expected
toYellingCase = \str ->
Str.graphemes str
|> List.map toUppercase
|> Str.joinWith ""
encodeTuple = \elems ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
Encode.custom \bytes, @Json {} ->
writeTuple = \{ buffer, elemsLeft }, elemEncoder ->
bufferWithElem =
appendWith buffer elemEncoder (@Json { fieldNameMapping })
appendWith buffer elemEncoder (@Json {})
bufferWithSuffix =
if elemsLeft > 1 then
@ -387,20 +289,11 @@ encodeTuple = \elems ->
{ buffer: bytesWithRecord } = List.walk elems { buffer: bytesHead, elemsLeft: List.len elems } writeTuple
List.append bytesWithRecord (Num.toU8 ']')
# Test encode of tuple
expect
input = ("The Answer is", 42)
actual = Encode.toBytes input json
expected = Str.toUtf8 "[\"The Answer is\",42]"
actual == expected
encodeTag = \name, payload ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
Encode.custom \bytes, @Json {} ->
# Idea: encode `A v1 v2` as `{"A": [v1, v2]}`
writePayload = \{ buffer, itemsLeft }, encoder ->
bufferWithValue = appendWith buffer encoder (@Json { fieldNameMapping })
bufferWithValue = appendWith buffer encoder (@Json {})
bufferWithSuffix =
if itemsLeft > 1 then
List.append bufferWithValue (Num.toU8 ',')
@ -422,15 +315,6 @@ encodeTag = \name, payload ->
List.append bytesWithPayload (Num.toU8 ']')
|> List.append (Num.toU8 '}')
# Test encode of tag
expect
input = TheAnswer "is" 42
encoder = jsonWithOptions { fieldNameMapping: KebabCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"TheAnswer\":[\"is\",42]}"
actual == expected
decodeU8 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -1334,7 +1218,7 @@ expect
# JSON OBJECTS -----------------------------------------------------------------
decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Json { fieldNameMapping } ->
decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Json {} ->
# Recursively build up record from object field:value pairs
decodeFields = \recordState, bytesBeforeField ->
@ -1361,8 +1245,7 @@ decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Jso
# Decode the json value
{ val: updatedRecord, rest: bytesAfterValue } <-
(
fieldName =
fromObjectNameUsingMap objectName fieldNameMapping
fieldName = objectName
# Retrieve value decoder for the current field
when stepField recordState fieldName is
@ -1375,7 +1258,7 @@ decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Jso
Keep valueDecoder ->
# Decode the value using the decoder from the recordState
# Note we need to pass json config options recursively here
Decode.decodeWith valueBytes valueDecoder (@Json { fieldNameMapping })
Decode.decodeWith valueBytes valueDecoder (@Json {})
)
|> tryDecode
@ -1444,327 +1327,3 @@ ObjectState : [
AfterClosingBrace Nat,
InvalidObject,
]
# Test decode of record with two strings ignoring whitespace
expect
input = Str.toUtf8 " {\n\"FruitCount\"\t:2\n, \"OwnerName\": \"Farmer Joe\" } "
decoder = jsonWithOptions { fieldNameMapping: PascalCase }
actual = Decode.fromBytesPartial input decoder
expected = Ok { fruitCount: 2, ownerName: "Farmer Joe" }
actual.result == expected
# Test decode of record with an array of strings and a boolean field
expect
input = Str.toUtf8 "{\"fruit-flavours\": [\"Apples\",\"Bananas\",\"Pears\"], \"is-fresh\": true }"
decoder = jsonWithOptions { fieldNameMapping: KebabCase }
actual = Decode.fromBytesPartial input decoder
expected = Ok { fruitFlavours: ["Apples", "Bananas", "Pears"], isFresh: Bool.true }
actual.result == expected
# Test decode of record with a string and number field
expect
input = Str.toUtf8 "{\"first_segment\":\"ab\",\"second_segment\":10}"
decoder = jsonWithOptions { fieldNameMapping: SnakeCase }
actual = Decode.fromBytesPartial input decoder
expected = Ok { firstSegment: "ab", secondSegment: 10u8 }
actual.result == expected
# Test decode of record of a record
expect
input = Str.toUtf8 "{\"OUTER\":{\"INNER\":\"a\"},\"OTHER\":{\"ONE\":\"b\",\"TWO\":10}}"
decoder = jsonWithOptions { fieldNameMapping: Custom fromYellingCase }
actual = Decode.fromBytesPartial input decoder
expected = Ok { outer: { inner: "a" }, other: { one: "b", two: 10u8 } }
actual.result == expected
fromYellingCase = \str ->
Str.graphemes str
|> List.map toLowercase
|> Str.joinWith ""
expect fromYellingCase "YELLING" == "yelling"
# Complex example from IETF RFC 8259 (2017)
complexExampleJson = Str.toUtf8 "{\"Image\":{\"Animated\":false,\"Height\":600,\"Ids\":[116,943,234,38793],\"Thumbnail\":{\"Height\":125,\"Url\":\"http:\\/\\/www.example.com\\/image\\/481989943\",\"Width\":100},\"Title\":\"View from 15th Floor\",\"Width\":800}}"
complexExampleRecord = {
image: {
width: 800,
height: 600,
title: "View from 15th Floor",
thumbnail: {
url: "http://www.example.com/image/481989943",
height: 125,
width: 100,
},
animated: Bool.false,
ids: [116, 943, 234, 38793],
},
}
# Test decode of Complex Example
expect
input = complexExampleJson
decoder = jsonWithOptions { fieldNameMapping: PascalCase }
actual = Decode.fromBytes input decoder
expected = Ok complexExampleRecord
actual == expected
# Test encode of Complex Example
expect
input = complexExampleRecord
encoder = jsonWithOptions { fieldNameMapping: PascalCase }
actual = Encode.toBytes input encoder
expected = complexExampleJson
actual == expected
fromObjectNameUsingMap : Str, FieldNameMapping -> Str
fromObjectNameUsingMap = \objectName, fieldNameMapping ->
when fieldNameMapping is
Default -> objectName
SnakeCase -> fromSnakeCase objectName
PascalCase -> fromPascalCase objectName
KebabCase -> fromKebabCase objectName
CamelCase -> fromCamelCase objectName
Custom transformation -> transformation objectName
toObjectNameUsingMap : Str, FieldNameMapping -> Str
toObjectNameUsingMap = \fieldName, fieldNameMapping ->
when fieldNameMapping is
Default -> fieldName
SnakeCase -> toSnakeCase fieldName
PascalCase -> toPascalCase fieldName
KebabCase -> toKebabCase fieldName
CamelCase -> toCamelCase fieldName
Custom transformation -> transformation fieldName
# Convert a `snake_case` JSON Object name to a Roc Field name
fromSnakeCase = \str ->
snakeToCamel str
# Convert a `PascalCase` JSON Object name to a Roc Field name
fromPascalCase = \str ->
pascalToCamel str
# Convert a `kabab-case` JSON Object name to a Roc Field name
fromKebabCase = \str ->
kebabToCamel str
# Convert a `camelCase` JSON Object name to a Roc Field name
fromCamelCase = \str ->
# Nothing to change as Roc field names are camelCase by default
str
# Convert a `camelCase` Roc Field name to a `snake_case` JSON Object name
toSnakeCase = \str ->
camelToSnake str
# Convert a `camelCase` Roc Field name to a `PascalCase` JSON Object name
toPascalCase = \str ->
camelToPascal str
# Convert a `camelCase` Roc Field name to a `kabab-case` JSON Object name
toKebabCase = \str ->
camelToKebeb str
# Convert a `camelCase` Roc Field name to a `camelCase` JSON Object name
toCamelCase = \str ->
# Nothing to change as Roc field names are camelCase by default
str
snakeToCamel : Str -> Str
snakeToCamel = \str ->
segments = Str.split str "_"
when segments is
[first, ..] ->
segments
|> List.dropFirst 1
|> List.map uppercaseFirst
|> List.prepend first
|> Str.joinWith ""
_ -> str
expect snakeToCamel "snake_case_string" == "snakeCaseString"
pascalToCamel : Str -> Str
pascalToCamel = \str ->
segments = Str.graphemes str
when segments is
[a, ..] ->
first = toLowercase a
rest = List.dropFirst segments 1
Str.joinWith (List.prepend rest first) ""
_ -> str
expect pascalToCamel "PascalCaseString" == "pascalCaseString"
kebabToCamel : Str -> Str
kebabToCamel = \str ->
segments = Str.split str "-"
when segments is
[first, ..] ->
segments
|> List.dropFirst 1
|> List.map uppercaseFirst
|> List.prepend first
|> Str.joinWith ""
_ -> str
expect kebabToCamel "kebab-case-string" == "kebabCaseString"
camelToPascal : Str -> Str
camelToPascal = \str ->
segments = Str.graphemes str
when segments is
[a, ..] ->
first = toUppercase a
rest = List.dropFirst segments 1
Str.joinWith (List.prepend rest first) ""
_ -> str
expect camelToPascal "someCaseString" == "SomeCaseString"
camelToKebeb : Str -> Str
camelToKebeb = \str ->
rest = Str.graphemes str
taken = List.withCapacity (List.len rest)
camelToKebabHelp { taken, rest }
|> .taken
|> Str.joinWith ""
camelToKebabHelp : { taken : List Str, rest : List Str } -> { taken : List Str, rest : List Str }
camelToKebabHelp = \{ taken, rest } ->
when rest is
[] -> { taken, rest }
[a, ..] if isUpperCase a ->
camelToKebabHelp {
taken: List.concat taken ["-", toLowercase a],
rest: List.dropFirst rest 1,
}
[a, ..] ->
camelToKebabHelp {
taken: List.append taken a,
rest: List.dropFirst rest 1,
}
expect camelToKebeb "someCaseString" == "some-case-string"
camelToSnake : Str -> Str
camelToSnake = \str ->
rest = Str.graphemes str
taken = List.withCapacity (List.len rest)
camelToSnakeHelp { taken, rest }
|> .taken
|> Str.joinWith ""
camelToSnakeHelp : { taken : List Str, rest : List Str } -> { taken : List Str, rest : List Str }
camelToSnakeHelp = \{ taken, rest } ->
when rest is
[] -> { taken, rest }
[a, ..] if isUpperCase a ->
camelToSnakeHelp {
taken: List.concat taken ["_", toLowercase a],
rest: List.dropFirst rest 1,
}
[a, ..] ->
camelToSnakeHelp {
taken: List.append taken a,
rest: List.dropFirst rest 1,
}
expect camelToSnake "someCaseString" == "some_case_string"
uppercaseFirst : Str -> Str
uppercaseFirst = \str ->
segments = Str.graphemes str
when segments is
[a, ..] ->
first = toUppercase a
rest = List.dropFirst segments 1
Str.joinWith (List.prepend rest first) ""
_ -> str
toUppercase : Str -> Str
toUppercase = \str ->
when str is
"a" -> "A"
"b" -> "B"
"c" -> "C"
"d" -> "D"
"e" -> "E"
"f" -> "F"
"g" -> "G"
"h" -> "H"
"i" -> "I"
"j" -> "J"
"k" -> "K"
"l" -> "L"
"m" -> "M"
"n" -> "N"
"o" -> "O"
"p" -> "P"
"q" -> "Q"
"r" -> "R"
"s" -> "S"
"t" -> "T"
"u" -> "U"
"v" -> "V"
"w" -> "W"
"x" -> "X"
"y" -> "Y"
"z" -> "Z"
_ -> str
toLowercase : Str -> Str
toLowercase = \str ->
when str is
"A" -> "a"
"B" -> "b"
"C" -> "c"
"D" -> "d"
"E" -> "e"
"F" -> "f"
"G" -> "g"
"H" -> "h"
"I" -> "i"
"J" -> "j"
"K" -> "k"
"L" -> "l"
"M" -> "m"
"N" -> "n"
"O" -> "o"
"P" -> "p"
"Q" -> "q"
"R" -> "r"
"S" -> "s"
"T" -> "t"
"U" -> "u"
"V" -> "v"
"W" -> "w"
"X" -> "x"
"Y" -> "y"
"Z" -> "z"
_ -> str
isUpperCase : Str -> Bool
isUpperCase = \str ->
when str is
"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" -> Bool.true
_ -> Bool.false

View file

@ -1,3 +1,3 @@
package "builtins"
exposes [Str, Num, Bool, Result, List, Dict, Set, Decode, Encode, Hash, Box, TotallyNotJson]
exposes [Str, Num, Bool, Result, List, Dict, Set, Decode, Encode, Hash, Box, TotallyNotJson, Inspect]
packages {}

Some files were not shown because too many files have changed in this diff Show more