From 159f95aad11fc5e185526dfa2e282dcc8d08fde3 Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Thu, 1 Jun 2023 18:46:02 -0700 Subject: [PATCH] add musl memcpy and avx2 dispatching --- crates/compiler/builtins/bitcode/src/libc.zig | 21 +- .../builtins/bitcode/src/libc/cpuid.zig | 43 ++++ .../builtins/bitcode/src/libc/musl/COPYRIGHT | 193 ++++++++++++++++++ .../builtins/bitcode/src/libc/musl/README.md | 2 + .../builtins/bitcode/src/libc/musl/memcpy.zig | 35 ++++ 5 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 crates/compiler/builtins/bitcode/src/libc/cpuid.zig create mode 100644 crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT create mode 100644 crates/compiler/builtins/bitcode/src/libc/musl/README.md create mode 100644 crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig diff --git a/crates/compiler/builtins/bitcode/src/libc.zig b/crates/compiler/builtins/bitcode/src/libc.zig index d7485e5734..9f643bf229 100644 --- a/crates/compiler/builtins/bitcode/src/libc.zig +++ b/crates/compiler/builtins/bitcode/src/libc.zig @@ -1,6 +1,8 @@ const std = @import("std"); const builtin = @import("builtin"); const arch = builtin.cpu.arch; +const musl_memcpy = @import("libc/musl/memcpy.zig"); +const cpuid = @import("libc/cpuid.zig"); comptime { @export(memcpy, .{ .name = "roc_memcpy", .linkage = .Weak }); @@ -10,14 +12,15 @@ comptime { const Memcpy = fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8; pub var memcpy_target: Memcpy = switch (arch) { - .x86_64 => memcpy, + // TODO(): Switch to dispatch_memcpy once the surgical linker can support it. + // .x86_64 => dispatch_memcpy, + .x86_64 => musl_memcpy.musl_memcpy, else => unreachable, }; pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 { switch (arch) { .x86_64 => { - @memcpy(dest, src, len); return memcpy_target(dest, src, len); }, .i386 => { @@ -39,3 +42,17 @@ pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callcon else => @compileError("Unsupported architecture for memcpy"), } } + +fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 { + switch (arch) { + .x86_64 => { + if (cpuid.supports_avx2()) { + memcpy_target = musl_memcpy.musl_memcpy; + } else { + memcpy_target = musl_memcpy.musl_memcpy; + } + }, + else => unreachable, + } + return memcpy_target(dest, src, len); +} diff --git a/crates/compiler/builtins/bitcode/src/libc/cpuid.zig b/crates/compiler/builtins/bitcode/src/libc/cpuid.zig new file mode 100644 index 0000000000..c339aeb7f8 --- /dev/null +++ b/crates/compiler/builtins/bitcode/src/libc/cpuid.zig @@ -0,0 +1,43 @@ +const builtin = @import("builtin"); +const arch = builtin.cpu.arch; + +// I couldn't manage to define this in a PIE friendly way with inline assembly. +// Instead, I am definining it as a global assembly function. +comptime { + switch (arch) { + .x86_64 => { + asm ( + \\ # Check if AVX2 is supported. + \\ # Returns 1 if AVX2 is supported, 0 otherwise. + \\ .global supports_avx2; + \\ supports_avx2: + \\ # Save the EBX register. + \\ push %rbx + \\ + \\ # Call the CPUID instruction with the EAX register set to 7 and ECX set to 0. + \\ # This will get the CPUID information for the current CPU. + \\ mov $7, %eax + \\ mov $0, %ecx + \\ cpuid + \\ + \\ # Check if the AVX2 feature flag is set. + \\ # The AVX2 feature flag is located in the EBX register at bit 5. + \\ bt $5, %ebx + \\ jc .AVX2Supported + \\ + \\ # AVX2 is not supported. + \\ pop %rbx + \\ mov $0, %eax + \\ ret + \\ + \\ .AVX2Supported: + \\ pop %rbx + \\ mov $1, %eax + \\ ret + ); + }, + else => unreachable, + } +} + +pub extern fn supports_avx2() bool; diff --git a/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT b/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT new file mode 100644 index 0000000000..c1628e9ac8 --- /dev/null +++ b/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT @@ -0,0 +1,193 @@ +musl as a whole is licensed under the following standard MIT license: + +---------------------------------------------------------------------- +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +---------------------------------------------------------------------- + +Authors/contributors include: + +A. Wilcox +Ada Worcester +Alex Dowad +Alex Suykov +Alexander Monakov +Andre McCurdy +Andrew Kelley +Anthony G. Basile +Aric Belsito +Arvid Picciani +Bartosz Brachaczek +Benjamin Peterson +Bobby Bingham +Boris Brezillon +Brent Cook +Chris Spiegel +Clément Vasseur +Daniel Micay +Daniel Sabogal +Daurnimator +David Carlier +David Edelsohn +Denys Vlasenko +Dmitry Ivanov +Dmitry V. Levin +Drew DeVault +Emil Renner Berthing +Fangrui Song +Felix Fietkau +Felix Janda +Gianluca Anzolin +Hauke Mehrtens +He X +Hiltjo Posthuma +Isaac Dunham +Jaydeep Patil +Jens Gustedt +Jeremy Huntwork +Jo-Philipp Wich +Joakim Sindholt +John Spencer +Julien Ramseier +Justin Cormack +Kaarle Ritvanen +Khem Raj +Kylie McClain +Leah Neukirchen +Luca Barbato +Luka Perkov +M Farkas-Dyck (Strake) +Mahesh Bodapati +Markus Wichmann +Masanori Ogino +Michael Clark +Michael Forney +Mikhail Kremnyov +Natanael Copa +Nicholas J. Kain +orc +Pascal Cuoq +Patrick Oppenlander +Petr Hosek +Petr Skocik +Pierre Carrier +Reini Urban +Rich Felker +Richard Pennington +Ryan Fairfax +Samuel Holland +Segev Finer +Shiz +sin +Solar Designer +Stefan Kristiansson +Stefan O'Rear +Szabolcs Nagy +Timo Teräs +Trutz Behn +Valentin Ochs +Will Dietz +William Haddon +William Pitcock + +Portions of this software are derived from third-party works licensed +under terms compatible with the above MIT license: + +The TRE regular expression implementation (src/regex/reg* and +src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +under a 2-clause BSD license (license text in the source files). The +included version has been heavily modified by Rich Felker in 2012, in +the interests of size, simplicity, and namespace cleanliness. + +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier or +Copyright © 2017-2018 Arm Limited +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. + +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 +The Android Open Source Project and is licensed under a two-clause BSD +license. It was taken from Bionic libc, used on Android. + +The AArch64 memcpy and memset code (src/string/aarch64/*) are +Copyright © 1999-2019, Arm Limited. + +The implementation of DES for crypt (src/crypt/crypt_des.c) is +Copyright © 1994 David Burren. It is licensed under a BSD license. + +The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was +originally written by Solar Designer and placed into the public +domain. The code also comes with a fallback permissive license for use +in jurisdictions that may not recognize the public domain. + +The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +Valentin Ochs and is licensed under an MIT-style license. + +The x86_64 port was written by Nicholas J. Kain and is licensed under +the standard MIT terms. + +The mips and microblaze ports were originally written by Richard +Pennington for use in the ellcc project. The original code was adapted +by Rich Felker for build system and code conventions during upstream +integration. It is licensed under the standard MIT terms. + +The mips64 port was contributed by Imagination Technologies and is +licensed under the standard MIT terms. + +The powerpc port was also originally written by Richard Pennington, +and later supplemented and integrated by John Spencer. It is licensed +under the standard MIT terms. + +All other files which have no copyright comments are original works +produced specifically for use as part of this library, written either +by Rich Felker, the main author of the library, or by one or more +contibutors listed above. Details on authorship of individual files +can be found in the git version control history of the project. The +omission of copyright and license comments in each file is in the +interest of source tree size. + +In addition, permission is hereby granted for all public header files +(include/* and arch/*/bits/*) and crt files intended to be linked into +applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit +the copyright notice and permission notice otherwise required by the +license, and to use these files without any requirement of +attribution. These files include substantial contributions from: + +Bobby Bingham +John Spencer +Nicholas J. Kain +Rich Felker +Richard Pennington +Stefan Kristiansson +Szabolcs Nagy + +all of whom have explicitly granted such permission. + +This file previously contained text expressing a belief that most of +the files covered by the above exception were sufficiently trivial not +to be subject to copyright, resulting in confusion over whether it +negated the permissions granted in the license. In the spirit of +permissive licensing, and of not having licensing issues being an +obstacle to adoption, that text has been removed. diff --git a/crates/compiler/builtins/bitcode/src/libc/musl/README.md b/crates/compiler/builtins/bitcode/src/libc/musl/README.md new file mode 100644 index 0000000000..d2d8d90c89 --- /dev/null +++ b/crates/compiler/builtins/bitcode/src/libc/musl/README.md @@ -0,0 +1,2 @@ +This set of files all come from [musl libc](https://musl.libc.org/). +Roc just directly uses a few of them instead of depending on musl libc fully. diff --git a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig new file mode 100644 index 0000000000..5df34d56d2 --- /dev/null +++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig @@ -0,0 +1,35 @@ +const builtin = @import("builtin"); +const arch = builtin.cpu.arch; + +comptime { + switch (arch) { + .x86_64 => { + asm ( + \\ .global musl_memcpy; + \\ musl_memcpy: + \\ mov %rdi,%rax + \\ cmp $8,%rdx + \\ jc 1f + \\ test $7,%edi + \\ jz 1f + \\ 2: movsb + \\ dec %rdx + \\ test $7,%edi + \\ jnz 2b + \\ 1: mov %rdx,%rcx + \\ shr $3,%rcx + \\ rep + \\ movsq + \\ and $7,%edx + \\ jz 1f + \\ 2: movsb + \\ dec %edx + \\ jnz 2b + \\ 1: ret + ); + }, + else => unreachable, + } +} + +pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;