better extract musl lib

This commit is contained in:
Brendan Hansknecht 2023-06-01 21:09:28 -07:00
parent 159f95aad1
commit 77624f627b
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
5 changed files with 65 additions and 31 deletions

View file

@ -1,7 +1,7 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const musl_memcpy = @import("libc/musl/memcpy.zig");
const musl = @import("libc/musl.zig");
const cpuid = @import("libc/cpuid.zig");
comptime {
@ -14,7 +14,7 @@ const Memcpy = fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C)
pub var memcpy_target: Memcpy = switch (arch) {
// TODO(): Switch to dispatch_memcpy once the surgical linker can support it.
// .x86_64 => dispatch_memcpy,
.x86_64 => musl_memcpy.musl_memcpy,
.x86_64 => musl.memcpy,
else => unreachable,
};
@ -47,9 +47,13 @@ fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) ca
switch (arch) {
.x86_64 => {
if (cpuid.supports_avx2()) {
memcpy_target = musl_memcpy.musl_memcpy;
if (cpuid.supports_prefetchw()) {
memcpy_target = musl.memcpy;
} else {
memcpy_target = musl.memcpy;
}
} else {
memcpy_target = musl_memcpy.musl_memcpy;
memcpy_target = musl.memcpy;
}
},
else => unreachable,

View file

@ -2,7 +2,7 @@ const builtin = @import("builtin");
const arch = builtin.cpu.arch;
// I couldn't manage to define this in a PIE friendly way with inline assembly.
// Instead, I am definining it as a global assembly function.
// Instead, I am definining it as global assembly functions.
comptime {
switch (arch) {
.x86_64 => {
@ -20,17 +20,44 @@ comptime {
\\ mov $0, %ecx
\\ cpuid
\\
\\ # Check if the AVX2 feature flag is set.
\\ # The AVX2 feature flag is located in the EBX register at bit 5.
\\ bt $5, %ebx
\\ jc .AVX2Supported
\\ jc .avx2_supported
\\
\\ # AVX2 is not supported.
\\ pop %rbx
\\ mov $0, %eax
\\ ret
\\
\\ .AVX2Supported:
\\ .avx2_supported:
\\ pop %rbx
\\ mov $1, %eax
\\ ret
);
asm (
\\ # Check if prefetchw is supported.
\\ # Returns 1 if the prefetchw instruction is supported, 0 otherwise.
\\ .global supports_prefetchw;
\\ supports_prefetchw:
\\ # Save the EBX register.
\\ push %rbx
\\
\\ # Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
\\ # This will get the CPUID information for the current CPU.
\\ mov $0x80000001, %eax
\\ mov $0, %ecx
\\ cpuid
\\
\\ # The prefetchw feature flag is located in the ECX register at bit 8.
\\ bt $8, %ecx
\\ jc .prefetchw_supported
\\
\\ # AVX2 is not supported.
\\ pop %rbx
\\ mov $0, %eax
\\ ret
\\
\\ .prefetchw_supported:
\\ pop %rbx
\\ mov $1, %eax
\\ ret
@ -41,3 +68,4 @@ comptime {
}
pub extern fn supports_avx2() bool;
pub extern fn supports_prefetchw() bool;

View file

@ -0,0 +1 @@
pub const memcpy = @import("musl/memcpy.zig").musl_memcpy;

View file

@ -0,0 +1,23 @@
.global musl_memcpy
# Windows does not support the type directive.
# .type memcpy,@function
musl_memcpy:
mov %rdi,%rax
cmp $8,%rdx
jc 1f
test $7,%edi
jz 1f
2: movsb
dec %rdx
test $7,%edi
jnz 2b
1: mov %rdx,%rcx
shr $3,%rcx
rep
movsq
and $7,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: ret

View file

@ -4,29 +4,7 @@ const arch = builtin.cpu.arch;
comptime {
switch (arch) {
.x86_64 => {
asm (
\\ .global musl_memcpy;
\\ musl_memcpy:
\\ mov %rdi,%rax
\\ cmp $8,%rdx
\\ jc 1f
\\ test $7,%edi
\\ jz 1f
\\ 2: movsb
\\ dec %rdx
\\ test $7,%edi
\\ jnz 2b
\\ 1: mov %rdx,%rcx
\\ shr $3,%rcx
\\ rep
\\ movsq
\\ and $7,%edx
\\ jz 1f
\\ 2: movsb
\\ dec %edx
\\ jnz 2b
\\ 1: ret
);
asm (@embedFile("memcpy-x86_64.S"));
},
else => unreachable,
}