add more impls and memcpy

This commit is contained in:
Brendan Hansknecht 2023-06-01 22:50:12 -07:00
parent 0e2c3e4723
commit ebc6bd3f45
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
5 changed files with 222 additions and 19 deletions

View file

@ -21,26 +21,13 @@ pub var memcpy_target: Memcpy = switch (arch) {
pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
switch (arch) {
// x86_64 has a special optimized memcpy that can use avx2.
.x86_64 => {
return memcpy_target(dest, src, len);
},
.i386 => {
@memcpy(dest, src, len);
return dest;
else => {
return musl.memcpy(dest, src, len);
},
.aarch64 => {
@memcpy(dest, src, len);
return dest;
},
.arm => {
@memcpy(dest, src, len);
return dest;
},
.wasm32 => {
@memcpy(dest, src, len);
return dest;
},
else => @compileError("Unsupported architecture for memcpy"),
}
}

View file

@ -1 +1 @@
pub const memcpy = @import("musl/memcpy.zig").musl_memcpy;
pub const memcpy = @import("musl/memcpy.zig").memcpy;

View file

@ -0,0 +1,30 @@
.global musl_memcpy
# Windows does not support the type directive.
# .type memcpy,@function
musl_memcpy:
push %esi
push %edi
mov 12(%esp),%edi
mov 16(%esp),%esi
mov 20(%esp),%ecx
mov %edi,%eax
cmp $4,%ecx
jc 1f
test $3,%edi
jz 1f
2: movsb
dec %ecx
test $3,%edi
jnz 2b
1: mov %ecx,%edx
shr $2,%ecx
rep
movsl
and $3,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: pop %edi
pop %esi
ret

View file

@ -1,6 +1,6 @@
.global musl_memcpy
# Windows does not support the type directive.
# .type memcpy,@function
# .type musl_memcpy,@function
musl_memcpy:
mov %rdi,%rax
cmp $8,%rdx

View file

@ -6,8 +6,194 @@ comptime {
.x86_64 => {
asm (@embedFile("memcpy-x86_64.S"));
},
else => unreachable,
.i386 => {
asm (@embedFile("memcpy-i386.S"));
},
// TODO: add assembly implementations for other platforms.
else => {},
}
}
pub const memcpy =
switch (arch) {
.x86_64, .i386 => musl_memcpy,
else => fallback_memcpy,
};
pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
// Note: this is written to only support little endian targets.
// To support big endian, `<<` and `>>` wold need to be swapped.
pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
var d = dest;
var s = src;
var n = len;
while (@ptrToInt(s) % 4 != 0 and n != 0) : (n -= 1) {
d[0] = s[0];
d += 1;
s += 1;
}
if (@ptrToInt(d) % 4 == 0) {
var d4 = @alignCast(4, d);
var s4 = @alignCast(4, s);
while (n >= 16) : (n -= 16) {
var d_u32 = @ptrCast([*]u32, d4);
var s_u32 = @ptrCast([*]const u32, s4);
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d_u32[2] = s_u32[2];
d_u32[3] = s_u32[3];
d4 += 16;
s4 += 16;
}
if (n & 8 != 0) {
var d_u32 = @ptrCast([*]u32, d4);
var s_u32 = @ptrCast([*]const u32, s4);
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d4 += 8;
s4 += 8;
}
if (n & 4 != 0) {
var d_u32 = @ptrCast([*]u32, d4);
var s_u32 = @ptrCast([*]const u32, s4);
d_u32[0] = s_u32[0];
d4 += 4;
s4 += 4;
}
d = d4;
s = s4;
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}
if (n >= 32) {
switch (@ptrToInt(d) % 4) {
1 => {
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 3;
while (n >= 17) : (n -= 16) {
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 1));
var x = s_u32[0];
d_u32[0] = (w >> 24) | (x << 8);
w = s_u32[1];
d_u32[1] = (x >> 24) | (w << 8);
x = s_u32[2];
d_u32[2] = (w >> 24) | (x << 8);
w = s_u32[3];
d_u32[3] = (x >> 24) | (w << 8);
d += 16;
s += 16;
}
},
2 => {
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 2;
while (n >= 18) : (n -= 16) {
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 2));
var x = s_u32[0];
d_u32[0] = (w >> 16) | (x << 16);
w = s_u32[1];
d_u32[1] = (x >> 16) | (w << 16);
x = s_u32[2];
d_u32[2] = (w >> 16) | (x << 16);
w = s_u32[3];
d_u32[3] = (x >> 16) | (w << 16);
d += 16;
s += 16;
}
},
3 => {
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
d[0] = s[0];
d += 1;
s += 1;
n -= 1;
while (n >= 19) : (n -= 16) {
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 3));
var x = s_u32[0];
d_u32[0] = (w >> 8) | (x << 24);
w = s_u32[1];
d_u32[1] = (x >> 8) | (w << 24);
x = s_u32[2];
d_u32[2] = (w >> 8) | (x << 24);
w = s_u32[3];
d_u32[3] = (x >> 8) | (w << 24);
d += 16;
s += 16;
}
},
else => unreachable,
}
}
if (n & 16 != 0) {
comptime var i = 0;
inline while (i < 16) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 8 != 0) {
comptime var i = 0;
inline while (i < 8) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 4 != 0) {
comptime var i = 0;
inline while (i < 4) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}