mirror of
https://github.com/roc-lang/roc.git
synced 2025-07-23 06:25:10 +00:00
add more impls and memcpy
This commit is contained in:
parent
0e2c3e4723
commit
ebc6bd3f45
5 changed files with 222 additions and 19 deletions
|
@ -21,26 +21,13 @@ pub var memcpy_target: Memcpy = switch (arch) {
|
|||
|
||||
pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
|
||||
switch (arch) {
|
||||
// x86_64 has a special optimized memcpy that can use avx2.
|
||||
.x86_64 => {
|
||||
return memcpy_target(dest, src, len);
|
||||
},
|
||||
.i386 => {
|
||||
@memcpy(dest, src, len);
|
||||
return dest;
|
||||
else => {
|
||||
return musl.memcpy(dest, src, len);
|
||||
},
|
||||
.aarch64 => {
|
||||
@memcpy(dest, src, len);
|
||||
return dest;
|
||||
},
|
||||
.arm => {
|
||||
@memcpy(dest, src, len);
|
||||
return dest;
|
||||
},
|
||||
.wasm32 => {
|
||||
@memcpy(dest, src, len);
|
||||
return dest;
|
||||
},
|
||||
else => @compileError("Unsupported architecture for memcpy"),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
pub const memcpy = @import("musl/memcpy.zig").musl_memcpy;
|
||||
pub const memcpy = @import("musl/memcpy.zig").memcpy;
|
||||
|
|
30
crates/compiler/builtins/bitcode/src/libc/musl/memcpy-i386.S
Normal file
30
crates/compiler/builtins/bitcode/src/libc/musl/memcpy-i386.S
Normal file
|
@ -0,0 +1,30 @@
|
|||
.global musl_memcpy
|
||||
# Windows does not support the type directive.
|
||||
# .type memcpy,@function
|
||||
musl_memcpy:
|
||||
push %esi
|
||||
push %edi
|
||||
mov 12(%esp),%edi
|
||||
mov 16(%esp),%esi
|
||||
mov 20(%esp),%ecx
|
||||
mov %edi,%eax
|
||||
cmp $4,%ecx
|
||||
jc 1f
|
||||
test $3,%edi
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %ecx
|
||||
test $3,%edi
|
||||
jnz 2b
|
||||
1: mov %ecx,%edx
|
||||
shr $2,%ecx
|
||||
rep
|
||||
movsl
|
||||
and $3,%edx
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %edx
|
||||
jnz 2b
|
||||
1: pop %edi
|
||||
pop %esi
|
||||
ret
|
|
@ -1,6 +1,6 @@
|
|||
.global musl_memcpy
|
||||
# Windows does not support the type directive.
|
||||
# .type memcpy,@function
|
||||
# .type musl_memcpy,@function
|
||||
musl_memcpy:
|
||||
mov %rdi,%rax
|
||||
cmp $8,%rdx
|
||||
|
|
|
@ -6,8 +6,194 @@ comptime {
|
|||
.x86_64 => {
|
||||
asm (@embedFile("memcpy-x86_64.S"));
|
||||
},
|
||||
else => unreachable,
|
||||
.i386 => {
|
||||
asm (@embedFile("memcpy-i386.S"));
|
||||
},
|
||||
// TODO: add assembly implementations for other platforms.
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub const memcpy =
|
||||
switch (arch) {
|
||||
.x86_64, .i386 => musl_memcpy,
|
||||
else => fallback_memcpy,
|
||||
};
|
||||
|
||||
pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
|
||||
|
||||
// Note: this is written to only support little endian targets.
|
||||
// To support big endian, `<<` and `>>` wold need to be swapped.
|
||||
pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
|
||||
var d = dest;
|
||||
var s = src;
|
||||
var n = len;
|
||||
while (@ptrToInt(s) % 4 != 0 and n != 0) : (n -= 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
|
||||
if (@ptrToInt(d) % 4 == 0) {
|
||||
var d4 = @alignCast(4, d);
|
||||
var s4 = @alignCast(4, s);
|
||||
while (n >= 16) : (n -= 16) {
|
||||
var d_u32 = @ptrCast([*]u32, d4);
|
||||
var s_u32 = @ptrCast([*]const u32, s4);
|
||||
d_u32[0] = s_u32[0];
|
||||
d_u32[1] = s_u32[1];
|
||||
d_u32[2] = s_u32[2];
|
||||
d_u32[3] = s_u32[3];
|
||||
|
||||
d4 += 16;
|
||||
s4 += 16;
|
||||
}
|
||||
if (n & 8 != 0) {
|
||||
var d_u32 = @ptrCast([*]u32, d4);
|
||||
var s_u32 = @ptrCast([*]const u32, s4);
|
||||
d_u32[0] = s_u32[0];
|
||||
d_u32[1] = s_u32[1];
|
||||
|
||||
d4 += 8;
|
||||
s4 += 8;
|
||||
}
|
||||
if (n & 4 != 0) {
|
||||
var d_u32 = @ptrCast([*]u32, d4);
|
||||
var s_u32 = @ptrCast([*]const u32, s4);
|
||||
d_u32[0] = s_u32[0];
|
||||
|
||||
d4 += 4;
|
||||
s4 += 4;
|
||||
}
|
||||
d = d4;
|
||||
s = s4;
|
||||
if (n & 2 != 0) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
if (n & 1 != 0) {
|
||||
d[0] = s[0];
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
if (n >= 32) {
|
||||
switch (@ptrToInt(d) % 4) {
|
||||
1 => {
|
||||
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 3;
|
||||
while (n >= 17) : (n -= 16) {
|
||||
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
|
||||
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 1));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 24) | (x << 8);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 24) | (w << 8);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 24) | (x << 8);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 24) | (w << 8);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
2 => {
|
||||
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 2;
|
||||
while (n >= 18) : (n -= 16) {
|
||||
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
|
||||
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 2));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 16) | (x << 16);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 16) | (w << 16);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 16) | (x << 16);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 16) | (w << 16);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
3 => {
|
||||
var w = @ptrCast([*]const u32, @alignCast(4, s))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 1;
|
||||
while (n >= 19) : (n -= 16) {
|
||||
var d_u32 = @ptrCast([*]u32, @alignCast(4, d));
|
||||
var s_u32 = @ptrCast([*]const u32, @alignCast(4, s + 3));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 8) | (x << 24);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 8) | (w << 24);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 8) | (x << 24);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 8) | (w << 24);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
if (n & 16 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 16) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 8 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 8) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 4 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 4) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 2 != 0) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
if (n & 1 != 0) {
|
||||
d[0] = s[0];
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue