Merge pull request #4437 from roc-lang/windows-effect-loop-scratchpad

windows: working cli benchmark tests
This commit is contained in:
Folkert de Vries 2022-10-31 22:33:10 +01:00 committed by GitHub
commit 55ea3bbea2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 80 additions and 226 deletions

3
.gitignore vendored
View file

@ -6,6 +6,9 @@ zig-cache
*.rs.bk
*.o
*.obj
*.dll
*.lib
*.def
*.tmp
*.wasm
*.exe

View file

@ -43,7 +43,7 @@ mod cli_run {
#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
const TEST_LEGACY_LINKER: bool = false;
#[cfg(not(target_os = "macos"))]
#[cfg(all(unix, not(target_os = "macos")))]
const ALLOW_VALGRIND: bool = true;
// Disallow valgrind on macOS by default, because it reports a ton
@ -52,6 +52,9 @@ mod cli_run {
#[cfg(target_os = "macos")]
const ALLOW_VALGRIND: bool = false;
#[cfg(windows)]
const ALLOW_VALGRIND: bool = false;
#[derive(Debug, PartialEq, Eq)]
enum Arg<'a> {
ExamplePath(&'a str),
@ -110,6 +113,11 @@ mod cli_run {
let ignorable = "🔨 Rebuilding platform...\n";
let stderr = compile_out.stderr.replacen(ignorable, "", 1);
// for some reason, llvm prints out this warning when targeting windows
let ignorable = "warning: ignoring debug info with an invalid version (0) in app\r\n";
let stderr = stderr.replacen(ignorable, "", 1);
let is_reporting_runtime = stderr.starts_with("runtime: ") && stderr.ends_with("ms\n");
if !(stderr.is_empty() || is_reporting_runtime) {
panic!("`roc` command had unexpected stderr: {}", stderr);
@ -149,7 +157,10 @@ mod cli_run {
let flags = {
let mut vec = flags.to_vec();
vec.push("--max-threads=1");
// max-threads segfaults on windows right now
if !cfg!(windows) {
vec.push("--max-threads=1");
}
vec.into_iter()
};
@ -397,6 +408,11 @@ mod cli_run {
)
}
#[cfg(windows)]
const LINE_ENDING: &str = "\r\n";
#[cfg(not(windows))]
const LINE_ENDING: &str = "\n";
#[test]
// uses C platform
fn platform_switching_main() {
@ -404,7 +420,7 @@ mod cli_run {
"examples/platform-switching",
"main.roc",
"rocLovesPlatforms",
"Which platform am I running on now?\n",
&("Which platform am I running on now?".to_string() + LINE_ENDING),
true,
)
}

View file

@ -71,26 +71,21 @@ pub export fn main() i32 {
const stdout = std.io.getStdOut().writer();
const stderr = std.io.getStdErr().writer();
// start time
var ts1: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts1) catch unreachable;
var timer = std.time.Timer.start() catch unreachable;
// actually call roc to populate the callresult
var callresult = RocStr.empty();
roc__mainForHost_1_exposed_generic(&callresult);
// end time
var ts2: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts2) catch unreachable;
const nanos = timer.read();
const seconds = (@intToFloat(f64, nanos) / 1_000_000_000.0);
// stdout the result
stdout.print("{s}\n", .{callresult.asSlice()}) catch unreachable;
callresult.deinit();
const delta = to_seconds(ts2) - to_seconds(ts1);
stderr.print("runtime: {d:.3}ms\n", .{delta * 1000}) catch unreachable;
stderr.print("runtime: {d:.3}ms\n", .{seconds * 1000}) catch unreachable;
return 0;
}

View file

@ -70,26 +70,21 @@ pub export fn main() i32 {
const stdout = std.io.getStdOut().writer();
const stderr = std.io.getStdErr().writer();
// start time
var ts1: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts1) catch unreachable;
var timer = std.time.Timer.start() catch unreachable;
// actually call roc to populate the callresult
var callresult = RocStr.empty();
roc__mainForHost_1_exposed_generic(&callresult);
// end time
var ts2: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts2) catch unreachable;
const nanos = timer.read();
const seconds = (@intToFloat(f64, nanos) / 1_000_000_000.0);
// stdout the result
stdout.print("{s}\n", .{callresult.asSlice()}) catch unreachable;
callresult.deinit();
const delta = to_seconds(ts2) - to_seconds(ts1);
stderr.print("runtime: {d:.3}ms\n", .{delta * 1000}) catch unreachable;
stderr.print("runtime: {d:.3}ms\n", .{seconds * 1000}) catch unreachable;
return 0;
}

View file

@ -83,23 +83,18 @@ export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {
pub export fn main() u8 {
const stdout = std.io.getStdOut().writer();
const stderr = std.io.getStdErr().writer();
// start time
var ts1: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts1) catch unreachable;
var timer = std.time.Timer.start() catch unreachable;
const result = roc__mainForHost_1_exposed(10);
// end time
var ts2: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts2) catch unreachable;
const nanos = timer.read();
const seconds = (@intToFloat(f64, nanos) / 1_000_000_000.0);
stdout.print("{d}\n", .{result}) catch unreachable;
const delta = to_seconds(ts2) - to_seconds(ts1);
stderr.print("runtime: {d:.3}ms\n", .{delta * 1000}) catch unreachable;
const stderr = std.io.getStdErr().writer();
stderr.print("runtime: {d:.3}ms\n", .{seconds * 1000}) catch unreachable;
return 0;
}

View file

@ -103,9 +103,7 @@ pub export fn main() u8 {
var roc_list = RocList{ .elements = numbers, .length = NUM_NUMS, .capacity = NUM_NUMS };
// start time
var ts1: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts1) catch unreachable;
var timer = std.time.Timer.start() catch unreachable;
// actually call roc to populate the callresult
const callresult: RocList = roc__mainForHost_1_exposed(roc_list);
@ -114,9 +112,8 @@ pub export fn main() u8 {
const length = std.math.min(20, callresult.length);
var result = callresult.elements[0..length];
// end time
var ts2: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts2) catch unreachable;
const nanos = timer.read();
const seconds = (@intToFloat(f64, nanos) / 1_000_000_000.0);
for (result) |x, i| {
if (i == 0) {
@ -128,9 +125,8 @@ pub export fn main() u8 {
}
}
// TODO apparently the typestamps are still (partially) undefined?
// const delta = to_seconds(ts2) - to_seconds(ts1);
// stderr.print("runtime: {d:.3}ms\n", .{delta * 1000}) catch unreachable;
const stderr = std.io.getStdErr().writer();
stderr.print("runtime: {d:.3}ms\n", .{seconds * 1000}) catch unreachable;
return 0;
}

View file

@ -1,5 +1,6 @@
const std = @import("std");
const str = @import("str");
const builtin = @import("builtin");
const RocStr = str.RocStr;
const testing = std.testing;
const expectEqual = testing.expectEqual;
@ -15,7 +16,6 @@ comptime {
// -fcompiler-rt in link.rs instead of doing this. Note that this
// workaround is present in many host.zig files, so make sure to undo
// it everywhere!
const builtin = @import("builtin");
if (builtin.os.tag == .macos) {
_ = @import("compiler_rt");
}
@ -210,7 +210,8 @@ fn roc_fx_getInt_help() !i64 {
const stdin = std.io.getStdIn().reader();
var buf: [40]u8 = undefined;
const line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const raw_line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const line = std.mem.trimRight(u8, raw_line, &std.ascii.spaces);
return std.fmt.parseInt(i64, line, 10);
}

View file

@ -74,7 +74,7 @@ where
}
pub fn path_to_roc_binary() -> PathBuf {
path_to_binary("roc")
path_to_binary(if cfg!(windows) { "roc.exe" } else { "roc" })
}
pub fn path_to_binary(binary_name: &str) -> PathBuf {

View file

@ -387,9 +387,30 @@ pub(crate) fn surgery_pe(executable_path: &Path, metadata_path: &Path, roc_app_b
destination - section_virtual_address as i64 - *offset_in_section as i64
+ relocation.addend();
executable[offset + *offset_in_section as usize..][..4]
.copy_from_slice(&(delta as i32).to_le_bytes());
} else if name == "___chkstk_ms" {
// this is a stack probe that is inserted when a function uses more than 2
// pages of stack space. The source of this function is not linked in, so we
// have to get a bit creative: we just jump to a `ret` instruction, so this
// function call becomes a no-op.
// the last byte of the section should be a `ret` instruction
assert_eq!(slice.last(), Some(0xc3).as_ref());
let delta =
(slice.len() - 1) as i64 - *offset_in_section as i64 + relocation.addend();
executable[offset + *offset_in_section as usize..][..4]
.copy_from_slice(&(delta as i32).to_le_bytes());
} else {
if *address == 0 && !name.starts_with("roc") {
eprintln!(
"I don't know the address of the {} function! this may cause segfaults",
name
);
}
match relocation.kind() {
object::RelocationKind::Relative => {
// we implicitly only do 32-bit relocations
@ -1228,176 +1249,7 @@ fn write_section_header(
data[section_header_start..][..header_array.len()].copy_from_slice(&header_array);
}
struct BaseRelocations {
new_size: u32,
delta: u32,
}
fn write_image_base_relocation(
mmap: &mut [u8],
reloc_section_start: usize,
new_block_va: u32,
relocations: &[u16],
) -> BaseRelocations {
// first, collect the blocks of relocations (each relocation block covers a 4K page)
// we will be moving stuff around, and have to work from the back to the front. However, the
// relocations are encoded in such a way that we can only decode them from front to back.
let mut next_block_start = reloc_section_start as u32;
let mut blocks = vec![];
let mut block_has_relocation = false;
loop {
let header =
load_struct_inplace_mut::<ImageBaseRelocation>(mmap, next_block_start as usize);
if header.virtual_address.get(LE) == 0 {
break;
}
if new_block_va == header.virtual_address.get(LE) {
block_has_relocation = true;
}
blocks.push((next_block_start, *header));
next_block_start += header.size_of_block.get(LE);
}
let old_size = next_block_start - reloc_section_start as u32;
// this is 8 in practice
const HEADER_WIDTH: usize = std::mem::size_of::<ImageBaseRelocation>();
const ENTRY_WIDTH: usize = std::mem::size_of::<u16>();
// extra space that we'll use for the new relocations
let shift_amount = relocations.len() * ENTRY_WIDTH;
if block_has_relocation {
// now, starting from the back, shift sections that need to be shifted and add the
// new relocations to the right block
while let Some((block_start, header)) = blocks.pop() {
let header_va = header.virtual_address.get(LE);
let header_size = header.size_of_block.get(LE);
let block_start = block_start as usize;
match header_va.cmp(&new_block_va) {
std::cmp::Ordering::Greater => {
// shift this block
mmap.copy_within(
block_start..block_start + header_size as usize,
block_start + shift_amount,
);
}
std::cmp::Ordering::Equal => {
// extend this block
let header = load_struct_inplace_mut::<ImageBaseRelocation>(mmap, block_start);
let new_size = header.size_of_block.get(LE) + shift_amount as u32;
header.size_of_block.set(LE, new_size);
let number_of_entries = (new_size as usize - HEADER_WIDTH) / ENTRY_WIDTH;
let entries = load_structs_inplace_mut::<u16>(
mmap,
block_start + HEADER_WIDTH,
number_of_entries,
);
entries[number_of_entries - relocations.len()..].copy_from_slice(relocations);
// sort by VA. Upper 4 bits store the relocation type
entries.sort_unstable_by_key(|x| x & 0b0000_1111_1111_1111);
}
std::cmp::Ordering::Less => {
// done
break;
}
}
}
BaseRelocations {
new_size: old_size + shift_amount as u32,
delta: shift_amount as u32,
}
} else {
let header =
load_struct_inplace_mut::<ImageBaseRelocation>(mmap, next_block_start as usize);
let size_of_block = HEADER_WIDTH + relocations.len() * ENTRY_WIDTH;
header.virtual_address.set(LE, new_block_va);
header.size_of_block.set(LE, size_of_block as u32);
let number_of_entries = relocations.len();
let entries = load_structs_inplace_mut::<u16>(
mmap,
next_block_start as usize + HEADER_WIDTH,
number_of_entries,
);
entries[number_of_entries - relocations.len()..].copy_from_slice(relocations);
// sort by VA. Upper 4 bits store the relocation type
entries.sort_unstable_by_key(|x| x & 0b0000_1111_1111_1111);
BaseRelocations {
new_size: old_size + size_of_block as u32,
delta: size_of_block as u32,
}
}
}
/// the roc app functions are called from the host with an indirect call: the code will look
/// in a table to find the actual address of the app function. This table must be relocated,
/// because it contains absolute addresses to jump to.
fn relocate_dummy_dll_entries(executable: &mut [u8], md: &PeMetadata) {
let thunks_start_va =
md.dummy_dll_thunk_section_virtual_address + md.thunks_start_offset_in_section as u32;
// relocations are defined per 4kb page
const BLOCK_SIZE: u32 = 4096;
let thunks_offset_in_block = thunks_start_va % BLOCK_SIZE;
let thunks_relocation_block_va = thunks_start_va - thunks_offset_in_block;
let relocations: Vec<_> = (0..md.dynamic_relocations.name_by_virtual_address.len())
.map(|i| (thunks_offset_in_block as usize + 2 * i) as u16)
.collect();
let base_relocations = write_image_base_relocation(
executable,
md.reloc_offset_in_file,
thunks_relocation_block_va,
&relocations,
);
// the reloc section got bigger, and we need to update the header with the new size
let reloc_section_header_start = md.dynamic_relocations.section_headers_offset_in_file as usize
+ md.reloc_section_index * std::mem::size_of::<ImageSectionHeader>();
let next_section = load_struct_inplace::<ImageSectionHeader>(
executable,
reloc_section_header_start + std::mem::size_of::<ImageSectionHeader>(),
);
let next_section_pointer_to_raw_data = next_section.pointer_to_raw_data.get(LE);
let reloc_section =
load_struct_inplace_mut::<ImageSectionHeader>(executable, reloc_section_header_start);
let old_section_size = reloc_section.virtual_size.get(LE);
let new_virtual_size = old_section_size + base_relocations.delta;
let new_virtual_size = next_multiple_of(new_virtual_size as usize, 8) as u32;
reloc_section.virtual_size.set(LE, new_virtual_size);
assert!(
reloc_section.pointer_to_raw_data.get(LE)
+ reloc_section.virtual_size.get(LE)
+ base_relocations.delta
< next_section_pointer_to_raw_data,
"new .reloc section is too big, and runs into the next section!",
);
// in the data directories, update the length of the base relocations
let dir = load_struct_inplace_mut::<pe::ImageDataDirectory>(
executable,
@ -1406,9 +1258,11 @@ fn relocate_dummy_dll_entries(executable: &mut [u8], md: &PeMetadata) {
* std::mem::size_of::<pe::ImageDataDirectory>(),
);
// it is crucial that the directory size is rounded up to a multiple of 8!
// the value is already rounded, so to be correct we can't rely on `dir.size.get(LE)`
let new_reloc_directory_size = next_multiple_of(base_relocations.new_size as usize, 8);
// for unclear reasons, we must bump the image directory size here.
// we also need some zeroed-out memory at the end, so if the directory
// ends at a multiple of `file_alignment`, pick the next one.
let new_reloc_directory_size =
next_multiple_of(dir.size.get(LE) as usize + 4, md.file_alignment as usize);
dir.size.set(LE, new_reloc_directory_size as u32);
}

View file

@ -1,5 +1,6 @@
const std = @import("std");
const str = @import("str");
const builtin = @import("builtin");
const RocStr = str.RocStr;
const testing = std.testing;
const expectEqual = testing.expectEqual;
@ -15,7 +16,6 @@ comptime {
// -fcompiler-rt in link.rs instead of doing this. Note that this
// workaround is present in many host.zig files, so make sure to undo
// it everywhere!
const builtin = @import("builtin");
if (builtin.os.tag == .macos) {
_ = @import("compiler_rt");
}
@ -198,7 +198,8 @@ fn roc_fx_getInt_help() !i64 {
const stdin = std.io.getStdIn().reader();
var buf: [40]u8 = undefined;
const line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const raw_line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const line = std.mem.trimRight(u8, raw_line, &std.ascii.spaces);
return std.fmt.parseInt(i64, line, 10);
}

View file

@ -1,5 +1,6 @@
const std = @import("std");
const str = @import("str");
const builtin = @import("builtin");
const RocStr = str.RocStr;
const testing = std.testing;
const expectEqual = testing.expectEqual;
@ -15,7 +16,6 @@ comptime {
// -fcompiler-rt in link.rs instead of doing this. Note that this
// workaround is present in many host.zig files, so make sure to undo
// it everywhere!
const builtin = @import("builtin");
if (builtin.os.tag == .macos) {
_ = @import("compiler_rt");
}
@ -147,20 +147,17 @@ export fn roc_memset(dst: [*]u8, value: i32, size: usize) callconv(.C) void {
const Unit = extern struct {};
pub export fn main() callconv(.C) u8 {
var ts1: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts1) catch unreachable;
var timer = std.time.Timer.start() catch unreachable;
const program = roc__mainForHost_1_exposed();
call_the_closure(program);
var ts2: std.os.timespec = undefined;
std.os.clock_gettime(std.os.CLOCK.REALTIME, &ts2) catch unreachable;
const delta = to_seconds(ts2) - to_seconds(ts1);
const nanos = timer.read();
const seconds = (@intToFloat(f64, nanos) / 1_000_000_000.0);
const stderr = std.io.getStdErr().writer();
stderr.print("runtime: {d:.3}ms\n", .{delta * 1000}) catch unreachable;
stderr.print("runtime: {d:.3}ms\n", .{seconds * 1000}) catch unreachable;
return 0;
}
@ -267,7 +264,8 @@ fn roc_fx_getInt_help() !i64 {
const stdin = std.io.getStdIn().reader();
var buf: [40]u8 = undefined;
const line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const raw_line: []u8 = (try stdin.readUntilDelimiterOrEof(&buf, '\n')) orelse "";
const line = std.mem.trimRight(u8, raw_line, &std.ascii.spaces);
return std.fmt.parseInt(i64, line, 10);
}