diff --git a/crates/linker/dynhost_benchmarks_windows.exe b/crates/linker/dynhost_benchmarks_windows.exe new file mode 100755 index 0000000000..5c3a208653 Binary files /dev/null and b/crates/linker/dynhost_benchmarks_windows.exe differ diff --git a/crates/linker/src/lib.rs b/crates/linker/src/lib.rs index dad95db9c8..8921c3d102 100644 --- a/crates/linker/src/lib.rs +++ b/crates/linker/src/lib.rs @@ -24,6 +24,7 @@ use target_lexicon::Triple; mod generate_dylib; mod metadata; +mod pe; use metadata::VirtualOffset; const MIN_SECTION_ALIGNMENT: usize = 0x40; diff --git a/crates/linker/src/pe.rs b/crates/linker/src/pe.rs new file mode 100644 index 0000000000..5820e91f41 --- /dev/null +++ b/crates/linker/src/pe.rs @@ -0,0 +1,357 @@ +use std::{path::Path, time::Instant}; + +use memmap2::Mmap; +use object::{ + pe::{ImageImportDescriptor, ImageNtHeaders64, ImageThunkData64}, + read::pe::ImportTable, +}; +use roc_collections::MutMap; +use roc_error_macros::internal_error; + +#[allow(dead_code)] +pub(crate) fn preprocess_windows( + exec_filename: &str, + _metadata_filename: &str, + _out_filename: &str, + _shared_lib: &Path, + _verbose: bool, + _time: bool, +) -> object::read::Result<()> { + let total_start = Instant::now(); + let exec_parsing_start = total_start; + let exec_file = std::fs::File::open(exec_filename).unwrap_or_else(|e| internal_error!("{}", e)); + let exec_mmap = unsafe { Mmap::map(&exec_file).unwrap_or_else(|e| internal_error!("{}", e)) }; + let exec_data = &*exec_mmap; + let _exec_obj = match object::read::pe::PeFile64::parse(exec_data) { + Ok(obj) => obj, + Err(err) => { + internal_error!("Failed to parse executable file: {}", err); + } + }; + + let _exec_parsing_duration = exec_parsing_start.elapsed(); + + let dynamic_relocations = DynamicRelocationsPe::new(exec_data); + + println!("{:#x?}", dynamic_relocations); + + Ok(()) +} + +#[derive(Debug)] +struct DynamicRelocationsPe { + name_by_virtual_address: MutMap, + + /// Map symbol name to the virtual address and file offset + /// where the actual address of this symbol is stored + address_and_offset: MutMap, + + /// Virtual offset of the first thunk + section_virtual_address: u32, + + /// Offset in the file of the first thunk + section_offset_in_file: u32, + + /// Offset in the file of the imports directory + #[allow(dead_code)] + imports_offset_in_file: u32, + + /// The dummy .dll is the `dummy_import_index`th import of the host .exe + #[allow(dead_code)] + dummy_import_index: u32, +} + +impl DynamicRelocationsPe { + fn new(data: &[u8]) -> Self { + Self::new_help(data).unwrap() + } + + fn find_roc_dummy_dll( + import_table: &ImportTable, + ) -> object::read::Result> { + use object::LittleEndian as LE; + + let mut index = 0; + let mut it = import_table.descriptors()?; + while let Some(descriptor) = it.next()? { + let name = import_table.name(descriptor.name.get(LE))?; + // dbg!(String::from_utf8_lossy(name)); + + if name == b"roc-cheaty-lib.dll" { + return Ok(Some((*descriptor, index))); + } + + index += 1; + } + + Ok(None) + } + + /// Appends the functions (e.g. mainForHost) that the host needs from the app + fn append_roc_imports( + &mut self, + import_table: &ImportTable, + descriptor: &ImageImportDescriptor, + ) -> object::read::Result<()> { + use object::LittleEndian as LE; + + // offset of first thunk from the start of the section + let thunk_start_offset = + descriptor.original_first_thunk.get(LE) - self.section_virtual_address; + let mut thunk_offset = 0; + + let mut thunks = import_table.thunks(descriptor.original_first_thunk.get(LE))?; + while let Some(thunk_data) = thunks.next::()? { + use object::read::pe::ImageThunkData; + + let temporary_address = thunk_data.address(); + let (_, name) = import_table.hint_name(temporary_address as _)?; + let name = String::from_utf8_lossy(name).to_string(); + + let offset_in_file = self.section_offset_in_file + thunk_start_offset + thunk_offset; + let virtual_address = descriptor.original_first_thunk.get(LE) + thunk_offset; + + self.name_by_virtual_address + .insert(virtual_address, name.clone()); + self.address_and_offset + .insert(name, (virtual_address, offset_in_file)); + + thunk_offset += std::mem::size_of::() as u32; + } + + Ok(()) + } + + fn new_help(data: &[u8]) -> object::read::Result { + use object::pe; + use object::read::pe::ImageNtHeaders; + use object::LittleEndian as LE; + + let dos_header = pe::ImageDosHeader::parse(data)?; + let mut offset = dos_header.nt_headers_offset().into(); + let (nt_headers, data_directories) = ImageNtHeaders64::parse(data, &mut offset)?; + let sections = nt_headers.sections(data, offset)?; + + let data_dir = match data_directories.get(pe::IMAGE_DIRECTORY_ENTRY_IMPORT) { + Some(data_dir) => data_dir, + None => internal_error!("No dynamic imports, probably a bug"), + }; + + let import_va = data_dir.virtual_address.get(LE); + + let (section_data, section_va, offset_in_file) = sections + .iter() + .find_map(|section| { + section + .pe_data_containing(data, import_va) + .map(|(section_data, section_va)| { + ( + section_data, + section_va, + section.pointer_to_raw_data.get(LE), + ) + }) + }) + .expect("Invalid import data dir virtual address"); + + let import_table = ImportTable::new(section_data, section_va, import_va); + + let imports_offset_in_section = import_va.wrapping_sub(section_va); + let imports_offset_in_file = offset_in_file + imports_offset_in_section; + + let (descriptor, dummy_import_index) = Self::find_roc_dummy_dll(&import_table)?.unwrap(); + + let mut this = Self { + name_by_virtual_address: Default::default(), + address_and_offset: Default::default(), + section_virtual_address: section_va, + section_offset_in_file: offset_in_file, + imports_offset_in_file, + dummy_import_index, + }; + + this.append_roc_imports(&import_table, &descriptor)?; + + Ok(this) + } +} + +#[allow(dead_code)] +fn remove_dummy_dll_import_table( + data: &mut [u8], + imports_offset_in_file: u32, + dummy_import_index: u32, +) { + const W: usize = std::mem::size_of::(); + + let start = imports_offset_in_file as usize + W * dummy_import_index as usize; + for b in data[start..][..W].iter_mut() { + *b = 0; + } +} + +#[cfg(test)] +mod test { + const PE_DYNHOST: &[u8] = include_bytes!("../dynhost_benchmarks_windows.exe") as &[_]; + + use object::read::pe::PeFile64; + use object::Object; + + use super::*; + + #[test] + fn dynamic_relocations() { + use object::LittleEndian as LE; + + // find the location in the virtual address space and the on-disk file where dynamic + // relocations are stored. For the example dynhost, objdump shows + + // Sections: + // Idx Name Size VMA LMA File off Algn + // 0 .text 0007bb26 0000000140001000 0000000140001000 00000400 2**4 + // CONTENTS, ALLOC, LOAD, READONLY, CODE + // 1 .rdata 000369d4 000000014007d000 000000014007d000 0007c000 2**4 + // + + // we parse that into this structure. Note how the VMA and File off correspond to the + // values that we find. Also note how the delta between virtual address and offset is the + // same for values of `address_and_offset` and the section values that we find + // + // DynamicRelocationsPe { + // name_by_virtual_address: { + // 0xaf4e0: "roc__mainForHost_size", + // 0xaf4c8: "roc__mainForHost_1__Fx_caller", + // 0xaf4d0: "roc__mainForHost_1__Fx_result_size", + // 0xaf4d8: "roc__mainForHost_1_exposed_generic", + // }, + // address_and_offset: { + // "roc__mainForHost_1__Fx_result_size": ( + // 0xaf4d0, + // 0xae4d0, + // ), + // "roc__mainForHost_1__Fx_caller": ( + // 0xaf4c8, + // 0xae4c8, + // ), + // "roc__mainForHost_1_exposed_generic": ( + // 0xaf4d8, + // 0xae4d8, + // ), + // "roc__mainForHost_size": ( + // 0xaf4e0, + // 0xae4e0, + // ), + // }, + // section_virtual_address: 0x7d000, + // section_offset_in_file: 0x7c000, + // } + + let dynamic_relocations = DynamicRelocationsPe::new(PE_DYNHOST); + // println!("{:#x?}", dynamic_relocations); + + let file = PeFile64::parse(PE_DYNHOST).unwrap(); + let import_table = file.import_table().unwrap().unwrap(); + + // let mut name_by_virtual_address: Vec<_> = dynamic_relocations + // .name_by_virtual_address + // .into_iter() + // .collect(); + // + // + // name_by_virtual_address.sort_unstable(); + + let mut address_and_offset: Vec<_> = + dynamic_relocations.address_and_offset.into_iter().collect(); + + address_and_offset.sort_unstable(); + + // get the relocations through the API + let addresses_api = { + let (descriptor, _) = DynamicRelocationsPe::find_roc_dummy_dll(&import_table) + .unwrap() + .unwrap(); + + let mut addresses = Vec::new(); + + let mut thunks = import_table + .thunks(descriptor.original_first_thunk.get(LE)) + .unwrap(); + while let Some(thunk_data) = thunks.next::().unwrap() { + use object::read::pe::ImageThunkData; + + let temporary_address = thunk_data.address(); + + let (_, name) = import_table.hint_name(temporary_address as _).unwrap(); + let name = String::from_utf8_lossy(name).to_string(); + + addresses.push((name, temporary_address)); + } + + addresses.sort_unstable(); + + addresses + }; + + // get the relocations through using our offsets into the file + let addresses_file: Vec<_> = address_and_offset + .iter() + .map(|(name, (_, offset))| { + ( + name.to_string(), + u32::from_le_bytes(PE_DYNHOST[*offset as usize..][..4].try_into().unwrap()), + ) + }) + .collect(); + + // we want our file offset approach to equal the API + assert_eq!(addresses_api, addresses_file); + } + + #[test] + fn collect_undefined_symbols_pe() { + let object = object::File::parse(PE_DYNHOST).unwrap(); + + let imports: Vec<_> = object + .imports() + .unwrap() + .iter() + .filter(|import| import.library() == b"roc-cheaty-lib.dll") + .map(|import| String::from_utf8_lossy(import.name())) + .collect(); + + assert_eq!( + [ + "roc__mainForHost_1__Fx_caller", + "roc__mainForHost_1__Fx_result_size", + "roc__mainForHost_1_exposed_generic", + "roc__mainForHost_size" + ], + imports.as_slice(), + ) + } + + #[test] + fn remove_dummy_dll_import() { + let dynamic_relocations = DynamicRelocationsPe::new(PE_DYNHOST); + let mut data = PE_DYNHOST.to_vec(); + + remove_dummy_dll_import_table( + &mut data, + dynamic_relocations.imports_offset_in_file, + dynamic_relocations.dummy_import_index, + ); + + // parse and check that it's really gone + let object = object::File::parse(data.as_slice()).unwrap(); + + assert_eq!( + object + .imports() + .unwrap() + .iter() + .filter(|import| import.library() == b"roc-cheaty-lib.dll") + .count(), + 0 + ); + } +}