From 97997acffa68db2a780e9ec4aa6c3efcbfb0bb15 Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Thu, 19 Aug 2021 22:54:14 -0700 Subject: [PATCH] Extra application function and plt data from platform --- Cargo.lock | 1 + linker/Cargo.toml | 1 + linker/src/lib.rs | 117 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 105 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4b6eb587d..ffbca5c639 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3323,6 +3323,7 @@ dependencies = [ "clap 3.0.0-beta.1", "memmap2 0.3.1", "object 0.26.0", + "roc_collections", ] [[package]] diff --git a/linker/Cargo.toml b/linker/Cargo.toml index 4608e87f67..621e712b74 100644 --- a/linker/Cargo.toml +++ b/linker/Cargo.toml @@ -18,6 +18,7 @@ test = false bench = false [dependencies] +roc_collections = { path = "../compiler/collections" } bumpalo = { version = "3.6.1", features = ["collections"] } # TODO switch to clap 3.0.0 once it's out. Tried adding clap = "~3.0.0-beta.1" and cargo wouldn't accept it clap = { git = "https://github.com/rtfeldman/clap", branch = "master" } diff --git a/linker/src/lib.rs b/linker/src/lib.rs index 2321738773..59eb21b612 100644 --- a/linker/src/lib.rs +++ b/linker/src/lib.rs @@ -1,11 +1,17 @@ use clap::{App, AppSettings, Arg, ArgMatches}; use memmap2::Mmap; -use object::Object; +use object::{ + Architecture, BinaryFormat, Object, ObjectSection, ObjectSymbol, Relocation, RelocationKind, + RelocationTarget, Symbol, +}; +use roc_collections::all::MutMap; use std::fs; use std::io; pub const CMD_PREPROCESS: &str = "preprocess"; pub const CMD_SURGERY: &str = "surgery"; +pub const FLAG_VERBOSE: &str = "verbose"; + pub const EXEC: &str = "EXEC"; pub const SHARED_LIB: &str = "SHARED_LIB"; @@ -25,6 +31,13 @@ pub fn build_app<'a>() -> App<'a> { Arg::with_name(SHARED_LIB) .help("The dummy shared library representing the Roc application") .required(true), + ) + .arg( + Arg::with_name(FLAG_VERBOSE) + .long(FLAG_VERBOSE) + .short('v') + .help("enable verbose printing") + .required(false), ), ) .subcommand( @@ -33,22 +46,98 @@ pub fn build_app<'a>() -> App<'a> { } pub fn preprocess(matches: &ArgMatches) -> io::Result { - let _app_functions = application_functions(&matches.value_of(SHARED_LIB).unwrap())?; + let verbose = matches.is_present(FLAG_VERBOSE); + + let app_functions = application_functions(&matches.value_of(SHARED_LIB).unwrap())?; + if verbose { + println!("Found app functions: {:?}", app_functions); + } let exec_file = fs::File::open(&matches.value_of(EXEC).unwrap())?; let exec_mmap = unsafe { Mmap::map(&exec_file)? }; - let exec_obj = object::File::parse(&*exec_mmap).map_err(|err| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Failed to parse executable file: {}", err), - ) - })?; + let exec_obj = match object::File::parse(&*exec_mmap) { + Ok(obj) => obj, + Err(err) => { + println!("Failed to parse executable file: {}", err); + return Ok(-1); + } + }; - // TODO: Extract PLT related information for these functions. - // The information need is really the address of each plt version of each application function. - // To find this, first get the dynmaic symbols for the app functions. - // Then reference them on the dynamic relocation table to figure out their plt function number. - // Then with the plt base address and that function number(or scanning the code), it should be possible to find the address. + // TODO: Deal with other file formats and architectures. + let format = exec_obj.format(); + if format != BinaryFormat::Elf { + println!("File Format, {:?}, not supported", format); + return Ok(-1); + } + let arch = exec_obj.architecture(); + if arch != Architecture::X86_64 { + println!("Architecture, {:?}, not supported", arch); + return Ok(-1); + } + + // Extract PLT related information for app functions. + let plt_address = match exec_obj.sections().find(|sec| sec.name() == Ok(".plt")) { + Some(section) => section.address(), + None => { + println!("Failed to find PLT section. Probably an malformed executable."); + return Ok(-1); + } + }; + if verbose { + println!("PLT Address: {:x}", plt_address); + } + + let plt_relocs: Vec = (match exec_obj.dynamic_relocations() { + Some(relocs) => relocs, + None => { + println!("Executable never calls any application functions."); + println!("No work to do. Probably an invalid input."); + return Ok(-1); + } + }) + .map(|(_, reloc)| reloc) + .filter(|reloc| reloc.kind() == RelocationKind::Elf(7)) + .collect(); + if verbose { + println!(); + println!("PLT relocations"); + for reloc in plt_relocs.iter() { + println!("{:x?}", reloc); + } + } + + let app_syms: Vec = exec_obj + .dynamic_symbols() + .filter(|sym| { + let name = sym.name(); + name.is_ok() && app_functions.contains(&name.unwrap().to_string()) + }) + .collect(); + if verbose { + println!(); + println!("PLT Symbols for App Functions"); + for symbol in app_syms.iter() { + println!("{}: {:x?}", symbol.index().0, symbol); + } + } + + const PLT_ADDRESS_OFFSET: u64 = 0x10; + + let mut app_func_addresses: MutMap = MutMap::default(); + for (i, reloc) in plt_relocs.into_iter().enumerate() { + for symbol in app_syms.iter() { + if reloc.target() == RelocationTarget::Symbol(symbol.index()) { + let func_address = (i as u64 + 1) * PLT_ADDRESS_OFFSET + plt_address; + app_func_addresses.insert(func_address, symbol.name().unwrap()); + break; + } + } + } + + if verbose { + println!(); + println!("App Function Address Map: {:x?}", app_func_addresses); + } // TODO: For all text sections check for function calls to app functions. // This should just be disassembly and then scanning for jmp and call style ops that jump to the plt offsets we care about. @@ -57,7 +146,7 @@ pub fn preprocess(matches: &ArgMatches) -> io::Result { // TODO: Store all this data in a nice format. - // TODO: Potentially create a version of the executable with certain dynamic and PLT information deleted. + // TODO: Potentially create a version of the executable with certain dynamic and PLT information deleted (changing offset may break stuff so be careful). // Remove shared library dependencies. // Delete extra plt entries, dynamic symbols, and dynamic relocations (might require updating other plt entries, may not worth it). // Add regular symbols pointing to 0 for the app functions (maybe not needed if it is just link metadata).