Add base of loading cloning exec for surgery

This commit is contained in:
Brendan Hansknecht 2021-08-21 14:55:39 -07:00
parent b84d958a9a
commit b0c3aa3d1c
2 changed files with 341 additions and 34 deletions

View file

@ -1,17 +1,20 @@
use bincode::{deserialize_from, serialize_into}; use bincode::{deserialize_from, serialize_into};
use clap::{App, AppSettings, Arg, ArgMatches}; use clap::{App, AppSettings, Arg, ArgMatches};
use iced_x86::{Decoder, DecoderOptions, Instruction, OpCodeOperandKind, OpKind}; use iced_x86::{Decoder, DecoderOptions, Instruction, OpCodeOperandKind, OpKind};
use memmap2::Mmap; use memmap2::{Mmap, MmapMut};
use object::{elf, endian};
use object::{ use object::{
Architecture, BinaryFormat, CompressedFileRange, CompressionFormat, Object, ObjectSection, Architecture, BinaryFormat, CompressedFileRange, CompressionFormat, LittleEndian, NativeEndian,
ObjectSymbol, Relocation, RelocationKind, RelocationTarget, Section, Symbol, Object, ObjectSection, ObjectSymbol, Relocation, RelocationKind, RelocationTarget, Section,
Symbol,
}; };
use roc_collections::all::MutMap; use roc_collections::all::MutMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::ffi::CStr; use std::ffi::CStr;
use std::fs; use std::fs;
use std::io; use std::io;
use std::io::BufWriter; use std::io::{BufReader, BufWriter};
use std::mem;
use std::os::raw::c_char; use std::os::raw::c_char;
use std::path::Path; use std::path::Path;
use std::time::{Duration, SystemTime}; use std::time::{Duration, SystemTime};
@ -25,7 +28,8 @@ pub const FLAG_VERBOSE: &str = "verbose";
pub const EXEC: &str = "EXEC"; pub const EXEC: &str = "EXEC";
pub const METADATA: &str = "METADATA"; pub const METADATA: &str = "METADATA";
pub const SHARED_LIB: &str = "SHARED_LIB"; pub const SHARED_LIB: &str = "SHARED_LIB";
pub const OBJ: &str = "OBJ"; pub const APP: &str = "APP";
pub const OUT: &str = "OUT";
fn report_timing(label: &str, duration: Duration) { fn report_timing(label: &str, duration: Duration) {
&println!("\t{:9.3} ms {}", duration.as_secs_f64() * 1000.0, label,); &println!("\t{:9.3} ms {}", duration.as_secs_f64() * 1000.0, label,);
@ -57,7 +61,7 @@ pub fn build_app<'a>() -> App<'a> {
Arg::with_name(FLAG_VERBOSE) Arg::with_name(FLAG_VERBOSE)
.long(FLAG_VERBOSE) .long(FLAG_VERBOSE)
.short('v') .short('v')
.help("enable verbose printing") .help("Enable verbose printing")
.required(false), .required(false),
), ),
) )
@ -75,9 +79,17 @@ pub fn build_app<'a>() -> App<'a> {
.required(true), .required(true),
) )
.arg( .arg(
Arg::with_name(OBJ) Arg::with_name(APP)
.help("the object file waiting to be linked") .help("The Roc application object file waiting to be linked")
.required(true), .required(true),
)
.arg(Arg::with_name(OUT).help("The output file").required(true))
.arg(
Arg::with_name(FLAG_VERBOSE)
.long(FLAG_VERBOSE)
.short('v')
.help("Enable verbose printing")
.required(false),
), ),
) )
} }
@ -459,30 +471,6 @@ pub fn preprocess(matches: &ArgMatches) -> io::Result<i32> {
} }
let scanning_dynamic_deps_duration = scanning_dynamic_deps_start.elapsed().unwrap(); let scanning_dynamic_deps_duration = scanning_dynamic_deps_start.elapsed().unwrap();
let elf64 = file_data[4] == 2;
let litte_endian = file_data[5] == 1;
if !elf64 || !litte_endian {
println!("Only 64bit little endian elf currently supported for preprocessing");
return Ok(-1);
}
let ph_offset = u64::from_le_bytes(<[u8; 8]>::try_from(&file_data[32..40]).unwrap());
let sh_offset = &u64::from_le_bytes(<[u8; 8]>::try_from(&file_data[40..48]).unwrap());
let ph_ent_size = &u16::from_le_bytes(<[u8; 2]>::try_from(&file_data[54..56]).unwrap());
let ph_num = &u16::from_le_bytes(<[u8; 2]>::try_from(&file_data[56..58]).unwrap());
let sh_ent_size = &u16::from_le_bytes(<[u8; 2]>::try_from(&file_data[58..60]).unwrap());
let sh_num = &u16::from_le_bytes(<[u8; 2]>::try_from(&file_data[60..62]).unwrap());
if verbose {
println!();
println!("Is Elf64: {}", elf64);
println!("Is Little Endian: {}", litte_endian);
println!("PH Offset: {:x}", ph_offset);
println!("PH Entry Size: {}", ph_ent_size);
println!("PH Entry Count: {}", ph_num);
println!("SH Offset: {:x}", sh_offset);
println!("SH Entry Size: {}", sh_ent_size);
println!("SH Entry Count: {}", sh_num);
}
if verbose { if verbose {
println!(); println!();
println!("{:?}", md); println!("{:?}", md);
@ -535,6 +523,322 @@ pub fn preprocess(matches: &ArgMatches) -> io::Result<i32> {
Ok(0) Ok(0)
} }
pub fn surgery(matches: &ArgMatches) -> io::Result<i32> {
let verbose = matches.is_present(FLAG_VERBOSE);
let total_start = SystemTime::now();
let loading_metadata_start = SystemTime::now();
let input = fs::File::open(&matches.value_of(METADATA).unwrap())?;
let input = BufReader::new(input);
let md: metadata::Metadata = match deserialize_from(input) {
Ok(data) => data,
Err(err) => {
println!("Failed to deserialize metadata: {}", err);
return Ok(-1);
}
};
let loading_metadata_duration = loading_metadata_start.elapsed().unwrap();
let exec_parsing_start = SystemTime::now();
let exec_file = fs::File::open(&matches.value_of(EXEC).unwrap())?;
let exec_mmap = unsafe { Mmap::map(&exec_file)? };
let exec_data = &*exec_mmap;
let elf64 = exec_data[4] == 2;
let litte_endian = exec_data[5] == 1;
if !elf64 || !litte_endian {
println!("Only 64bit little endian elf currently supported for surgery");
return Ok(-1);
}
let exec_header = load_struct_inplace::<elf::FileHeader64<LittleEndian>>(exec_data, 0);
let ph_offset = exec_header.e_phoff.get(NativeEndian);
let ph_ent_size = exec_header.e_phentsize.get(NativeEndian);
let ph_num = exec_header.e_phnum.get(NativeEndian);
let sh_offset = exec_header.e_shoff.get(NativeEndian);
let sh_ent_size = exec_header.e_shentsize.get(NativeEndian);
let sh_num = exec_header.e_shnum.get(NativeEndian);
if verbose {
println!();
println!("Is Elf64: {}", elf64);
println!("Is Little Endian: {}", litte_endian);
println!("PH Offset: {:x}", ph_offset);
println!("PH Entry Size: {}", ph_ent_size);
println!("PH Entry Count: {}", ph_num);
println!("SH Offset: {:x}", sh_offset);
println!("SH Entry Size: {}", sh_ent_size);
println!("SH Entry Count: {}", sh_num);
}
let exec_parsing_duration = exec_parsing_start.elapsed().unwrap();
let app_parsing_start = SystemTime::now();
let app_file = fs::File::open(&matches.value_of(APP).unwrap())?;
let app_mmap = unsafe { Mmap::map(&app_file)? };
let app_data = &*exec_mmap;
let app_obj = match object::File::parse(app_data) {
Ok(obj) => obj,
Err(err) => {
println!("Failed to parse application file: {}", err);
return Ok(-1);
}
};
let app_parsing_duration = app_parsing_start.elapsed().unwrap();
let out_gen_start = SystemTime::now();
let max_out_len = exec_data.len() + app_data.len();
let out_file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(&matches.value_of(OUT).unwrap())?;
out_file.set_len(max_out_len as u64)?;
let mut out_mmap = unsafe { MmapMut::map_mut(&out_file)? };
// Write a modified elf header with an extra program header entry.
let added_data = ph_ent_size as u64;
let ph_end = ph_offset as usize + ph_num as usize * ph_ent_size as usize;
out_mmap[..ph_end].copy_from_slice(&exec_data[..ph_end]);
let file_header = load_struct_inplace_mut::<elf::FileHeader64<LittleEndian>>(&mut out_mmap, 0);
// file_header.e_phnum = endian::U16::new(LittleEndian, ph_num + 1);
file_header.e_shoff = endian::U64::new(LittleEndian, sh_offset + added_data);
// file_header.e_shnum = endian::U16::new(LittleEndian, 0);
// file_header.e_shstrndx = endian::U16::new(LittleEndian, elf::SHN_UNDEF);
let program_headers = load_structs_inplace_mut::<elf::ProgramHeader64<LittleEndian>>(
&mut out_mmap,
ph_offset as usize,
ph_num as usize + 1,
);
// Steal the extra bytes we need from the first loaded sections.
// Generally this section has empty space due to alignment.
// TODO: I am not sure if these can be out of order. If they can be, we need to change this.
let mut first_load_start = None;
let mut first_load_end = None;
for mut ph in program_headers {
let p_type = ph.p_type.get(NativeEndian);
let p_vaddr = ph.p_vaddr.get(NativeEndian);
if first_load_end.is_none() && p_type == elf::PT_LOAD && ph.p_offset.get(NativeEndian) == 0
{
let p_filesz = ph.p_filesz.get(NativeEndian);
let p_align = ph.p_align.get(NativeEndian);
let p_memsz = ph.p_memsz.get(NativeEndian);
if p_filesz / p_align != (p_filesz + added_data) / p_align {
println!("Not enough extra space in the executable for alignment");
println!("This makes linking a lot harder and is not supported yet");
return Ok(-1);
}
ph.p_filesz = endian::U64::new(LittleEndian, p_filesz + added_data);
ph.p_memsz = endian::U64::new(LittleEndian, p_memsz + added_data);
first_load_start = Some(p_vaddr + ph_end as u64);
first_load_end = Some(p_vaddr + p_memsz);
} else if p_type == elf::PT_PHDR {
ph.p_filesz =
endian::U64::new(LittleEndian, ph.p_filesz.get(NativeEndian) + added_data);
ph.p_memsz = endian::U64::new(LittleEndian, ph.p_memsz.get(NativeEndian) + added_data);
} else if first_load_end.is_none() {
ph.p_offset =
endian::U64::new(LittleEndian, ph.p_offset.get(NativeEndian) + added_data);
ph.p_vaddr = endian::U64::new(LittleEndian, p_vaddr + added_data);
ph.p_paddr = endian::U64::new(LittleEndian, ph.p_paddr.get(NativeEndian) + added_data);
} else if first_load_start.unwrap() <= p_vaddr && p_vaddr <= first_load_end.unwrap() {
ph.p_vaddr = endian::U64::new(LittleEndian, p_vaddr + added_data);
} else if p_type != elf::PT_GNU_STACK && p_type != elf::PT_NULL {
ph.p_offset =
endian::U64::new(LittleEndian, ph.p_offset.get(NativeEndian) + added_data);
}
}
if first_load_start.is_none() || first_load_end.is_none() {
println!("Executable does not load any data");
println!("Probably input the wrong file as the executable");
return Ok(-1);
}
if verbose {
println!(
"First Byte loaded after Program Headers: {:x}",
first_load_start.unwrap()
);
println!(
"Last Byte loaded in first load: {:x}",
first_load_end.unwrap()
);
}
// Copy to program header, but add an extra item for the new data at the end of the file.
out_mmap[ph_end + added_data as usize..sh_offset as usize + added_data as usize]
.copy_from_slice(&exec_data[ph_end..sh_offset as usize]);
// Update dynamic table entry for shift of extra ProgramHeader.
let dyn_offset = match md.dynamic_section_offset {
Some(offset) => offset as usize,
None => {
println!("Metadata missing dynamic section offset");
return Ok(-1);
}
};
let dyn_lib_count = match md.dynamic_lib_count {
Some(count) => count as usize,
None => {
println!("Metadata missing dynamic library count");
return Ok(-1);
}
};
let shared_index = match md.shared_lib_index {
Some(index) => index as usize,
None => {
println!("Metadata missing shared library index");
return Ok(-1);
}
};
let dyns = load_structs_inplace_mut::<elf::Dyn64<LittleEndian>>(
&mut out_mmap,
dyn_offset + added_data as usize,
dyn_lib_count,
);
for mut d in dyns {
match d.d_tag.get(NativeEndian) as u32 {
// I believe this is the list of symbols that need to be update if addresses change.
// I am less sure about the symbols from GNU_HASH down.
elf::DT_INIT
| elf::DT_FINI
| elf::DT_PLTGOT
| elf::DT_HASH
| elf::DT_STRTAB
| elf::DT_SYMTAB
| elf::DT_RELA
| elf::DT_REL
| elf::DT_DEBUG
| elf::DT_JMPREL
| elf::DT_INIT_ARRAY
| elf::DT_FINI_ARRAY
| elf::DT_PREINIT_ARRAY
| elf::DT_SYMTAB_SHNDX
| elf::DT_GNU_HASH
| elf::DT_TLSDESC_PLT
| elf::DT_TLSDESC_GOT
| elf::DT_GNU_CONFLICT
| elf::DT_GNU_LIBLIST
| elf::DT_CONFIG
| elf::DT_DEPAUDIT
| elf::DT_AUDIT
| elf::DT_PLTPAD
| elf::DT_MOVETAB
| elf::DT_SYMINFO
| elf::DT_VERSYM
| elf::DT_VERDEF
| elf::DT_VERNEED => {
let d_addr = d.d_val.get(NativeEndian);
if first_load_start.unwrap() <= d_addr && d_addr <= first_load_end.unwrap() {
println!("Updating {:x?}", d);
d.d_val = endian::U64::new(LittleEndian, d_addr + added_data);
}
}
_ => {}
}
}
// Delete shared library from the dynamic table.
// let out_ptr = out_mmap.as_mut_ptr();
// unsafe {
// std::ptr::copy(
// out_ptr.offset((dyn_offset + added_data as usize + 16 * (shared_index + 1)) as isize),
// out_ptr.offset((dyn_offset + added_data as usize + 16 * shared_index) as isize),
// 16 * (dyn_lib_count - shared_index),
// );
// }
let offset = sh_offset as usize;
// Copy sections and resolve their relocations.
// let text_sections: Vec<Section> = app_obj
// .sections()
// .filter(|sec| {
// let name = sec.name();
// name.is_ok() && name.unwrap().starts_with(".text")
// })
// .collect();
// if text_sections.is_empty() {
// println!("No text sections found. This application has no code.");
// return Ok(-1);
// }
// let mut new_headers: Vec<SectionHeader64<LittleEndian>> = vec![SectionHeader64::<LittleEndian> {
// sh_name: endian::U32::new(LittleEndian, 1);
// sh_type: endian::U32::new(LittleEndian, elf::SHT_PROGBITS);
// }];
let sh_size = sh_ent_size as usize * sh_num as usize;
out_mmap[offset + added_data as usize..offset + added_data as usize + sh_size]
.copy_from_slice(&exec_data[offset..offset + sh_size]);
let section_headers = load_structs_inplace_mut::<elf::SectionHeader64<LittleEndian>>(
&mut out_mmap,
offset + added_data as usize,
sh_num as usize,
);
for mut sh in section_headers {
let offset = sh.sh_offset.get(NativeEndian);
if offset >= ph_end as u64 {
sh.sh_offset = endian::U64::new(LittleEndian, offset + added_data);
}
let addr = sh.sh_addr.get(NativeEndian);
if first_load_start.unwrap() <= addr && addr <= first_load_end.unwrap() {
sh.sh_addr = endian::U64::new(LittleEndian, addr + added_data);
}
}
let out_gen_duration = out_gen_start.elapsed().unwrap();
let total_duration = total_start.elapsed().unwrap();
println!();
println!("Timings");
report_timing("Loading Metadata", loading_metadata_duration);
report_timing("Executable Parsing", exec_parsing_duration);
report_timing("Application Parsing", app_parsing_duration);
report_timing("Output Generation", out_gen_duration);
report_timing(
"Other",
total_duration
- loading_metadata_duration
- exec_parsing_duration
- app_parsing_duration
- out_gen_duration,
);
report_timing("Total", total_duration);
Ok(0)
}
fn load_struct_inplace<'a, T>(bytes: &'a [u8], offset: usize) -> &'a T {
&load_structs_inplace(bytes, offset, 1)[0]
}
fn load_struct_inplace_mut<'a, T>(bytes: &'a mut [u8], offset: usize) -> &'a mut T {
&mut load_structs_inplace_mut(bytes, offset, 1)[0]
}
fn load_structs_inplace<'a, T>(bytes: &'a [u8], offset: usize, count: usize) -> &'a [T] {
let (head, body, tail) =
unsafe { bytes[offset..offset + count * mem::size_of::<T>()].align_to::<T>() };
assert!(head.is_empty(), "Data was not aligned");
assert_eq!(count, body.len(), "Failed to load all structs");
assert!(tail.is_empty(), "End of data was not aligned");
body
}
fn load_structs_inplace_mut<'a, T>(
bytes: &'a mut [u8],
offset: usize,
count: usize,
) -> &'a mut [T] {
let (head, body, tail) =
unsafe { bytes[offset..offset + count * mem::size_of::<T>()].align_to_mut::<T>() };
assert!(head.is_empty(), "Data was not aligned");
assert_eq!(count, body.len(), "Failed to load all structs");
assert!(tail.is_empty(), "End of data was not aligned");
body
}
fn application_functions(shared_lib_name: &str) -> io::Result<Vec<String>> { fn application_functions(shared_lib_name: &str) -> io::Result<Vec<String>> {
let shared_file = fs::File::open(&shared_lib_name)?; let shared_file = fs::File::open(&shared_lib_name)?;
let shared_mmap = unsafe { Mmap::map(&shared_file)? }; let shared_mmap = unsafe { Mmap::map(&shared_file)? };

View file

@ -1,4 +1,4 @@
use roc_linker::{build_app, preprocess, CMD_PREPROCESS, CMD_SURGERY}; use roc_linker::{build_app, preprocess, surgery, CMD_PREPROCESS, CMD_SURGERY};
use std::io; use std::io;
fn main() -> io::Result<()> { fn main() -> io::Result<()> {
@ -10,7 +10,10 @@ fn main() -> io::Result<()> {
let sub_matches = matches.subcommand_matches(CMD_PREPROCESS).unwrap(); let sub_matches = matches.subcommand_matches(CMD_PREPROCESS).unwrap();
preprocess(sub_matches) preprocess(sub_matches)
} }
Some(CMD_SURGERY) => Ok(0), Some(CMD_SURGERY) => {
let sub_matches = matches.subcommand_matches(CMD_SURGERY).unwrap();
surgery(sub_matches)
}
_ => unreachable!(), _ => unreachable!(),
}?; }?;
std::process::exit(exit_code); std::process::exit(exit_code);