diff --git a/Cargo.toml b/Cargo.toml index 5f90fbf..8e4f84e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ bincode = "1.3.3" flate2 = "1.0.31" serde = { version = "1.0", features = ["derive"] } num_enum = "0.7.3" +num-traits = "0.2.19" +zstd = "0.13.3" [features] default = [] diff --git a/src/id0.rs b/src/id0.rs index 10ab02f..7440c0c 100644 --- a/src/id0.rs +++ b/src/id0.rs @@ -2,12 +2,15 @@ use std::collections::HashMap; use std::num::NonZeroU32; use std::ops::Range; -use crate::ida_reader::{IdaGenericUnpack, IdaUnpack, IdaUnpacker}; -use crate::{til, IDBHeader, IDBSectionCompression}; +use crate::ida_reader::{IdbBufRead, IdbRead, IdbReadKind}; +use crate::{til, IDAKind, IDAUsize}; use anyhow::{anyhow, ensure, Result}; +pub mod flag; + mod segment; +use num_traits::CheckedAdd; pub use segment::*; mod root_info; pub use root_info::*; @@ -17,76 +20,68 @@ mod address_info; pub use address_info::*; mod dirtree; pub use dirtree::*; +mod file_region; +pub use file_region::*; +mod patch; +pub use patch::*; +mod db; +pub use db::*; #[derive(Clone, Debug)] -pub struct IDBFileRegions { - pub start: u64, - pub end: u64, - pub eva: u64, +pub struct IDBFunction { + pub address: Range, + pub flags: u16, + pub extra: IDBFunctionExtra, } -impl IDBFileRegions { - fn read( - _key: &[u8], - data: &[u8], - version: u16, - is_64: bool, - ) -> Result { - let mut input = IdaUnpacker::new(data, is_64); - // TODO detect versions with more accuracy - let (start, end, eva) = match version { - ..=699 => { - let start = input.read_word()?; - let end = input.read_word()?; - let rva: u32 = bincode::deserialize_from(&mut input)?; - (start, end, rva.into()) - } - 700.. => { - let start = input.unpack_usize()?; - let end = start.checked_add(input.unpack_usize()?).ok_or_else( - || anyhow!("Overflow address in File Regions"), - )?; - let rva = input.unpack_usize()?; - // TODO some may include an extra 0 byte at the end? - if let Ok(_unknown) = input.unpack_usize() { - ensure!(_unknown == 0); - } - (start, end, rva) - } - }; - ensure!(input.inner().is_empty()); - Ok(Self { start, end, eva }) - } +#[derive(Clone, Debug)] +pub enum IDBFunctionExtra { + NonTail { + frame: K::Usize, + }, + Tail { + /// function owner of the function start + owner: K::Usize, + refqty: K::Usize, + _unknown1: u16, + _unknown2: K::Usize, + }, } #[derive(Clone, Debug)] -pub enum FunctionsAndComments<'a> { +pub enum FunctionsAndComments<'a, K: IDAKind> { // It's just the name "$ funcs" Name, - Function(IDBFunction), - Comment { address: u64, comment: Comments<'a> }, - Unknown { key: &'a [u8], value: &'a [u8] }, + Function(IDBFunction), + Comment { + address: K::Usize, + comment: Comments<'a>, + }, + Unknown { + key: &'a [u8], + value: &'a [u8], + }, } -impl<'a> FunctionsAndComments<'a> { - fn read(key: &'a [u8], value: &'a [u8], is_64: bool) -> Result { +impl<'a, K: IDAKind> FunctionsAndComments<'a, K> { + fn read(key: &'a [u8], value: &'a [u8]) -> Result { let [key_type, sub_key @ ..] = key else { return Err(anyhow!("invalid Funcs subkey")); }; match *key_type { - b'N' => { + flag::netnode::nn_res::NAME_TAG => { ensure!(parse_maybe_cstr(value) == Some(&b"$ funcs"[..])); Ok(Self::Name) } - b'S' => { - IDBFunction::read(sub_key, value, is_64).map(Self::Function) + flag::netnode::nn_res::ARRAY_SUP_TAG => { + IDBFunction::read(sub_key, value).map(Self::Function) } // some kind of style setting, maybe setting font and background color b'R' | b'C' if value.starts_with(&[4, 3, 2, 1]) => { Ok(Self::Unknown { key, value }) } b'C' => { - let address = parse_number(sub_key, true, is_64) + let address = K::Usize::from_be_bytes(sub_key) .ok_or_else(|| anyhow!("Invalid Comment address"))?; parse_maybe_cstr(value) .map(|value| Self::Comment { @@ -97,7 +92,7 @@ impl<'a> FunctionsAndComments<'a> { } b'R' => { let address = - parse_number(sub_key, true, is_64).ok_or_else(|| { + K::Usize::from_be_bytes(sub_key).ok_or_else(|| { anyhow!("Invalid Repetable Comment address") })?; parse_maybe_cstr(value) @@ -113,42 +108,28 @@ impl<'a> FunctionsAndComments<'a> { } } -#[derive(Clone, Debug)] -pub struct IDBFunction { - pub address: Range, - pub flags: u16, - pub extra: Option, -} - -#[derive(Clone, Debug)] -pub enum IDBFunctionExtra { - NonTail { - frame: u64, - }, - Tail { - /// function owner of the function start - owner: u64, - refqty: u64, - }, -} - -impl IDBFunction { +impl IDBFunction { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x37dd30 // InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28f810 - fn read(_key: &[u8], value: &[u8], is_64: bool) -> Result { - let mut input = IdaUnpacker::new(value, is_64); - let address = input.unpack_address_range()?; + fn read(_key: &[u8], value: &[u8]) -> Result { + let mut input = value; + let address = IdbReadKind::::unpack_address_range(&mut input)?; let flags = input.unpack_dw()?; // CONST migrate this to mod flags const FUNC_TAIL: u16 = 0x8000; - let extra = if flags & FUNC_TAIL != 0 { - Self::read_extra_tail(input, address.start).ok() + let extra = if flags & FUNC_TAIL == 0 { + Self::read_extra_tail(&mut input, address.start)? } else { - Self::read_extra_regular(input).ok() + Self::read_extra_regular(&mut input)? }; - // TODO Undertand the InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28f9d8 data + + if !input.is_empty() { + let _value = input.unpack_dq()?; + } + // TODO Undestand the InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28f9d8 data // TODO make sure all the data is parsed - //ensure!(input.position() == u64::try_from(data.len()).unwrap()); + //ensure!(input.inner_ref().empty()); Ok(Self { address, flags, @@ -157,10 +138,10 @@ impl IDBFunction { } fn read_extra_regular( - mut input: impl IdaUnpack, - ) -> Result { + input: &mut impl IdbReadKind, + ) -> Result> { // TODO Undertand the sub operation at InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28f98f - let frame = input.unpack_usize_ext_max()?; + let frame = input.unpack_usize()?; let _unknown4 = input.unpack_dw()?; if _unknown4 == 0 { let _unknown5 = input.unpack_dd()?; @@ -168,38 +149,48 @@ impl IDBFunction { Ok(IDBFunctionExtra::NonTail { frame }) } - fn read_extra_tail( - mut input: impl IdaUnpack, - address_start: u64, - ) -> Result { + fn read_extra_tail( + input: &mut R, + address_start: K::Usize, + ) -> Result> + where + R: IdbBufRead + IdbReadKind, + { // offset of the function owner in relation to the function start - let owner_offset = input.unpack_usize()? as i64; - let owner = match address_start.checked_add_signed(owner_offset) { - Some(0xFFFF_FFFF) => u64::MAX, - Some(value) => value, - None => return Err(anyhow!("Owner Function offset is invalid")), - }; - let refqty = input.unpack_usize_ext_max()?; + let owner_offset = input.unpack_usize()?; + let owner = address_start + .checked_add(&owner_offset) + .ok_or_else(|| anyhow!("Owner Function offset is invalid"))?; + let refqty = input.unpack_usize()?; let _unknown1 = input.unpack_dw()?; - let _unknown2 = input.unpack_usize_ext_max()?; + let _unknown2 = input.unpack_usize()?; + if input.peek_u8()?.is_some() { + input.consume(1); + } // TODO make data depending on variables that I don't understant // InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28fa93 - Ok(IDBFunctionExtra::Tail { owner, refqty }) + Ok(IDBFunctionExtra::Tail { + owner, + refqty, + _unknown1, + _unknown2, + }) } } #[derive(Clone, Debug)] -pub enum EntryPointRaw<'a> { +pub enum EntryPointRaw<'a, K: IDAKind> { Name, - Address { key: u64, address: u64 }, - Ordinal { key: u64, ordinal: u64 }, - ForwardedSymbol { key: u64, symbol: &'a str }, - FunctionName { key: u64, name: &'a str }, + Address { key: K::Usize, address: K::Usize }, + Ordinal { key: K::Usize, ordinal: K::Usize }, + ForwardedSymbol { key: K::Usize, symbol: &'a str }, + FunctionName { key: K::Usize, name: &'a str }, Unknown { key: &'a [u8], value: &'a [u8] }, } -impl<'a> EntryPointRaw<'a> { - fn read(key: &'a [u8], value: &'a [u8], is_64: bool) -> Result { +impl<'a, K: IDAKind> EntryPointRaw<'a, K> { + fn read(key: &'a [u8], value: &'a [u8]) -> Result { + let mut value = value; let [key_type, sub_key @ ..] = key else { return Err(anyhow!("invalid Funcs subkey")); }; @@ -207,20 +198,20 @@ impl<'a> EntryPointRaw<'a> { ensure!(parse_maybe_cstr(value) == Some(&b"$ entry points"[..])); return Ok(Self::Name); } - let Some(sub_key) = parse_number(sub_key, true, is_64) else { + let Some(sub_key) = K::Usize::from_be_bytes(sub_key) else { return Ok(Self::Unknown { key, value }); }; match *key_type { // TODO for some reason the address is one byte extra - b'A' => IdaUnpacker::new(value, is_64) - .read_word() - .map(|address| Self::Address { - key: sub_key, - address: address - 1, - }) - .map_err(|_| anyhow!("Invalid Function address")), - b'I' => IdaUnpacker::new(value, is_64) - .read_word() + flag::netnode::nn_res::ARRAY_ALT_TAG => { + IdbReadKind::::read_usize(&mut value) + .map(|address| Self::Address { + key: sub_key, + address: address - K::Usize::from(1u8), + }) + .map_err(|_| anyhow!("Invalid Function address")) + } + b'I' => IdbReadKind::::read_usize(&mut value) .map(|ordinal| Self::Ordinal { key: sub_key, ordinal, @@ -234,7 +225,7 @@ impl<'a> EntryPointRaw<'a> { }) }) .ok_or_else(|| anyhow!("Invalid Forwarded symbol name")), - b'S' => parse_maybe_cstr(value) + flag::netnode::nn_res::ARRAY_SUP_TAG => parse_maybe_cstr(value) .and_then(|name| { Some(Self::FunctionName { key: sub_key, @@ -249,29 +240,13 @@ impl<'a> EntryPointRaw<'a> { } #[derive(Clone, Debug)] -pub struct EntryPoint { +pub struct EntryPoint { pub name: String, - pub address: u64, + pub address: K::Usize, pub forwarded: Option, pub entry_type: Option, } -pub(crate) fn parse_number( - data: &[u8], - big_endian: bool, - is_64: bool, -) -> Option { - Some(match (data.len(), is_64, big_endian) { - (8, true, true) => u64::from_be_bytes(data.try_into().unwrap()), - (8, true, false) => u64::from_le_bytes(data.try_into().unwrap()), - (4, false, true) => u32::from_be_bytes(data.try_into().unwrap()).into(), - (4, false, false) => { - u32::from_le_bytes(data.try_into().unwrap()).into() - } - _ => return None, - }) -} - // parse a string that maybe is finalized with \x00 fn parse_maybe_cstr(data: &[u8]) -> Option<&[u8]> { // find the end of the string @@ -282,3 +257,39 @@ fn parse_maybe_cstr(data: &[u8]) -> Option<&[u8]> { } Some(&data[..end_pos]) } + +enum ID0CStr<'a, K: IDAKind> { + CStr(&'a [u8]), + Ref(K::Usize), +} + +// parse a string that maybe is finalized with \x00 +impl<'a, K: IDAKind> ID0CStr<'a, K> { + pub(crate) fn parse_cstr_or_subkey(data: &'a [u8]) -> Option { + // TODO find the InnerRef, so far I found only the + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x4e20c0 + match data { + [b'\x00', rest @ ..] => { + K::Usize::from_be_bytes(rest).map(ID0CStr::Ref) + } + _ => parse_maybe_cstr(data).map(ID0CStr::CStr), + } + } +} + +fn read_addr_from_key( + input: &mut impl IdbReadKind, +) -> Result { + // skip the '.' + ensure!(input.read_u8()? == b'.'); + // read the key + input.read_usize_be() +} + +fn read_addr_and_tag_from_key( + input: &mut impl IdbReadKind, +) -> Result<(K::Usize, u8)> { + let addr = read_addr_from_key::(&mut *input)?; + let tag = input.read_u8()?; + Ok((addr, tag)) +} diff --git a/src/id0/address_info.rs b/src/id0/address_info.rs index 85b0878..3ee1055 100644 --- a/src/id0/address_info.rs +++ b/src/id0/address_info.rs @@ -1,14 +1,21 @@ +use std::borrow::Cow; + use anyhow::{anyhow, Result}; +use num_traits::ToBytes; -use crate::til; +use crate::{til, IDAKind, IDAUsize}; -use super::{parse_maybe_cstr, ID0Entry, IDBFileRegions}; +use super::{ + flag, parse_maybe_cstr, FileRegionIter, FileRegions, ID0CStr, ID0Entry, + ID0Section, +}; #[derive(Clone, Debug)] -pub enum AddressInfo<'a> { +pub enum AddressInfo<'a, K: IDAKind> { Comment(Comments<'a>), - Label(&'a str), + Label(Cow<'a, str>), TilType(til::Type), + DefinedStruct(SubtypeId), Other { key: &'a [u8], value: &'a [u8] }, } @@ -20,6 +27,9 @@ pub enum Comments<'a> { PostComment(&'a [u8]), } +#[derive(Clone, Copy, Debug)] +pub struct SubtypeId(pub(crate) K::Usize); + impl<'a> Comments<'a> { /// The message on the comment, NOTE that IDA don't have a default character encoding pub fn message(&self) -> &'a [u8] { @@ -32,154 +42,192 @@ impl<'a> Comments<'a> { } } -pub(crate) struct SectionAddressInfoIter< - 'a, - I: Iterator>, -> { - all_entries: &'a [ID0Entry], - regions: I, - current_region: AddressInfoIter<'a>, +#[derive(Clone, Copy)] +pub struct SectionAddressInfoByAddressIter<'a, K: IDAKind> { + id0: &'a ID0Section, + regions: FileRegionIter<'a, K>, + current_region: &'a [ID0Entry], } -impl<'a, I: Iterator>> - SectionAddressInfoIter<'a, I> -{ - pub fn new(all_entries: &'a [ID0Entry], regions: I, is_64: bool) -> Self { - Self { - all_entries, +impl<'a, K: IDAKind> SectionAddressInfoByAddressIter<'a, K> { + pub fn new(id0: &'a ID0Section, version: u16) -> Result { + let idx = id0.file_regions_idx()?; + let regions = id0.file_regions(idx, version); + Ok(Self { + id0, regions, - current_region: AddressInfoIter::new(&[], is_64), - } + // dummy values + current_region: &[], + }) + } + + fn advance_region(&mut self) -> Result> { + // get the next region + advance_region(self.id0, &mut self.regions) + .map(|x| x.map(|x| self.current_region = x)) + } + + fn next_inner( + &mut self, + ) -> Result)>> { + // get the next address of the current region, if nothing, next region + let Some(first) = self.current_region.first() else { + if self.advance_region()?.is_none() { + // no more regions, end it + return Ok(None); + } + // NOTE regions can be empty, so check if this new region have + // elements by calling this function again + return self.next_inner(); + }; + + let address = super::read_addr_from_key::(&mut &first.key[..])?; + + let end = self + .current_region + .iter() + .position(|e| !e.key.starts_with(address.to_be_bytes().as_ref())) + .unwrap_or(self.current_region.len()); + let (current_addr, rest) = self.current_region.split_at(end); + self.current_region = rest; + Ok(Some(( + address, + AddressInfoIter::new(current_addr, self.id0), + ))) } } -impl<'a, I: Iterator> + 'a> Iterator - for SectionAddressInfoIter<'a, I> -{ - type Item = Result<(u64, AddressInfo<'a>)>; +impl<'a, K: IDAKind> Iterator for SectionAddressInfoByAddressIter<'a, K> { + type Item = Result<(K::Usize, AddressInfoIter<'a, K>)>; fn next(&mut self) -> Option { + self.next_inner().transpose() + } +} + +#[derive(Clone, Copy)] +pub struct SectionAddressInfoIter<'a, K: IDAKind> { + id0: &'a ID0Section, + regions: FileRegionIter<'a, K>, + current_region: AddressInfoIter<'a, K>, +} + +impl<'a, K: IDAKind> SectionAddressInfoIter<'a, K> { + pub fn new(id0: &'a ID0Section, version: u16) -> Result { + let idx = id0.file_regions_idx()?; + let regions = id0.file_regions(idx, version); + Ok(Self { + id0, + regions, + // dummy value + current_region: AddressInfoIter::new(&[], id0), + }) + } + + fn advance_region(&mut self) -> Result> { + // get the next region + advance_region(self.id0, &mut self.regions).map(|x| { + x.map(|x| self.current_region = AddressInfoIter::new(x, self.id0)) + }) + } +} + +impl<'a, K: IDAKind> Iterator for SectionAddressInfoIter<'a, K> { + type Item = Result<(K::Usize, AddressInfo<'a, K>)>; + + fn next(&mut self) -> Option { + // next element in the current region, or next region let Some(next_addr_info) = self.current_region.next() else { - // get the next region - let region = match self.regions.next() { - Some(Ok(region)) => region, - // if no more regions, finish the iter (AKA return None) - None => return None, - // return the error if err - Some(Err(err)) => return Some(Err(err)), + match self.advance_region() { + Ok(Some(_)) => {} + // no more regions, end it + Ok(None) => return None, + Err(e) => return Some(Err(e)), }; - let is_64 = self.current_region.is_64; - let start_key: Vec = - crate::id0::key_from_address(region.start, is_64).collect(); - let end_key: Vec = - crate::id0::key_from_address(region.end, is_64).collect(); - let start = self - .all_entries - .binary_search_by_key(&&start_key[..], |b| &b.key[..]) - .unwrap_or_else(|start| start); - let end = self - .all_entries - .binary_search_by_key(&&end_key[..], |b| &b.key[..]) - .unwrap_or_else(|end| end); - - let entries = &self.all_entries[start..end]; - self.current_region = AddressInfoIter::new(entries, is_64); - // try again using this new region + // NOTE regions can be empty, so check if this new region have + // elements by calling this function again return self.next(); }; Some(next_addr_info) } } -pub(crate) struct AddressInfoIter<'a> { +#[derive(Clone, Copy)] +pub struct AddressInfoIter<'a, K: IDAKind> { + id0: &'a ID0Section, entries: &'a [ID0Entry], - is_64: bool, } -impl<'a> AddressInfoIter<'a> { - pub fn new(entries: &'a [ID0Entry], is_64: bool) -> Self { - Self { entries, is_64 } +impl<'a, K: IDAKind> AddressInfoIter<'a, K> { + pub fn new(entries: &'a [ID0Entry], section: &'a ID0Section) -> Self { + Self { + entries, + id0: section, + } } -} - -impl<'a> Iterator for AddressInfoIter<'a> { - type Item = Result<(u64, AddressInfo<'a>)>; - fn next(&mut self) -> Option { - let (current, rest) = self.entries.split_first()?; - self.entries = rest; - let value = ¤t.value[..]; - // 1.. because it starts with '.' - let addr_len = if self.is_64 { 8 } else { 4 }; - let key_start = addr_len + 1; - let address = - super::parse_number(¤t.key[1..key_start], true, self.is_64) - .unwrap(); - let key = ¤t.key[key_start..]; - let Some((sub_type, id_value)) = id_subkey_from_idx(key, self.is_64) - else { - return Some(Err(anyhow!("Missing SubType"))); + fn next_inner(&mut self) -> Result)>> { + let Some((current, rest)) = self.entries.split_first() else { + return Ok(None); }; + self.entries = rest; + let (address, sub_type, subkey) = + addr_id_subkey_from_key::(¤t.key[..]) + .ok_or_else(|| anyhow!("Missing SubType"))?; // Non UTF-8 comment: "C:\\Documents and Settings\\Administrator\\\xb9\xd9\xc5\xc1 \xc8\xad\xb8\xe9\ls" // \xb9\xd9\xc5\xc1 \xc8\xad\xb8\xe9 = "바탕 화면" = "Desktop" in Korean encoded using Extended Unix Code #[allow(clippy::wildcard_in_or_patterns)] - match (sub_type, id_value) { + match (sub_type, subkey.map(>::into)) { // Comments // NOTE // pre comments start at index 1000 // post comments start at index 2000 // if you create more then a 1000 pre/post comments ida start acting strange, BUG? - (b'S', Some(1000..=1999)) => { - let Some(comment) = parse_maybe_cstr(value) else { - return Some(Err(anyhow!("Pre-Comment is not valid CStr"))); - }; - Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment))))) + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(1000..=1999)) => { + let comment = parse_maybe_cstr(¤t.value[..]).ok_or_else(|| + anyhow!("Pre-Comment is not valid CStr") + )?; + Ok(Some((address, AddressInfo::Comment(Comments::PreComment(comment))))) }, - (b'S', Some(2000..=2999)) => { - let Some(comment) = parse_maybe_cstr(value) else { - return Some(Err(anyhow!("Post-Comment is not valid CStr"))); - }; - Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment))))) + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(2000..=2999)) => { + let comment = parse_maybe_cstr(¤t.value[..]).ok_or_else(|| + anyhow!("Post-Comment is not valid CStr") + )?; + Ok(Some((address, AddressInfo::Comment(Comments::PostComment(comment))))) }, - (b'S', Some(0x0)) => { - let Some(comment) = parse_maybe_cstr(value) else { - return Some(Err(anyhow!("Comment is not valid CStr"))); - }; - Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment))))) + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x0)) => { + let comment = parse_maybe_cstr(¤t.value[..]).ok_or_else(|| + anyhow!("Comment is not valid CStr") + )?; + Ok(Some((address, AddressInfo::Comment(Comments::Comment(comment))))) }, // Repeatable comment - (b'S', Some(0x1)) => { - let Some(comment) = parse_maybe_cstr(value) else { - return Some(Err(anyhow!("Repeatable Comment is not valid CStr"))); - }; - Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment))))) + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x1)) => { + let comment = parse_maybe_cstr(¤t.value[..]).ok_or_else(|| + anyhow!("Repeatable Comment is not valid CStr") + )?; + Ok(Some((address, AddressInfo::Comment(Comments::RepeatableComment(comment))))) }, // Type at this address - (b'S', Some(0x3000)) => { + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x3000)) => { // take the field names (optional?) and the continuation (optional!) let last = rest.iter().position(|entry| { - let Some((sub_type, id)) = entry.key[key_start..].split_first() else { - return true + let Some((_address, sub_type, Some(id))) = addr_id_subkey_from_key::(&entry.key[..]) else { + return true; }; - let id_value = id_from_key(id, self.is_64); - !matches!((*sub_type, id_value), (b'S', Some(0x3000..=0x3999))) + !matches!((sub_type, >::into(id)), (b'S', 0x3000u64..=0x3999)) }).unwrap_or(rest.len()); self.entries = &rest[last..]; // TODO enforce sequential index for the id? // get the entry for field names and rest of data let (fields, continuation) = match &rest[..last] { - [fields, rest @ ..] if matches!(id_subkey_from_idx(&fields.key[key_start..], self.is_64), Some((b'S', Some(0x3001)))) => { + [fields, rest @ ..] if addr_id_subkey_from_key::(&fields.key[..]) == Some((address, b'S', Some(K::Usize::from(0x3001u16)))) => { // convert the value into fields // usually this string ends with \x00, but bmaybe there is no garanty for that. - let Some(value) = parse_maybe_cstr(&fields.value) else { - // TODO: maybe those fields are continuated by the next entry - return Some(Err(anyhow!("Incomplete Fields for TIL Type"))); - }; - let Some(fields) = crate::ida_reader::split_strings_from_array(value) else { - return Some(Err(anyhow!("Invalid Fields for TIL Type"))); - }; + let value = parse_maybe_cstr(&fields.value).ok_or_else(||anyhow!("Incomplete Fields for TIL Type"))?; + let fields = crate::ida_reader::split_strings_from_array(value).ok_or_else(||anyhow!("Invalid Fields for TIL Type"))?; (fields, rest) } rest => (vec![], rest), @@ -188,60 +236,128 @@ impl<'a> Iterator for AddressInfoIter<'a> { // condensate the data into a single buffer let buf: Vec = current.value.iter().chain(continuation.iter().flat_map(|entry| &entry.value[..])).copied().collect(); // create the raw type - let til = match til::Type::new_from_id0(&buf[..], fields) { - Ok(til) => til, - Err(err) => return Some(Err(err)), - }; - Some(Ok((address, AddressInfo::TilType(til)))) + let til = til::Type::new_from_id0(&buf[..], fields)?; + Ok(Some((address, AddressInfo::TilType(til)))) }, // field names and continuation in from the previous til type [citation needed] - (b'S', Some(0x3001..=0x3999)) => { - Some(Err(anyhow!("ID0 Til type info without a previous TIL type"))) + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x3001..=0x3999)) => { + Err(anyhow!("ID0 Til type info without a previous TIL type")) }, // Name, aka a label to this memory address - (b'N', None) => { - let Some(label_raw) = parse_maybe_cstr(value) else { - return Some(Err(anyhow!("Label is not a valid CStr"))); - }; - let Some(label) = core::str::from_utf8(label_raw).ok() else { - return Some(Err(anyhow!("Label is not valid UTF-8"))) + (flag::netnode::nn_res::NAME_TAG, None) => { + let value = ID0CStr::<'_, K>::parse_cstr_or_subkey(¤t.value) + .ok_or_else(|| anyhow!("Label is not a valid CStr or ID0 Ref"))?; + let label = match value { + ID0CStr::CStr(label_raw) => { + let label = core::str::from_utf8(label_raw).map_err(|_| + anyhow!("Label is not valid UTF-8") + )?; + Cow::Borrowed(label) + }, + ID0CStr::Ref(label_ref) => { + let entries = self.id0.address_info_value(label_ref)?; + let label_raw = entries.iter().flat_map(|x| &x.value[..]).copied().collect(); + let label = String::from_utf8(label_raw).map_err(|_| { + anyhow!("LabelRef is not valid UTF-8") + })?; + Cow::Owned(label) + }, }; - Some(Ok((address, AddressInfo::Label(label)))) + Ok(Some((address, AddressInfo::Label(label)))) }, + // Used to define what struct is apply at the address + (flag::nalt::x::NALT_DREF_FROM, Some(_)) if current.value[..] == [0x03] => { + Ok(Some((address, AddressInfo::DefinedStruct(SubtypeId(subkey.unwrap()))))) + } + // Seems related to datatype, maybe cstr, align and stuff like that - (b'A', Some(_)) | + (flag::netnode::nn_res::ARRAY_ALT_TAG, Some(_)) | // Know to happen to data that represent an memory location - (b'S', Some(0x09)) | + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x09)) | // Seem defined on procedures - (b'S', Some(0x1000)) | + (flag::netnode::nn_res::ARRAY_SUP_TAG, Some(0x1000)) | // seems to be a code reference to memory, key is the destination memory - (b'x', Some(_)) | + (flag::nalt::x::NALT_CREF_FROM, Some(_)) | // The oposite of 'x', memory being referenced by an instruction - (b'X', Some(_)) | + (flag::nalt::x::NALT_CREF_TO, Some(_)) | // Seems to represent a XREF, key being the location that points to this address - (b'D', Some(_)) | + (flag::nalt::x::NALT_DREF_TO, Some(_)) | // The oposite of 'D", is a memory location that points to other - (b'd', Some(_)) | + (flag::nalt::x::NALT_DREF_FROM, Some(_)) | // other unknown values - _ => Some(Ok((address, AddressInfo::Other { key, value }))), + _ => Ok(Some((address, AddressInfo::Other { key: ¤t.key, value: ¤t.value }))), } } } -fn id_subkey_from_idx(key: &[u8], is_64: bool) -> Option<(u8, Option)> { - let (sub_type, id) = key.split_first()?; - Some((*sub_type, id_from_key(id, is_64))) +impl<'a, K: IDAKind> Iterator for AddressInfoIter<'a, K> { + type Item = Result<(K::Usize, AddressInfo<'a, K>)>; + + fn next(&mut self) -> Option { + self.next_inner().transpose() + } +} + +#[derive(Clone, Copy)] +pub struct AddressInfoIterAt<'a, K: IDAKind> { + iter: AddressInfoIter<'a, K>, } -fn id_from_key(key: &[u8], is_64: bool) -> Option { - if is_64 { - <[u8; 8]>::try_from(key).ok().map(u64::from_be_bytes) - } else { - <[u8; 4]>::try_from(key) - .ok() - .map(u32::from_be_bytes) - .map(u64::from) +impl<'a, K: IDAKind> AddressInfoIterAt<'a, K> { + pub fn new(iter: AddressInfoIter<'a, K>) -> Self { + Self { iter } } } + +impl<'a, K: IDAKind> Iterator for AddressInfoIterAt<'a, K> { + type Item = Result>; + + fn next(&mut self) -> Option { + // ignore the address, it will always be the same, the one request + self.iter.next().map(|x| x.map(|(_, x)| x)) + } +} + +fn addr_id_subkey_from_key( + mut key: &[u8], +) -> Option<(K::Usize, u8, Option)> { + let (addr, tag) = super::read_addr_and_tag_from_key::(&mut key).ok()?; + let subkey = K::Usize::from_be_bytes(key); + Some((addr, tag, subkey)) +} + +fn advance_region( + id0: &ID0Section, + mut regions: impl Iterator>>, +) -> Result> { + // get the next region + let region = match regions.next() { + Some(Ok(region)) => region, + // if no more regions, finish the iter (AKA return None) + None => return Ok(None), + // return the error if err + Some(Err(err)) => return Err(err), + }; + Ok(Some(get_next_address_region(®ion, id0.all_entries()))) +} + +fn get_next_address_region<'a, K: IDAKind>( + region: &FileRegions, + all_entries: &'a [ID0Entry], +) -> &'a [ID0Entry] { + // get the next region + let start_key: Vec = + crate::id0::key_from_address::(region.start).collect(); + let end_key: Vec = + crate::id0::key_from_address::(region.end).collect(); + let start = all_entries + .binary_search_by_key(&&start_key[..], |b| &b.key[..]) + .unwrap_or_else(|start| start); + let end = all_entries + .binary_search_by_key(&&end_key[..], |b| &b.key[..]) + .unwrap_or_else(|end| end); + + &all_entries[start..end] +} diff --git a/src/id0/btree.rs b/src/id0/btree.rs index ba08385..c206b55 100644 --- a/src/id0/btree.rs +++ b/src/id0/btree.rs @@ -1,20 +1,21 @@ -use std::{ffi::CStr, io::Read}; +use std::io::BufRead; use anyhow::Result; +use num_traits::ToBytes; -use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; +use crate::ida_reader::{IdbBufRead, IdbRead}; use super::*; #[derive(Debug, Clone, Copy)] -enum ID0Version { +pub(crate) enum ID0Version { V15, V16, V20, } impl ID0Version { - pub(crate) fn read(input: &mut impl IdaGenericBufUnpack) -> Result { + pub(crate) fn read(input: &mut impl BufRead) -> Result { let value = input.read_c_string_raw()?; match &value[..] { b"B-tree v 1.5 (C) Pol 1990" => Ok(Self::V15), @@ -29,21 +30,21 @@ impl ID0Version { } #[derive(Debug, Clone, Copy)] -struct ID0Header { +pub(crate) struct ID0Header { // TODO handle the next_free_offset being the fist free page - _next_free_offset: Option, - page_size: u16, + pub _next_free_offset: Option, + pub page_size: u16, // assuming None here means there are no entries in this ID0 - root_page: Option, - record_count: u32, - page_count: u32, - //unk12: u8, - version: ID0Version, + pub root_page: Option, + pub record_count: u32, + pub page_count: u32, + //pub unk12: u8, + pub version: ID0Version, } impl ID0Header { pub(crate) fn read( - input: &mut impl IdaGenericUnpack, + input: &mut impl BufRead, buf: &mut Vec, ) -> Result { buf.resize(64, 0); @@ -93,708 +94,7 @@ impl ID0Header { } #[derive(Debug, Clone)] -pub struct ID0Section { - is_64: bool, - pub entries: Vec, -} - -#[derive(Debug, Clone)] -pub struct ID0Entry { - pub key: Vec, - pub value: Vec, -} - -impl ID0Section { - pub(crate) fn read( - input: &mut impl IdaGenericUnpack, - header: &IDBHeader, - compress: IDBSectionCompression, - ) -> Result { - let mut buf = vec![]; - let _len = match compress { - IDBSectionCompression::None => input.read_to_end(&mut buf)?, - IDBSectionCompression::Zlib => { - flate2::read::ZlibDecoder::new(input).read_to_end(&mut buf)? - } - }; - Self::read_inner(&buf, header) - } - - // NOTE this was written this way to validate the data in each file, so it's clear that no - // data is being parsed incorrectly or is left unparsed. There way too many validations - // and non-necessary parsing is done on delete data. - fn read_inner(input: &[u8], idb_header: &IDBHeader) -> Result { - let mut reader = input; - - // pages size are usually around that size - let mut buf = Vec::with_capacity(0x2000); - let header = ID0Header::read(&mut reader, &mut buf)?; - - ensure!(input.len() % header.page_size as usize == 0); - let pages_in_section = input.len() / header.page_size as usize; - // +1 for the header, some times there is more space then pages, usually empty pages at the end - ensure!(header.page_count as usize + 1 <= pages_in_section); - - let Some(root_page) = header.root_page else { - ensure!(header.record_count == 0); - // if root is not set, then the DB is empty - return Ok(Self { - is_64: idb_header.magic_version.is_64(), - entries: vec![], - }); - }; - - buf.resize(header.page_size.into(), 0); - let mut pages = - HashMap::with_capacity(header.page_count.try_into().unwrap()); - let mut pending_pages = vec![root_page]; - loop { - if pending_pages.is_empty() { - break; - } - let page_idx = pending_pages.pop().unwrap(); - // if already parsed, ignore - if pages.contains_key(&page_idx) { - continue; - } - // read the full page - ensure!((page_idx.get() as usize) < pages_in_section); - let page_offset = - page_idx.get() as usize * header.page_size as usize; - let page_raw = - &input[page_offset..page_offset + header.page_size as usize]; - let page = ID0Page::read(page_raw, &header)?; - // put in the queue the pages that need parsing, AKA children of this page - match &page { - ID0Page::Index { preceding, entries } => { - pending_pages.extend( - entries - .iter() - .filter_map(|entry| entry.page) - .chain(*preceding), - ); - } - ID0Page::Leaf(_) => {} - } - // insert the parsed page - if let Some(_old) = pages.insert(page_idx, page) { - unreachable!(); - } - } - - // verify that the correct number of pages were consumed and added to the tree - ensure!(pages.len() <= header.page_count.try_into().unwrap()); - - // put it all in order on the vector - let mut entries = - Vec::with_capacity(header.record_count.try_into().unwrap()); - Self::tree_to_vec(root_page, &mut pages, &mut entries); - - // make sure the vector is sorted - ensure!(entries.windows(2).all(|win| { - let [a, b] = win else { unreachable!() }; - a.key < b.key - })); - - // make sure the right number of entries are in the final vector - ensure!(entries.len() == header.record_count.try_into().unwrap()); - - Ok(ID0Section { - is_64: idb_header.magic_version.is_64(), - entries, - }) - } - - fn tree_to_vec( - page_idx: NonZeroU32, - pages: &mut HashMap, - output: &mut Vec, - ) { - match pages.remove(&page_idx).unwrap() { - ID0Page::Index { preceding, entries } => { - if let Some(preceding) = preceding { - // if not root, add the preceding page before this one - Self::tree_to_vec(preceding, pages, &mut *output); - } - for ID0PageIndex { page, key, value } in entries { - output.push(ID0Entry { key, value }); - if let Some(page) = page { - Self::tree_to_vec(page, pages, &mut *output); - } - } - } - ID0Page::Leaf(entries) => output.extend(entries), - } - } - - pub fn all_entries(&self) -> impl Iterator { - self.entries.iter() - } - - pub(crate) fn binary_search( - &self, - key: impl AsRef<[u8]>, - ) -> Result { - let key = key.as_ref(); - self.entries.binary_search_by_key(&key, |b| &b.key[..]) - } - - fn binary_search_end(&self, key: impl AsRef<[u8]>) -> Result { - let key = key.as_ref(); - self.entries.binary_search_by(|b| { - if b.key.starts_with(key) { - std::cmp::Ordering::Less - } else { - b.key.as_slice().cmp(key) - } - }) - } - - pub fn get(&self, key: impl AsRef<[u8]>) -> Option<&ID0Entry> { - self.binary_search(key).ok().map(|i| &self.entries[i]) - } - - /// search for entries in this inclusive range - pub fn get_inclusive_range( - &self, - start: impl AsRef<[u8]>, - end: impl AsRef<[u8]>, - ) -> impl Iterator { - let start = self.binary_search(start).unwrap_or_else(|start| start); - let end = self.binary_search_end(end).unwrap_or_else(|end| end); - - self.entries[start..end].iter() - } - - pub fn sub_values( - &self, - key: impl AsRef<[u8]>, - ) -> impl Iterator { - let key = key.as_ref(); - let start = self.binary_search(key).unwrap_or_else(|start| start); - let end = self.binary_search_end(key).unwrap_or_else(|end| end); - - self.entries[start..end].iter() - } - - /// read the `$ segs` entries of the database - pub fn segments( - &self, - ) -> Result> + '_> { - let entry = self - .get("N$ segs") - .ok_or_else(|| anyhow!("Unable to find entry segs"))?; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .chain(b"S") - .copied() - .collect(); - let names = self.segment_strings()?; - Ok(self.sub_values(key).map(move |e| { - Segment::read(&e.value, self.is_64, names.as_ref(), self) - })) - } - - /// read the `$ segstrings` entries of the database - fn segment_strings(&self) -> Result>>> { - let Some(entry) = self.get("N$ segstrings") else { - // no entry means no strings - return Ok(None); - }; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .chain(b"S") - .copied() - .collect(); - let mut entries = HashMap::new(); - for entry in self.sub_values(key) { - let mut value_current = &entry.value[..]; - let start = value_current.unpack_dd()?; - let end = value_current.unpack_dd()?; - ensure!(start > 0); - ensure!(start <= end); - for i in start..end { - let name = value_current.unpack_ds()?; - if let Some(_old) = entries.insert(i.try_into().unwrap(), name) - { - return Err(anyhow!("Duplicated id in segstrings {start}")); - } - } - // TODO always end with '\x0a'? - ensure!( - value_current.is_empty(), - "Unparsed data in SegsString: {}", - value_current.len() - ); - } - Ok(Some(entries)) - } - - pub(crate) fn name_by_index(&self, idx: u64) -> Result<&[u8]> { - // if there is no names, AKA `$ segstrings`, search for the key directly - let key: Vec = b"." - .iter() - .copied() - .chain(if self.is_64 { - (idx | (0xFF << 56)).to_be_bytes().to_vec() - } else { - (u32::try_from(idx).unwrap() | (0xFF << 24)) - .to_be_bytes() - .to_vec() - }) - .chain(b"N".iter().copied()) - .collect(); - let name = self - .get(key) - .ok_or_else(|| anyhow!("Not found name for segment {idx}"))?; - parse_maybe_cstr(&name.value) - .ok_or_else(|| anyhow!("Invalid segment name {idx}")) - } - - /// read the `$ loader name` entries of the database - pub fn loader_name(&self) -> Result>> { - let entry = self - .get("N$ loader name") - .ok_or_else(|| anyhow!("Unable to find entry loader name"))?; - // TODO check that keys are 0 => plugin, or 1 => format - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .chain(b"S") - .copied() - .collect(); - Ok(self - .sub_values(key) - .map(|e| Ok(CStr::from_bytes_with_nul(&e.value)?.to_str()?))) - } - - /// read the `Root Node` entries of the database - pub fn root_info( - &self, - ) -> Result>> { - let entry = self - .get("NRoot Node") - .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .copied() - .collect(); - let key_len = key.len(); - Ok(self.sub_values(key).map(move |entry| { - let sub_key = &entry.key[key_len..]; - let Some(sub_type) = sub_key.first().copied() else { - return Ok(IDBRootInfo::Unknown(entry)); - }; - match (sub_type, sub_key.len()) { - (b'N', 1) => { - ensure!( - parse_maybe_cstr(&entry.value) - == Some(&b"Root Node"[..]), - "Invalid Root Node Name" - ); - return Ok(IDBRootInfo::RootNodeName); - } - // TODO filenames can be non-utf-8, but are they always CStr? - (b'V', 1) => return Ok(IDBRootInfo::InputFile(&entry.value)), - _ => {} - } - let Some(value) = parse_number(&sub_key[1..], true, self.is_64) - else { - return Ok(IDBRootInfo::Unknown(entry)); - }; - match (sub_type, value as i64) { - (b'A', -6) => parse_number(&entry.value, false, self.is_64) - .ok_or_else(|| anyhow!("Unable to parse imagebase value")) - .map(IDBRootInfo::ImageBase), - (b'A', -5) => parse_number(&entry.value, false, self.is_64) - .ok_or_else(|| anyhow!("Unable to parse crc value")) - .map(IDBRootInfo::Crc), - (b'A', -4) => parse_number(&entry.value, false, self.is_64) - .ok_or_else(|| anyhow!("Unable to parse open_count value")) - .map(IDBRootInfo::OpenCount), - (b'A', -2) => parse_number(&entry.value, false, self.is_64) - .ok_or_else(|| anyhow!("Unable to parse CreatedDate value")) - .map(IDBRootInfo::CreatedDate), - (b'A', -1) => parse_number(&entry.value, false, self.is_64) - .ok_or_else(|| anyhow!("Unable to parse Version value")) - .map(IDBRootInfo::Version), - (b'S', 1302) => entry - .value - .as_slice() - .try_into() - .map(IDBRootInfo::Md5) - .map_err(|_| anyhow!("Value Md5 with invalid len")), - (b'S', 1303) => parse_maybe_cstr(&entry.value) - .and_then(|version| core::str::from_utf8(version).ok()) - .ok_or_else(|| { - anyhow!("Unable to parse VersionString string") - }) - .map(IDBRootInfo::VersionString), - (b'S', 1349) => entry - .value - .as_slice() - .try_into() - .map(IDBRootInfo::Sha256) - .map_err(|_| anyhow!("Value Sha256 with invalid len")), - (b'S', 0x41b994) => IDBParam::read(&entry.value, self.is_64) - .map(Box::new) - .map(IDBRootInfo::IDAInfo), - _ => Ok(IDBRootInfo::Unknown(entry)), - } - })) - } - - /// read the `Root Node` ida_info entry of the database - pub fn ida_info(&self) -> Result { - // TODO Root Node is always the last one? - let entry = self - .get("NRoot Node") - .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; - let sub_key = if self.is_64 { - 0x41B994u64.to_be_bytes().to_vec() - } else { - 0x41B994u32.to_be_bytes().to_vec() - }; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .chain(b"S") - .chain(sub_key.iter()) - .copied() - .collect(); - let description = self.sub_values(key).next().ok_or_else(|| { - anyhow!("Unable to find id_params inside Root Node") - })?; - IDBParam::read(&description.value, self.is_64) - } - - /// read the `$ fileregions` entries of the database - pub fn file_regions( - &self, - version: u16, - ) -> Result> + '_> { - let entry = self - .get("N$ fileregions") - .ok_or_else(|| anyhow!("Unable to find fileregions"))?; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .chain(b"S") - .copied() - .collect(); - let key_len = key.len(); - // TODO find the meaning of "$ fileregions" b'V' entries - Ok(self.sub_values(key).map(move |e| { - let key = &e.key[key_len..]; - IDBFileRegions::read(key, &e.value, version, self.is_64) - })) - } - - /// read the `$ funcs` entries of the database - pub fn functions_and_comments( - &self, - ) -> Result>> { - let entry = self - .get("N$ funcs") - .ok_or_else(|| anyhow!("Unable to find functions"))?; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .copied() - .collect(); - let key_len = key.len(); - Ok(self.sub_values(key).map(move |e| { - let key = &e.key[key_len..]; - FunctionsAndComments::read(key, &e.value, self.is_64) - })) - } - - // TODO implement $ fixups - // TODO implement $ imports - // TODO implement $ scriptsnippets - // TODO implement $ enums - // TODO implement $ structs - - // TODO implement $ hidden_ranges - // TODO the address_info for 0xff00_00XX (or 0xff00_0000__0000_00XX for 64bits) seesm to be reserved, what happens if there is data at that page? - - fn entry_points_raw( - &self, - ) -> Result>> { - let entry = self - .get("N$ entry points") - .ok_or_else(|| anyhow!("Unable to find functions"))?; - let key: Vec = b"." - .iter() - .chain(entry.value.iter().rev()) - .copied() - .collect(); - let key_len = key.len(); - Ok(self.sub_values(key).map(move |e| { - let key = &e.key[key_len..]; - EntryPointRaw::read(key, &e.value, self.is_64) - })) - } - - /// read the `$ entry points` entries of the database - pub fn entry_points(&self) -> Result> { - type RawEntryPoint<'a> = - HashMap, Option<&'a str>, Option<&'a str>)>; - let mut entry_points: RawEntryPoint = HashMap::new(); - for entry_point in self.entry_points_raw()? { - match entry_point? { - EntryPointRaw::Unknown { .. } - | EntryPointRaw::Name - | EntryPointRaw::Ordinal { .. } => {} - EntryPointRaw::Address { key, address } => { - if let Some(_old) = - entry_points.entry(key).or_default().0.replace(address) - { - return Err(anyhow!( - "Duplicated function address for {key}" - )); - } - } - EntryPointRaw::ForwardedSymbol { key, symbol } => { - if let Some(_old) = - entry_points.entry(key).or_default().1.replace(symbol) - { - return Err(anyhow!( - "Duplicated function symbol for {key}" - )); - } - } - EntryPointRaw::FunctionName { key, name } => { - if let Some(_old) = - entry_points.entry(key).or_default().2.replace(name) - { - return Err(anyhow!( - "Duplicated function name for {key}" - )); - } - } - } - } - let mut result: Vec<_> = entry_points - .into_iter() - .filter_map(|(key, (address, symbol, name))| { - match (address, symbol, name) { - // Function without name or address is possible, this is - // probably some label that got deleted - (Some(_), _, None) - | (None, _, Some(_)) - | (None, _, None) => None, - (Some(address), forwarded, Some(name)) => { - let entry = - match self.find_entry_point_type(key, address) { - Ok(entry) => entry, - Err(error) => return Some(Err(error)), - }; - Some(Ok(EntryPoint { - name: name.to_owned(), - address, - forwarded: forwarded.map(str::to_string), - entry_type: entry, - })) - } - } - }) - .collect::>()?; - result.sort_by_key(|entry| entry.address); - Ok(result) - } - - fn find_entry_point_type( - &self, - key: u64, - address: u64, - ) -> Result> { - if let Some(key_entry) = - self.find_entry_point_type_value(key, 0x3000)? - { - return Ok(Some(key_entry)); - } - // TODO some times it uses the address as key, it's based on the version? - if let Some(key_entry) = - self.find_entry_point_type_value(address, 0x3000)? - { - return Ok(Some(key_entry)); - } - Ok(None) - } - - fn find_entry_point_type_value( - &self, - value: u64, - key_find: u64, - ) -> Result> { - let key: Vec = b"." - .iter() - .copied() - .chain(if self.is_64 { - value.to_be_bytes().to_vec() - } else { - u32::try_from(value).unwrap().to_be_bytes().to_vec() - }) - .chain([b'S']) - .collect(); - let key_len = key.len(); - for entry in self.sub_values(key) { - let key = &entry.key[key_len..]; - let key = parse_number(key, true, self.is_64).unwrap(); - // TODO handle other values for the key - if key == key_find { - return til::Type::new_from_id0(&entry.value, vec![]) - .map(Option::Some); - } - } - Ok(None) - } - - /// read the address information for all addresses from `$ fileregions` - pub fn address_info( - &self, - version: u16, - ) -> Result>> { - let regions = self.file_regions(version)?; - Ok(SectionAddressInfoIter::new( - &self.entries[..], - regions, - self.is_64, - )) - } - - /// read the address information for the address - pub fn address_info_at( - &self, - address: impl Id0AddressKey, - ) -> Result>> { - let address = address.as_u64(); - let key: Vec = key_from_address(address, self.is_64).collect(); - let start = self.binary_search(&key).unwrap_or_else(|start| start); - let end = self.binary_search_end(&key).unwrap_or_else(|end| end); - - let entries = &self.entries[start..end]; - // ignore the address, it will always be the same, the one request - let iter = AddressInfoIter::new(entries, self.is_64) - .map(|value| value.map(|(_addr, value)| value)); - Ok(iter) - } - - /// read the label set at address, if any - pub fn label_at( - &self, - id0_addr: impl Id0AddressKey, - ) -> Result> { - let key: Vec = key_from_address(id0_addr.as_u64(), self.is_64) - .chain(Some(b'N')) - .collect(); - let Ok(start) = self.binary_search(&key) else { - return Ok(None); - }; - - let entry = &self.entries[start]; - let key_len = key.len(); - let key = &entry.key[key_len..]; - ensure!(key.is_empty(), "Label ID0 entry with key"); - let label = parse_maybe_cstr(&entry.value) - .ok_or_else(|| anyhow!("Label is not valid CStr"))?; - Ok(Some(label)) - } - - pub(crate) fn dirtree_from_name( - &self, - name: impl AsRef<[u8]>, - ) -> Result> { - let Ok(index) = self.binary_search(name) else { - // if the entry is missin, it's probably just don't have entries - return Ok(DirTreeRoot { entries: vec![] }); - }; - let key: Vec = b"." - .iter() - .chain(self.entries[index].value.iter().rev()) - .chain(b"S") - .copied() - .collect(); - let key_len = key.len(); - let mut sub_values = self.sub_values(key).map(|entry| { - let raw_idx = parse_number(&entry.key[key_len..], true, self.is_64) - .ok_or_else(|| anyhow!("invalid dirtree entry key"))?; - let idx = raw_idx >> 16; - let sub_idx = (raw_idx & 0xFFFF) as u16; - Ok((idx, sub_idx, &entry.value[..])) - }); - let dirs = dirtree::parse_dirtree(&mut sub_values, self.is_64)?; - ensure!(sub_values.next().is_none(), "unparsed diretree entries"); - Ok(dirs) - } - - // https://hex-rays.com/products/ida/support/idapython_docs/ida_dirtree.html - - /// read the `$ dirtree/tinfos` entries of the database - pub fn dirtree_tinfos(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/tinfos") - } - - // TODO remove the u64 and make it a TILOrdIndex type - /// read the `$ dirtree/structs` entries of the database - pub fn dirtree_structs(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/structs") - } - - // TODO remove the u64 and make it a TILOrdIndex type - /// read the `$ dirtree/enums` entries of the database - pub fn dirtree_enums(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/enums") - } - - // TODO remove the u64 and make it a FuncAddress type - /// read the `$ dirtree/funcs` entries of the database - pub fn dirtree_function_address(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/funcs") - } - - /// read the `$ dirtree/names` entries of the database - pub fn dirtree_names(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/names") - } - - // TODO remove the u64 and make it a ImportIDX type - /// read the `$ dirtree/imports` entries of the database - pub fn dirtree_imports(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/imports") - } - - // TODO remove the u64 and make it a BptsIDX type - /// read the `$ dirtree/bpts` entries of the database - pub fn dirtree_bpts(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/bpts") - } - - // TODO remove the u64 and make it a &str type - /// read the `$ dirtree/bookmarks_idaplace_t` entries of the database - pub fn dirtree_bookmarks_idaplace(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/bookmarks_idaplace_t") - } - - // TODO remove the u64 and make it a &str type - /// read the `$ dirtree/bookmarks_structplace_t` entries of the database - pub fn dirtree_bookmarks_structplace(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/bookmarks_structplace_t") - } - - // TODO remove the u64 and make it a &str type - /// read the `$ dirtree/bookmarks_tiplace_t` entries of the database - pub fn dirtree_bookmarks_tiplace(&self) -> Result> { - self.dirtree_from_name("N$ dirtree/bookmarks_tiplace_t") - } -} - -#[derive(Debug, Clone)] -enum ID0Page { +pub(crate) enum ID0Page { Index { preceding: Option, entries: Vec, @@ -803,7 +103,7 @@ enum ID0Page { } #[derive(Debug, Clone)] -struct ID0PageIndex { +pub(crate) struct ID0PageIndex { page: Option, key: Vec, value: Vec, @@ -1025,17 +325,171 @@ impl ID0Page { } } -pub(crate) fn key_from_address( - address: u64, - is_64: bool, +// TODO improve this function, maybe make a one liner +pub(crate) fn key_from_address( + address: K::Usize, ) -> impl Iterator { - b".".iter().copied().chain(if is_64 { - address.to_be_bytes().to_vec() - } else { - u32::try_from(address).unwrap().to_be_bytes().to_vec() - }) + b".".iter() + .copied() + .chain(address.to_be_bytes().as_ref().to_vec()) +} + +pub trait Id0AddressKey { + // TODO fix this name + fn as_u64(&self) -> K; +} + +impl Id0AddressKey for K { + fn as_u64(&self) -> K { + *self + } +} + +pub(crate) struct ID0BTree { + pub header: ID0Header, + pub pages: Option, +} + +pub(crate) struct ID0BTreePages { + pub root: NonZeroU32, + pub pages: HashMap, +} + +impl ID0BTree { + // NOTE this was written this way to validate the data in each file, so it's clear that no + // data is being parsed incorrectly or is left unparsed. There way too many validations + // and non-necessary parsing is done on delete data. + pub(crate) fn read_inner(input: &[u8]) -> Result { + let mut reader = input; + + // pages size are usually around that size + let mut buf = Vec::with_capacity(0x2000); + let header = ID0Header::read(&mut reader, &mut buf)?; + + let page_count: usize = header.page_count.try_into().unwrap(); + let page_size: usize = header.page_size.into(); + // in compressed sectors extra data can be present + //ensure!(input.len() % page_size == 0); + let pages_in_section = input.len() / page_size; + // +1 for the header, some times there is more space then pages, usually empty pages at the end + ensure!(page_count + 1 <= pages_in_section); + + let Some(root) = header.root_page else { + ensure!(header.record_count == 0); + // if root is not set, then the DB is empty + return Ok(Self { + header, + pages: None, + }); + }; + + buf.resize(page_size, 0); + let mut pages = + HashMap::with_capacity(header.page_count.try_into().unwrap()); + let mut pending_pages = vec![root]; + loop { + if pending_pages.is_empty() { + break; + } + let page_idx = pending_pages.pop().unwrap(); + // if already parsed, ignore + if pages.contains_key(&page_idx) { + continue; + } + // read the full page + ensure!((page_idx.get() as usize) < pages_in_section); + let page_offset = + page_idx.get() as usize * header.page_size as usize; + let page_raw = + &input[page_offset..page_offset + header.page_size as usize]; + let page = ID0Page::read(page_raw, &header)?; + // put in the queue the pages that need parsing, AKA children of this page + match &page { + ID0Page::Index { preceding, entries } => { + pending_pages.extend( + entries + .iter() + .filter_map(|entry| entry.page) + .chain(*preceding), + ); + } + ID0Page::Leaf(_) => {} + } + // insert the parsed page + if let Some(_old) = pages.insert(page_idx, page) { + unreachable!(); + } + } + + // verify that the correct number of pages were consumed and added to the tree + ensure!(pages.len() <= header.page_count.try_into().unwrap()); + + // TODO verify why this is not true + // verify that we read the correct number of entries + //#[cfg(feature = "restrictive")] + //{ + // fn page_entry_num( + // pages: &HashMap, + // page: &ID0Page, + // ) -> usize { + // match page { + // ID0Page::Index { preceding, entries } => { + // let preceding = preceding + // .and_then(|preceding| pages.get(&preceding)); + // let entries = entries + // .iter() + // .filter_map(|x| x.page) + // .filter_map(|page_idx| pages.get(&page_idx)); + // preceding + // .into_iter() + // .chain(entries) + // .map(|entries| page_entry_num(pages, entries)) + // .sum() + // } + // ID0Page::Leaf(items) => items.len(), + // } + // } + // let entry_num = page_entry_num(&pages, pages.get(&root).unwrap()); + // //ensure!(entry_num == usize::try_from(header.record_count).unwrap()); + //} + + Ok(Self { + header, + pages: Some(ID0BTreePages { root, pages }), + }) + } + + pub(crate) fn into_vec(mut self) -> Vec { + let mut output = + Vec::with_capacity(self.header.record_count.try_into().unwrap()); + let Some(pages) = &mut self.pages else { + return vec![]; + }; + pages.inner_into_vec(pages.root, &mut output); + output + } } -pub trait Id0AddressKey { - fn as_u64(&self) -> u64; +impl ID0BTreePages { + fn inner_into_vec( + &mut self, + page_idx: NonZeroU32, + output: &mut Vec, + ) { + match self.pages.remove(&page_idx).unwrap() { + ID0Page::Index { preceding, entries } => { + if let Some(preceding) = preceding { + // if not root, add the preceding page before this one + self.inner_into_vec(preceding, &mut *output); + } + for ID0PageIndex { page, key, value } in entries { + output.push(ID0Entry { key, value }); + if let Some(page) = page { + self.inner_into_vec(page, &mut *output); + } + } + } + ID0Page::Leaf(entries) => output.extend(entries), + } + } } diff --git a/src/id0/db.rs b/src/id0/db.rs new file mode 100644 index 0000000..b1b0dc7 --- /dev/null +++ b/src/id0/db.rs @@ -0,0 +1,684 @@ +use std::borrow::Cow; +use std::ffi::CStr; + +use anyhow::Result; +use num_traits::{AsPrimitive, PrimInt, ToBytes}; + +use crate::{ + ida_reader::IdbReadKind, IDAUsize, IDAVariants, SectionReader, IDA32, IDA64, +}; + +use super::*; + +pub type ID0SectionVariants = IDAVariants, ID0Section>; + +#[derive(Debug, Clone)] +pub struct ID0Section { + // the data itself don't have a kind, but it's required to handle the data + _kind: std::marker::PhantomData, + pub entries: Vec, +} + +#[derive(Debug, Clone)] +pub struct ID0Entry { + pub key: Vec, + pub value: Vec, +} + +impl SectionReader for ID0Section { + type Result = ID0Section; + + fn read_section + IdbBufRead>( + input: &mut I, + ) -> Result { + let mut output = vec![]; + input.read_to_end(&mut output)?; + ID0BTree::read_inner(&output[..]) + .map(ID0BTree::into_vec) + .map(|entries| Self { + _kind: std::marker::PhantomData, + entries, + }) + } + + fn size_from_v910(header: &crate::IDBHeaderV910) -> u64 { + header.id0.unwrap().size.get() + } +} + +impl ID0Section { + pub fn all_entries(&self) -> &[ID0Entry] { + &self.entries + } + + pub(crate) fn binary_search( + &self, + key: impl AsRef<[u8]>, + ) -> Result { + let key = key.as_ref(); + self.entries.binary_search_by_key(&key, |b| &b.key[..]) + } + + fn binary_search_end(&self, key: impl AsRef<[u8]>) -> Result { + let key = key.as_ref(); + self.entries.binary_search_by(|b| { + if b.key.starts_with(key) { + std::cmp::Ordering::Less + } else { + b.key.as_slice().cmp(key) + } + }) + } + + pub fn get(&self, key: impl AsRef<[u8]>) -> Option<&ID0Entry> { + self.binary_search(key).ok().map(|i| &self.entries[i]) + } + + /// search for entries in this inclusive range + pub fn get_inclusive_range( + &self, + start: impl AsRef<[u8]>, + end: impl AsRef<[u8]>, + ) -> &[ID0Entry] { + let start = self.binary_search(start).unwrap_or_else(|start| start); + let end = self.binary_search_end(end).unwrap_or_else(|end| end); + + &self.entries[start..end] + } + + pub fn sub_values(&self, key: impl AsRef<[u8]>) -> &[ID0Entry] { + let key = key.as_ref(); + let start = self.binary_search(key).unwrap_or_else(|start| start); + let end = self.binary_search_end(key).unwrap_or_else(|end| end); + + &self.entries[start..end] + } + + pub(crate) fn address_info_value( + &self, + label_ref: K::Usize, + ) -> Result<&[ID0Entry]> { + // NOTE for some reasong the key is only 7 bytes, + // there is also a subindex, in case the value is very big + #[cfg(feature = "restrictive")] + { + let max_ref_value = + ::max_value() >> 8; + ensure!( + label_ref <= max_ref_value, + "Ivanlid Address Info value Ref" + ); + } + let label_ref = (label_ref << 8).to_be_bytes(); + let key: Vec = key_from_address_and_subtype::( + K::Usize::from(0xFFu8).swap_bytes(), + b'S', + ) + .chain( + label_ref.as_ref()[0..label_ref.as_ref().len() - 1] + .iter() + .copied(), + ) + .collect(); + Ok(self.sub_values(key)) + } + + /// read the `$ segs` entries of the database + pub fn segments(&self) -> Result> { + let entry = self + .get("N$ segs") + .ok_or_else(|| anyhow!("Unable to find entry segs"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + Ok(SegmentIter { + _kind: std::marker::PhantomData, + segments: self.sub_values(key), + }) + } + + /// find the `$ segstrings` + pub fn segment_strings_idx(&self) -> Option { + self.get("N$ segstrings") + .map(|x| SegmentStringsIdx(&x.value)) + } + + /// read all the `$ segstrings` entries of the database + pub fn segment_strings(&self, idx: SegmentStringsIdx) -> SegmentStringIter { + let key: Vec = b"." + .iter() + .chain(idx.0.iter().rev()) + .chain(b"S") + .copied() + .collect(); + SegmentStringIter::new(self.sub_values(key)) + } + + /// find the `$ patches` + pub fn segment_patches_idx(&self) -> Option { + self.get("N$ patches").map(|x| SegmentPatchIdx(&x.value)) + } + + /// read all the original values from `$ patches` entries of the database + pub fn segment_patches_original_value( + &self, + idx: SegmentPatchIdx, + ) -> SegmentPatchOriginalValueIter { + let key: Vec = b"." + .iter() + .chain(idx.0.iter().rev()) + .chain(b"A") + .copied() + .collect(); + let key_len = key.len(); + let entries = self.sub_values(key); + SegmentPatchOriginalValueIter::new(entries, key_len) + } + + // TODO there is also a "P" entry in patches, it seems to only contains + // the value 0x01 for each equivalent "A" entry + + pub fn segment_name(&self, idx: SegmentNameIdx) -> Result<&[u8]> { + let seg_idx = self.segment_strings_idx(); + // TODO I think this is dependent on the version, and not on availability + if let Some(seg_idx) = seg_idx { + for seg in self.segment_strings(seg_idx) { + let (seg_idx, seg_value) = seg?; + if seg_idx == idx { + return Ok(seg_value); + } + } + Err(anyhow!("Unable to find ID0 Segment Name")) + } else { + // if there is no names, AKA `$ segstrings`, search for the key directly + self.name_by_index(idx) + } + } + + pub(crate) fn name_by_index(&self, idx: SegmentNameIdx) -> Result<&[u8]> { + // if there is no names, AKA `$ segstrings`, search for the key directly + let key: Vec = key_from_address_and_subtype::( + K::Usize::from(0xFFu8).swap_bytes(), + b'N', + ) + .collect(); + let name = self + .get(key) + .ok_or_else(|| anyhow!("Not found name for segment {}", idx.0))?; + parse_maybe_cstr(&name.value) + .ok_or_else(|| anyhow!("Invalid segment name {}", idx.0)) + } + + /// read the `$ loader name` entries of the database + pub fn loader_name(&self) -> Result>> { + let entry = self + .get("N$ loader name") + .ok_or_else(|| anyhow!("Unable to find entry loader name"))?; + // TODO check that keys are 0 => plugin, or 1 => format + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + Ok(self + .sub_values(key) + .iter() + .map(|e| Ok(CStr::from_bytes_with_nul(&e.value)?.to_str()?))) + } + + pub fn root_info_node(&self) -> Result> { + let entry = self + .get("NRoot Node") + .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; + let node_idx = K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Invalid Root Node key value"))?; + Ok(NodeIdx(node_idx)) + } + + /// read the `Root Node` entries of the database + pub fn root_info( + &self, + idx: NodeIdx, + ) -> Result>>> { + let key: Vec = key_from_address::(idx.0).collect(); + let key_len = key.len(); + Ok(self.sub_values(key).iter().map(move |entry| { + let sub_key = &entry.key[key_len..]; + let Some(sub_type) = sub_key.first().copied() else { + return Ok(IDBRootInfo::Unknown(entry)); + }; + match (sub_type, sub_key.len()) { + (b'N', 1) => { + ensure!( + parse_maybe_cstr(&entry.value) + == Some(&b"Root Node"[..]), + "Invalid Root Node Name" + ); + return Ok(IDBRootInfo::RootNodeName); + } + // TODO filenames can be non-utf-8, but are they always CStr? + (b'V', 1) => return Ok(IDBRootInfo::InputFile(&entry.value)), + _ => {} + } + let Some(value) = K::Usize::from_be_bytes(&sub_key[1..]) else { + return Ok(IDBRootInfo::Unknown(entry)); + }; + match (sub_type, value.into_i64()) { + (b'A', -6) => K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Unable to parse imagebase value")) + .map(ImageBase) + .map(IDBRootInfo::ImageBase), + (b'A', -5) => K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Unable to parse crc value")) + .map(IDBRootInfo::Crc), + (b'A', -4) => K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Unable to parse open_count value")) + .map(IDBRootInfo::OpenCount), + (b'A', -2) => K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Unable to parse CreatedDate value")) + .map(IDBRootInfo::CreatedDate), + (b'A', -1) => K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Unable to parse Version value")) + .map(IDBRootInfo::Version), + (b'S', 1302) => entry + .value + .as_slice() + .try_into() + .map(IDBRootInfo::Md5) + .map_err(|_| anyhow!("Value Md5 with invalid len")), + (b'S', 1303) => parse_maybe_cstr(&entry.value) + .and_then(|version| core::str::from_utf8(version).ok()) + .ok_or_else(|| { + anyhow!("Unable to parse VersionString string") + }) + .map(IDBRootInfo::VersionString), + (b'S', 1349) => entry + .value + .as_slice() + .try_into() + .map(IDBRootInfo::Sha256) + .map_err(|_| anyhow!("Value Sha256 with invalid len")), + (b'S', 0x41b994) => IDBParam::::read(&entry.value) + .map(Box::new) + .map(IDBRootInfo::IDAInfo), + _ => Ok(IDBRootInfo::Unknown(entry)), + } + })) + } + + /// read the `Root Node` ida_info entry of the database + pub fn ida_info(&self) -> Result> { + // TODO Root Node is always the last one? + let entry = self.root_info_node()?; + let sub_key = K::Usize::from(0x41B994u32); + let key: Vec = key_from_address_and_subtype::(entry.0, b'S') + .chain(sub_key.to_be_bytes().as_ref().iter().copied()) + .collect(); + let description = + self.sub_values(key).iter().next().ok_or_else(|| { + anyhow!("Unable to find id_params inside Root Node") + })?; + IDBParam::::read(&description.value) + } + + /// read the `$ fileregions` entries of the database + pub fn file_regions_idx(&self) -> Result> { + let entry = self + .get("N$ fileregions") + .ok_or_else(|| anyhow!("Unable to find entry fileregions"))?; + let node_idx = K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Invalid fileregions key value"))?; + Ok(FileRegionIdx(NodeIdx(node_idx))) + } + + /// read the `$ fileregions` entries of the database + pub fn file_regions( + &self, + idx: FileRegionIdx, + version: u16, + ) -> FileRegionIter { + let key: Vec = + key_from_address_and_subtype::(idx.0 .0, b'S').collect(); + let key_len = key.len(); + // TODO find the meaning of "$ fileregions" b'V' entries + let segments = self.sub_values(key); + FileRegionIter { + _kind: std::marker::PhantomData, + segments, + key_len, + version, + } + } + + /// read the `$ funcs` entries of the database + pub fn functions_and_comments( + &self, + ) -> Result>>> { + let entry = self + .get("N$ funcs") + .ok_or_else(|| anyhow!("Unable to find functions"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .copied() + .collect(); + let key_len = key.len(); + Ok(self.sub_values(key).iter().map(move |e| { + let key = &e.key[key_len..]; + FunctionsAndComments::read(key, &e.value) + })) + } + + // TODO implement $ fixups + // TODO implement $ imports + // TODO implement $ scriptsnippets + // TODO implement $ enums + // TODO implement $ structs + + // TODO implement $ hidden_ranges + // TODO the address_info for 0xff00_00XX (or 0xff00_0000__0000_00XX for 64bits) seesm to be reserved, what happens if there is data at that page? + + fn entry_points_raw( + &self, + ) -> Result>>> { + let entry = self + .get("N$ entry points") + .ok_or_else(|| anyhow!("Unable to find functions"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .copied() + .collect(); + let key_len = key.len(); + Ok(self.sub_values(key).iter().map(move |e| { + let key = &e.key[key_len..]; + EntryPointRaw::read(key, &e.value) + })) + } + + /// read the `$ entry points` entries of the database + pub fn entry_points(&self) -> Result>> { + type RawEntryPoint<'a, K> = + HashMap, Option<&'a str>, Option<&'a str>)>; + let mut entry_points: RawEntryPoint<'_, K::Usize> = HashMap::new(); + for entry_point in self.entry_points_raw()? { + match entry_point? { + EntryPointRaw::Unknown { .. } + | EntryPointRaw::Name + | EntryPointRaw::Ordinal { .. } => {} + EntryPointRaw::Address { key, address } => { + if let Some(_old) = + entry_points.entry(key).or_default().0.replace(address) + { + return Err(anyhow!( + "Duplicated function address for {key}" + )); + } + } + EntryPointRaw::ForwardedSymbol { key, symbol } => { + if let Some(_old) = + entry_points.entry(key).or_default().1.replace(symbol) + { + return Err(anyhow!( + "Duplicated function symbol for {key}" + )); + } + } + EntryPointRaw::FunctionName { key, name } => { + if let Some(_old) = + entry_points.entry(key).or_default().2.replace(name) + { + return Err(anyhow!( + "Duplicated function name for {key}" + )); + } + } + } + } + let mut result: Vec<_> = entry_points + .into_iter() + .filter_map(|(key, (address, symbol, name))| { + match (address, symbol, name) { + // Function without name or address is possible, this is + // probably some label that got deleted + (Some(_), _, None) + | (None, _, Some(_)) + | (None, _, None) => None, + (Some(address), forwarded, Some(name)) => { + let entry = + match self.find_entry_point_type(key, address) { + Ok(entry) => entry, + Err(error) => return Some(Err(error)), + }; + Some(Ok(EntryPoint { + name: name.to_owned(), + address, + forwarded: forwarded.map(str::to_string), + entry_type: entry, + })) + } + } + }) + .collect::>()?; + result.sort_by_key(|entry| entry.address); + Ok(result) + } + + fn find_entry_point_type( + &self, + key: K::Usize, + address: K::Usize, + ) -> Result> { + if let Some(key_entry) = + self.find_entry_point_type_value(key, K::Usize::from(0x3000u16))? + { + return Ok(Some(key_entry)); + } + // TODO some times it uses the address as key, it's based on the version? + if let Some(key_entry) = self + .find_entry_point_type_value(address, K::Usize::from(0x3000u16))? + { + return Ok(Some(key_entry)); + } + Ok(None) + } + + fn find_entry_point_type_value( + &self, + value: K::Usize, + key_find: K::Usize, + ) -> Result> { + let key: Vec = + key_from_address_and_subtype::(value, b'S').collect(); + let key_len = key.len(); + for entry in self.sub_values(key) { + let key = &entry.key[key_len..]; + let key = K::Usize::from_be_bytes(key).unwrap(); + // TODO handle other values for the key + if key == key_find { + return til::Type::new_from_id0(&entry.value, vec![]) + .map(Option::Some); + } + } + Ok(None) + } + + /// read the address information for all addresses from `$ fileregions` + pub fn address_info( + &self, + version: u16, + ) -> Result> { + SectionAddressInfoIter::new(self, version) + } + + /// read the address information for all addresses from `$ fileregions` + pub fn address_info_by_address( + &self, + version: u16, + ) -> Result> { + SectionAddressInfoByAddressIter::new(self, version) + } + + /// read the address information for the address + pub fn address_info_at( + &self, + address: impl Id0AddressKey, + ) -> Result> { + let address = address.as_u64(); + let key: Vec = key_from_address::(address).collect(); + let start = self.binary_search(&key).unwrap_or_else(|start| start); + let end = self.binary_search_end(&key).unwrap_or_else(|end| end); + + let entries = &self.entries[start..end]; + Ok(AddressInfoIterAt::new(AddressInfoIter::new(entries, self))) + } + + /// read the label set at address, if any + pub fn label_at( + &self, + id0_addr: impl Id0AddressKey, + ) -> Result>> { + let key: Vec = key_from_address::(id0_addr.as_u64()) + .chain(Some(b'N')) + .collect(); + let Ok(start) = self.binary_search(&key) else { + return Ok(None); + }; + + let entry = &self.entries[start]; + let key_len = key.len(); + let key = &entry.key[key_len..]; + ensure!(key.is_empty(), "Label ID0 entry with key"); + let label = ID0CStr::<'_, K>::parse_cstr_or_subkey(&entry.value) + .ok_or_else(|| anyhow!("Label is not valid CStr"))?; + match label { + ID0CStr::CStr(label) => Ok(Some(Cow::Borrowed(label))), + ID0CStr::Ref(label_ref) => { + let entries = self.address_info_value(label_ref)?; + Ok(Some(Cow::Owned( + entries + .iter() + .flat_map(|x| &x.value[..]) + .copied() + .collect(), + ))) + } + } + } + + pub fn struct_at(&self, idx: SubtypeId) -> Result<&[u8]> { + let key: Vec = + key_from_address_and_subtype::(idx.0, b'N').collect(); + let start = self.binary_search(&key).map_err(|_| { + anyhow!("Unable to locate struct type for id0 entry") + })?; + + let entry = &self.entries[start]; + // older versions dont have this prefix + let value = + entry.value.strip_prefix(b"$$ ").unwrap_or(&entry.value[..]); + Ok(value) + } + + pub(crate) fn dirtree_from_name>( + &self, + name: impl AsRef<[u8]>, + ) -> Result> { + let Ok(index) = self.binary_search(name) else { + // if the entry is missin, it's probably just don't have entries + return Ok(DirTreeRoot { entries: vec![] }); + }; + let key: Vec = b"." + .iter() + .chain(self.entries[index].value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + let key_len = key.len(); + let mut sub_values = self.sub_values(key).iter().map(|entry| { + let raw_idx = K::Usize::from_be_bytes(&entry.key[key_len..]) + .ok_or_else(|| anyhow!("invalid dirtree entry key"))?; + let idx = raw_idx >> 16; + let sub_idx: u16 = (raw_idx & K::Usize::from(0xFFFFu16)).as_(); + Ok((idx, sub_idx, &entry.value[..])) + }); + let dirs = dirtree::parse_dirtree::<'_, _, _, K>(&mut sub_values)?; + ensure!(sub_values.next().is_none(), "unparsed diretree entries"); + Ok(dirs) + } + + // https://hex-rays.com/products/ida/support/idapython_docs/ida_dirtree.html + + /// read the `$ dirtree/tinfos` entries of the database + pub fn dirtree_tinfos(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/tinfos") + } + + // TODO remove the u64 and make it a TILOrdIndex type + /// read the `$ dirtree/structs` entries of the database + pub fn dirtree_structs(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/structs") + } + + // TODO remove the u64 and make it a TILOrdIndex type + /// read the `$ dirtree/enums` entries of the database + pub fn dirtree_enums(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/enums") + } + + // TODO remove the u64 and make it a FuncAddress type + /// read the `$ dirtree/funcs` entries of the database + pub fn dirtree_function_address( + &self, + ) -> Result>> { + self.dirtree_from_name("N$ dirtree/funcs") + } + + /// read the `$ dirtree/names` entries of the database + pub fn dirtree_names(&self) -> Result>> { + self.dirtree_from_name("N$ dirtree/names") + } + + // TODO remove the u64 and make it a ImportIDX type + /// read the `$ dirtree/imports` entries of the database + pub fn dirtree_imports(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/imports") + } + + // TODO remove the u64 and make it a BptsIDX type + /// read the `$ dirtree/bpts` entries of the database + pub fn dirtree_bpts(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/bpts") + } + + // TODO remove the u64 and make it a &str type + /// read the `$ dirtree/bookmarks_idaplace_t` entries of the database + pub fn dirtree_bookmarks_idaplace(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/bookmarks_idaplace_t") + } + + // TODO remove the u64 and make it a &str type + /// read the `$ dirtree/bookmarks_structplace_t` entries of the database + pub fn dirtree_bookmarks_structplace( + &self, + ) -> Result> { + self.dirtree_from_name("N$ dirtree/bookmarks_structplace_t") + } + + // TODO remove the u64 and make it a &str type + /// read the `$ dirtree/bookmarks_tiplace_t` entries of the database + pub fn dirtree_bookmarks_tiplace(&self) -> Result> { + self.dirtree_from_name("N$ dirtree/bookmarks_tiplace_t") + } +} + +fn key_from_address_and_subtype( + address: K::Usize, + subtype: u8, +) -> impl Iterator { + key_from_address::(address).chain([subtype]) +} diff --git a/src/id0/dirtree.rs b/src/id0/dirtree.rs index 45f06bf..0633ecd 100644 --- a/src/id0/dirtree.rs +++ b/src/id0/dirtree.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; use anyhow::{anyhow, ensure, Result}; +use num_traits::WrappingAdd; -use crate::ida_reader::{IdaGenericBufUnpack, IdaUnpack, IdaUnpacker}; +use crate::ida_reader::{IdbBufRead, IdbReadKind}; +use crate::{IDAKind, IDAUsize}; use super::Id0AddressKey; @@ -40,42 +42,40 @@ pub enum DirTreeEntry { }, } -pub(crate) trait FromDirTreeNumber { - fn new(value: u64) -> Self; +pub(crate) trait FromDirTreeNumber { + fn new(value: K) -> Self; } -impl FromDirTreeNumber for u64 { - #[inline] - fn new(value: u64) -> u64 { +impl FromDirTreeNumber for K { + fn new(value: K) -> K { value } } -#[derive(Clone, Copy, Debug)] -pub struct Id0Address { - address: u64, +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Id0Address { + address: K::Usize, } -impl FromDirTreeNumber for Id0Address { - #[inline] - fn new(address: u64) -> Self { +impl FromDirTreeNumber for Id0Address { + fn new(address: K::Usize) -> Self { Self { address } } } -impl Id0AddressKey for Id0Address { - fn as_u64(&self) -> u64 { +impl Id0AddressKey for Id0Address { + fn as_u64(&self) -> K::Usize { self.address } } +// TODO this can't be right #[derive(Clone, Copy, Debug)] pub struct Id0TilOrd { // TODO remove this pub pub ord: u64, } -impl FromDirTreeNumber for Id0TilOrd { - #[inline] - fn new(ord: u64) -> Self { - Self { ord } +impl FromDirTreeNumber for Id0TilOrd { + fn new(ord: K) -> Self { + Self { ord: ord.into() } } } @@ -95,22 +95,21 @@ impl FromDirTreeNumber for Id0TilOrd { /// "\x2e\xff\x00\x00\x31\x53\x00\x02\x00\x00":"\x01\x62\x00\x00\x00\x0d\x90\x20\x80\x88\x08\x10\x80\xe9\x04\x80\xe7\x82\x36\x06\xff\xff\xff\xfc\xd0\xff\xff\xff\xff\x60\x50\x83\x0a\x00\x0d" /// ... /// "N$ dirtree/funcs":"\x31\x00\x00\xff" -pub(crate) fn parse_dirtree<'a, T, I>( +pub(crate) fn parse_dirtree<'a, T, I, K: IDAKind>( entries_iter: I, - is_64: bool, ) -> Result> where - T: FromDirTreeNumber, - I: IntoIterator>, + T: FromDirTreeNumber, + I: IntoIterator>, { // parse all the raw entries let mut entries_raw = HashMap::new(); // This is assuming the first entry is the root, because this is more general that assume it's always 0 - let mut reader = DirtreeEntryRead { + let mut reader = DirtreeEntryRead::<'_, _, K> { iter: entries_iter.into_iter(), // dummy value so next_entry() will get the first one state: DirtreeEntryState::Reading { - idx: 0, + idx: K::Usize::from(0u8), sub_idx: 0, entry: &[], }, @@ -120,13 +119,9 @@ where let Some(idx) = reader.next_entry()? else { break; }; - let mut reader = IdaUnpacker::new(&mut reader, is_64); root_idx.get_or_insert(idx); - let entry = DirTreeEntryRaw::from_raw(&mut reader)?; - ensure!( - !reader.inner().have_data_left(), - "Entry have data after dirtree" - ); + let entry = DirTreeEntryRaw::::from_raw(&mut reader)?; + ensure!(!reader.have_data_left(), "Entry have data after dirtree"); if let Some(_old) = entries_raw.insert(idx, Some(entry)) { return Err(anyhow!("Duplicated dirtree index entry")); }; @@ -141,16 +136,23 @@ where .unwrap(); let name = root.name; ensure!(name.is_empty(), "DirTree With a named root"); - ensure!(root.parent == 0, "Dirtree Root with parent"); - let dirs = dirtree_directory_from_raw(&mut entries_raw, 0, root.entries)?; + ensure!( + root.parent == K::Usize::from(0u8), + "Dirtree Root with parent" + ); + let dirs = dirtree_directory_from_raw( + &mut entries_raw, + K::Usize::from(0u8), + root.entries, + )?; Ok(DirTreeRoot { entries: dirs }) } -fn dirtree_directory_from_raw( - raw: &mut HashMap>, - parent_idx: u64, - entries: Vec, +fn dirtree_directory_from_raw, K: IDAKind>( + raw: &mut HashMap>>, + parent_idx: K::Usize, + entries: Vec>, ) -> Result>> { let sub_dirs = entries .into_iter() @@ -186,16 +188,14 @@ fn dirtree_directory_from_raw( } #[derive(Clone, Debug)] -struct DirTreeEntryRaw { +struct DirTreeEntryRaw { name: Vec, - parent: u64, - entries: Vec, + parent: K::Usize, + entries: Vec>, } -impl DirTreeEntryRaw { - fn from_raw( - data: &mut I, - ) -> Result { +impl DirTreeEntryRaw { + fn from_raw>(data: &mut I) -> Result { // TODO It's unclear if this value is a version, it seems so match data.read_u8()? { 0 => Self::from_raw_v0(data), @@ -204,7 +204,7 @@ impl DirTreeEntryRaw { } } - fn from_raw_v0( + fn from_raw_v0>( data: &mut I, ) -> Result { // part 1: header @@ -269,7 +269,7 @@ impl DirTreeEntryRaw { /// | entries folder | \x00 | 0..0 are folders | /// | entries values | \x0c | from 0..12 are values | /// - fn from_raw_v1( + fn from_raw_v1>( data: &mut I, ) -> Result { // part 1: header @@ -337,34 +337,34 @@ impl DirTreeEntryRaw { } #[derive(Clone, Copy, Debug)] -struct DirTreeEntryChildRaw { - number: u64, +struct DirTreeEntryChildRaw { + number: K::Usize, is_value: bool, } -struct DirtreeEntryRead<'a, I> { +struct DirtreeEntryRead<'a, I, K: IDAKind> { iter: I, - state: DirtreeEntryState<'a>, + state: DirtreeEntryState<'a, K>, } -enum DirtreeEntryState<'a> { +enum DirtreeEntryState<'a, K: IDAKind> { Reading { - idx: u64, + idx: K::Usize, sub_idx: u16, entry: &'a [u8], }, Next { - idx: u64, + idx: K::Usize, entry: &'a [u8], }, } -impl<'a, I> DirtreeEntryRead<'a, I> +impl<'a, I, K: IDAKind> DirtreeEntryRead<'a, I, K> where - I: Iterator>, + I: Iterator>, { // get the next entry on the database - fn next_entry(&mut self) -> Result> { + fn next_entry(&mut self) -> Result> { let (idx, sub_idx, entry) = match self.state { DirtreeEntryState::Reading { entry: &[], .. } => { let Some(next_entry) = self.iter.next() else { @@ -459,9 +459,9 @@ where } } -impl<'a, I> std::io::Read for DirtreeEntryRead<'a, I> +impl<'a, I, K: IDAKind> std::io::Read for DirtreeEntryRead<'a, I, K> where - I: Iterator>, + I: Iterator>, { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let is_empty = match self.state { @@ -484,9 +484,9 @@ where } } -impl<'a, I> std::io::BufRead for DirtreeEntryRead<'a, I> +impl<'a, I, K: IDAKind> std::io::BufRead for DirtreeEntryRead<'a, I, K> where - I: Iterator>, + I: Iterator>, { fn fill_buf(&mut self) -> std::io::Result<&[u8]> { match self.state { @@ -511,28 +511,20 @@ where } } -fn parse_entries( +fn parse_entries>( data: &mut I, - entries: &mut Vec, + entries: &mut Vec>, entries_len: u32, default_is_value: bool, ) -> Result<()> { - let mut last_value: Option = None; + let mut last_value: Option = None; for _ in 0..entries_len { let rel_value = data.unpack_usize()?; let value = match last_value { // first value is absolute None => rel_value, // other are relative from the previous - Some(last_value_old) => { - let mut value = - last_value_old.wrapping_add_signed(rel_value as i64); - // NOTE that in 32bits it wrapps using the u32 limit - if !data.is_64() { - value &= u32::MAX as u64; - } - value - } + Some(last_value_old) => last_value_old.wrapping_add(&rel_value), }; last_value = Some(value); entries.push(DirTreeEntryChildRaw { diff --git a/src/id0/file_region.rs b/src/id0/file_region.rs new file mode 100644 index 0000000..aabd66f --- /dev/null +++ b/src/id0/file_region.rs @@ -0,0 +1,75 @@ +use anyhow::{anyhow, Result}; +use num_traits::CheckedAdd; + +use crate::ida_reader::IdbReadKind; +use crate::IDAKind; + +use super::{ID0Entry, NodeIdx}; + +#[derive(Clone, Debug)] +pub struct FileRegions { + pub start: K::Usize, + pub end: K::Usize, + pub eva: K::Usize, +} + +impl FileRegions { + fn read(_key: &[u8], data: &[u8], version: u16) -> Result { + let mut cursor = data; + let result = Self::innner_read(&mut cursor, version)?; + match (version, cursor) { + (..=699, &[]) => {} + // TODO some may include an extra 0 byte at the end? + (700.., &[] | &[0]) => {} + _ => return Err(anyhow!("Unknown data after the ID0 FileRegions")), + } + Ok(result) + } + + fn innner_read( + cursor: &mut impl IdbReadKind, + version: u16, + ) -> Result { + // TODO detect versions with more accuracy + let (start, end, eva) = match version { + ..=699 => { + let start = cursor.read_usize()?; + let end = cursor.read_usize()?; + let rva = cursor.read_u32()?; + // TODO avoid this into and make it a enum? + (start, end, rva.into()) + } + 700.. => { + let start = cursor.unpack_usize()?; + let len = cursor.unpack_usize()?; + let end = start.checked_add(&len).ok_or_else(|| { + anyhow!("Overflow address in File Regions") + })?; + let rva = cursor.unpack_usize()?; + (start, end, rva) + } + }; + Ok(Self { start, end, eva }) + } +} + +pub struct FileRegionIdx(pub(crate) NodeIdx); + +#[derive(Clone, Copy)] +pub struct FileRegionIter<'a, K: IDAKind> { + pub(crate) _kind: std::marker::PhantomData, + pub(crate) segments: &'a [ID0Entry], + pub(crate) key_len: usize, + pub(crate) version: u16, +} + +impl Iterator for FileRegionIter<'_, K> { + type Item = Result>; + + fn next(&mut self) -> Option { + let (current, rest) = self.segments.split_first()?; + self.segments = rest; + let key = ¤t.key[self.key_len..]; + Some(FileRegions::read(key, ¤t.value, self.version)) + } +} diff --git a/src/id0/flag.rs b/src/id0/flag.rs new file mode 100644 index 0000000..dd27375 --- /dev/null +++ b/src/id0/flag.rs @@ -0,0 +1,672 @@ +pub mod netnode { + /// Maximum length of a netnode name. WILL BE REMOVED IN THE FUTURE + pub const MAXNAMESIZE: u32 = 512; + + /// Maximum length of a name. We permit names up to 32KB-1 bytes. + pub const MAX_NODENAME_SIZE: u32 = 32 * 1024; + + /// Maximum length of strings or objects stored in a supval array element + pub const MAXSPECSIZE: u32 = 1024; + + /// A number to represent a bad netnode reference + pub const BADNODE: u64 = u64::MAX; + + /// Reserved netnode tags + /// Tags internally used in netnodes. You should not use them + /// for your tagged alt/sup/char/hash arrays. + pub mod nn_res { + /// Array of altvals + pub const ARRAY_ALT_TAG: u8 = b'A'; + /// Array of supvals + pub const ARRAY_SUP_TAG: u8 = b'S'; + /// Array of hashvals + pub const HASH_TAG: u8 = b'H'; + /// Value of netnode + pub const VALUE_TAG: u8 = b'V'; + /// Name of netnode + pub const NAME_TAG: u8 = b'N'; + /// Links between netnodes + pub const LINK_TAG: u8 = b'L'; + } + + // Internal bit used to request ea2node() mapping of alt and sup indexes + pub const NETMAP_IDX: u32 = 0x100; + // Internal bit used to request ea2node() mapping of alt values. + // Such values are stored after being incremented by one. + pub const NETMAP_VAL: u32 = 0x200; + // Internal bit used to make sure a string obtained with getblob() is + // null-terminated. + pub const NETMAP_STR: u32 = 0x400; + // Internal bit: use 8-bit indexes. + pub const NETMAP_X8: u32 = 0x800; + // Internal bit: use 8-bit values. + pub const NETMAP_V8: u32 = 0x1000; + // Internal bit: value is a netnode index + pub const NETMAP_VAL_NDX: u32 = 0x2000; +} + +/// Structure of altvals array +/// Structure of altvals array of netnode(ea). +/// altvals is a virtual array of 32-bit longs attached to a netnode. +/// the size of this array is unlimited. Unused indexes are not kept in the +/// database. We use only first several indexes to this array. +pub mod nalt { + ///// reserved for enums + //const NALT_ENUM: i32 = -2; + ///// 16-bit byte value + //const NALT_WIDE: i32 = -1; + + /// switch idiom address (used at jump targets) + pub const NALT_SWITCH: u8 = 1; + ///// offset base 2 + //const NALT_OBASE1: u8 = 2; + /// struct id + pub const NALT_STRUCT: u8 = 3; + ///// 'seen' flag (used in structures) + //const NALT_SEENF: u8 = 4; + ///// outer offset base 1 + //const NALT_OOBASE0: u8 = 5; + ///// outer offset base 2 + //const NALT_OOBASE1: u8 = 6; + ///// saved xref address in the xrefs window + //const NALT_XREFPOS: u8 = 7; + /// additional flags for an item + pub const NALT_AFLAGS: u8 = 8; + /// source line number + pub const NALT_LINNUM: u8 = 9; + /// absolute segment location + pub const NALT_ABSBASE: u8 = 10; + /// enum id for the first operand + pub const NALT_ENUM0: u8 = 11; + /// enum id for the second operand + pub const NALT_ENUM1: u8 = 12; + ///// struct offset, struct id for the first operand + //const NALT_STROFF0: u8 = 13; + ///// struct offset, struct id for the second operand + //const NALT_STROFF1: u8 = 14; + /// number of bytes purged from the stack when a function is called indirectly + pub const NALT_PURGE: u8 = 15; + /// type of string item + pub const NALT_STRTYPE: u8 = 16; + /// alignment value if the item is #FF_ALIGN + /// (should by equal to power of 2) + pub const NALT_ALIGN: u8 = 17; + + ///// linear address of byte referenced by + ///// high 16 bits of an offset (FF_0HIGH) + //const NALT_HIGH0: u8 = 18; + ///// linear address of byte referenced by + ///// high 16 bits of an offset (FF_1HIGH) + //const NALT_HIGH1: u8 = 19; + /// instruction/data background color + pub const NALT_COLOR: u8 = 20; + + /// Netnode xref tags + /// Tag values to store xrefs + pub mod x { + /// code xref to, idx: target address + pub const NALT_CREF_TO: u8 = b'X'; + /// code xref from, idx: source address + pub const NALT_CREF_FROM: u8 = b'x'; + /// data xref to, idx: target address + pub const NALT_DREF_TO: u8 = b'D'; + /// data xref from, idx: source address + pub const NALT_DREF_FROM: u8 = b'd'; + } +} + +/// Structure of supvals array +/// Structure of supvals array of netnode(ea). +/// Supvals is a virtual array of objects of arbitrary length attached +/// to a netnode (length of one element is limited by #MAXSPECSIZE, though) +/// We use first several indexes to this array: +pub mod nsup { + /// regular comment + pub const NSUP_CMT: u8 = 0; + /// repeatable comment + pub const NSUP_REPCMT: u8 = 1; + /// forced operand 1 + pub const NSUP_FOP1: u8 = 2; + /// forced operand 2 + pub const NSUP_FOP2: u8 = 3; + /// jump table info + pub const NSUP_JINFO: u8 = 4; + /// array parameters + pub const NSUP_ARRAY: u8 = 5; + /// OMF: group of segments (not used anymore) + pub const NSUP_OMFGRP: u8 = 6; + /// forced operand 3 + pub const NSUP_FOP3: u8 = 7; + /// switch information + pub const NSUP_SWITCH: u8 = 8; + /// complex reference information for operand 1 + pub const NSUP_REF0: u8 = 9; + /// complex reference information for operand 2 + pub const NSUP_REF1: u8 = 10; + /// complex reference information for operand 3 + pub const NSUP_REF2: u8 = 11; + /// outer complex reference information for operand 1 + pub const NSUP_OREF0: u8 = 12; + /// outer complex reference information for operand 2 + pub const NSUP_OREF1: u8 = 13; + /// outer complex reference information for operand 3 + pub const NSUP_OREF2: u8 = 14; + /// stroff: struct path for the first operand + pub const NSUP_STROFF0: u8 = 15; + /// stroff: struct path for the second operand + pub const NSUP_STROFF1: u8 = 16; + /// segment translations + pub const NSUP_SEGTRANS: u8 = 17; + /// forced operand 4 + pub const NSUP_FOP4: u8 = 18; + /// forced operand 5 + pub const NSUP_FOP5: u8 = 19; + /// forced operand 6 + pub const NSUP_FOP6: u8 = 20; + /// complex reference information for operand 4 + pub const NSUP_REF3: u8 = 21; + /// complex reference information for operand 5 + pub const NSUP_REF4: u8 = 22; + /// complex reference information for operand 6 + pub const NSUP_REF5: u8 = 23; + /// outer complex reference information for operand 4 + pub const NSUP_OREF3: u8 = 24; + /// outer complex reference information for operand 5 + pub const NSUP_OREF4: u8 = 25; + /// outer complex reference information for operand 6 + pub const NSUP_OREF5: u8 = 26; + /// saved xref address and type in the xrefs window + pub const NSUP_XREFPOS: u8 = 27; + /// custom data type id + pub const NSUP_CUSTDT: u8 = 28; + /// SEG_GRP: pack_dd encoded list of selectors + pub const NSUP_GROUPS: u8 = 29; + /// instructions that initialize call arguments + pub const NSUP_ARGEAS: u8 = 30; + /// forced operand 7 + pub const NSUP_FOP7: u8 = 31; + /// forced operand 8 + pub const NSUP_FOP8: u8 = 32; + /// complex reference information for operand 7 + pub const NSUP_REF6: u8 = 33; + /// complex reference information for operand 8 + pub const NSUP_REF7: u8 = 34; + /// outer complex reference information for operand 7 + pub const NSUP_OREF6: u8 = 35; + /// outer complex reference information for operand 8 + pub const NSUP_OREF7: u8 = 36; + /// Extended flags + pub const NSUP_EX_FLAGS: u8 = 37; + + // values E_PREV..E_NEXT+1000 are reserved (1000..2000..3000 decimal) + + /// SP change points blob (see funcs.cpp). + /// values NSUP_POINTS..NSUP_POINTS+0x1000 are reserved + pub const NSUP_POINTS: u32 = 0x1000; + + /// manual instruction. + /// values NSUP_MANUAL..NSUP_MANUAL+0x1000 are reserved + pub const NSUP_MANUAL: u32 = 0x2000; + + /// type information. + /// values NSUP_TYPEINFO..NSUP_TYPEINFO+0x1000 are reserved + pub const NSUP_TYPEINFO: u32 = 0x3000; + + /// register variables. + /// values NSUP_REGVAR..NSUP_REGVAR+0x1000 are reserved + pub const NSUP_REGVAR: u32 = 0x4000; + + /// local labels. + /// values NSUP_LLABEL..NSUP_LLABEL+0x1000 are reserved + pub const NSUP_LLABEL: u32 = 0x5000; + + /// register argument type/name descriptions + /// values NSUP_REGARG..NSUP_REGARG+0x1000 are reserved + pub const NSUP_REGARG: u32 = 0x6000; + + /// function tails or tail referers + /// values NSUP_FTAILS..NSUP_FTAILS+0x1000 are reserved + pub const NSUP_FTAILS: u32 = 0x7000; + + /// graph group information + /// values NSUP_GROUP..NSUP_GROUP+0x1000 are reserved + pub const NSUP_GROUP: u32 = 0x8000; + + /// operand type information. + /// values NSUP_OPTYPES..NSUP_OPTYPES+0x100000 are reserved + pub const NSUP_OPTYPES: u32 = 0x9000; + + /// function metadata before lumina information was applied + /// values NSUP_ORIGFMD..NSUP_ORIGFMD+0x1000 are reserved + pub const NSUP_ORIGFMD: u32 = 0x109000; + + /// function frame type + /// values NSUP_FRAME..NSUP_FRAME+0x10000 are reserved + pub const NSUP_FRAME: u32 = 0x10A000; + + /// Netnode graph tags + /// Tag values to store graph info + pub mod gt { + /// group node info: color, ea, text + pub const NSUP_GR_INFO: u8 = b'g'; + /// group layout ptrs, hash: md5 of 'belongs' + pub const NALT_GR_LAYX: u8 = b'p'; + /// group layouts, idx: layout pointer + pub const NSUP_GR_LAYT: u8 = b'l'; + } +} + +/// Patch netnode tag +pub const PATCH_TAG: u8 = b'P'; + +pub mod indxs { + // UI desktops + /// hash indexed by desktop name with dekstop netnode + pub const IDB_DESKTOPS_NODE_NAME: &str = "$ desktops"; + /// tag to store desktop blob & timestamp + pub const IDB_DESKTOPS_TAG: u8 = b'S'; + /// desktop timestamp index + pub const IDB_DESKTOPS_TIMESTAMP: i32 = -1; + + /// node containing address of .got section + pub const GOTEA_NODE_NAME: &str = "$ got"; + pub const GOTEA_NODE_IDX: u8 = 0; +} + +/// Additional flags for the location +/// All 32-bits of the main flags are used up. +/// Additional flags keep more information about addresses. +/// AFLNOTE: DO NOT use these flags directly unless there is absolutely no way. +/// They are too low level and may corrupt the database. +pub mod afl { + /// has line number info + pub const AFL_LINNUM: u32 = 0x00000001; + /// user-defined SP value + pub const AFL_USERSP: u32 = 0x00000002; + /// name is public (inter-file linkage) + pub const AFL_PUBNAM: u32 = 0x00000004; + /// name is weak + pub const AFL_WEAKNAM: u32 = 0x00000008; + /// the item is hidden completely + pub const AFL_HIDDEN: u32 = 0x00000010; + /// the instruction/data is specified by the user + pub const AFL_MANUAL: u32 = 0x00000020; + /// the code/data border is hidden + pub const AFL_NOBRD: u32 = 0x00000040; + /// display struct field name at 0 offset when displaying an offset. + /// example: + /// \v{offset somestruct.field_0} + /// if this flag is clear, then + /// \v{offset somestruct} + pub const AFL_ZSTROFF: u32 = 0x00000080; + /// the 1st operand is bitwise negated + pub const AFL_BNOT0: u32 = 0x00000100; + /// the 2nd operand is bitwise negated + pub const AFL_BNOT1: u32 = 0x00000200; + /// item from the standard library. + /// low level flag, is used to set + /// #FUNC_LIB of ::func_t + pub const AFL_LIB: u32 = 0x00000400; + /// has typeinfo? (#NSUP_TYPEINFO); used only for addresses, not for member_t + pub const AFL_TI: u32 = 0x00000800; + /// has typeinfo for operand 0? (#NSUP_OPTYPES) + pub const AFL_TI0: u32 = 0x00001000; + /// has typeinfo for operand 1? (#NSUP_OPTYPES+1) + pub const AFL_TI1: u32 = 0x00002000; + /// has local name too (#FF_NAME should be set) + pub const AFL_LNAME: u32 = 0x00004000; + /// has type comment? (such a comment may be changed by IDA) + pub const AFL_TILCMT: u32 = 0x00008000; + /// toggle leading zeroes for the 1st operand + pub const AFL_LZERO0: u32 = 0x00010000; + /// toggle leading zeroes for the 2nd operand + pub const AFL_LZERO1: u32 = 0x00020000; + /// has user defined instruction color? + pub const AFL_COLORED: u32 = 0x00040000; + /// terse structure variable display? + pub const AFL_TERSESTR: u32 = 0x00080000; + /// code: toggle sign of the 1st operand + pub const AFL_SIGN0: u32 = 0x00100000; + /// code: toggle sign of the 2nd operand + pub const AFL_SIGN1: u32 = 0x00200000; + /// for imported function pointers: doesn't return. + /// this flag can also be used for any instruction + /// which halts or finishes the program execution + pub const AFL_NORET: u32 = 0x00400000; + /// sp delta value is fixed by analysis. + /// should not be modified by modules + pub const AFL_FIXEDSPD: u32 = 0x00800000; + /// the previous insn was created for alignment purposes only + pub const AFL_ALIGNFLOW: u32 = 0x01000000; + /// the type information is definitive. + /// (comes from the user or type library) + /// if not set see #AFL_TYPE_GUESSED + pub const AFL_USERTI: u32 = 0x02000000; + /// function returns a floating point value + pub const AFL_RETFP: u32 = 0x04000000; + /// insn modifes SP and uses the modified value; + /// example: pop [rsp+N] + pub const AFL_USEMODSP: u32 = 0x08000000; + /// autoanalysis should not create code here + pub const AFL_NOTCODE: u32 = 0x10000000; + /// autoanalysis should not create proc here + pub const AFL_NOTPROC: u32 = 0x20000000; + /// who guessed the type information? + pub const AFL_TYPE_GUESSED: u32 = 0xC2000000; + /// the type is guessed by IDA + pub const AFL_IDA_GUESSED: u32 = 0x00000000; + /// the function type is guessed by the decompiler + pub const AFL_HR_GUESSED_FUNC: u32 = 0x40000000; + /// the data type is guessed by the decompiler + pub const AFL_HR_GUESSED_DATA: u32 = 0x80000000; + /// the type is definitely guessed by the decompiler + pub const AFL_HR_DETERMINED: u32 = 0xC0000000; +} + +pub mod array { + /// use 'dup' construct + pub const AP_ALLOWDUPS: u32 = 0x00000001; + /// treats numbers as signed + pub const AP_SIGNED: u32 = 0x00000002; + /// display array element indexes as comments + pub const AP_INDEX: u32 = 0x00000004; + /// create as array (this flag is not stored in database) + pub const AP_ARRAY: u32 = 0x00000008; + /// mask for number base of the indexes + pub const AP_IDXBASEMASK: u32 = 0x000000F0; + /// display indexes in decimal + pub const AP_IDXDEC: u32 = 0x00000000; + /// display indexes in hex + pub const AP_IDXHEX: u32 = 0x00000010; + /// display indexes in octal + pub const AP_IDXOCT: u32 = 0x00000020; + /// display indexes in binary + pub const AP_IDXBIN: u32 = 0x00000030; +} + +/// Switch info flags +pub mod swi { + /// sparse switch (value table present), + /// otherwise lowcase present + pub const SWI_SPARSE: u32 = 0x00000001; + /// 32-bit values in table + pub const SWI_V32: u32 = 0x00000002; + /// 32-bit jump offsets + pub const SWI_J32: u32 = 0x00000004; + /// value table is split (only for 32-bit values) + pub const SWI_VSPLIT: u32 = 0x00000008; + /// user specified switch (starting from version 2) + pub const SWI_USER: u32 = 0x00000010; + /// default case is an entry in the jump table. + /// This flag is applicable in 2 cases: + /// + /// * The sparse indirect switch (i.e. a switch with a values table) + /// + /// {jump table size} == {value table size} + 1. + /// The default case entry is the last one in the table + /// (or the first one in the case of an inversed jump table). + /// + /// * The switch with insns in the jump table. + /// + /// The default case entry is before the first entry of the table. + /// See also the find_defjump_from_table() helper function. + pub const SWI_DEF_IN_TBL: u32 = 0x00000020; + /// jumptable is inversed. (last entry is for first entry in values table) + pub const SWI_JMP_INV: u32 = 0x00000040; + /// use formula (element<(pub(crate) &'a [u8]); + +pub struct Patch { + pub address: K::Usize, + pub original_byte: u8, +} + +#[derive(Clone, Copy)] +pub struct SegmentPatchOriginalValueIter<'a, K: IDAKind> { + _kind: std::marker::PhantomData, + pub(crate) entries: &'a [ID0Entry], + pub(crate) key_len: usize, + //pub(crate) segment_strings: SegmentStringsIter<'a>, +} +impl<'a, K: IDAKind> SegmentPatchOriginalValueIter<'a, K> { + pub(crate) fn new(entries: &'a [ID0Entry], key_len: usize) -> Self { + Self { + _kind: std::marker::PhantomData, + entries, + key_len, + } + } + + fn patch_from_entry(&self, entry: &ID0Entry) -> Result> { + // TODO find the InnerRef for this + let addr_raw = &entry.key[self.key_len..]; + + let address = K::Usize::from_be_bytes(addr_raw) + .ok_or_else(|| anyhow!("Invalid id1 entry address"))?; + + let original_value = K::Usize::from_le_bytes(&entry.value[..]) + .ok_or_else(|| anyhow!("Invalid id1 entry original value"))?; + let original_byte = AsPrimitive::::as_(original_value); + + // TODO the rest of the value is unknown, it's not the id1 flag... + let _rest_byte = original_value >> 8; + Ok(Patch { + address, + original_byte, + }) + } +} + +impl Iterator for SegmentPatchOriginalValueIter<'_, K> { + type Item = Result>; + + fn next(&mut self) -> Option { + let (first, rest) = self.entries.split_first()?; + self.entries = rest; + + Some(self.patch_from_entry(first)) + } + + fn size_hint(&self) -> (usize, Option) { + (self.entries.len(), Some(self.entries.len())) + } +} + +impl ExactSizeIterator for SegmentPatchOriginalValueIter<'_, K> {} diff --git a/src/id0/root_info.rs b/src/id0/root_info.rs index 9fcba13..f597f6b 100644 --- a/src/id0/root_info.rs +++ b/src/id0/root_info.rs @@ -1,56 +1,94 @@ use std::io::Read; use anyhow::Result; +use num_enum::{IntoPrimitive, TryFromPrimitive}; +use num_traits::{CheckedAdd, CheckedSub, WrappingAdd, WrappingSub}; -use crate::ida_reader::IdaUnpack; +use crate::{ida_reader::IdbReadKind, IDAKind, IDAUsize}; use super::*; #[derive(Clone, Debug)] -pub enum IDBRootInfo<'a> { +pub enum IDBRootInfo<'a, K: IDAKind> { /// it's just the "Root Node" String RootNodeName, InputFile(&'a [u8]), - Crc(u64), - ImageBase(u64), - OpenCount(u64), - CreatedDate(u64), - Version(u64), + Crc(K::Usize), + ImageBase(ImageBase), + OpenCount(K::Usize), + CreatedDate(K::Usize), + Version(K::Usize), Md5(&'a [u8; 16]), VersionString(&'a str), Sha256(&'a [u8; 32]), - IDAInfo(Box), + IDAInfo(Box>), Unknown(&'a ID0Entry), } +#[derive(Copy, Clone, Debug)] +pub struct ImageBase(pub(crate) K::Usize); +impl ImageBase { + // TODO create a nodeidx_t type + pub fn ea2node(&self, ea: K::Usize) -> Result> { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1db9c0 + if ea.is_max() { + return Ok(NodeIdx(ea)); + } + if cfg!(feature = "restrictive") { + ea.checked_add(&self.0) + .map(NodeIdx) + .ok_or_else(|| anyhow!("Invalid address on ea2node")) + } else { + Ok(NodeIdx(ea.wrapping_add(&self.0))) + } + } + pub fn node2ea(&self, node: NodeIdx) -> Result { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1dba10 + if cfg!(feature = "restrictive") { + node.0 + .checked_sub(&self.0) + .ok_or_else(|| anyhow!("Invalid address on node2ea")) + } else { + Ok(node.0.wrapping_sub(&self.0)) + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct NodeIdx(pub(crate) K::Usize); + +pub trait AsNodeIdx { + fn as_node_idx(&self) -> NodeIdx; +} + #[derive(Clone, Debug)] -pub enum IDBParam { - V1(IDBParam1), - V2(IDBParam2), +pub enum IDBParam { + V1(IDBParam1), + V2(IDBParam2), } #[derive(Clone, Debug)] -pub struct IDBParam1 { +pub struct IDBParam1 { pub version: u16, pub cpu: Vec, pub lflags: u8, pub demnames: u8, pub filetype: u16, - pub fcoresize: u64, - pub corestart: u64, + pub fcoresize: K::Usize, + pub corestart: K::Usize, pub ostype: u16, pub apptype: u16, - pub startsp: u64, + pub startsp: K::Usize, pub af: u16, - pub startip: u64, - pub startea: u64, - pub minea: u64, - pub maxea: u64, - pub ominea: u64, - pub omaxea: u64, - pub lowoff: u64, - pub highoff: u64, - pub maxref: u64, + pub startip: K::Usize, + pub startea: K::Usize, + pub minea: K::Usize, + pub maxea: K::Usize, + pub ominea: K::Usize, + pub omaxea: K::Usize, + pub lowoff: K::Usize, + pub highoff: K::Usize, + pub maxref: K::Usize, pub ascii_break: u8, pub wide_high_byte_first: u8, pub indent: u8, @@ -67,7 +105,7 @@ pub struct IDBParam1 { pub showpref: u8, pub prefseg: u8, pub asmtype: u8, - pub baseaddr: u64, + pub baseaddr: K::Usize, pub xrefs: u8, pub binpref: u16, pub cmtflag: u8, @@ -78,7 +116,7 @@ pub struct IDBParam1 { pub asciiflags: u8, pub listnames: u8, pub asciiprefs: [u8; 16], - pub asciisernum: u64, + pub asciisernum: K::Usize, pub asciizeroes: u8, pub tribyte_order: u8, pub mf: u8, @@ -86,13 +124,13 @@ pub struct IDBParam1 { pub assume: u8, pub checkarg: u8, // offset 131 - pub start_ss: u64, - pub start_cs: u64, - pub main: u64, - pub short_dn: u64, - pub long_dn: u64, - pub datatypes: u64, - pub strtype: u64, + pub start_ss: K::Usize, + pub start_cs: K::Usize, + pub main: K::Usize, + pub short_dn: K::Usize, + pub long_dn: K::Usize, + pub datatypes: K::Usize, + pub strtype: K::Usize, pub af2: u16, pub namelen: u16, pub margin: u16, @@ -116,7 +154,7 @@ pub struct IDBParam1 { } #[derive(Clone, Debug)] -pub struct IDBParam2 { +pub struct IDBParam2 { pub version: u16, pub cpu: Vec, pub genflags: Inffl, @@ -128,23 +166,23 @@ pub struct IDBParam2 { pub asmtype: u8, pub specsegs: u8, pub af: Af, - pub baseaddr: u64, - pub start_ss: u64, - pub start_cs: u64, - pub start_ip: u64, - pub start_ea: u64, - pub start_sp: u64, - pub main: u64, - pub min_ea: u64, - pub max_ea: u64, - pub omin_ea: u64, - pub omax_ea: u64, - pub lowoff: u64, - pub highoff: u64, - pub maxref: u64, - pub privrange_start_ea: u64, - pub privrange_end_ea: u64, - pub netdelta: u64, + pub baseaddr: K::Usize, + pub start_ss: K::Usize, + pub start_cs: K::Usize, + pub start_ip: K::Usize, + pub start_ea: K::Usize, + pub start_sp: K::Usize, + pub main: K::Usize, + pub min_ea: K::Usize, + pub max_ea: K::Usize, + pub omin_ea: K::Usize, + pub omax_ea: K::Usize, + pub lowoff: K::Usize, + pub highoff: K::Usize, + pub maxref: K::Usize, + pub privrange_start_ea: K::Usize, + pub privrange_end_ea: K::Usize, + pub netdelta: K::Usize, pub xrefnum: u8, pub type_xrefnum: u8, pub refcmtnum: u8, @@ -169,9 +207,10 @@ pub struct IDBParam2 { pub strlit_zeroes: u8, pub strtype: u32, pub strlit_pref: String, - pub strlit_sernum: u64, - pub datatypes: u64, + pub strlit_sernum: K::Usize, + pub datatypes: K::Usize, pub cc_id: Compiler, + pub cc_guessed: bool, pub cc_cm: u8, pub cc_size_i: u8, pub cc_size_b: u8, @@ -185,9 +224,9 @@ pub struct IDBParam2 { pub appcall_options: u32, } -impl IDBParam { - pub(crate) fn read(data: &[u8], is_64: bool) -> Result { - let mut input = IdaUnpacker::new(data, is_64); +impl IDBParam { + pub(crate) fn read(data: &[u8]) -> Result { + let mut input = data; let magic: [u8; 3] = bincode::deserialize_from(&mut input)?; let magic_old = match &magic[..] { b"ida" => { @@ -223,102 +262,99 @@ impl IDBParam { match version { // TODO old version may contain extra data at the end with unknown purpose ..=699 => {} - 700.. => ensure!( - input.inner().is_empty(), - "Data left after the IDBParam", - ), + 700.. => ensure!(input.is_empty(), "Data left after the IDBParam",), } Ok(param) } pub(crate) fn read_v1( - mut input: &mut impl IdaUnpack, + mut input: &mut impl IdbReadKind, version: u16, cpu: Vec, ) -> Result { - let lflags: u8 = bincode::deserialize_from(&mut input)?; - let demnames: u8 = bincode::deserialize_from(&mut input)?; - let filetype: u16 = bincode::deserialize_from(&mut input)?; - let fcoresize: u64 = input.read_word()?; - let corestart: u64 = input.read_word()?; - let ostype: u16 = bincode::deserialize_from(&mut input)?; - let apptype: u16 = bincode::deserialize_from(&mut input)?; - let startsp: u64 = input.read_word()?; - let af: u16 = bincode::deserialize_from(&mut input)?; - let startip: u64 = input.read_word()?; - let startea: u64 = input.read_word()?; - let minea: u64 = input.read_word()?; - let maxea: u64 = input.read_word()?; - let ominea: u64 = input.read_word()?; - let omaxea: u64 = input.read_word()?; - let lowoff: u64 = input.read_word()?; - let highoff: u64 = input.read_word()?; - let maxref: u64 = input.read_word()?; - let ascii_break: u8 = bincode::deserialize_from(&mut input)?; - let wide_high_byte_first: u8 = bincode::deserialize_from(&mut input)?; - let indent: u8 = bincode::deserialize_from(&mut input)?; - let comment: u8 = bincode::deserialize_from(&mut input)?; - let xrefnum: u8 = bincode::deserialize_from(&mut input)?; - let entab: u8 = bincode::deserialize_from(&mut input)?; - let specsegs: u8 = bincode::deserialize_from(&mut input)?; - let voids: u8 = bincode::deserialize_from(&mut input)?; - let _unkownw: u8 = bincode::deserialize_from(&mut input)?; - let showauto: u8 = bincode::deserialize_from(&mut input)?; - let auto: u8 = bincode::deserialize_from(&mut input)?; - let border: u8 = bincode::deserialize_from(&mut input)?; - let null: u8 = bincode::deserialize_from(&mut input)?; - let genflags: u8 = bincode::deserialize_from(&mut input)?; - let showpref: u8 = bincode::deserialize_from(&mut input)?; - let prefseg: u8 = bincode::deserialize_from(&mut input)?; - let asmtype: u8 = bincode::deserialize_from(&mut input)?; - let baseaddr: u64 = input.read_word()?; - let xrefs: u8 = bincode::deserialize_from(&mut input)?; - let binpref: u16 = bincode::deserialize_from(&mut input)?; - let cmtflag: u8 = bincode::deserialize_from(&mut input)?; - let nametype: u8 = bincode::deserialize_from(&mut input)?; - let showbads: u8 = bincode::deserialize_from(&mut input)?; - let prefflag: u8 = bincode::deserialize_from(&mut input)?; - let packbase: u8 = bincode::deserialize_from(&mut input)?; - let asciiflags: u8 = bincode::deserialize_from(&mut input)?; - let listnames: u8 = bincode::deserialize_from(&mut input)?; + let lflags = input.read_u8()?; + let demnames = input.read_u8()?; + let filetype = input.read_u16()?; + let fcoresize = input.read_usize()?; + let corestart = input.read_usize()?; + let ostype = input.read_u16()?; + let apptype = input.read_u16()?; + let startsp = input.read_usize()?; + let af = input.read_u16()?; + let startip = input.read_usize()?; + let startea = input.read_usize()?; + let minea = input.read_usize()?; + let maxea = input.read_usize()?; + let ominea = input.read_usize()?; + let omaxea = input.read_usize()?; + let lowoff = input.read_usize()?; + let highoff = input.read_usize()?; + let maxref = input.read_usize()?; + let ascii_break = input.read_u8()?; + let wide_high_byte_first = input.read_u8()?; + let indent = input.read_u8()?; + let comment = input.read_u8()?; + let xrefnum = input.read_u8()?; + let entab = input.read_u8()?; + let specsegs = input.read_u8()?; + let voids = input.read_u8()?; + let _unkownw = input.read_u8()?; + let showauto = input.read_u8()?; + let auto = input.read_u8()?; + let border = input.read_u8()?; + let null = input.read_u8()?; + let genflags = input.read_u8()?; + let showpref = input.read_u8()?; + let prefseg = input.read_u8()?; + let asmtype = input.read_u8()?; + let baseaddr = input.read_usize()?; + let xrefs = input.read_u8()?; + let binpref = input.read_u16()?; + let cmtflag = input.read_u8()?; + let nametype = input.read_u8()?; + let showbads = input.read_u8()?; + let prefflag = input.read_u8()?; + let packbase = input.read_u8()?; + let asciiflags = input.read_u8()?; + let listnames = input.read_u8()?; let asciiprefs: [u8; 16] = bincode::deserialize_from(&mut input)?; - let asciisernum: u64 = input.read_word()?; - let asciizeroes: u8 = bincode::deserialize_from(&mut input)?; - let _unknown2: u16 = bincode::deserialize_from(&mut input)?; - let tribyte_order: u8 = bincode::deserialize_from(&mut input)?; - let mf: u8 = bincode::deserialize_from(&mut input)?; - let org: u8 = bincode::deserialize_from(&mut input)?; - let assume: u8 = bincode::deserialize_from(&mut input)?; - let checkarg: u8 = bincode::deserialize_from(&mut input)?; + let asciisernum = input.read_usize()?; + let asciizeroes = input.read_u8()?; + let _unknown2 = input.read_u16()?; + let tribyte_order = input.read_u8()?; + let mf = input.read_u8()?; + let org = input.read_u8()?; + let assume = input.read_u8()?; + let checkarg = input.read_u8()?; // offset 131 - let start_ss: u64 = input.read_word()?; - let start_cs: u64 = input.read_word()?; - let main: u64 = input.read_word()?; - let short_dn: u64 = input.read_word()?; - let long_dn: u64 = input.read_word()?; - let datatypes: u64 = input.read_word()?; - let strtype: u64 = input.read_word()?; - let af2: u16 = bincode::deserialize_from(&mut input)?; - let namelen: u16 = bincode::deserialize_from(&mut input)?; - let margin: u16 = bincode::deserialize_from(&mut input)?; - let lenxref: u16 = bincode::deserialize_from(&mut input)?; + let start_ss = input.read_usize()?; + let start_cs = input.read_usize()?; + let main = input.read_usize()?; + let short_dn = input.read_usize()?; + let long_dn = input.read_usize()?; + let datatypes = input.read_usize()?; + let strtype = input.read_usize()?; + let af2 = input.read_u16()?; + let namelen = input.read_u16()?; + let margin = input.read_u16()?; + let lenxref = input.read_u16()?; let lprefix: [u8; 16] = bincode::deserialize_from(&mut input)?; - let lprefixlen: u8 = bincode::deserialize_from(&mut input)?; - let compiler: u8 = bincode::deserialize_from(&mut input)?; - let model: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_int: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_bool: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_enum: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_algn: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_short: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_long: u8 = bincode::deserialize_from(&mut input)?; - let sizeof_llong: u8 = bincode::deserialize_from(&mut input)?; - let change_counter: u32 = bincode::deserialize_from(&mut input)?; - let sizeof_ldbl: u8 = bincode::deserialize_from(&mut input)?; - let _unknown_3: u32 = bincode::deserialize_from(&mut input)?; + let lprefixlen = input.read_u8()?; + let compiler = input.read_u8()?; + let model = input.read_u8()?; + let sizeof_int = input.read_u8()?; + let sizeof_bool = input.read_u8()?; + let sizeof_enum = input.read_u8()?; + let sizeof_algn = input.read_u8()?; + let sizeof_short = input.read_u8()?; + let sizeof_long = input.read_u8()?; + let sizeof_llong = input.read_u8()?; + let change_counter = input.read_u32()?; + let sizeof_ldbl = input.read_u8()?; + let _unknown_3 = input.read_u32()?; let abiname: [u8; 16] = bincode::deserialize_from(&mut input)?; - let abibits: u32 = bincode::deserialize_from(&mut input)?; - let refcmts: u8 = bincode::deserialize_from(&mut input)?; + let abibits = input.read_u32()?; + let refcmts = input.read_u8()?; Ok(IDBParam::V1(IDBParam1 { version, @@ -406,7 +442,7 @@ impl IDBParam { } pub(crate) fn read_v2( - mut input: &mut impl IdaUnpack, + mut input: &mut impl IdbReadKind, magic_old: bool, version: u16, cpu: Vec, @@ -481,7 +517,15 @@ impl IDBParam { let strlit_sernum = input.unpack_usize()?; let datatypes = input.unpack_usize()?; - let cc_id = Compiler::from_value(input.read_u8()?); + let cc_id_raw = input.read_u8()?; + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1a15e8 + let cc_guessed = cc_id_raw & 0x80 != 0; + #[cfg(feature = "restrictive")] + let cc_id = Compiler::try_from(cc_id_raw & 0x7F) + .map_err(|_| anyhow!("Invalid compiler id: {cc_id_raw}"))?; + #[cfg(not(feature = "restrictive"))] + let cc_id = + Compiler::try_from(cc_id_raw & 0x7F).unwrap_or(Compiler::Unknown); let cc_cm = input.read_u8()?; let cc_size_i = input.read_u8()?; let cc_size_b = input.read_u8()?; @@ -550,6 +594,7 @@ impl IDBParam { strlit_sernum, datatypes, cc_id, + cc_guessed, cc_cm, cc_size_i, cc_size_b, @@ -1231,32 +1276,19 @@ impl FileType { } } +use crate::til::flag::cm::comp::*; // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0x7e6cc0 -#[derive(Debug, Clone, Copy)] +// InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x3a03c0 +#[derive(Debug, Clone, Copy, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum Compiler { - Unknown, - VisualStudio, - Borland, - Watcom, - Gnu, - VisualAge, - Delphi, - - // IDA LIB pring compiler_name allow any value here, printing it as "?" - Other, -} + Unknown = COMP_UNK, + VisualStudio = COMP_MS, + Borland = COMP_BC, + Watcom = COMP_WATCOM, + Gnu = COMP_GNU, + VisualAge = COMP_VISAGE, + Delphi = COMP_BP, -impl Compiler { - pub fn from_value(value: u8) -> Self { - match value { - 0x0 => Self::Unknown, - 0x1 => Self::VisualStudio, - 0x2 => Self::Borland, - 0x3 => Self::Watcom, - 0x6 => Self::Gnu, - 0x7 => Self::VisualAge, - 0x8 => Self::Delphi, - _ => Self::Other, - } - } + Unsure = COMP_UNSURE, } diff --git a/src/id0/segment.rs b/src/id0/segment.rs index edcd823..c7cc87c 100644 --- a/src/id0/segment.rs +++ b/src/id0/segment.rs @@ -1,20 +1,25 @@ use anyhow::Result; +use num_enum::{IntoPrimitive, TryFromPrimitive}; -use std::collections::HashMap; use std::num::{NonZeroU32, NonZeroU8}; use std::ops::Range; +use crate::ida_reader::IdbReadKind; + use super::*; +#[derive(Clone, Copy, Debug)] +pub struct SegmentStringsIdx<'a>(pub(crate) &'a [u8]); + #[derive(Clone, Debug)] -pub struct Segment { - pub address: Range, - pub name: Option>, +pub struct Segment { + pub address: Range, + pub name: Option, // TODO class String - _class_id: u64, + _class_id: K::Usize, /// This field is IDP dependent. /// You may keep your information about the segment here - pub orgbase: u64, + pub orgbase: K::Usize, /// See more at [flags](https://hex-rays.com//products/ida/support/sdkdoc/group___s_f_l__.html) pub flags: SegmentFlag, /// [Segment alignment codes](https://hex-rays.com//products/ida/support/sdkdoc/group__sa__.html) @@ -33,57 +38,56 @@ pub struct Segment { /// Exception: 16bit OMF files may have several segments with the same selector, /// but this is not good (no way to denote a segment exactly) so it should be fixed in /// the future. - pub selector: u64, + pub selector: K::Usize, /// Default segment register values. /// First element of this array keeps information about value of [processor_t::reg_first_sreg](https://hex-rays.com//products/ida/support/sdkdoc/structprocessor__t.html#a4206e35bf99d211c18d53bd1035eb2e3) - pub defsr: [u64; 16], + pub defsr: [K::Usize; 16], /// the segment color pub color: u32, } -impl Segment { - pub(crate) fn read( - value: &[u8], - is_64: bool, - names: Option<&HashMap>>, - id0: &ID0Section, - ) -> Result { - let mut cursor = IdaUnpacker::new(value, is_64); +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct SegmentNameIdx(pub(crate) NonZeroU32); + +impl Segment { + pub(crate) fn read(value: &[u8]) -> Result { + let mut cursor = value; + let result = Self::inner_read(&mut cursor)?; + ensure!(cursor.is_empty()); + Ok(result) + } + + pub(crate) fn inner_read(cursor: &mut impl IdbReadKind) -> Result { // InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x330684 let startea = cursor.unpack_usize()?; let size = cursor.unpack_usize()?; let name_id = cursor.unpack_usize()?; - let name_id = NonZeroU32::new(u32::try_from(name_id).unwrap()); - // TODO: I'm assuming name_id == 0 means no name, but maybe I'm wrong - let name = name_id - .map(|name_id| { - // TODO I think this is dependent on the version, and not on availability - if let Some(names) = names { - names.get(&name_id).map(Vec::to_owned).ok_or_else(|| { - anyhow!("Not found name for segment {name_id}") - }) - } else { - // if there is no names, AKA `$ segstrings`, search for the key directly - id0.name_by_index(name_id.get().into()).map(<[u8]>::to_vec) - } - }) - .transpose(); - let name = name?; + let name_id = >::try_into(name_id) + .map(NonZeroU32::new) + .map_err(|_| anyhow!("Invalid ID0 Segment NameId value"))?; + let name = name_id.map(SegmentNameIdx); // TODO AKA [sclass](https://hex-rays.com//products/ida/support/sdkdoc/classsegment__t.html) // I don't know what is this value or what it represents let _class_id = cursor.unpack_usize()?; let orgbase = cursor.unpack_usize()?; let flags = SegmentFlag::from_raw(cursor.unpack_dd()?) .ok_or_else(|| anyhow!("Invalid Segment Flag value"))?; - let align = SegmentAlignment::from_raw(cursor.unpack_dd()?) + let align = u8::try_from(cursor.unpack_dd()?) + .ok() + .and_then(|x| SegmentAlignment::try_from_primitive(x).ok()) .ok_or_else(|| anyhow!("Invalid Segment Alignment value"))?; - let comb = SegmentCombination::from_raw(cursor.unpack_dd()?) + let comb_raw = cursor.unpack_dd()?; + let comb = u8::try_from(comb_raw) + .ok() + .and_then(|x| SegmentCombination::try_from_primitive(x).ok()) .ok_or_else(|| anyhow!("Invalid Segment Combination value"))?; let perm = SegmentPermission::from_raw(cursor.unpack_dd()?) .ok_or_else(|| anyhow!("Invalid Segment Permission value"))?; let bitness = SegmentBitness::from_raw(cursor.unpack_dd()?) .ok_or_else(|| anyhow!("Invalid Segment Bitness value"))?; - let seg_type = SegmentType::from_raw(cursor.unpack_dd()?) + let seg_type = u8::try_from(cursor.unpack_dd()?) + .ok() + .and_then(|x| SegmentType::try_from(x).ok()) .ok_or_else(|| anyhow!("Invalid Segment Type value"))?; let selector = cursor.unpack_usize()?; let defsr: [_; 16] = (0..16) @@ -94,7 +98,6 @@ impl Segment { let color = cursor.unpack_dd()?; // TODO maybe new versions include extra information and thid check fails - ensure!(cursor.inner().is_empty()); Ok(Segment { address: startea..startea + size, name, @@ -125,33 +128,33 @@ impl SegmentFlag { /// IDP dependent field (IBM PC: if set, ORG directive is not commented out) pub fn is_comorg(&self) -> bool { - self.0 & 0x01 != 0 + self.0 & flag::segs::sfl::SFL_COMORG != 0 } /// Orgbase is present? (IDP dependent field) pub fn is_orgbase_present(&self) -> bool { - self.0 & 0x02 != 0 + self.0 & flag::segs::sfl::SFL_OBOK != 0 } /// Is the segment hidden? pub fn is_hidden(&self) -> bool { - self.0 & 0x04 != 0 + self.0 & flag::segs::sfl::SFL_HIDDEN != 0 } /// Is the segment created for the debugger?. /// /// Such segments are temporary and do not have permanent flags. pub fn is_debug(&self) -> bool { - self.0 & 0x08 != 0 + self.0 & flag::segs::sfl::SFL_DEBUG != 0 } /// Is the segment created by the loader? pub fn is_created_by_loader(&self) -> bool { - self.0 & 0x10 != 0 + self.0 & flag::segs::sfl::SFL_LOADER != 0 } /// Hide segment type (do not print it in the listing) pub fn is_hide_type(&self) -> bool { - self.0 & 0x20 != 0 + self.0 & flag::segs::sfl::SFL_HIDETYPE != 0 } /// Header segment (do not create offsets to it in the disassembly) pub fn is_header(&self) -> bool { - self.0 & 0x40 != 0 + self.0 & flag::segs::sfl::SFL_HEADER != 0 } } @@ -174,102 +177,66 @@ impl core::fmt::Debug for SegmentFlag { } } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum SegmentAlignment { /// Absolute segment. - Abs, + Abs = flag::segs::sa::SA_ABS, /// Relocatable, byte aligned. - RelByte, + RelByte = flag::segs::sa::SA_REL_BYTE, /// Relocatable, word (2-byte) aligned. - RelWord, + RelWord = flag::segs::sa::SA_REL_WORD, /// Relocatable, paragraph (16-byte) aligned. - RelPara, + RelPara = flag::segs::sa::SA_REL_PARA, /// Relocatable, aligned on 256-byte boundary. - RelPage, + RelPage = flag::segs::sa::SA_REL_PAGE, /// Relocatable, aligned on a double word (4-byte) boundary. - RelDble, + RelDble = flag::segs::sa::SA_REL_DBLE, /// This value is used by the PharLap OMF for page (4K) alignment. /// /// It is not supported by LINK. - Rel4K, + Rel4K = flag::segs::sa::SA_REL4_K, /// Segment group. - Group, + Group = flag::segs::sa::SA_GROUP, /// 32 bytes - Rel32Bytes, + Rel32Bytes = flag::segs::sa::SA_REL32_BYTES, /// 64 bytes - Rel64Bytes, + Rel64Bytes = flag::segs::sa::SA_REL64_BYTES, /// 8 bytes - RelQword, + RelQword = flag::segs::sa::SA_REL_QWORD, /// 128 bytes - Rel128Bytes, + Rel128Bytes = flag::segs::sa::SA_REL128_BYTES, /// 512 bytes - Rel512Bytes, + Rel512Bytes = flag::segs::sa::SA_REL512_BYTES, /// 1024 bytes - Rel1024Bytes, + Rel1024Bytes = flag::segs::sa::SA_REL1024_BYTES, /// 2048 bytes - Rel2048Bytes, -} - -impl SegmentAlignment { - fn from_raw(value: u32) -> Option { - match value { - 0 => Some(Self::Abs), - 1 => Some(Self::RelByte), - 2 => Some(Self::RelWord), - 3 => Some(Self::RelPara), - 4 => Some(Self::RelPage), - 5 => Some(Self::RelDble), - 6 => Some(Self::Rel4K), - 7 => Some(Self::Group), - 8 => Some(Self::Rel32Bytes), - 9 => Some(Self::Rel64Bytes), - 10 => Some(Self::RelQword), - 11 => Some(Self::Rel128Bytes), - 12 => Some(Self::Rel512Bytes), - 13 => Some(Self::Rel1024Bytes), - 14 => Some(Self::Rel2048Bytes), - _ => None, - } - } + Rel2048Bytes = flag::segs::sa::SA_REL2048_BYTES, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum SegmentCombination { /// Private. /// /// Do not combine with any other program segment. - Priv, + Priv = super::flag::segs::sc::SC_PRIV, /// Segment group. - Group, + Group = super::flag::segs::sc::SC_GROUP, /// Public. /// /// Combine by appending at an offset that meets the alignment requirement. - Pub, + Pub = super::flag::segs::sc::SC_PUB, /// As defined by Microsoft, same as C=2 (public). - Pub2, + Pub2 = super::flag::segs::sc::SC_PUB2, /// Stack. - Stack, + Stack = super::flag::segs::sc::SC_STACK, /// Common. Combine by overlay using maximum size. /// /// Combine as for C=2. This combine type forces byte alignment. - Common, + Common = super::flag::segs::sc::SC_COMMON, /// As defined by Microsoft, same as C=2 (public). - Pub3, -} - -impl SegmentCombination { - fn from_raw(value: u32) -> Option { - match value { - 0 => Some(Self::Priv), - 1 => Some(Self::Group), - 2 => Some(Self::Pub), - 4 => Some(Self::Pub2), - 5 => Some(Self::Stack), - 6 => Some(Self::Common), - 7 => Some(Self::Pub3), - _ => None, - } - } + Pub3 = super::flag::segs::sc::SC_PUB3, } #[derive(Clone, Copy)] @@ -330,52 +297,139 @@ impl SegmentBitness { } } -#[derive(Clone, Copy, Debug)] +// Has segment a special type?. (#SEG_XTRN, #SEG_GRP, #SEG_ABSSYM, #SEG_COMM) +// Does the address belong to a segment with a special type?.(#SEG_XTRN, #SEG_GRP, #SEG_ABSSYM, #SEG_COMM) +#[derive(Clone, Copy, Debug, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum SegmentType { /// unknown type, no assumptions - Norm, + Norm = flag::segs::ty::SEG_NORM, /// segment with 'extern' definitions. /// /// no instructions are allowed - Xtrn, + Xtrn = flag::segs::ty::SEG_XTRN, /// code segment - Code, + Code = flag::segs::ty::SEG_CODE, /// data segment - Data, + Data = flag::segs::ty::SEG_DATA, /// java: implementation segment - Imp, + Imp = flag::segs::ty::SEG_IMP, /// group of segments - Grp, + Grp = flag::segs::ty::SEG_GRP, /// zero-length segment - Null, + Null = flag::segs::ty::SEG_NULL, /// undefined segment type (not used) - Undf, + Undf = flag::segs::ty::SEG_UNDF, /// uninitialized segment - Bss, + Bss = flag::segs::ty::SEG_BSS, /// segment with definitions of absolute symbols - Abssym, + Abssym = flag::segs::ty::SEG_ABSSYM, /// segment with communal definitions - Comm, + Comm = flag::segs::ty::SEG_COMM, /// internal processor memory & sfr (8051) - Imem, + Imem = flag::segs::ty::SEG_IMEM, } -impl SegmentType { - fn from_raw(value: u32) -> Option { - match value { - 0 => Some(Self::Norm), - 1 => Some(Self::Xtrn), - 2 => Some(Self::Code), - 3 => Some(Self::Data), - 4 => Some(Self::Imp), - 6 => Some(Self::Grp), - 7 => Some(Self::Null), - 8 => Some(Self::Undf), - 9 => Some(Self::Bss), - 10 => Some(Self::Abssym), - 11 => Some(Self::Comm), - 12 => Some(Self::Imem), - _ => None, +pub struct SegmentIter<'a, K: IDAKind> { + pub(crate) _kind: std::marker::PhantomData, + pub(crate) segments: &'a [ID0Entry], +} + +impl Iterator for SegmentIter<'_, K> { + type Item = Result>; + + fn next(&mut self) -> Option { + let (current, rest) = self.segments.split_first()?; + self.segments = rest; + Some(Segment::read(¤t.value)) + } +} + +#[derive(Clone, Copy)] +pub struct SegmentStringIter<'a> { + pub(crate) segments: &'a [ID0Entry], + pub(crate) segment_strings: SegmentStringsIter<'a>, +} + +impl<'a> SegmentStringIter<'a> { + pub(crate) fn new(segments: &'a [ID0Entry]) -> Self { + // dummy value + let segment_strings = SegmentStringsIter { + start: 0, + end: 0, + value: &[], + }; + Self { + segments, + segment_strings, + } + } + fn inner_next(&mut self) -> Result> { + // get the next segment string + if let Some(value) = self.segment_strings.next() { + return Some(value).transpose(); + } + // no strings in this segment, next segment + let Some((current, rest)) = self.segments.split_first() else { + return Ok(None); + }; + self.segments = rest; + + let mut current_value = ¤t.value[..]; + let start = current_value.unpack_dd()?; + let end = current_value.unpack_dd()?; + ensure!(start > 0, "Invalid ID0 Segment String idx start"); + ensure!(start <= end, "Invalid ID0 Segment String idx end"); + self.segment_strings = SegmentStringsIter { + start, + end, + value: current_value, + }; + self.inner_next() + } +} + +impl<'a> Iterator for SegmentStringIter<'a> { + type Item = Result<(SegmentNameIdx, &'a [u8])>; + + fn next(&mut self) -> Option { + self.inner_next().transpose() + } +} + +#[derive(Clone, Copy)] +pub(crate) struct SegmentStringsIter<'a> { + pub(crate) start: u32, + pub(crate) end: u32, + pub(crate) value: &'a [u8], +} + +impl<'a> SegmentStringsIter<'a> { + fn inner_next(&mut self) -> Result> { + if self.start == self.end { + ensure!( + self.value.is_empty(), + "Unparsed data in ID0 Segment String: {}", + self.value.len() + ); + return Ok(None); } + let len = self.value.unpack_dd()?; + let (value, rest) = self + .value + .split_at_checked(len.try_into().unwrap()) + .ok_or_else(|| anyhow!("Invalid ID0 Segment String len"))?; + self.value = rest; + let idx = self.start; + self.start += 1; + Ok(Some((SegmentNameIdx(idx.try_into().unwrap()), value))) + } +} + +impl<'a> Iterator for SegmentStringsIter<'a> { + type Item = Result<(SegmentNameIdx, &'a [u8])>; + + fn next(&mut self) -> Option { + self.inner_next().transpose() } } diff --git a/src/id1.rs b/src/id1.rs index e1071ec..0a6aff8 100644 --- a/src/id1.rs +++ b/src/id1.rs @@ -1,142 +1,38 @@ use anyhow::{anyhow, ensure, Result}; -use std::ops::Range; +pub mod flag; -use crate::ida_reader::IdaGenericUnpack; -use crate::{IDBHeader, IDBSectionCompression, VaVersion}; +use std::ops::{Div, Range, Rem}; + +use crate::ida_reader::{IdbRead, IdbReadKind}; +use crate::{IDAKind, SectionReader, VaVersion}; #[derive(Clone, Debug)] pub struct ID1Section { pub seglist: Vec, } -#[derive(Clone, Debug)] -pub struct SegInfo { - pub offset: u64, - pub data: Vec, - // TODO find a way to decode this data - _flags: Vec, -} +impl SectionReader for ID1Section { + type Result = Self; -impl ID1Section { - pub(crate) fn read( - input: &mut impl IdaGenericUnpack, - header: &IDBHeader, - compress: IDBSectionCompression, - ) -> Result { - match compress { - IDBSectionCompression::None => Self::read_inner(input, header), - IDBSectionCompression::Zlib => { - let mut input = flate2::read::ZlibDecoder::new(input); - Self::read_inner(&mut input, header) - } - } + fn read_section>(input: &mut I) -> Result { + Self::read_inner::(input) + } + + fn size_from_v910(header: &crate::IDBHeaderV910) -> u64 { + header.id1.unwrap().size.get() } +} - fn read_inner( - input: &mut impl IdaGenericUnpack, - header: &IDBHeader, - ) -> Result { +impl ID1Section { + fn read_inner(input: &mut impl std::io::Read) -> Result { // TODO pages are always 0x2000? const PAGE_SIZE: usize = 0x2000; let mut buf = vec![0; PAGE_SIZE]; input.read_exact(&mut buf[..])?; - let mut header_page = &buf[..]; - let version = VaVersion::read(&mut header_page)?; - let (npages, seglist_raw) = match version { - VaVersion::Va0 - | VaVersion::Va1 - | VaVersion::Va2 - | VaVersion::Va3 - | VaVersion::Va4 => { - let nsegments: u16 = - bincode::deserialize_from(&mut header_page)?; - let npages: u16 = bincode::deserialize_from(&mut header_page)?; - ensure!( - npages > 0, - "Invalid number of pages, net at least one for the header" - ); - // TODO section_size / npages == 0x2000 - - // TODO the reference code uses the magic version, should it use - // the version itself instead? - let seglist: Vec = if header - .magic_version - .is_64() - { - (0..nsegments) - .map(|_| { - let start: u64 = - bincode::deserialize_from(&mut header_page)?; - let end: u64 = - bincode::deserialize_from(&mut header_page)?; - ensure!(start <= end); - let offset: u64 = - bincode::deserialize_from(&mut header_page)?; - Ok(SegInfoVaNRaw { - address: start..end, - offset, - }) - }) - .collect::>()? - } else { - (0..nsegments) - .map(|_| { - let start: u32 = - bincode::deserialize_from(&mut header_page)?; - let end: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!(start <= end); - let offset: u32 = - bincode::deserialize_from(&mut header_page)?; - Ok(SegInfoVaNRaw { - address: start.into()..end.into(), - offset: offset.into(), - }) - }) - .collect::>()? - }; - (u32::from(npages), SegInfoRaw::VaN(seglist)) - } - VaVersion::VaX => { - let unknown_always3: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!(unknown_always3 == 3); - let nsegments: u32 = - bincode::deserialize_from(&mut header_page)?; - let unknown_always2048: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!(unknown_always2048 == 2048); - let npages: u32 = bincode::deserialize_from(&mut header_page)?; - - let seglist: Vec> = (0..nsegments) - // TODO the reference code uses the magic version, should it use - // the version itself instead? - .map(|_| { - let (start, end) = match header.magic_version { - crate::IDBMagic::IDA0 | crate::IDBMagic::IDA1 => { - let startea: u32 = bincode::deserialize_from( - &mut header_page, - )?; - let endea: u32 = bincode::deserialize_from( - &mut header_page, - )?; - (startea.into(), endea.into()) - } - crate::IDBMagic::IDA2 => ( - bincode::deserialize_from(&mut header_page)?, - bincode::deserialize_from(&mut header_page)?, - ), - }; - ensure!(start <= end); - Ok(start..end) - }) - .collect::>()?; - (npages, SegInfoRaw::VaX(seglist)) - } - }; + let (npages, seglist_raw) = Self::read_header::(&mut &buf[..])?; // make sure the unused values a all zero - ensure!(header_page.iter().all(|b| *b == 0)); + ensure!(buf.iter().all(|b| *b == 0)); // sort segments by address let mut overlay_check = match &seglist_raw { @@ -155,20 +51,26 @@ impl ID1Section { ensure!(!overlap); // make sure the data fits the available pages - let required_size: u64 = - overlay_check.iter().map(|s| (s.end - s.start) * 4).sum(); - let required_pages = - required_size.div_ceil(u64::try_from(PAGE_SIZE).unwrap()); + let required_size: K::Usize = overlay_check + .iter() + .map(|s| (s.end - s.start) * K::Usize::from(4u8)) + .sum(); + let round_up = required_size + .rem(K::Usize::try_from(PAGE_SIZE).unwrap()) + != K::Usize::from(0u8); + let required_pages = required_size + .div(K::Usize::try_from(PAGE_SIZE).unwrap()) + + K::Usize::from(round_up as u8); // TODO if the extra data at the end of the section is identified, review replacing <= with == // -1 because the first page is always the header - ensure!(required_pages <= u64::from(npages - 1)); + ensure!(required_pages <= K::Usize::from(npages - 1)); // populated the seglist data using the pages let seglist = match seglist_raw { SegInfoRaw::VaN(mut segs) => { // sort it by disk offset, so we can read one after the other segs.sort_unstable_by_key(|s| s.offset); - let mut current_offset = u64::try_from(PAGE_SIZE).unwrap(); + let mut current_offset = K::Usize::try_from(PAGE_SIZE).unwrap(); segs.into_iter() .map(|seg| { // skip any gaps @@ -182,7 +84,7 @@ impl ID1Section { ensure_all_bytes_are_zero( std::io::Read::take( &mut *input, - seg.offset - current_offset, + (seg.offset - current_offset).into(), ), &mut buf, )?; @@ -191,13 +93,11 @@ impl ID1Section { std::cmp::Ordering::Equal => {} } let len = seg.address.end - seg.address.start; - let (data, _flags) = - split_flags_data(&mut *input, len)?; - current_offset += len * 4; + let data = read_data::(&mut *input, len)?; + current_offset += len * K::Usize::from(4u8); Ok(SegInfo { - offset: seg.address.start, + offset: (seg.address.start).into(), data, - _flags, }) }) .collect::>()? @@ -206,14 +106,13 @@ impl ID1Section { // the data for the segments are stored sequentialy in disk segs.into_iter() .map(|address| { - let (data, _flags) = split_flags_data( + let data = read_data::( &mut *input, address.end - address.start, )?; Ok(SegInfo { - offset: address.start, + offset: (address.start).into(), data, - _flags, }) }) .collect::>()? @@ -228,22 +127,406 @@ impl ID1Section { Ok(Self { seglist }) } + + fn read_header( + input: &mut impl IdbReadKind, + ) -> Result<(u32, SegInfoRaw)> { + let version = VaVersion::read(&mut *input)?; + match version { + VaVersion::Va0 + | VaVersion::Va1 + | VaVersion::Va2 + | VaVersion::Va3 + | VaVersion::Va4 => { + let nsegments = input.read_u16()?; + let npages = input.read_u16()?; + ensure!( + npages > 0, + "Invalid number of pages, net at least one for the header" + ); + // TODO section_size / npages == 0x2000 + + // TODO the reference code uses the magic version, should it use + // the version itself instead? + let seglist: Vec> = (0..nsegments) + .map(|_| { + let start = input.read_usize()?; + let end = input.read_usize()?; + ensure!(start <= end); + let offset = input.read_usize()?; + Ok(SegInfoVaNRaw { + address: start..end, + offset, + }) + }) + .collect::>()?; + Ok((u32::from(npages), SegInfoRaw::VaN(seglist))) + } + VaVersion::VaX => { + let unknown_always3 = input.read_u32()?; + ensure!(unknown_always3 == 3); + let nsegments = input.read_u32()?; + let unknown_always2048 = input.read_u32()?; + ensure!(unknown_always2048 == 2048); + let npages = input.read_u32()?; + + let seglist = (0..nsegments) + // TODO the reference code uses the magic version, should it use + // the version itself instead? + .map(|_| { + let start = input.read_usize()?; + let end = input.read_usize()?; + ensure!(start <= end); + Ok(start..end) + }) + .collect::>()?; + Ok((npages, SegInfoRaw::VaX(seglist))) + } + } + } + + pub fn byte_by_address(&self, address: u64) -> Option { + for seg in &self.seglist { + let addr_range = + seg.offset..seg.offset + u64::try_from(seg.data.len()).unwrap(); + if addr_range.contains(&address) { + return Some(ByteInfoRaw( + seg.data[usize::try_from(address - seg.offset).unwrap()], + )); + } + } + None + } + + pub fn all_bytes( + &self, + ) -> impl Iterator + use<'_> { + self.seglist.iter().flat_map(|seg| { + seg.data.iter().enumerate().map(|(i, b)| { + (seg.offset + u64::try_from(i).unwrap(), ByteInfoRaw(*b)) + }) + }) + } +} + +#[derive(Clone, Debug)] +pub struct SegInfo { + pub offset: u64, + // data and flags + data: Vec, +} + +#[derive(Clone, Copy, Debug)] +pub struct ByteInfoRaw(u32); + +impl ByteInfoRaw { + pub fn as_raw(&self) -> u32 { + self.0 + } + + pub fn byte_raw(&self) -> u8 { + (self.0 & flag::byte::MS_VAL) as u8 + } + + pub fn flags_raw(&self) -> u32 { + self.0 & !flag::byte::MS_VAL + } + + pub fn decode(&self) -> Result { + ByteInfo::from_raw(*self) + } + + pub fn byte_value(&self) -> Option { + (self.0 & flag::byte::FF_IVL != 0) + .then_some((self.0 & flag::byte::MS_VAL) as u8) + } + + pub fn byte_type(&self) -> ByteRawType { + use flag::flags::byte_type::*; + match self.0 & MS_CLS { + FF_DATA => ByteRawType::Data, + FF_CODE => ByteRawType::Code, + FF_TAIL => ByteRawType::Tail, + FF_UNK => ByteRawType::Unknown, + _ => unreachable!(), + } + } + + pub fn has_comment(&self) -> bool { + self.0 & flag::flags::byte_info::FF_COMM != 0 + } + pub fn has_reference(&self) -> bool { + self.0 & flag::flags::byte_info::FF_REF != 0 + } + pub fn has_comment_ext(&self) -> bool { + self.0 & flag::flags::byte_info::FF_LINE != 0 + } + pub fn has_name(&self) -> bool { + self.0 & flag::flags::byte_info::FF_NAME != 0 + } + pub fn has_dummy_name(&self) -> bool { + self.0 & flag::flags::byte_info::FF_LABL != 0 + } + pub fn exec_flow_from_prev_inst(&self) -> bool { + self.0 & flag::flags::byte_info::FF_FLOW != 0 + } + pub fn op_invert_sig(&self) -> bool { + self.0 & flag::flags::byte_info::FF_SIGN != 0 + } + pub fn op_bitwise_negation(&self) -> bool { + self.0 & flag::flags::byte_info::FF_BNOT != 0 + } + pub fn is_unused_set(&self) -> bool { + self.0 & flag::flags::byte_info::FF_UNUSED != 0 + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct ByteInfo { + pub byte_value: Option, + pub has_comment: bool, + pub has_reference: bool, + pub has_comment_ext: bool, + pub has_name: bool, + pub has_dummy_name: bool, + pub exec_flow_from_prev_inst: bool, + pub op_invert_sig: bool, + pub op_bitwise_negation: bool, + pub is_unused_set: bool, + pub byte_type: ByteType, +} + +impl ByteInfo { + fn from_raw(value: ByteInfoRaw) -> Result { + let byte_type = ByteType::from_raw(value)?; + Ok(Self { + byte_value: value.byte_value(), + has_comment: value.has_comment(), + has_reference: value.has_reference(), + has_comment_ext: value.has_comment_ext(), + has_name: value.has_name(), + has_dummy_name: value.has_dummy_name(), + exec_flow_from_prev_inst: value.exec_flow_from_prev_inst(), + op_invert_sig: value.op_invert_sig(), + op_bitwise_negation: value.op_bitwise_negation(), + is_unused_set: value.is_unused_set(), + byte_type, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ByteRawType { + Code, + Data, + Tail, + Unknown, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ByteType { + Code(CodeData), + Data(ByteData), + Tail, + Unknown, +} + +impl ByteType { + fn from_raw(value: ByteInfoRaw) -> Result { + match value.byte_type() { + // TODO find the InnerRef for this decoding, this is not correct + ByteRawType::Code => { + Ok(ByteType::Code(CodeData::from_raw(value.0.into())?)) + } + ByteRawType::Data => { + Ok(ByteType::Data(ByteData::from_raw(value.0)?)) + } + ByteRawType::Tail => Ok(ByteType::Tail), + ByteRawType::Unknown => Ok(ByteType::Unknown), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct CodeData { + pub operands: [InstOpInfo; 8], + pub is_func_start: bool, + pub is_reserved_set: bool, + pub is_immediate_value: bool, + pub is_jump_table: bool, +} + +impl CodeData { + fn from_raw(value: u64) -> Result { + use flag::flags::code_info::*; + let is_func_start = value & FF_FUNC as u64 != 0; + let is_reserved_set = value & FF_RESERVED as u64 != 0; + let is_immediate_value = value & FF_IMMD as u64 != 0; + let is_jump_table = value & FF_JUMP as u64 != 0; + #[cfg(feature = "restrictive")] + if value + & (MS_CODE as u64) + & !((FF_FUNC | FF_RESERVED | FF_IMMD | FF_JUMP) as u64) + != 0 + { + return Err(anyhow!("Invalid id1 CodeData flag")); + } + let operands = [ + InstOpInfo::from_raw(value, 7)?, + InstOpInfo::from_raw(value, 6)?, + InstOpInfo::from_raw(value, 5)?, + InstOpInfo::from_raw(value, 4)?, + InstOpInfo::from_raw(value, 3)?, + InstOpInfo::from_raw(value, 2)?, + InstOpInfo::from_raw(value, 1)?, + InstOpInfo::from_raw(value, 0)?, + ]; + Ok(CodeData { + is_func_start, + is_reserved_set, + is_immediate_value, + is_jump_table, + operands, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct ByteData { + pub data_type: ByteDataType, + pub print_info: InstOpInfo, +} + +impl ByteData { + fn from_raw(value: u32) -> Result { + Ok(Self { + data_type: ByteDataType::from_raw(value), + print_info: InstOpInfo::from_raw(value.into(), 0)?, + }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ByteDataType { + Byte, + Word, + Dword, + Qword, + Oword, + Yword, + Zword, + Tbyte, + Float, + Double, + Packreal, + Strlit, + Struct, + Align, + Reserved, + Custom, +} + +impl ByteDataType { + fn from_raw(value: u32) -> Self { + use flag::flags::data_info::*; + match value & DT_TYPE { + FF_BYTE => ByteDataType::Byte, + FF_WORD => ByteDataType::Word, + FF_DWORD => ByteDataType::Dword, + FF_QWORD => ByteDataType::Qword, + FF_TBYTE => ByteDataType::Tbyte, + FF_STRLIT => ByteDataType::Strlit, + FF_STRUCT => ByteDataType::Struct, + FF_OWORD => ByteDataType::Oword, + FF_FLOAT => ByteDataType::Float, + FF_DOUBLE => ByteDataType::Double, + FF_PACKREAL => ByteDataType::Packreal, + FF_ALIGN => ByteDataType::Align, + FF_RESERVED => ByteDataType::Reserved, + FF_CUSTOM => ByteDataType::Custom, + FF_YWORD => ByteDataType::Yword, + FF_ZWORD => ByteDataType::Zword, + _ => unreachable!(), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum InstOpInfo { + /// Void (unknown) + Void, + /// Hexadecimal number + Hex, + /// Decimal number + Dec, + /// Char + Char, + /// Segment + Seg, + /// Offset + Off, + /// Binary number + Bin, + /// Octal number + Oct, + /// Enumeration + Enum, + /// Forced operand + Fop, + /// Struct offset + StrOff, + /// Stack variable + StackVar, + /// Floating point number + Float, + /// Custom representation + Custom, +} + +impl InstOpInfo { + fn from_raw(value: u64, n: u32) -> Result { + use flag::flags::inst_info::*; + Ok( + match ((value >> get_operand_type_shift(n)) as u8) & MS_N_TYPE { + FF_N_VOID => Self::Void, + FF_N_NUMH => Self::Hex, + FF_N_NUMD => Self::Dec, + FF_N_CHAR => Self::Char, + FF_N_SEG => Self::Seg, + FF_N_OFF => Self::Off, + FF_N_NUMB => Self::Bin, + FF_N_NUMO => Self::Oct, + FF_N_ENUM => Self::Enum, + FF_N_FOP => Self::Fop, + FF_N_STRO => Self::StrOff, + FF_N_STK => Self::StackVar, + FF_N_FLT => Self::Float, + FF_N_CUST => Self::Custom, + // TODO reserved values? + #[cfg(not(feature = "restrictive"))] + 0xE | 0xF => Self::Custom, + #[cfg(feature = "restrictive")] + 0xE | 0xF => return Err(anyhow!("Invalid ID1 operand value")), + _ => unreachable!(), + }, + ) + } } #[derive(Clone, Debug)] -enum SegInfoRaw { - VaN(Vec), - VaX(Vec>), +enum SegInfoRaw { + VaN(Vec>), + VaX(Vec>), } #[derive(Clone, Debug)] -struct SegInfoVaNRaw { - address: Range, - offset: u64, +struct SegInfoVaNRaw { + address: Range, + offset: K::Usize, } fn ensure_all_bytes_are_zero( - mut input: impl IdaGenericUnpack, + mut input: impl IdbRead, buf: &mut [u8], ) -> Result<()> { loop { @@ -261,10 +544,7 @@ fn ensure_all_bytes_are_zero( Ok(()) } -fn ignore_bytes( - mut input: impl IdaGenericUnpack, - buf: &mut [u8], -) -> Result<()> { +fn ignore_bytes(mut input: impl IdbRead, buf: &mut [u8]) -> Result<()> { loop { match input.read(buf) { // found EoF @@ -280,17 +560,20 @@ fn ignore_bytes( Ok(()) } -fn split_flags_data( - mut input: impl IdaGenericUnpack, - len: u64, -) -> Result<(Vec, Vec)> { - let len = usize::try_from(len).unwrap(); - let mut flags = Vec::with_capacity(len); - let mut data = Vec::with_capacity(len); - for _i in 0..len { - let bytes = input.read_u32()?; - data.push((bytes & 0xFF) as u8); - flags.push(bytes >> 8); - } - Ok((data, flags)) +fn read_data( + mut input: impl IdbRead, + len: K::Usize, +) -> Result> { + let len = >::try_into(len).unwrap(); + let mut data = vec![0u8; len * 4]; + input.read_exact(&mut data)?; + Ok(data + .chunks(4) + .map(|b| u32::from_le_bytes(b.try_into().unwrap())) + .collect()) +} + +const fn get_operand_type_shift(n: u32) -> u32 { + let n_mod = (n > 1) as u32; + 20 + (4 * (n + n_mod)) } diff --git a/src/id1/flag.rs b/src/id1/flag.rs new file mode 100644 index 0000000..9a23f84 --- /dev/null +++ b/src/id1/flag.rs @@ -0,0 +1,145 @@ +/// Flags structure +/// Here we define the organization of ::flags64_t values. +/// Low 8 bits contain value of corresponding byte of the program. +/// The next bit is set if the byte is initialized. +pub mod byte { + /// Mask for byte value + pub const MS_VAL: u32 = 0x000000FF; + /// Byte has value ? + pub const FF_IVL: u32 = 0x00000100; +} + +pub mod flags { + pub mod byte_type { + /// Mask for typing + pub const MS_CLS: u32 = 0x00000600; + /// Code ? + pub const FF_CODE: u32 = 0x00000600; + /// Data ? + pub const FF_DATA: u32 = 0x00000400; + /// Tail ? + pub const FF_TAIL: u32 = 0x00000200; + /// Unknown ? + pub const FF_UNK: u32 = 0x00000000; + } + + /// specific state information + /// Flags keep information common to all four states of bytes. + /// This information will not be automatically discarded during + /// transitions between different states. + pub mod byte_info { + /// Mask of common bits + pub const MS_COMM: u32 = 0x000FF800; + /// Has comment ? + pub const FF_COMM: u32 = 0x00000800; + /// has references + pub const FF_REF: u32 = 0x00001000; + /// Has next or prev lines ? + pub const FF_LINE: u32 = 0x00002000; + /// Has name ? + pub const FF_NAME: u32 = 0x00004000; + /// Has dummy name? + pub const FF_LABL: u32 = 0x00008000; + /// Exec flow from prev instruction + pub const FF_FLOW: u32 = 0x00010000; + /// Inverted sign of operands + pub const FF_SIGN: u32 = 0x00020000; + /// Bitwise negation of operands + pub const FF_BNOT: u32 = 0x00040000; + /// unused bit (was used for variable bytes) + pub const FF_UNUSED: u32 = 0x00080000; + } + + /// Instruction/Data operands + /// Represent instruction/data operands. + /// + /// IDA keeps bitmask representations for a maximum of 8 operands: + /// + /// For data bytes, only the first bitmask is used (i.e. all elements of + /// an array have the same type). + pub mod inst_info { + /// Mask for nth arg (a 64-bit constant) + pub const MS_N_TYPE: u8 = 0xf; + /// Void (unknown)? + pub const FF_N_VOID: u8 = 0x0; + /// Hexadecimal number? + pub const FF_N_NUMH: u8 = 0x1; + /// Decimal number? + pub const FF_N_NUMD: u8 = 0x2; + /// Char ('x')? + pub const FF_N_CHAR: u8 = 0x3; + /// Segment? + pub const FF_N_SEG: u8 = 0x4; + /// Offset? + pub const FF_N_OFF: u8 = 0x5; + /// Binary number? + pub const FF_N_NUMB: u8 = 0x6; + /// Octal number? + pub const FF_N_NUMO: u8 = 0x7; + /// Enumeration? + pub const FF_N_ENUM: u8 = 0x8; + /// Forced operand? + pub const FF_N_FOP: u8 = 0x9; + /// Struct offset? + pub const FF_N_STRO: u8 = 0xA; + /// Stack variable? + pub const FF_N_STK: u8 = 0xB; + /// Floating point number? + pub const FF_N_FLT: u8 = 0xC; + /// Custom representation? + pub const FF_N_CUST: u8 = 0xD; + } + + /// data bytes + pub mod data_info { + /// Mask for DATA typing + pub const DT_TYPE: u32 = 0xF0000000; + + /// byte + pub const FF_BYTE: u32 = 0x00000000; + /// word + pub const FF_WORD: u32 = 0x10000000; + /// double word + pub const FF_DWORD: u32 = 0x20000000; + /// quadro word + pub const FF_QWORD: u32 = 0x30000000; + /// tbyte + pub const FF_TBYTE: u32 = 0x40000000; + /// string literal + pub const FF_STRLIT: u32 = 0x50000000; + /// struct variable + pub const FF_STRUCT: u32 = 0x60000000; + /// octaword/xmm word (16 bytes/128 bits) + pub const FF_OWORD: u32 = 0x70000000; + /// float + pub const FF_FLOAT: u32 = 0x80000000; + /// double + pub const FF_DOUBLE: u32 = 0x90000000; + /// packed decimal real + pub const FF_PACKREAL: u32 = 0xA0000000; + /// alignment directive + pub const FF_ALIGN: u32 = 0xB0000000; + /// reserved + pub const FF_RESERVED: u32 = 0xC0000000; + /// custom data type + pub const FF_CUSTOM: u32 = 0xD0000000; + /// ymm word (32 bytes/256 bits) + pub const FF_YWORD: u32 = 0xE0000000; + /// zmm word (64 bytes/512 bits) + pub const FF_ZWORD: u32 = 0xF0000000; + } + + /// code bytes + pub mod code_info { + /// Mask for code bits + pub const MS_CODE: u32 = 0xF0000000; + /// function start? + pub const FF_FUNC: u32 = 0x10000000; + /// not used + pub const FF_RESERVED: u32 = 0x20000000; + /// Has Immediate value ? + pub const FF_IMMD: u32 = 0x40000000; + /// Has jump table or switch_info? + pub const FF_JUMP: u32 = 0x80000000; + } +} diff --git a/src/ida_reader.rs b/src/ida_reader.rs index 83f2462..c22930d 100644 --- a/src/ida_reader.rs +++ b/src/ida_reader.rs @@ -1,339 +1,12 @@ use anyhow::{anyhow, ensure, Result}; -use std::io::{BufRead, ErrorKind, Read, Seek}; +use std::io::{BufRead, ErrorKind, Read}; use std::ops::Range; use crate::til::{TypeAttribute, TypeAttributeExt}; +use crate::{IDAKind, IDAUsize}; -pub trait IdbReader: Seek + IdaGenericBufUnpack {} -impl IdbReader for R {} - -pub trait IdaUnpack: IdaGenericUnpack { - fn is_64(&self) -> bool; - - // TODO rename to deserialize_usize - fn read_word(&mut self) -> Result { - if self.is_64() { - Ok(bincode::deserialize_from(self)?) - } else { - Ok(bincode::deserialize_from::<_, u32>(self).map(u64::from)?) - } - } - - fn unpack_usize(&mut self) -> Result { - if self.is_64() { - self.unpack_dq() - } else { - self.unpack_dd().map(u64::from) - } - } - - // TODO unpack_address_ext - /// unpack an address and extend to max address if 32bits and u32::MAX - fn unpack_usize_ext_max(&mut self) -> Result { - if self.is_64() { - self.unpack_dq() - } else { - self.unpack_dd_ext_max().map(u64::from) - } - } - - // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0x28f8cc - fn unpack_address_range(&mut self) -> Result> { - if self.is_64() { - let start = self.unpack_dq()?; - let len = self.unpack_dq()?; - #[cfg(feature = "restrictive")] - let end = start - .checked_add(len) - .ok_or_else(|| anyhow!("Function range overflows"))?; - #[cfg(not(feature = "restrictive"))] - let end = start.saturating_add(len); - Ok(start..end) - } else { - let start = self.unpack_dd_ext_max()?; - let len = self.unpack_dd()?; - // NOTE may not look right, but that's how ida does it - let end = match start.checked_add(len.into()) { - Some(0xFFFF_FFFF) => u64::MAX, - Some(value) => value, - #[cfg(feature = "restrictive")] - None => return Err(anyhow!("Function range overflows")), - #[cfg(not(feature = "restrictive"))] - None => u64::MAX, - }; - Ok(start..end) - } - } -} - -pub struct IdaUnpacker { - input: I, - is_64: bool, -} - -impl IdaUnpacker { - pub fn new(input: I, is_64: bool) -> Self { - Self { input, is_64 } - } - - pub fn inner(self) -> I { - self.input - } -} - -impl IdaUnpack for IdaUnpacker { - fn is_64(&self) -> bool { - self.is_64 - } -} - -impl Read for IdaUnpacker { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - self.input.read(buf) - } - - fn read_vectored( - &mut self, - bufs: &mut [std::io::IoSliceMut<'_>], - ) -> std::io::Result { - self.input.read_vectored(bufs) - } - - fn read_to_end(&mut self, buf: &mut Vec) -> std::io::Result { - self.input.read_to_end(buf) - } - - fn read_to_string(&mut self, buf: &mut String) -> std::io::Result { - self.input.read_to_string(buf) - } - - fn read_exact(&mut self, buf: &mut [u8]) -> std::io::Result<()> { - self.input.read_exact(buf) - } -} - -impl BufRead for IdaUnpacker { - fn fill_buf(&mut self) -> std::io::Result<&[u8]> { - self.input.fill_buf() - } - - fn consume(&mut self, amt: usize) { - self.input.consume(amt); - } - - fn read_until( - &mut self, - byte: u8, - buf: &mut Vec, - ) -> std::io::Result { - self.input.read_until(byte, buf) - } - - fn read_line(&mut self, buf: &mut String) -> std::io::Result { - self.input.read_line(buf) - } -} - -pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42ad36 - fn read_raw_til_type(&mut self, format: u32) -> Result> { - let flags = self.read_u32()?; - if flags == 0x7fff_fffe { - let len = self.read_u32()?; - let mut data = vec![0; 8 + len as usize]; - data[0..4].copy_from_slice(&flags.to_le_bytes()); - data[4..8].copy_from_slice(&len.to_le_bytes()); - self.read_exact(&mut data[8..])?; - Ok(data) - } else { - let mut data = flags.to_le_bytes().to_vec(); - // skip name - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42ad58 - self.read_until(b'\x00', &mut data)?; - - // skip the ordinal number - match (format, (flags >> 31) != 0) { - // formats below 0x12 doesn't have 64 bits ord - (0..=0x11, _) | (_, false) => { - data.extend(self.read_u32()?.to_le_bytes()) - } - (_, true) => data.extend(self.read_u64()?.to_le_bytes()), - } - - // skip the type itself - self.read_until(b'\x00', &mut data)?; - // skip the info field - self.read_until(b'\x00', &mut data)?; - // skip the cmt field - self.read_until(b'\x00', &mut data)?; - // skip the fieldcmts field - self.read_until(b'\x00', &mut data)?; - // skip the sclass - data.push(self.read_u8()?); - Ok(data) - } - } - - /// Reads 1 to 9 bytes. - /// ValueRange: 0-0x7FFFFFFF, 0-0xFFFFFFFF - /// Usage: Arrays - fn read_da(&mut self) -> Result<(u8, u8)> { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478620 - let mut a = 0; - let mut b = 0; - let mut da = 0; - let mut base = 0; - let mut nelem = 0; - // TODO check no more then 9 bytes are read - loop { - let Some(typ) = self.peek_u8()? else { - #[cfg(feature = "restrictive")] - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - #[cfg(not(feature = "restrictive"))] - return Ok((nelem, base)); - }; - if typ & 0x80 == 0 { - break; - } - self.consume(1); - - da = (da << 7) | typ & 0x7F; - b += 1; - if b >= 4 { - let z: u8 = self.read_u8()?; - if z != 0 { - base = (da << 4) | z & 0xF - } - nelem = (z >> 4) & 7; - loop { - let Some(y) = self.peek_u8()? else { - #[cfg(feature = "restrictive")] - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - #[cfg(not(feature = "restrictive"))] - return Ok((nelem, base)); - }; - if (y & 0x80) == 0 { - break; - } - self.consume(1); - nelem = (nelem << 7) | y & 0x7F; - a += 1; - if a >= 4 { - return Ok((nelem, base)); - } - } - } - } - Ok((nelem, base)) - } - - // TODO rename this - fn read_c_string_raw(&mut self) -> Result> { - let mut buf = vec![]; - self.read_until(b'\x00', &mut buf)?; - // last char need to be \x00 or we found a EoF - if let Some(b'\x00') = buf.last() { - let _ = buf.pop(); // remove the \x00 from the end - } else { - // found EOF, aka could not find the \x00 for the string end - #[cfg(feature = "restrictive")] - return Err(anyhow!("Unexpected EoF on CStr")); - } - Ok(buf) - } - - // TODO rename this - fn read_c_string_vec(&mut self) -> Result>> { - let buf = self.read_c_string_raw()?; - split_strings_from_array(&buf) - .ok_or_else(|| anyhow!("Invalid len on Vec of CStr {buf:02x?}")) - } - - fn peek_u8(&mut self) -> Result> { - Ok(self.fill_buf()?.first().copied()) - } - - // InnerRef b47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46b690 unpack_dd - // NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 - /// Reads 1 to 5 bytes. - fn unpack_dd_or_eof(&mut self) -> Result> { - let Some(b1) = self.peek_u8()? else { - return Ok(None); - }; - self.consume(1); - self.unpack_dd_from_byte(b1).map(Option::Some) - } - - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48ce40 - fn read_ext_att(&mut self) -> Result { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cec0 - let start_value = match self.read_dt()? { - 0x400 => return Ok(-1i64 as u64), - 0x200 => return Ok(-1i32 as u64), - other => other, - }; - - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48ce6f - let mut acc = 0; - for bit in 0..8 { - let byte = bit * 8; - if (start_value >> bit) & 1 != 0 { - let value = self.read_u8()?; - // TODO is this an error or expect possible value? - #[cfg(feature = "restrictive")] - ensure!(value != 0); - acc |= (value as u64) << byte; - } - } - - if start_value & 0x100 != 0 { - acc = !acc; - } - Ok(acc) - } - - fn read_tah(&mut self) -> Result> { - // TODO TAH in each type have a especial meaning, verify those - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477080 - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 - let Some(tah) = self.peek_u8()? else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - }; - if tah == 0xFE { - Ok(Some(self.read_type_attribute()?)) - } else { - Ok(None) - } - } - - fn read_sdacl(&mut self) -> Result> { - let Some(sdacl) = self.peek_u8()? else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on SDACL" - ))); - }; - - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477eff - //NOTE: original op ((sdacl as u8 & 0xcf) ^ 0xC0) as i32 <= 0x01 - matches!(sdacl, 0xC0..=0xC1 | 0xD0..=0xD1 | 0xE0..=0xE1 | 0xF0..=0xF1) - .then(|| self.read_type_attribute()) - .transpose() - } -} -impl IdaGenericBufUnpack for R {} - -pub trait IdaGenericUnpack: Read { +pub trait IdbRead: Read { fn read_u8(&mut self) -> Result { let mut data = [0; 1]; self.read_exact(&mut data)?; @@ -476,13 +149,13 @@ pub trait IdaGenericUnpack: Read { } } - /// unpack 32bits, extending the max value if equal to u32::MAX - fn unpack_dd_ext_max(&mut self) -> Result { - match self.unpack_dd()? { - u32::MAX => Ok(u64::MAX), - value => Ok(u64::from(value)), - } - } + ///// unpack 32bits, extending the max value if equal to u32::MAX + //fn unpack_dd_ext_max(&mut self) -> Result { + // match self.unpack_dd()? { + // u32::MAX => Ok(u64::MAX), + // value => Ok(u64::from(value)), + // } + //} // InnerRef b47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46b7b0 unpack_dq // NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 @@ -545,6 +218,7 @@ pub trait IdaGenericUnpack: Read { /// Usage: 16bit numbers fn read_dt(&mut self) -> Result { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cd60 + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x4514b let value = match self.read_u8()? { #[cfg(feature = "restrictive")] 0 => return Err(anyhow!("DT can't have 0 value")), @@ -584,18 +258,20 @@ pub trait IdaGenericUnpack: Read { /// Reads 2 to 7 bytes. /// Value Range: Nothing or 0-0xFFFF_FFFF /// Usage: some kind of size - fn read_dt_de(&mut self) -> Result> { + fn read_dt_de(&mut self) -> Result> { // TODO the return is always NonZero? // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cf20 + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x451670 match self.read_dt()? { 0 => Ok(None), - 0x7FFE => self.read_de().map(Some), - n => Ok(Some(n.into())), + 0x7FFE => self.read_de().map(|x| Some((x, x >> 3 == 0))), + n => Ok(Some((n.into(), false))), } } - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 fn read_type_attribute(&mut self) -> Result { + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x2fbf90 use crate::til::flag::tattr_ext::*; #[cfg(feature = "restrictive")] let byte0 = self.read_u8()?; @@ -679,7 +355,249 @@ pub trait IdaGenericUnpack: Read { } } -impl IdaGenericUnpack for R {} +impl IdbRead for R {} + +pub trait IdbBufRead: IdbRead + BufRead { + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42ad36 + fn read_raw_til_type(&mut self, format: u32) -> Result> { + let flags = self.read_u32()?; + if flags == 0x7fff_fffe { + // TODO find the type that have this flag + let len = self.read_u32()?; + let mut data = vec![0; 8 + len as usize]; + data[0..4].copy_from_slice(&flags.to_le_bytes()); + data[4..8].copy_from_slice(&len.to_le_bytes()); + self.read_exact(&mut data[8..])?; + Ok(data) + } else { + let mut data = flags.to_le_bytes().to_vec(); + // skip name + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42ad58 + self.read_until(b'\x00', &mut data)?; + + // skip the ordinal number + match (format, (flags >> 31) != 0) { + // formats below 0x12 doesn't have 64 bits ord + (0..=0x11, _) | (_, false) => { + data.extend(self.read_u32()?.to_le_bytes()) + } + (_, true) => data.extend(self.read_u64()?.to_le_bytes()), + } + + // skip the type itself + self.read_until(b'\x00', &mut data)?; + // skip the info field + self.read_until(b'\x00', &mut data)?; + // skip the cmt field + self.read_until(b'\x00', &mut data)?; + // skip the fieldcmts field + self.read_until(b'\x00', &mut data)?; + // skip the sclass + data.push(self.read_u8()?); + Ok(data) + } + } + + /// Reads 1 to 9 bytes. + /// ValueRange: 0-0x7FFFFFFF, 0-0xFFFFFFFF + /// Usage: Arrays + fn read_da(&mut self) -> Result<(u8, u8)> { + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478620 + let mut a = 0; + let mut b = 0; + let mut da = 0; + let mut base = 0; + let mut nelem = 0; + // TODO check no more then 9 bytes are read + loop { + let Some(typ) = self.peek_u8()? else { + #[cfg(feature = "restrictive")] + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + #[cfg(not(feature = "restrictive"))] + return Ok((nelem, base)); + }; + if typ & 0x80 == 0 { + break; + } + self.consume(1); + + da = (da << 7) | typ & 0x7F; + b += 1; + if b >= 4 { + let z: u8 = self.read_u8()?; + if z != 0 { + base = (da << 4) | z & 0xF + } + nelem = (z >> 4) & 7; + loop { + let Some(y) = self.peek_u8()? else { + #[cfg(feature = "restrictive")] + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + #[cfg(not(feature = "restrictive"))] + return Ok((nelem, base)); + }; + if (y & 0x80) == 0 { + break; + } + self.consume(1); + nelem = (nelem << 7) | y & 0x7F; + a += 1; + if a >= 4 { + return Ok((nelem, base)); + } + } + } + } + Ok((nelem, base)) + } + + // TODO rename this + fn read_c_string_raw(&mut self) -> Result> { + let mut buf = vec![]; + self.read_until(b'\x00', &mut buf)?; + // last char need to be \x00 or we found a EoF + if let Some(b'\x00') = buf.last() { + let _ = buf.pop(); // remove the \x00 from the end + } else { + // found EOF, aka could not find the \x00 for the string end + #[cfg(feature = "restrictive")] + return Err(anyhow!("Unexpected EoF on CStr")); + } + Ok(buf) + } + + // TODO rename this + fn read_c_string_vec(&mut self) -> Result>> { + let buf = self.read_c_string_raw()?; + split_strings_from_array(&buf) + .ok_or_else(|| anyhow!("Invalid len on Vec of CStr {buf:02x?}")) + } + + fn peek_u8(&mut self) -> Result> { + Ok(self.fill_buf()?.first().copied()) + } + + // InnerRef b47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46b690 unpack_dd + // NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 + /// Reads 1 to 5 bytes. + fn unpack_dd_or_eof(&mut self) -> Result> { + let Some(b1) = self.peek_u8()? else { + return Ok(None); + }; + self.consume(1); + self.unpack_dd_from_byte(b1).map(Option::Some) + } + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48ce40 + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x451590 + fn read_ext_att(&mut self) -> Result { + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cec0 + // TODO this can't be found at InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x451590 + let start_value = match self.read_dt()? { + 0x400 => return Ok(-1i64 as u64), + 0x200 => return Ok(-1i32 as u64), + other => other, + }; + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48ce6f + let mut acc = 0; + for bit in 0..8 { + let byte = bit * 8; + if (start_value >> bit) & 1 != 0 { + let value = self.read_u8()?; + // TODO is this an error or expect possible value? + #[cfg(feature = "restrictive")] + ensure!(value != 0); + acc |= (value as u64) << byte; + } + } + + if start_value & 0x100 != 0 { + acc = !acc; + } + Ok(acc) + } + + fn read_tah(&mut self) -> Result> { + // TODO TAH in each type have a especial meaning, verify those + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477080 + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 + let Some(tah) = self.peek_u8()? else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + }; + if tah == 0xFE { + Ok(Some(self.read_type_attribute()?)) + } else { + Ok(None) + } + } + + fn read_sdacl(&mut self) -> Result> { + let Some(sdacl) = self.peek_u8()? else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on SDACL" + ))); + }; + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477eff + //NOTE: original op ((sdacl as u8 & 0xcf) ^ 0xC0) as i32 <= 0x01 + matches!(sdacl, 0xC0..=0xC1 | 0xD0..=0xD1 | 0xE0..=0xE1 | 0xF0..=0xF1) + .then(|| self.read_type_attribute()) + .transpose() + } +} + +impl IdbBufRead for R {} + +pub trait IdbReadKind: IdbRead { + fn read_usize(&mut self) -> Result + where + Self: Sized, + { + ::from_le_reader(self) + } + + fn read_usize_be(&mut self) -> Result + where + Self: Sized, + { + ::from_be_reader(self) + } + + fn unpack_usize(&mut self) -> Result + where + Self: Sized, + { + ::unpack_from_reader(self) + } + fn unpack_address_range(&mut self) -> Result> + where + Self: Sized, + { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0x28f8cc + let start = self.unpack_usize()?; + let len = self.unpack_usize()?; + // NOTE may not look right, but that's how ida does it + #[cfg(feature = "restrictive")] + let end = num_traits::CheckedAdd::checked_add(&start, &len) + .ok_or_else(|| anyhow!("Function range overflows"))?; + #[cfg(not(feature = "restrictive"))] + let end = num_traits::Saturating::saturating_add(start, len); + Ok(start..end) + } +} + +impl IdbReadKind for R {} pub fn split_strings_from_array(buf: &[u8]) -> Option>> { if buf.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 3395572..6957ae2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,14 +6,12 @@ pub mod nam; pub mod til; use std::borrow::Cow; -use std::fmt::Debug; use std::fmt::Write; -use std::io::SeekFrom; +use std::io::{BufRead, BufReader, Read, Seek, SeekFrom}; use std::num::NonZeroU64; -use id0::ID0Section; -use ida_reader::IdaGenericUnpack; -use ida_reader::IdbReader; +use id0::{ID0Section, ID0SectionVariants}; +use ida_reader::{IdbBufRead, IdbRead, IdbReadKind}; use serde::Deserialize; use crate::id1::ID1Section; @@ -21,10 +19,21 @@ use crate::nam::NamSection; use crate::til::section::TILSection; use anyhow::{anyhow, ensure, Result}; -#[derive(Debug, Clone, Copy)] -pub struct IDBParser { - input: I, +use num_enum::{IntoPrimitive, TryFromPrimitive}; + +#[derive(Debug, Clone)] +enum IDBParserInput { + File(I), + // TODO find a better way to handle Zstd data, + // this could be problematic with big files + Buffer(Vec), +} + +#[derive(Debug, Clone)] +pub struct IDBParser { + input: IDBParserInput, header: IDBHeader, + _kind: std::marker::PhantomData, } trait Sealed {} @@ -38,83 +47,207 @@ macro_rules! impl_idb_offset { impl Sealed for $name {} impl IDBOffset for $name { fn idb_offset(&self) -> u64 { - self.0.get() + self.0 } } }; } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct ID0Offset(NonZeroU64); +pub struct ID0Offset(u64); impl_idb_offset!(ID0Offset); #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct ID1Offset(NonZeroU64); +pub struct ID1Offset(u64); impl_idb_offset!(ID1Offset); #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct NamOffset(NonZeroU64); +pub struct NamOffset(u64); impl_idb_offset!(NamOffset); #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct TILOffset(NonZeroU64); +pub struct TILOffset(u64); impl_idb_offset!(TILOffset); -impl IDBParser { +macro_rules! call_parser_discrimiant { + ($slf:ident, $name:ident, $call:tt) => { + match $slf { + Self::IDA32($name) => $call, + Self::IDA64($name) => $call, + } + }; +} + +pub type IDBParserVariants = + IDAVariants, IDBParser>; + +impl IDBParserVariants { pub fn new(mut input: I) -> Result { let header = IDBHeader::read(&mut input)?; - Ok(Self { input, header }) + let input = match &header.version { + IDBHeaderVersion::V910(h) + if h.compression != IDBSectionCompression::None => + { + let mut output = vec![]; + input.seek(SeekFrom::Start(h.data_start.get()))?; + match h.compression { + IDBSectionCompression::None => unreachable!(), + IDBSectionCompression::Zlib => { + flate2::read::ZlibDecoder::new(input) + .read_to_end(&mut output)?; + } + IDBSectionCompression::Zstd => { + zstd::Decoder::new(input)?.read_to_end(&mut output)?; + } + } + IDBParserInput::Buffer(output) + } + _ => IDBParserInput::File(input), + }; + if header.magic_version.is_64() { + Ok(Self::IDA64(IDBParser { + input, + header, + _kind: std::marker::PhantomData, + })) + } else { + Ok(Self::IDA32(IDBParser { + input, + header, + _kind: std::marker::PhantomData, + })) + } + } + pub fn id0_section_offset(&self) -> Option { + call_parser_discrimiant!(self, x, { x.id0_section_offset() }) + } + + pub fn id1_section_offset(&self) -> Option { + call_parser_discrimiant!(self, x, { x.id1_section_offset() }) + } + + pub fn nam_section_offset(&self) -> Option { + call_parser_discrimiant!(self, x, { x.nam_section_offset() }) + } + + pub fn til_section_offset(&self) -> Option { + call_parser_discrimiant!(self, x, { x.til_section_offset() }) + } + + pub fn read_id0_section( + &mut self, + id0: ID0Offset, + ) -> Result { + match self { + Self::IDA32(parser) => { + parser.read_id0_section(id0).map(IDAVariants::IDA32) + } + Self::IDA64(parser) => { + parser.read_id0_section(id0).map(IDAVariants::IDA64) + } + } + } + + pub fn read_id1_section(&mut self, id1: ID1Offset) -> Result { + call_parser_discrimiant!(self, x, { x.read_id1_section(id1) }) + } + + pub fn read_nam_section(&mut self, nam: NamOffset) -> Result { + call_parser_discrimiant!(self, x, { x.read_nam_section(nam) }) + } + + pub fn read_til_section(&mut self, til: TILOffset) -> Result { + call_parser_discrimiant!(self, x, { x.read_til_section(til) }) } + pub fn decompress_til_section( + &mut self, + til: TILOffset, + output: &mut impl std::io::Write, + ) -> Result<()> { + call_parser_discrimiant!(self, x, { + x.decompress_til_section(til, output) + }) + } +} + +impl IDBParser { pub fn id0_section_offset(&self) -> Option { - self.header.id0_offset.map(ID0Offset) + match self.header.version { + IDBHeaderVersion::V1(v) => v.id0_offset.map(NonZeroU64::get), + IDBHeaderVersion::V4(v) => v.id0_offset.map(NonZeroU64::get), + IDBHeaderVersion::V5(v) => v.id0_offset.map(NonZeroU64::get), + IDBHeaderVersion::V6(v) => v.id0_offset.map(NonZeroU64::get), + IDBHeaderVersion::V910(v) => v.id0.map(|x| x.offset), + } + .map(ID0Offset) } pub fn id1_section_offset(&self) -> Option { - self.header.id1_offset.map(ID1Offset) + match self.header.version { + IDBHeaderVersion::V1(v) => v.id1_offset.map(NonZeroU64::get), + IDBHeaderVersion::V4(v) => v.id1_offset.map(NonZeroU64::get), + IDBHeaderVersion::V5(v) => v.id1_offset.map(NonZeroU64::get), + IDBHeaderVersion::V6(v) => v.id1_offset.map(NonZeroU64::get), + IDBHeaderVersion::V910(v) => v.id1.map(|x| x.offset), + } + .map(ID1Offset) } pub fn nam_section_offset(&self) -> Option { - self.header.nam_offset.map(NamOffset) + match self.header.version { + IDBHeaderVersion::V1(v) => v.nam_offset.map(NonZeroU64::get), + IDBHeaderVersion::V4(v) => v.nam_offset.map(NonZeroU64::get), + IDBHeaderVersion::V5(v) => v.nam_offset.map(NonZeroU64::get), + IDBHeaderVersion::V6(v) => v.nam_offset.map(NonZeroU64::get), + IDBHeaderVersion::V910(v) => v.nam.map(|x| x.offset), + } + .map(NamOffset) } pub fn til_section_offset(&self) -> Option { - self.header.til_offset.map(TILOffset) + match self.header.version { + IDBHeaderVersion::V1(v) => v.til_offset.map(NonZeroU64::get), + IDBHeaderVersion::V4(v) => v.til_offset.map(NonZeroU64::get), + IDBHeaderVersion::V5(v) => v.til_offset.map(NonZeroU64::get), + IDBHeaderVersion::V6(v) => v.til_offset.map(NonZeroU64::get), + IDBHeaderVersion::V910(v) => v.til.map(|x| x.offset), + } + .map(TILOffset) } - pub fn read_id0_section(&mut self, id0: ID0Offset) -> Result { - read_section( + pub fn read_id0_section( + &mut self, + id0: ID0Offset, + ) -> Result> { + read_section_from_main_header::, I, K>( &mut self.input, + id0.0, &self.header, - id0.0.get(), - ID0Section::read, ) } pub fn read_id1_section(&mut self, id1: ID1Offset) -> Result { - read_section( + read_section_from_main_header::( &mut self.input, + id1.0, &self.header, - id1.0.get(), - ID1Section::read, ) } pub fn read_nam_section(&mut self, nam: NamOffset) -> Result { - read_section( + read_section_from_main_header::( &mut self.input, + nam.0, &self.header, - nam.0.get(), - NamSection::read, ) } pub fn read_til_section(&mut self, til: TILOffset) -> Result { - read_section( + read_section_from_main_header::( &mut self.input, + til.0, &self.header, - til.0.get(), - |input, _header, compressed| TILSection::read(input, compressed), ) } @@ -123,22 +256,45 @@ impl IDBParser { offset: impl IDBOffset, output: &mut impl std::io::Write, ) -> Result<()> { - self.input.seek(SeekFrom::Start(offset.idb_offset()))?; - let section_header = - IDBSectionHeader::read(&self.header, &mut self.input)?; - // makes sure the reader doesn't go out-of-bounds - let mut input = - std::io::Read::take(&mut self.input, section_header.len); - match section_header.compress { - IDBSectionCompression::Zlib => { - let mut input = flate2::bufread::ZlibDecoder::new(input); - let _ = std::io::copy(&mut input, output)?; + match &mut self.input { + IDBParserInput::Buffer(buf) => { + let offset = usize::try_from(offset.idb_offset()).unwrap(); + let IDBHeaderVersion::V910(h) = &self.header.version else { + unreachable!(); + }; + let size = usize::try_from(h.til.unwrap().size.get()).unwrap(); + output + .write_all(&buf[offset..offset + size]) + .map_err(anyhow::Error::from) } - IDBSectionCompression::None => { - let _ = std::io::copy(&mut input, output)?; + IDBParserInput::File(input) => { + input.seek(SeekFrom::Start(offset.idb_offset()))?; + let section_header = + IDBSectionHeader::read(&self.header, &mut *input)?; + // makes sure the reader doesn't go out-of-bounds + match section_header.compress { + IDBSectionCompression::Zlib => { + let input = + std::io::Read::take(input, section_header.len); + let mut input = + flate2::bufread::ZlibDecoder::new(input); + let _ = std::io::copy(&mut input, output)?; + } + IDBSectionCompression::None => { + let mut input = + std::io::Read::take(input, section_header.len); + let _ = std::io::copy(&mut input, output)?; + } + IDBSectionCompression::Zstd => { + let input = zstd::Decoder::new(input)?; + let mut input = + std::io::Read::take(input, section_header.len); + let _ = std::io::copy(&mut input, output)?; + } + } + Ok(()) } } - Ok(()) } pub fn decompress_til_section( @@ -146,47 +302,147 @@ impl IDBParser { til: TILOffset, output: &mut impl std::io::Write, ) -> Result<()> { - self.input.seek(SeekFrom::Start(til.0.get()))?; - let section_header = - IDBSectionHeader::read(&self.header, &mut self.input)?; - // makes sure the reader doesn't go out-of-bounds - let mut input = - std::io::Read::take(&mut self.input, section_header.len); - TILSection::decompress(&mut input, output, section_header.compress) + let offset = til.0; + match &mut self.input { + IDBParserInput::Buffer(buf) => { + let offset = usize::try_from(offset).unwrap(); + let IDBHeaderVersion::V910(h) = &self.header.version else { + unreachable!(); + }; + let size = usize::try_from(h.til.unwrap().size.get()).unwrap(); + output + .write_all(&buf[offset..offset + size]) + .map_err(anyhow::Error::from) + } + IDBParserInput::File(input) => { + input.seek(SeekFrom::Start(offset))?; + let section_header = + IDBSectionHeader::read(&self.header, &mut *input)?; + // makes sure the reader doesn't go out-of-bounds + let mut input = std::io::Read::take(input, section_header.len); + TILSection::decompress( + &mut input, + output, + section_header.compress, + ) + } + } } } -fn read_section<'a, I, T, F>( - input: &'a mut I, - header: &IDBHeader, +trait SectionReader { + type Result; + fn read_section + IdbBufRead>( + reader: &mut I, + ) -> Result; + fn size_from_v910(header: &IDBHeaderV910) -> u64; +} + +// decided, based on the version, where the size compress data is stored +fn read_section_from_main_header( + input: &mut IDBParserInput, offset: u64, - mut process: F, -) -> Result + header: &IDBHeader, +) -> Result where - I: IdbReader, - F: FnMut( - &mut std::io::Take<&'a mut I>, - &IDBHeader, - IDBSectionCompression, - ) -> Result, + I: IdbBufRead + Seek, + K: IDAKind, + F: SectionReader, +{ + match input { + IDBParserInput::Buffer(buf) => { + let offset = usize::try_from(offset).unwrap(); + let IDBHeaderVersion::V910(h) = &header.version else { + unreachable!(); + }; + let size = usize::try_from(F::size_from_v910(h)).unwrap(); + read_section::( + &mut &buf[offset..offset + size], + IDBSectionCompression::None, + F::size_from_v910(h), + ) + } + IDBParserInput::File(input) => { + input.seek(SeekFrom::Start(offset))?; + match &header.version { + IDBHeaderVersion::V910(h) => read_section::( + input, + h.compression, + F::size_from_v910(h), + ), + IDBHeaderVersion::V1(_) + | IDBHeaderVersion::V4(_) + | IDBHeaderVersion::V5(_) + | IDBHeaderVersion::V6(_) => { + read_section_from_header::(input, header) + } + } + } + } +} + +// read the header from the disk +fn read_section_from_header( + input: &mut I, + header: &IDBHeader, +) -> Result +where + I: IdbBufRead, + K: IDAKind, + F: SectionReader, { - input.seek(SeekFrom::Start(offset))?; let section_header = IDBSectionHeader::read(header, &mut *input)?; - // makes sure the reader doesn't go out-of-bounds - let mut input = std::io::Read::take(input, section_header.len); - let result = process(&mut input, header, section_header.compress)?; - - // TODO seems its normal to have a few extra bytes at the end of the sector, maybe - // because of the compressions stuff, anyway verify that - ensure!( - if matches!(section_header.compress, IDBSectionCompression::None) { - input.limit() == 0 - } else { - input.limit() <= 16 - }, - "Sector have more data then expected, left {} bytes", - input.limit() - ); + read_section::(input, section_header.compress, section_header.len) +} + +fn read_section( + input: &mut I, + compress: IDBSectionCompression, + len: u64, +) -> Result +where + I: IdbBufRead, + K: IDAKind, + F: SectionReader, +{ + let result = match compress { + IDBSectionCompression::None => { + let mut input = std::io::Read::take(input, len); + let result = F::read_section(&mut input)?; + ensure!( + input.limit() == 0, + "Sector have more data then expected, left {} bytes", + input.limit() + ); + result + } + IDBSectionCompression::Zlib => { + // TODO seems its normal to have a few extra bytes at the end of the sector, maybe + // because of the compressions stuff, anyway verify that + let input = std::io::Read::take(input, len); + let mut flate_reader = + BufReader::new(flate2::read::ZlibDecoder::new(input)); + let result = F::read_section(&mut flate_reader)?; + let limit = flate_reader.into_inner().into_inner().limit(); + ensure!( + limit <= 16, + "Compressed Zlib Sector have more data then expected, left {limit} bytes", + ); + result + } + IDBSectionCompression::Zstd => { + let zstd_reader = BufReader::new(zstd::Decoder::new(input)?); + let mut input = std::io::Read::take(zstd_reader, len); + let result = F::read_section(&mut input)?; + let limit = input.limit(); + ensure!( + limit <= 16, + "Compressed Zlib Sector have more data then expected, left {limit} bytes", + ); + result + } + }; + Ok(result) } @@ -197,14 +453,14 @@ enum IDBMagic { IDA2, } -impl TryFrom<[u8; 4]> for IDBMagic { +impl TryFrom<[u8; 5]> for IDBMagic { type Error = anyhow::Error; - fn try_from(value: [u8; 4]) -> Result { + fn try_from(value: [u8; 5]) -> Result { match &value { - b"IDA0" => Ok(IDBMagic::IDA0), - b"IDA1" => Ok(IDBMagic::IDA1), - b"IDA2" => Ok(IDBMagic::IDA2), + b"IDA0\x00" => Ok(IDBMagic::IDA0), + b"IDA1\x00" => Ok(IDBMagic::IDA1), + b"IDA2\x00" => Ok(IDBMagic::IDA2), _ => Err(anyhow!("Invalid IDB Magic number")), } } @@ -219,76 +475,114 @@ impl IDBMagic { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Hash, IntoPrimitive, TryFromPrimitive, +)] +#[repr(u16)] enum IDBVersion { // TODO add other versions - V1, - V4, - V5, - V6, + V1 = 1, + V4 = 4, + V5 = 5, + V6 = 6, + V910 = 910, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct IDBHeader { magic_version: IDBMagic, - version: IDBVersion, - id0_offset: Option, - id1_offset: Option, - nam_offset: Option, - til_offset: Option, - checksums: [u32; 3], - unk0_checksum: u32, - data: IDBHeaderVersion, + version: IDBHeaderVersion, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum IDBHeaderVersion { - V1 { - seg_offset: Option, - }, - V4 { - seg_offset: Option, - }, - V5 { - unk16: u32, - unk1_checksum: u32, - }, - V6 { - unk16: u32, - id2_offset: Option, - unk1_checksum: u32, - }, + V1(IDBHeaderV1), + V4(IDBHeaderV4), + V5(IDBHeaderV5), + V6(IDBHeaderV6), + V910(IDBHeaderV910), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV1 { + pub id0_offset: Option, + pub id1_offset: Option, + pub nam_offset: Option, + pub seg_offset: Option, + pub til_offset: Option, + pub checksums: [u32; 5], +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV4 { + pub id0_offset: Option, + pub id1_offset: Option, + pub nam_offset: Option, + pub seg_offset: Option, + pub til_offset: Option, + pub checksums: [u32; 5], } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV5 { + pub id0_offset: Option, + pub id1_offset: Option, + pub nam_offset: Option, + pub til_offset: Option, + pub checksums: [u32; 5], +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV6 { + pub id0_offset: Option, + pub id1_offset: Option, + pub id2_offset: Option>, + pub nam_offset: Option, + pub til_offset: Option, + pub checksums: [u32; 5], +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV910 { + pub compression: IDBSectionCompression, + pub data_start: NonZeroU64, + pub id0: Option, + pub id1: Option, + pub id2: Option, + pub nam: Option, + pub til: Option, + pub seg: Option, + pub md5: [u8; 16], +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeaderV910Sector { + pub offset: u64, + pub size: NonZeroU64, +} + +// NOTE V910 ditched the SectionHeader #[derive(Debug, Clone, Copy)] struct IDBSectionHeader { compress: IDBSectionCompression, len: u64, } -#[derive(Debug, Clone, Copy)] +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Hash, IntoPrimitive, TryFromPrimitive, +)] #[repr(u8)] pub enum IDBSectionCompression { None = 0, Zlib = 2, -} - -impl TryFrom for IDBSectionCompression { - type Error = (); - - fn try_from(value: u8) -> std::result::Result { - match value { - 0 => Ok(Self::None), - 2 => Ok(Self::Zlib), - _ => Err(()), - } - } + /// Introduced in version 9.1 + Zstd = 3, } #[derive(Debug, Deserialize)] struct IDBHeaderRaw { - magic: [u8; 4], - _padding_0: u16, + magic: [u8; 5], + _padding_0: u8, offsets: [u32; 5], signature: u32, version: u16, @@ -296,7 +590,11 @@ struct IDBHeaderRaw { } impl IDBHeader { - pub fn read(mut input: impl IdaGenericUnpack) -> Result { + pub fn read(mut input: impl Read) -> Result { + // InnerRef fa53bd30-ebf1-4641-80ef-4ddc73db66cd 0x77eef0 + // InnerRef expects the file to be at least 112 bytes, + // always read 109 bytes at the start + // read 32 bytes let header_raw: IDBHeaderRaw = bincode::deserialize_from(&mut input)?; let magic = IDBMagic::try_from(header_raw.magic)?; ensure!( @@ -305,62 +603,68 @@ impl IDBHeader { header_raw.signature ); // TODO associate header.version and magic? - match header_raw.version { - 1 => Self::read_v1(&header_raw, magic, input), - 4 => Self::read_v4(&header_raw, magic, input), - 5 => Self::read_v5(&header_raw, magic, input), - 6 => Self::read_v6(&header_raw, magic, input), - v => Err(anyhow!("Unable to parse version `{v}`")), - } + let version = match IDBVersion::try_from_primitive(header_raw.version) + .map_err(|_| { + anyhow!("Unable to parse version `{}`", header_raw.version) + })? { + IDBVersion::V1 => { + IDBHeaderVersion::V1(Self::read_v1(&header_raw, input)?) + } + IDBVersion::V4 => { + IDBHeaderVersion::V4(Self::read_v4(&header_raw, input)?) + } + IDBVersion::V5 => { + IDBHeaderVersion::V5(Self::read_v5(&header_raw, input)?) + } + IDBVersion::V6 => { + IDBHeaderVersion::V6(Self::read_v6(&header_raw, input)?) + } + IDBVersion::V910 => { + IDBHeaderVersion::V910(Self::read_v910(&header_raw, input)?) + } + }; + Ok(Self { + magic_version: magic, + version, + }) } fn read_v1( header_raw: &IDBHeaderRaw, - magic: IDBMagic, - input: impl IdaGenericUnpack, - ) -> Result { + input: impl Read, + ) -> Result { #[derive(Debug, Deserialize)] struct V1Raw { _id2_offset: u32, - checksums: [u32; 3], - _unk30_zeroed: u32, - unk33_checksum: u32, + checksums: [u32; 5], } let v1_raw: V1Raw = bincode::deserialize_from(input)?; + + // TODO ensure all offsets point to after the header #[cfg(feature = "restrictive")] { - ensure!(v1_raw._unk30_zeroed == 0, "unk30 not zeroed"); ensure!(v1_raw._id2_offset == 0, "id2 in V1 is not zeroed"); } - // TODO ensure all offsets point to after the header - Ok(Self { - magic_version: magic, - version: IDBVersion::V1, + Ok(IDBHeaderV1 { id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), + seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), til_offset: NonZeroU64::new(header_raw.offsets[4].into()), checksums: v1_raw.checksums, - unk0_checksum: v1_raw.unk33_checksum, - data: IDBHeaderVersion::V1 { - seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), - }, }) } fn read_v4( header_raw: &IDBHeaderRaw, - magic: IDBMagic, - input: impl IdaGenericUnpack, - ) -> Result { + input: impl Read, + ) -> Result { #[derive(Debug, Deserialize)] struct V4Raw { _id2_offset: u32, - checksums: [u32; 3], - _unk30_zeroed: u32, - unk33_checksum: u32, + checksums: [u32; 5], _unk38_zeroed: [u8; 8], _unk40_v5c: u32, } @@ -369,43 +673,34 @@ impl IDBHeader { #[cfg(feature = "restrictive")] { - ensure!(v4_raw._unk30_zeroed == 0, "unk30 not zeroed"); ensure!(v4_raw._id2_offset == 0, "id2 in V4 is not zeroed"); ensure!(v4_raw._unk38_zeroed == [0; 8], "unk38 is not zeroed"); ensure!(v4_raw._unk40_v5c == 0x5c, "unk40 is not 0x5C"); } // TODO ensure all offsets point to after the header - Ok(Self { - magic_version: magic, - version: IDBVersion::V4, + Ok(IDBHeaderV4 { id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), + seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), til_offset: NonZeroU64::new(header_raw.offsets[4].into()), checksums: v4_raw.checksums, - unk0_checksum: v4_raw.unk33_checksum, - data: IDBHeaderVersion::V4 { - seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), - }, }) } fn read_v5( header_raw: &IDBHeaderRaw, - magic: IDBMagic, - input: impl IdaGenericUnpack, - ) -> Result { + input: impl Read, + ) -> Result { #[derive(Debug, Deserialize)] struct V5Raw { nam_offset: u64, _seg_offset_zeroed: u64, til_offset: u64, - initial_checksums: [u32; 3], - _unk4_zeroed: u32, - unk_checksum: u32, + checksums: [u32; 5], _id2_offset_zeroed: u64, - final_checksum: u32, + _final_checksum: u32, _unk0_v7c: u32, } let v5_raw: V5Raw = bincode::deserialize_from(input)?; @@ -421,44 +716,33 @@ impl IDBHeader { // TODO Final checksum is always zero on v5? #[cfg(feature = "restrictive")] { - ensure!(v5_raw._unk4_zeroed == 0, "unk4 not zeroed"); ensure!(v5_raw._id2_offset_zeroed == 0, "id2 in V5 is not zeroed"); ensure!(v5_raw._seg_offset_zeroed == 0, "seg in V5 is not zeroed"); ensure!(v5_raw._unk0_v7c == 0x7C, "unk0 not 0x7C"); } // TODO ensure all offsets point to after the header - Ok(Self { - magic_version: magic, - version: IDBVersion::V5, + Ok(IDBHeaderV5 { id0_offset: NonZeroU64::new(id0_offset), id1_offset: NonZeroU64::new(id1_offset), nam_offset: NonZeroU64::new(v5_raw.nam_offset), til_offset: NonZeroU64::new(v5_raw.til_offset), - checksums: v5_raw.initial_checksums, - unk0_checksum: v5_raw.unk_checksum, - data: IDBHeaderVersion::V5 { - unk16: header_raw.offsets[4], - unk1_checksum: v5_raw.final_checksum, - }, + checksums: v5_raw.checksums, }) } fn read_v6( header_raw: &IDBHeaderRaw, - magic: IDBMagic, - input: impl IdaGenericUnpack, - ) -> Result { + input: impl Read, + ) -> Result { #[derive(Debug, Deserialize)] struct V6Raw { nam_offset: u64, _seg_offset_zeroed: u64, til_offset: u64, - initial_checksums: [u32; 3], - _unk4_zeroed: [u8; 4], - unk5_checksum: u32, + checksums: [u32; 5], id2_offset: u64, - final_checksum: u32, + _final_checksum: u32, _unk0_v7c: u32, } let v6_raw: V6Raw = bincode::deserialize_from(input)?; @@ -473,37 +757,103 @@ impl IDBHeader { #[cfg(feature = "restrictive")] { - ensure!(v6_raw._unk4_zeroed == [0; 4], "unk4 not zeroed"); ensure!(v6_raw._seg_offset_zeroed == 0, "seg in V6 is not zeroed"); ensure!(v6_raw._unk0_v7c == 0x7C, "unk0 not 0x7C"); } // TODO ensure all offsets point to after the header - Ok(Self { - magic_version: magic, - version: IDBVersion::V6, + Ok(IDBHeaderV6 { id0_offset: NonZeroU64::new(id0_offset), id1_offset: NonZeroU64::new(id1_offset), + id2_offset: NonZeroU64::new(v6_raw.id2_offset), nam_offset: NonZeroU64::new(v6_raw.nam_offset), til_offset: NonZeroU64::new(v6_raw.til_offset), - checksums: v6_raw.initial_checksums, - unk0_checksum: v6_raw.unk5_checksum, - data: IDBHeaderVersion::V6 { - unk16: header_raw.offsets[4], - id2_offset: NonZeroU64::new(v6_raw.id2_offset), - unk1_checksum: v6_raw.final_checksum, - }, + checksums: v6_raw.checksums, + }) + } + + fn read_v910( + header_raw: &IDBHeaderRaw, + input: impl Read, + ) -> Result { + #[derive(Debug, Deserialize)] + struct V91Raw { + compression: u8, + sectors: [u64; 6], + _unk1: u64, + _unk2: u32, + md5: [u8; 16], + } + let raw: V91Raw = bincode::deserialize_from(input)?; + let header_size = u64::from_le( + u64::from(header_raw.offsets[1]) << 32 + | u64::from(header_raw.offsets[0]), + ); + let data_start = u64::from_le( + u64::from(header_raw.offsets[3]) << 32 + | u64::from(header_raw.offsets[2]), + ); + // TODO find meanings, seeing value 0 and 2 + let _unk3 = header_raw.offsets[4]; + #[cfg(feature = "restrictive")] + { + ensure!(raw._unk1 == 0); + ensure!(raw._unk2 == 0); + } + + ensure!(header_size != 0); + // TODO ensure other header data is empty based on the header_size + + let data_start = NonZeroU64::new(data_start) + .ok_or_else(|| anyhow!("Invalid Header data start offset"))?; + + // InnerRef fa53bd30-ebf1-4641-80ef-4ddc73db66cd 0x077f669 read + // InnerRef fa53bd30-ebf1-4641-80ef-4ddc73db66cd 0x077ebf9 unpack + let mut current_offset = + if raw.compression != IDBSectionCompression::None.into() { + 0 + } else { + data_start.get() + }; + let sectors: [Option; 6] = raw + .sectors + .iter() + .copied() + .map(|size| { + let sector = + NonZeroU64::new(size).map(|size| IDBHeaderV910Sector { + offset: current_offset, + size, + }); + current_offset += size; + Ok(sector) + }) + .collect::>>()? + .try_into() + .unwrap(); + + let compression = + IDBSectionCompression::try_from_primitive(raw.compression) + .map_err(|_| anyhow!("Invalid V910 header compression"))?; + + Ok(IDBHeaderV910 { + compression, + data_start, + id0: sectors[0], + id1: sectors[1], + nam: sectors[2], + id2: sectors[3], + til: sectors[4], + seg: sectors[5], + md5: raw.md5, }) } } impl IDBSectionHeader { - pub fn read( - header: &IDBHeader, - input: impl IdaGenericUnpack, - ) -> Result { + fn read(header: &IDBHeader, input: impl Read) -> Result { match header.version { - IDBVersion::V1 | IDBVersion::V4 => { + IDBHeaderVersion::V1(_) | IDBHeaderVersion::V4(_) => { #[derive(Debug, Deserialize)] struct Section32Raw { compress: u8, @@ -518,7 +868,7 @@ impl IDBSectionHeader { len: header.len.into(), }) } - IDBVersion::V5 | IDBVersion::V6 => { + IDBHeaderVersion::V5(_) | IDBHeaderVersion::V6(_) => { #[derive(Debug, Deserialize)] struct Section64Raw { compress: u8, @@ -533,6 +883,9 @@ impl IDBSectionHeader { len: header.len, }) } + IDBHeaderVersion::V910(_) => { + unreachable!() + } } } } @@ -548,7 +901,7 @@ enum VaVersion { } impl VaVersion { - fn read(mut input: impl IdaGenericUnpack) -> Result { + fn read(mut input: impl Read) -> Result { let mut magic: [u8; 4] = [0; 4]; input.read_exact(&mut magic)?; match &magic[..] { @@ -810,10 +1163,40 @@ mod test { let _til = til::Type::new_from_id0(&function, vec![]).unwrap(); } + #[test] + fn parse_struct_with_fixed() { + let function = [ + 0x0d, // stuct type + 0x31, // n = 0x30, mem_cnt = 6, packalig = 0 + 0xf1, 0x80, 0x08, // struct att + 0x32, // member 0 => char + 0x01, // member 0 fixed_ext_att + 0x03, // member 1 => int16 + 0x02, 0x10, // member 1 fixed_ext_att + 0x07, // member 2 => int + 0x02, 0x10, // member 2 fixed_ext_att + 0x3d, 0x03, 0x23, 0x48, // member 3 => typeref(8) + 0x02, 0x20, // member 3 fixed_ext_att + 0x08, // member 4 => bool + 0x02, 0x40, // member 4 fixed_ext_att + 0x1b, // member 5 array + 0x01, // member 5 nelem = 0 + 0x32, // member 5 inner_type = char + 0x02, 0x08, // member 5 fixed_ext_att + 0x02, 0x13, // struct stuff + 0x00, //end + ]; + let til = til::Type::new_from_id0(&function, vec![]).unwrap(); + let til::TypeVariant::Struct(til_struct) = til.type_variant else { + unreachable!() + }; + assert!(til_struct.extra_padding == Some(19)); + } + #[test] fn parse_idb_param() { let param = b"IDA\xbc\x02\x06metapc#\x8a\x03\x03\x02\x00\x00\x00\x00\xff_\xff\xff\xf7\x03\x00\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x0d\x00\x0d \x0d\x10\xff\xff\x00\x00\x00\xc0\x80\x00\x00\x00\x02\x02\x01\x0f\x0f\x06\xce\xa3\xbeg\xc6@\x00\x07\x00\x07\x10(FP\x87t\x09\x03\x00\x01\x13\x0a\x00\x00\x01a\x00\x07\x00\x13\x04\x04\x04\x00\x02\x04\x08\x00\x00\x00"; - let _parsed = id0::IDBParam::read(param, false).unwrap(); + let _parsed = id0::IDBParam::::read(param).unwrap(); } #[test] @@ -832,7 +1215,18 @@ mod test { let filename = filename.as_ref(); println!("{}", filename.to_str().unwrap()); let file = BufReader::new(File::open(&filename).unwrap()); - let mut parser = IDBParser::new(file).unwrap(); + let parser = IDAVariants::new(file).unwrap(); + match parser { + IDAVariants::IDA32(idbparser) => parse_idb_inner(idbparser), + IDAVariants::IDA64(idbparser) => parse_idb_inner(idbparser), + } + } + + fn parse_idb_inner(mut parser: IDBParser) + where + I: BufRead + Seek, + K: IDAKind, + { // parse sectors let id0 = parser .read_id0_section(parser.id0_section_offset().unwrap()) @@ -857,12 +1251,17 @@ mod test { let _: Vec<_> = id0.segments().unwrap().map(Result::unwrap).collect(); let _: Vec<_> = id0.loader_name().unwrap().map(Result::unwrap).collect(); - let _: Vec<_> = id0.root_info().unwrap().map(Result::unwrap).collect(); + let root_info_idx = id0.root_info_node().unwrap(); let _: Vec<_> = id0 - .file_regions(version) + .root_info(root_info_idx) .unwrap() .map(Result::unwrap) .collect(); + let file_regions_idx = id0.file_regions_idx().unwrap(); + let _: Vec<_> = id0 + .file_regions(file_regions_idx, version) + .map(Result::unwrap) + .collect(); let _: Vec<_> = id0 .functions_and_comments() .unwrap() @@ -906,7 +1305,7 @@ mod test { // makes sure it don't read out-of-bounds let mut input = BufReader::new(File::open(file)?); // TODO make a SmartReader - TILSection::read(&mut input, IDBSectionCompression::None).and_then(|_til| { + TILSection::read(&mut input).and_then(|_til| { let current = input.seek(SeekFrom::Current(0))?; let end = input.seek(SeekFrom::End(0))?; ensure!( @@ -953,3 +1352,116 @@ mod test { Ok(result) } } + +pub enum IDAVariants { + IDA32(I32), + IDA64(I64), +} + +pub trait IDAKind: std::fmt::Debug + Clone + Copy { + type Usize: IDAUsize; +} + +pub trait IDAUsize: + Sized + + Sync + + Send + + 'static + + Copy + + Clone + + std::fmt::Debug + + std::fmt::Display + + std::fmt::LowerHex + + std::fmt::UpperHex + + PartialEq + + Eq + + PartialOrd + + Ord + + core::hash::Hash + + core::iter::Sum + + num_traits::PrimInt + + num_traits::NumAssign + + num_traits::WrappingAdd + + num_traits::WrappingSub + + num_traits::FromBytes + + num_traits::ToBytes + + num_traits::ToBytes + + num_traits::AsPrimitive + + num_traits::AsPrimitive + + num_traits::AsPrimitive + + num_traits::AsPrimitive + + num_traits::AsPrimitive + + TryInto + + Into + + TryInto + + TryInto + + TryInto + + From + + From + + From + + TryFrom + + TryFrom + + Into +{ + type Isize: num_traits::Signed + Into + Copy; + const BYTES: u8; + + /// helper fo call into u64 + fn into_u64(self) -> u64 { + self.into() + } + /// cast the inner type as a signed version of itself, then call into i64 + fn into_i64(self) -> i64 { + let signed: Self::Isize = self.as_(); + signed.into() + } + fn is_max(self) -> bool { + self == Self::max_value() + } + // parse the bytes and only return Some if data is the exact size of type + fn from_le_bytes(data: &[u8]) -> Option; + fn from_be_bytes(data: &[u8]) -> Option; + // read the type from a reader + fn from_le_reader(data: &mut impl std::io::Read) -> Result; + fn from_be_reader(data: &mut impl std::io::Read) -> Result; + fn unpack_from_reader(read: &mut impl std::io::Read) -> Result; +} + +macro_rules! declare_idb_kind { + ($bytes:literal, $utype:ident, $itype:ident, $name:ident, $unapack_fun:ident) => { + #[derive(Debug, Clone, Copy)] + pub struct $name; + impl IDAKind for $name { + type Usize = $utype; + } + impl IDAUsize for $utype { + type Isize = $itype; + const BYTES: u8 = $bytes; + + fn from_le_bytes(data: &[u8]) -> Option { + Some(Self::from_le_bytes(data.try_into().ok()?)) + } + fn from_be_bytes(data: &[u8]) -> Option { + Some(Self::from_be_bytes(data.try_into().ok()?)) + } + fn from_le_reader(read: &mut impl std::io::Read) -> Result { + let mut data = [0; $bytes]; + read.read_exact(&mut data)?; + Ok(Self::from_le_bytes(data)) + } + fn from_be_reader(read: &mut impl std::io::Read) -> Result { + let mut data = [0; $bytes]; + read.read_exact(&mut data)?; + Ok(Self::from_be_bytes(data)) + } + fn unpack_from_reader( + read: &mut impl std::io::Read, + ) -> Result { + read.$unapack_fun() + } + } + }; +} + +declare_idb_kind!(4, u32, i32, IDA32, unpack_dd); +declare_idb_kind!(8, u64, i64, IDA64, unpack_dq); diff --git a/src/nam.rs b/src/nam.rs index 6f11574..1d839b4 100644 --- a/src/nam.rs +++ b/src/nam.rs @@ -1,114 +1,53 @@ use anyhow::{ensure, Result}; -use crate::ida_reader::IdaGenericUnpack; -use crate::{IDBHeader, IDBSectionCompression, VaVersion}; +use crate::ida_reader::{IdbRead, IdbReadKind}; +use crate::{IDAKind, IDAUsize, SectionReader, VaVersion}; #[derive(Debug, Clone)] pub struct NamSection { pub names: Vec, } -impl NamSection { - pub(crate) fn read( - input: &mut impl IdaGenericUnpack, - header: &IDBHeader, - compress: IDBSectionCompression, - ) -> Result { - match compress { - IDBSectionCompression::None => Self::read_inner(input, header), - IDBSectionCompression::Zlib => { - let mut input = flate2::read::ZlibDecoder::new(input); - Self::read_inner(&mut input, header) - } - } +impl SectionReader for NamSection { + type Result = Self; + + fn read_section>(input: &mut I) -> Result { + Self::read_inner::(input) + } + + fn size_from_v910(header: &crate::IDBHeaderV910) -> u64 { + header.nam.unwrap().size.get() } - pub(crate) fn read_inner( - input: &mut impl IdaGenericUnpack, - header: &IDBHeader, +} + +impl NamSection { + pub(crate) fn read_inner( + input: &mut impl IdbRead, ) -> Result { // NOTE 64 should be enougth for all version, if a new version is implemented // review this value const MAX_HEADER_LEN: usize = 64; - const DEFAULT_PAGE_SIZE: usize = 0x2000; - //assert!(MAX_HEADER_LEN < DEFAULT_PAGE_SIZE); let mut buf = vec![0; MAX_HEADER_LEN]; input.read_exact(&mut buf[..])?; - let mut header_page = &buf[..]; - let version = VaVersion::read(&mut header_page)?; - - let (npages, nnames, pagesize) = match version { - VaVersion::Va0 - | VaVersion::Va1 - | VaVersion::Va2 - | VaVersion::Va3 - | VaVersion::Va4 => { - let always1: u16 = bincode::deserialize_from(&mut header_page)?; - ensure!(always1 == 1); - let npages: u64 = if header.magic_version.is_64() { - bincode::deserialize_from(&mut header_page)? - } else { - bincode::deserialize_from::<_, u32>(&mut header_page)? - .into() - }; - let always0: u16 = bincode::deserialize_from(&mut header_page)?; - ensure!(always0 == 0); - let nnames: u64 = if header.magic_version.is_64() { - // TODO nnames / 2? Why? - bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 - } else { - bincode::deserialize_from::<_, u32>(&mut header_page)? - .into() - }; - let pagesize: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!(pagesize >= 64); - (npages, nnames, pagesize) - } - VaVersion::VaX => { - let always3: u32 = bincode::deserialize_from(&mut header_page)?; - ensure!(always3 == 3); - let one_or_zero: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!([0, 1].contains(&one_or_zero)); - // TODO always2048 have some relation to pagesize? - let always2048: u32 = - bincode::deserialize_from(&mut header_page)?; - ensure!(always2048 == 2048); - let npages: u64 = if header.magic_version.is_64() { - bincode::deserialize_from(&mut header_page)? - } else { - bincode::deserialize_from::<_, u32>(&mut header_page)? - .into() - }; - let always0: u32 = bincode::deserialize_from(&mut header_page)?; - ensure!(always0 == 0); - let nnames: u64 = if header.magic_version.is_64() { - // TODO nnames / 2? Why? - bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 - } else { - bincode::deserialize_from::<_, u32>(&mut header_page)? - .into() - }; - (npages, nnames, DEFAULT_PAGE_SIZE.try_into().unwrap()) - } - }; + let (npages, nnames, pagesize) = Self::read_header::(&mut &buf[..])?; ensure!( - npages >= 1, + npages >= K::Usize::from(1u8), "Invalid number of pages, need at least one page for the header" ); // read the rest of the header page and ensure it's all zeros buf.resize(pagesize.try_into().unwrap(), 0); - input.read_exact(&mut buf[64..])?; - ensure!(buf[64..].iter().all(|b| *b == 0)); + input.read_exact(&mut buf[MAX_HEADER_LEN..])?; + ensure!(buf[MAX_HEADER_LEN..].iter().all(|b| *b == 0)); - let name_len = if header.magic_version.is_64() { 8 } else { 4 }; + let name_len: u32 = K::Usize::BYTES.into(); // ensure pages dont break a name ensure!(pagesize % name_len == 0); // names fit inside the pages - let size_required = nnames * u64::from(name_len); - let available_data = (npages - 1) * u64::from(pagesize); + let size_required = nnames * K::Usize::from(name_len); + let available_data = + (npages - K::Usize::from(1u8)) * K::Usize::from(pagesize); ensure!( size_required <= available_data, "there is no enough size required {size_required} <= {available_data}" @@ -116,30 +55,72 @@ impl NamSection { let mut names = Vec::with_capacity(nnames.try_into().unwrap()); let mut current_nnames = nnames; - for _page in 1..npages { + for _page in 1u64..npages.into() { input.read_exact(&mut buf)?; let mut input = &buf[..]; loop { - if current_nnames == 0 { + if current_nnames == K::Usize::from(0u8) { break; }; - let name = if header.magic_version.is_64() { - bincode::deserialize_from::<_, u64>(&mut input) - } else { - bincode::deserialize_from::<_, u32>(&mut input) - .map(u64::from) - }; + let name = K::Usize::from_le_reader(&mut input); let Ok(name) = name else { break; }; - names.push(name); - current_nnames -= 1; + names.push(name.into()); + current_nnames -= K::Usize::from(1u8); } // if anything is left, make sure it's all zeros ensure!(input.iter().all(|b| *b == 0)); } - assert!(current_nnames == 0); + assert!(current_nnames == K::Usize::from(0u8)); Ok(Self { names }) } + + fn read_header( + input: &mut impl IdbReadKind, + ) -> Result<(K::Usize, K::Usize, u32)> { + const DEFAULT_PAGE_SIZE: usize = 0x2000; + //assert!(MAX_HEADER_LEN < DEFAULT_PAGE_SIZE); + match VaVersion::read(&mut *input)? { + VaVersion::Va0 + | VaVersion::Va1 + | VaVersion::Va2 + | VaVersion::Va3 + | VaVersion::Va4 => { + let always1 = input.read_u16()?; + ensure!(always1 == 1); + let npages = input.read_usize()?; + let always0 = input.read_u16()?; + ensure!(always0 == 0); + let mut nnames = input.read_usize()?; + if K::Usize::BYTES == 8 { + // TODO nnames / 2? Why? + nnames /= K::Usize::from(2u8); + } + let pagesize = input.read_u32()?; + ensure!(pagesize >= 64); + Ok((npages, nnames, pagesize)) + } + VaVersion::VaX => { + let always3 = input.read_u32()?; + ensure!(always3 == 3); + let one_or_zero = input.read_u32()?; + ensure!([0, 1].contains(&one_or_zero)); + // TODO always2048 have some relation to pagesize? + let always2048 = input.read_u32()?; + ensure!(always2048 == 2048); + let npages = input.read_usize()?; + let always0 = input.read_u32()?; + ensure!(always0 == 0); + let mut nnames = input.read_usize()?; + // TODO remove this HACK to find if the Type is u64 + if K::Usize::BYTES == 8 { + // TODO nnames / 2? Why? + nnames /= K::Usize::from(2u8); + } + Ok((npages, nnames, DEFAULT_PAGE_SIZE.try_into().unwrap())) + } + } + } } diff --git a/src/til.rs b/src/til.rs index 3cd80d1..d485c91 100644 --- a/src/til.rs +++ b/src/til.rs @@ -19,7 +19,7 @@ use std::num::NonZeroU8; use anyhow::{anyhow, ensure, Context, Result}; -use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; +use crate::ida_reader::{IdbBufRead, IdbRead}; use crate::til::array::{Array, ArrayRaw}; use crate::til::bitfield::Bitfield; @@ -135,7 +135,7 @@ pub(crate) struct TILTypeInfoRaw { impl TILTypeInfoRaw { pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, til: &TILSectionHeader, is_last: bool, ) -> Result { @@ -359,7 +359,7 @@ pub(crate) enum TypeVariantRaw { impl TypeRaw { pub fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, til: &TILSectionHeader, ) -> Result { let metadata: u8 = input.read_u8()?; @@ -448,7 +448,7 @@ impl TypeRaw { } pub fn read_ref( - input: &mut impl IdaGenericUnpack, + input: &mut impl IdbRead, header: &TILSectionHeader, ) -> Result { let mut bytes = input.unpack_dt_bytes()?; @@ -617,10 +617,11 @@ pub enum TypedefRaw { } impl TypedefRaw { - fn read(input: &mut impl IdaGenericUnpack) -> Result { + fn read(input: &mut impl IdbRead) -> Result { let buf = input.unpack_dt_bytes()?; match &buf[..] { [b'#', data @ ..] => { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x2fbf90 let mut tmp = data; let de = tmp.read_de()?; if !tmp.is_empty() { @@ -754,7 +755,7 @@ pub enum TILMacroValue { } impl TILMacro { - fn read(input: &mut impl IdaGenericBufUnpack) -> Result { + fn read(input: &mut impl IdbBufRead) -> Result { let name = input.read_c_string_raw()?; // TODO find what this is let flag: u16 = input.read_u16()?; @@ -875,6 +876,7 @@ pub fn ephemeral_til_header() -> TILSectionHeader { size_long_double: None, extended_sizeof_info: None, cc: None, + compiler_guessed: false, cn: None, type_ordinal_alias: None, is_universal: true, @@ -895,11 +897,11 @@ impl CommentType { } Ok(Some(match field[0] { 5 if *field.last().unwrap() == b'.' => { - Self::Unknown5(u32::from_str_radix( - std::str::from_utf8(&field[1..field.len() - 1])?, - // TODO 10 or 16? - 10, - )?) + Self::Unknown5( + // TODO base 10 or 16? + std::str::from_utf8(&field[1..field.len() - 1])? + .parse::()?, + ) } cmt_type @ 0..=0x1F => { return Err(anyhow!("Unknown comment type {cmt_type:#X}")) diff --git a/src/til/array.rs b/src/til/array.rs index ee9853a..a2657e6 100644 --- a/src/til/array.rs +++ b/src/til/array.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::num::{NonZeroU16, NonZeroU8}; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use crate::til::{Type, TypeAttribute, TypeRaw}; use crate::IDBString; @@ -50,7 +50,7 @@ pub(crate) struct ArrayRaw { impl ArrayRaw { pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, metadata: u8, ) -> anyhow::Result { diff --git a/src/til/bitfield.rs b/src/til/bitfield.rs index 4507d08..faa49c8 100644 --- a/src/til/bitfield.rs +++ b/src/til/bitfield.rs @@ -2,7 +2,7 @@ use std::num::NonZeroU8; use anyhow::Result; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use super::TypeAttribute; @@ -28,7 +28,7 @@ pub struct Bitfield { impl Bitfield { pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, metadata: u8, ) -> Result { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x472f3c print_til_type diff --git a/src/til/enum.rs b/src/til/enum.rs index 4333197..3f986ce 100644 --- a/src/til/enum.rs +++ b/src/til/enum.rs @@ -1,6 +1,6 @@ use std::num::NonZeroU8; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use crate::til::{flag, TypeAttribute, TypeRaw, TypeVariantRaw}; use crate::IDBString; use anyhow::{anyhow, ensure}; @@ -69,13 +69,15 @@ pub(crate) struct EnumRaw { impl EnumRaw { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x473a08 pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ) -> anyhow::Result { use flag::tattr_enum::*; use flag::tf_enum::*; - let Some(member_num) = input.read_dt_de()? else { + // TODO n == 0 && n_cond == false? + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x325f87 + let Some((member_num, _)) = input.read_dt_de()? else { // is ref // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 let ref_type = TypeRaw::read_ref(&mut *input, header)?; @@ -99,11 +101,26 @@ impl EnumRaw { is_64 = tattr & TAENUM_64BIT != 0; is_signed = tattr & TAENUM_SIGNED != 0; is_unsigned = tattr & TAENUM_UNSIGNED != 0; + // TODO handle those flags + let _is_oct = tattr & TAENUM_OCT != 0; + let _is_bin = tattr & TAENUM_BIN != 0; + let _is_numsign = tattr & TAENUM_NUMSIGN != 0; + let _is_lzero = tattr & TAENUM_LZERO != 0; + #[cfg(feature = "restrictive")] - ensure!( - tattr & !(TAENUM_64BIT | TAENUM_SIGNED | TAENUM_UNSIGNED) == 0, - "Invalid Enum taenum_bits {tattr:x}" - ); + { + const ALL_FLAGS: crate::til::flag::TattrT = TAENUM_64BIT + | TAENUM_SIGNED + | TAENUM_UNSIGNED + | TAENUM_OCT + | TAENUM_BIN + | TAENUM_NUMSIGN + | TAENUM_LZERO; + ensure!( + tattr & !ALL_FLAGS == 0, + "Invalid Enum taenum_bits {tattr:x}" + ); + } #[cfg(feature = "restrictive")] ensure!( !(is_signed && is_unsigned), diff --git a/src/til/flag.rs b/src/til/flag.rs index 05405e0..12db4d5 100644 --- a/src/til/flag.rs +++ b/src/til/flag.rs @@ -1,12 +1,12 @@ /// byte sequence used to describe a type in IDA -type TypeT = u8; +pub type TypeT = u8; /// Enum type flags -type BteT = u8; +pub type BteT = u8; /// Til Type flags -type TilT = u16; +pub type TilT = u16; /// TypeAtt Type flags -type TattrT = u16; -type CmT = u8; +pub type TattrT = u16; +pub type CmT = u8; /// multi-use pub const RESERVED_BYTE: TypeT = 0xFF; @@ -347,7 +347,7 @@ pub mod tf_enum { /// - 'de' mask (has name) /// - 'dt' cnt /// - cnt records of 'de' values - /// (cnt CAN be 0) + /// (cnt CAN be 0) /// /// NOTE: delta for ALL subsegment is ONE pub const BTE_BITFIELD: BteT = 0x10; @@ -497,6 +497,9 @@ pub mod tattr_udt { pub const TAUDT_CPPOBJ: TattrT = 0x0080; /// struct: is virtual function table pub const TAUDT_VFTABLE: TattrT = 0x0100; + /// struct: fixed field offsets, stored in serialized form, + /// cannot be set for unions + pub const TAUDT_FIXED: TattrT = 0x0400; } /// Type attributes for udt fields @@ -512,6 +515,16 @@ pub mod tattr_field { pub const TAFLD_VFTABLE: TattrT = 0x0100; /// denotes a udt member function pub const TAFLD_METHOD: TattrT = 0x0200; + /// gap member (displayed as padding in type details) + pub const TAFLD_GAP: TattrT = 0x0400; + /// the comment is regular (if not set, it is repeatable) + pub const TAFLD_REGCMT: TattrT = 0x0800; + /// function return address frame slot + pub const TAFLD_FRAME_R: TattrT = 0x1000; + /// function saved registers frame slot + pub const TAFLD_FRAME_S: TattrT = 0x2000; + /// was the member created due to the type system + pub const TAFLD_BYTIL: TattrT = 0x4000; } /// Type attributes for pointers @@ -530,12 +543,22 @@ pub mod tattr_ptr { /// Type attributes for enums pub mod tattr_enum { use super::TattrT; - /// enum: store 64-bit values + /// store 64-bit values pub const TAENUM_64BIT: TattrT = 0x0020; - /// enum: unsigned + /// unsigned pub const TAENUM_UNSIGNED: TattrT = 0x0040; - /// enum: signed + /// signed pub const TAENUM_SIGNED: TattrT = 0x0080; + /// octal representation, if BTE_HEX + pub const TAENUM_OCT: TattrT = 0x0100; + /// binary representation, if BTE_HEX + /// only one of OCT/BIN bits can be set. they + /// are meaningful only if BTE_HEX is used. + pub const TAENUM_BIN: TattrT = 0x0200; + /// signed representation, if BTE_HEX + pub const TAENUM_NUMSIGN: TattrT = 0x0400; + /// print numbers with leading zeroes (only for HEX/OCT/BIN) + pub const TAENUM_LZERO: TattrT = 0x0800; } /// Type info library property bits @@ -650,4 +673,292 @@ pub mod cm { pub const C_PC_HUGE: CmT = CM_N16_F32 | CM_M_FF; pub const C_PC_FLAT: CmT = CM_N32_F48 | CM_M_NN; } + + pub mod comp { + pub const COMP_MASK: u8 = 0x0F; + /// Unknown + pub const COMP_UNK: u8 = 0x00; + /// Visual C++ + pub const COMP_MS: u8 = 0x01; + /// Borland C++ + pub const COMP_BC: u8 = 0x02; + /// Watcom C++ + pub const COMP_WATCOM: u8 = 0x03; + /// GNU C++ + pub const COMP_GNU: u8 = 0x06; + /// Visual Age C++ + pub const COMP_VISAGE: u8 = 0x07; + /// Delphi + pub const COMP_BP: u8 = 0x08; + /// uncertain compiler id + pub const COMP_UNSURE: u8 = 0x80; + } + + pub mod sc { + /// unknown + pub const SC_UNK: u8 = 0; + /// typedef + pub const SC_TYPE: u8 = 1; + /// extern + pub const SC_EXT: u8 = 2; + /// static + pub const SC_STAT: u8 = 3; + /// register + pub const SC_REG: u8 = 4; + /// auto + pub const SC_AUTO: u8 = 5; + /// friend + pub const SC_FRIEND: u8 = 6; + /// virtual + pub const SC_VIRT: u8 = 7; + } + + /// Format/Parse/Print type information + pub mod hti { + /// C++ mode (not implemented) + pub const HTI_CPP: u32 = 0x00000001; + /// debug: print internal representation of types + pub const HTI_INT: u32 = 0x00000002; + /// debug: print external representation of types + pub const HTI_EXT: u32 = 0x00000004; + /// debug: print tokens + pub const HTI_LEX: u32 = 0x00000008; + /// debug: check the result by unpacking it + pub const HTI_UNP: u32 = 0x00000010; + /// test mode: discard the result + pub const HTI_TST: u32 = 0x00000020; + /// "input" is file name, + /// otherwise "input" contains a C declaration + pub const HTI_FIL: u32 = 0x00000040; + + /// define macros from the base tils + pub const HTI_MAC: u32 = 0x00000080; + /// no warning messages + pub const HTI_NWR: u32 = 0x00000100; + /// ignore all errors but display them + pub const HTI_NER: u32 = 0x00000200; + /// don't complain about redeclarations + pub const HTI_DCL: u32 = 0x00000400; + /// don't decorate names + pub const HTI_NDC: u32 = 0x00000800; + /// explicit structure pack value (#pragma pack) + pub const HTI_PAK: u32 = 0x00007000; + + /// shift for #HTI_PAK. This field should + /// be used if you want to remember an explicit + /// pack value for each structure/union type. + /// See #HTI_PAK... definitions + pub const HTI_PAK_SHIFT: u32 = 12; + + /// default pack value + pub const HTI_PAKDEF: u32 = 0x00000000; + /// #pragma pack(1) + pub const HTI_PAK1: u32 = 0x00001000; + /// #pragma pack(2) + pub const HTI_PAK2: u32 = 0x00002000; + /// #pragma pack(4) + pub const HTI_PAK4: u32 = 0x00003000; + /// #pragma pack(8) + pub const HTI_PAK8: u32 = 0x00004000; + /// #pragma pack(16) + pub const HTI_PAK16: u32 = 0x00005000; + /// assume high level prototypes + pub const HTI_HIGH: u32 = 0x00008000; + + /// (with hidden args, etc) + /// lower the function prototypes + pub const HTI_LOWER: u32 = 0x00010000; + /// leave argument names unchanged (do not remove underscores) + pub const HTI_RAWARGS: u32 = 0x00020000; + /// accept references to unknown namespaces + pub const HTI_RELAXED: u32 = 0x00080000; + /// do not inspect base tils + pub const HTI_NOBASE: u32 = 0x00100000; + } + + pub mod pt { + /// silent, no messages + pub const PT_SIL: u32 = 0x0001; + /// don't decorate names + pub const PT_NDC: u32 = 0x0002; + /// return declared type information + pub const PT_TYP: u32 = 0x0004; + /// return declared object information + pub const PT_VAR: u32 = 0x0008; + /// mask for pack alignment values + pub const PT_PACKMASK: u32 = 0x0070; + /// assume high level prototypes + /// (with hidden args, etc) + pub const PT_HIGH: u32 = 0x0080; + /// lower the function prototypes + pub const PT_LOWER: u32 = 0x0100; + /// replace the old type (used in idc) + pub const PT_REPLACE: u32 = 0x0200; + /// leave argument names unchanged (do not remove underscores) + pub const PT_RAWARGS: u32 = 0x0400; + /// accept references to unknown namespaces + pub const PT_RELAXED: u32 = 0x1000; + /// accept empty decl + pub const PT_EMPTY: u32 = 0x2000; + } + + pub mod prtype { + /// print to one line + pub const PRTYPE_1LINE: u32 = 0x00000; + /// print to many lines + pub const PRTYPE_MULTI: u32 = 0x00001; + /// print type declaration (not variable declaration) + pub const PRTYPE_TYPE: u32 = 0x00002; + /// print pragmas for alignment + pub const PRTYPE_PRAGMA: u32 = 0x00004; + /// append ; to the end + pub const PRTYPE_SEMI: u32 = 0x00008; + /// use c++ name (only for print_type()) + pub const PRTYPE_CPP: u32 = 0x00010; + /// tinfo_t: print definition, if available + pub const PRTYPE_DEF: u32 = 0x00020; + /// tinfo_t: do not print function argument names + pub const PRTYPE_NOARGS: u32 = 0x00040; + /// tinfo_t: print arguments with #FAI_ARRAY as pointers + pub const PRTYPE_NOARRS: u32 = 0x00080; + /// tinfo_t: never resolve types (meaningful with PRTYPE_DEF) + pub const PRTYPE_NORES: u32 = 0x00100; + /// tinfo_t: print restored types for #FAI_ARRAY and #FAI_STRUCT + pub const PRTYPE_RESTORE: u32 = 0x00200; + /// do not apply regular expressions to beautify name + pub const PRTYPE_NOREGEX: u32 = 0x00400; + /// add color tag COLOR_SYMBOL for any parentheses, commas and colons + pub const PRTYPE_COLORED: u32 = 0x00800; + /// tinfo_t: print udt methods + pub const PRTYPE_METHODS: u32 = 0x01000; + /// print comments even in the one line mode + pub const PRTYPE_1LINCMT: u32 = 0x02000; + /// print only type header (only for definitions) + pub const PRTYPE_HEADER: u32 = 0x04000; + /// print udt member offsets + pub const PRTYPE_OFFSETS: u32 = 0x08000; + /// limit the output length to 1024 bytes (the output may be slightly longer) + pub const PRTYPE_MAXSTR: u32 = 0x10000; + /// print only the definition tail (only for definitions, exclusive with PRTYPE_HEADER) + pub const PRTYPE_TAIL: u32 = 0x20000; + /// print function arglocs (not only for usercall) + pub const PRTYPE_ARGLOCS: u32 = 0x40000; + } + + pub mod ntf { + /// type name + pub const NTF_TYPE: u32 = 0x0001; + /// symbol, name is unmangled ('func') + pub const NTF_SYMU: u32 = 0x0008; + /// symbol, name is mangled ('_func'); + /// only one of #NTF_TYPE and #NTF_SYMU, #NTF_SYMM can be used + pub const NTF_SYMM: u32 = 0x0000; + /// don't inspect base tils (for get_named_type) + pub const NTF_NOBASE: u32 = 0x0002; + /// replace original type (for set_named_type) + pub const NTF_REPLACE: u32 = 0x0004; + /// name is unmangled (don't use this flag) + pub const NTF_UMANGLED: u32 = 0x0008; + /// don't inspect current til file (for get_named_type) + pub const NTF_NOCUR: u32 = 0x0020; + /// value is 64bit + pub const NTF_64BIT: u32 = 0x0040; + /// force-validate the name of the type when setting (set_named_type, set_numbered_type only) + pub const NTF_FIXNAME: u32 = 0x0080; + /// the name is given in the IDB encoding; + /// non-ASCII bytes will be decoded accordingly + /// (set_named_type, set_numbered_type only) + pub const NTF_IDBENC: u32 = 0x0100; + /// check that synchronization to IDB passed OK + /// (set_numbered_type, set_named_type) + pub const NTF_CHKSYNC: u32 = 0x0200; + /// do not validate type name (set_numbered_type, set_named_type) + pub const NTF_NO_NAMECHK: u32 = 0x0400; + /// save a new type definition, not a typeref + /// (tinfo_t::set_numbered_type, tinfo_t::set_named_type) + pub const NTF_COPY: u32 = 0x1000; + } + + /// Function type information (see tinfo_t::get_func_details()) + pub mod fti { + /// information about spoiled registers is present + pub const FTI_SPOILED: u32 = 0x0001; + /// noreturn + pub const FTI_NORET: u32 = 0x0002; + /// __pure + pub const FTI_PURE: u32 = 0x0004; + /// high level prototype (with possibly hidden args) + pub const FTI_HIGH: u32 = 0x0008; + /// static + pub const FTI_STATIC: u32 = 0x0010; + /// virtual + pub const FTI_VIRTUAL: u32 = 0x0020; + + /// mask for FTI_*CALL + pub const FTI_CALLTYPE: u32 = 0x00C0; + /// default call + pub const FTI_DEFCALL: u32 = 0x0000; + /// near call + pub const FTI_NEARCALL: u32 = 0x0040; + /// far call + pub const FTI_FARCALL: u32 = 0x0080; + + /// interrupt call + pub const FTI_INTCALL: u32 = 0x00C0; + /// info about argument locations has been calculated (stkargs and retloc too) + pub const FTI_ARGLOCS: u32 = 0x0100; + /// all arglocs are specified explicitly + pub const FTI_EXPLOCS: u32 = 0x0200; + /// const member function + pub const FTI_CONST: u32 = 0x0400; + /// constructor + pub const FTI_CTOR: u32 = 0x0800; + /// destructor + pub const FTI_DTOR: u32 = 0x1000; + + /// all defined bits + pub const FTI_ALL: u32 = 0x1FFF; + } + + /// Visual representation of a member of a complex type (struct/union/enum) + pub mod frb { + /// Mask for the value type (* means requires additional info) + pub const FRB_MASK: u32 = 0xF; + /// Unknown + pub const FRB_UNK: u32 = 0x0; + /// Binary number + pub const FRB_NUMB: u32 = 0x1; + /// Octal number + pub const FRB_NUMO: u32 = 0x2; + /// Hexadecimal number + pub const FRB_NUMH: u32 = 0x3; + /// Decimal number + pub const FRB_NUMD: u32 = 0x4; + /// Floating point number (for interpreting an integer type as a floating value) + pub const FRB_FLOAT: u32 = 0x5; + /// Char + pub const FRB_CHAR: u32 = 0x6; + /// Segment + pub const FRB_SEG: u32 = 0x7; + /// *Enumeration + pub const FRB_ENUM: u32 = 0x8; + /// *Offset + pub const FRB_OFFSET: u32 = 0x9; + /// *String literal (used for arrays) + pub const FRB_STRLIT: u32 = 0xA; + /// *Struct offset + pub const FRB_STROFF: u32 = 0xB; + /// *Custom data type + pub const FRB_CUSTOM: u32 = 0xC; + /// Invert sign (0x01 is represented as -0xFF) + pub const FRB_INVSIGN: u32 = 0x0100; + /// Invert bits (0x01 is represented as ~0xFE) + pub const FRB_INVBITS: u32 = 0x0200; + /// Force signed representation + pub const FRB_SIGNED: u32 = 0x0400; + /// Toggle leading zeroes (used for integers) + pub const FRB_LZERO: u32 = 0x0800; + /// has additional tabular + pub const FRB_TABFORM: u32 = 0x1000; + } } diff --git a/src/til/function.rs b/src/til/function.rs index ca9047f..996c2e8 100644 --- a/src/til/function.rs +++ b/src/til/function.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::num::NonZeroU8; -use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; +use crate::ida_reader::{IdbBufRead, IdbRead}; use crate::til::{Basic, Type, TypeRaw}; use crate::IDBString; use anyhow::{anyhow, ensure, Context, Result}; @@ -184,7 +184,7 @@ impl FunctionRaw { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x473190 print_til_type // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47c8f0 pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, metadata: u8, ) -> Result { @@ -198,7 +198,11 @@ impl FunctionRaw { }; // TODO InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x473bf1 print_til_type - let (cc, flags, _spoiled) = read_cc(&mut *input)?; + let FunctionCC { + cc, + flags, + _spoiled, + } = read_cc(&mut *input)?; let cc = CallingConvention::from_cm_raw(cc)?; // TODO investigate why this don't hold true @@ -302,7 +306,7 @@ impl FunctionRaw { } impl ArgLoc { - fn read(input: &mut impl IdaGenericUnpack) -> Result { + fn read(input: &mut impl IdbRead) -> Result { let t: u8 = input.read_u8()?; if t != 0xFF { let b = t & 0x7F; @@ -544,15 +548,23 @@ pub enum CallMethod { Int, } +struct FunctionCC { + cc: u8, + flags: u16, + _spoiled: Vec<(u16, u8)>, +} + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x476e60 /// [BT_FUNC](https://hex-rays.com/products/ida/support/sdkdoc/group__tf__func.html#ga7b7fee21f21237beb6d91e854410e0fa) -fn read_cc( - input: &mut impl IdaGenericBufUnpack, -) -> Result<(u8, u16, Vec<(u16, u8)>)> { +fn read_cc(input: &mut impl IdbBufRead) -> Result { let mut cc = input.read_u8()?; // TODO find the flag for that if cc & 0xF0 != 0xA0 { - return Ok((cc, 0, vec![])); + return Ok(FunctionCC { + cc, + flags: 0, + _spoiled: vec![], + }); } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46de7c let pbyte2 = input.peek_u8()?; @@ -574,7 +586,11 @@ fn read_cc( cc = input.read_u8()?; if cc & 0xF0 != 0xA0 { - return Ok((cc, flags.into(), spoiled)); + return Ok(FunctionCC { + cc, + flags: flags.into(), + _spoiled: spoiled, + }); } } } else { @@ -588,12 +604,16 @@ fn read_cc( } let cc = input.read_u8()?; // TODO is this `&` realy necessary? Should we allow invalid flags? - Ok((cc, (flag & 0x1E3F) as u16, spoiled)) + Ok(FunctionCC { + cc, + flags: (flag & 0x1E3F) as u16, + _spoiled: spoiled, + }) } } fn read_cc_spoiled( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, nspoiled: u16, spoiled: &mut Vec<(u16, u8)>, ) -> Result<()> { diff --git a/src/til/pointer.rs b/src/til/pointer.rs index 2b77562..51965ef 100644 --- a/src/til/pointer.rs +++ b/src/til/pointer.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use anyhow::Result; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use crate::til::{Type, TypeAttribute, TypeRaw}; use crate::IDBString; @@ -130,7 +130,7 @@ pub(crate) struct PointerRaw { impl PointerRaw { pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, metadata: u8, ) -> Result { @@ -217,7 +217,7 @@ pub(crate) enum PointerTypeRaw { impl PointerTypeRaw { fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ) -> Result { let closure_type = input.read_u8()?; diff --git a/src/til/section.rs b/src/til/section.rs index b312ffc..829c524 100644 --- a/src/til/section.rs +++ b/src/til/section.rs @@ -1,7 +1,7 @@ use crate::id0::{Compiler, Id0TilOrd}; -use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; +use crate::ida_reader::{IdbBufRead, IdbRead, IdbReadKind}; use crate::til::{flag, TILMacro, TILTypeInfo, TILTypeInfoRaw}; -use crate::{IDBSectionCompression, IDBString}; +use crate::{IDAKind, IDBSectionCompression, IDBString, SectionReader}; use anyhow::{anyhow, ensure, Result}; use serde::{Deserialize, Serialize}; @@ -22,6 +22,20 @@ pub struct TILSection { pub macros: Option>, } +impl SectionReader for TILSection { + type Result = Self; + + fn read_section + IdbBufRead>( + input: &mut I, + ) -> Result { + Self::read(input) + } + + fn size_from_v910(header: &crate::IDBHeaderV910) -> u64 { + header.til.unwrap().size.get() + } +} + #[derive(Debug, Clone)] pub(crate) struct TILSectionRaw { pub header: TILSectionHeader, @@ -41,6 +55,8 @@ pub struct TILSectionHeader { pub dependencies: Vec, /// the compiler used to generated types pub compiler_id: Compiler, + /// if the the compiler is just a guess + pub compiler_guessed: bool, /// default calling convention pub cc: Option, /// default calling ptr size @@ -79,17 +95,61 @@ pub struct TILSectionHeaderRaw { pub size_int: NonZeroU8, pub size_bool: NonZeroU8, pub def_align: Option, + // defaults to 2, 4, 8 pub extended_sizeof_info: Option, pub size_long_double: Option, } -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy)] pub struct TILSectionHeader1 { pub signature: [u8; 6], pub format: u32, pub flags: TILSectionFlags, } +impl TILSectionHeader1 { + pub(crate) fn deserialize(input: &mut impl IdbRead) -> Result { + let signature: [u8; 6] = bincode::deserialize_from(&mut *input)?; + ensure!(signature == *TIL_SECTION_MAGIC, "Invalid TIL Signature"); + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x431eb5 + let (format, flags) = match input.read_u32()? { + format @ 0x13.. => { + return Err(anyhow!("Invalid TIL format {format}")) + } + // read the flag after the format + format @ 0x10..=0x12 => { + let flags = TILSectionFlags::new(input.read_u32()?)?; + (format, flags) + } + // format and flag are the same + value @ ..=0xf => (value, TILSectionFlags::new(value)?), + }; + Ok(Self { + signature, + format, + flags, + }) + } + + pub(crate) fn serialize( + self, + output: &mut impl Write, + ) -> std::io::Result<()> { + output.write_all(&self.signature)?; + output.write_all(&u32::to_le_bytes(self.format))?; + match self.format { + 0x13.. => unreachable!(), + // read the flag after the format + 0x10..=0x12 => { + output.write_all(&u32::to_le_bytes(self.flags.0.into()))? + } + // format and flag are the same + ..=0xf => {} + }; + Ok(()) + } +} + #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct TILSectionHeader2 { pub compiler_id: u8, @@ -101,21 +161,7 @@ pub struct TILSectionHeader2 { } impl TILSectionRaw { - pub(crate) fn read( - input: &mut impl IdaGenericBufUnpack, - compress: IDBSectionCompression, - ) -> Result { - match compress { - IDBSectionCompression::None => Self::read_inner(input), - IDBSectionCompression::Zlib => { - let mut input = - BufReader::new(flate2::bufread::ZlibDecoder::new(input)); - Self::read_inner(&mut input) - } - } - } - - fn read_inner(input: &mut impl IdaGenericBufUnpack) -> Result { + fn read(input: &mut impl IdbBufRead) -> Result { let header_raw = Self::read_header(&mut *input)?; // TODO verify that is always false? @@ -139,12 +185,17 @@ impl TILSectionRaw { } else { vec![] }; + let cc_id_raw = header_raw.compiler_id; + let compiler_guessed = cc_id_raw & 0x80 != 0; + let compiler_id = Compiler::try_from(cc_id_raw & 0x7F) + .map_err(|_| anyhow!("Invalid compiler id: {cc_id_raw}"))?; let mut header = TILSectionHeader { format: header_raw.format, description: IDBString::new(header_raw.description), flags: header_raw.flags, dependencies, - compiler_id: Compiler::from_value(header_raw.compiler_id), + compiler_id, + compiler_guessed, cc, cn, cm, @@ -172,6 +223,8 @@ impl TILSectionRaw { .then(|| Self::read_macros(&mut *input, &header)) .transpose()?; + // TODO streams + Ok(Self { symbols, types, @@ -182,20 +235,21 @@ impl TILSectionRaw { #[allow(clippy::type_complexity)] fn read_next_ordinal_and_alias( - input: &mut impl IdaGenericUnpack, + input: &mut impl IdbRead, header: &TILSectionHeader, ) -> Result<(Option, Option>)> { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e292 - if !header.flags.has_ordinal() { - return Ok((None, None)); - } - let next_ord = input.read_u32()?; + let next_ord = header + .flags + .has_ordinal() + .then(|| input.read_u32()) + .transpose()?; - match (header.flags.has_type_aliases(), next_ord) { + let next_ord = match (header.flags.has_type_aliases(), next_ord) { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e2a8 - (false, _) | (_, 0) => return Ok((Some(next_ord), None)), + (false, _) | (_, Some(0) | None) => return Ok((next_ord, None)), // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e29c - (true, 1..) => {} + (true, Some(next_ord @ 1..)) => next_ord, }; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e2b1 @@ -232,30 +286,9 @@ impl TILSectionRaw { Ok((Some(next_ord), Some(ordinals))) } - fn read_header( - input: &mut impl IdaGenericUnpack, - ) -> Result { + fn read_header(input: &mut impl IdbRead) -> Result { // TODO this break a few files - let signature: [u8; 6] = bincode::deserialize_from(&mut *input)?; - ensure!(signature == *TIL_SECTION_MAGIC, "Invalid TIL Signature"); - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x431eb5 - let (format, flags) = match input.read_u32()? { - format @ 0x13.. => { - return Err(anyhow!("Invalid TIL format {format}")) - } - // read the flag after the format - format @ 0x10..=0x12 => { - let flags = TILSectionFlags::new(input.read_u32()?)?; - (format, flags) - } - // format and flag are the same - value @ ..=0xf => (value, TILSectionFlags::new(value)?), - }; - let header1 = TILSectionHeader1 { - signature, - format, - flags, - }; + let header1 = TILSectionHeader1::deserialize(&mut *input)?; let description = input.read_bytes_len_u8()?; let mut dependencies = input.read_bytes_len_u8()?; @@ -327,9 +360,7 @@ impl TILSectionRaw { }) } - fn read_bucket_header( - input: &mut impl IdaGenericUnpack, - ) -> Result<(u32, u32)> { + fn read_bucket_header(input: &mut impl IdbRead) -> Result<(u32, u32)> { let ndefs = bincode::deserialize_from(&mut *input)?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e3e0 //ensure!(ndefs < 0x55555555); @@ -338,7 +369,7 @@ impl TILSectionRaw { } fn read_bucket_zip_header( - input: &mut impl IdaGenericUnpack, + input: &mut impl IdbRead, ) -> Result<(u32, u32, u32)> { let (ndefs, len) = Self::read_bucket_header(&mut *input)?; let compressed_len = bincode::deserialize_from(&mut *input)?; @@ -346,7 +377,7 @@ impl TILSectionRaw { } fn read_bucket( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, next_ordinal: Option, ) -> Result> { @@ -358,7 +389,7 @@ impl TILSectionRaw { } fn read_bucket_normal( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, next_ordinal: Option, ) -> Result> { @@ -367,7 +398,7 @@ impl TILSectionRaw { } fn read_bucket_zip( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, next_ordinal: Option, ) -> Result> { @@ -395,7 +426,7 @@ impl TILSectionRaw { } fn read_bucket_inner( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ndefs: u32, len: u32, @@ -425,7 +456,7 @@ impl TILSectionRaw { } fn read_macros( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ) -> Result> { if header.flags.is_zip() { @@ -436,7 +467,7 @@ impl TILSectionRaw { } fn read_macros_normal( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, ) -> Result> { let (ndefs, len) = Self::read_bucket_header(&mut *input)?; let mut input = input.take(len.into()); @@ -451,9 +482,7 @@ impl TILSectionRaw { Ok(type_info) } - fn read_macros_zip( - input: &mut impl IdaGenericBufUnpack, - ) -> Result> { + fn read_macros_zip(input: &mut impl IdbBufRead) -> Result> { let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; // make sure the decompressor don't read out-of-bounds @@ -484,7 +513,7 @@ impl TILSectionRaw { impl TILSection { pub fn decompress( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, output: &mut impl Write, compress: IDBSectionCompression, ) -> Result<()> { @@ -497,11 +526,16 @@ impl TILSection { IDBSectionCompression::None => { Self::decompress_inner(input, output) } + IDBSectionCompression::Zstd => { + let mut input = + BufReader::new(zstd::Decoder::with_buffer(input)?); + Self::decompress_inner(&mut input, output) + } } } fn decompress_inner( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, output: &mut impl Write, ) -> Result<()> { let mut header = TILSectionRaw::read_header(&mut *input)?; @@ -532,26 +566,29 @@ impl TILSection { size_enum: header.size_enum.map(NonZeroU8::get).unwrap_or(0), def_align, }; - bincode::serialize_into(&mut *output, &header1)?; + header1.serialize(&mut *output)?; crate::write_string_len_u8(&mut *output, &header.description)?; crate::write_string_len_u8(&mut *output, &header.dependencies)?; bincode::serialize_into(&mut *output, &header2)?; if header.flags.have_extended_sizeof_info() { let sizes = header.extended_sizeof_info.unwrap(); + bincode::serialize_into(&mut *output, &sizes.size_short.get())?; + bincode::serialize_into(&mut *output, &sizes.size_long.get())?; + bincode::serialize_into(&mut *output, &sizes.size_long_long.get())?; + } + + if header.flags.has_size_long_double() { bincode::serialize_into( &mut *output, - &( - sizes.size_short.get(), - sizes.size_long.get(), - sizes.size_long_long.get(), - ), + &header.size_long_double.unwrap().get(), )?; } - if header.flags.has_size_long_double() { + if let Some(def_align) = header.def_align { + let value = def_align.trailing_zeros() + 1; bincode::serialize_into( &mut *output, - &header.size_long_double.unwrap().get(), + &u8::try_from(value).unwrap(), )?; } @@ -587,7 +624,7 @@ impl TILSection { #[allow(dead_code)] fn decompress_bucket( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, output: &mut impl std::io::Write, ) -> Result<()> { let (ndefs, len, compressed_len) = @@ -676,11 +713,8 @@ impl TILSection { } impl TILSection { - pub fn read( - input: &mut impl IdaGenericBufUnpack, - compress: IDBSectionCompression, - ) -> Result { - let type_info_raw = TILSectionRaw::read(input, compress)?; + pub fn read(input: &mut impl IdbBufRead) -> Result { + let type_info_raw = TILSectionRaw::read(input)?; // TODO check for dups? let type_by_name = type_info_raw .types diff --git a/src/til/struct.rs b/src/til/struct.rs index 38bb5e0..e8f33a5 100644 --- a/src/til/struct.rs +++ b/src/til/struct.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::num::NonZeroU8; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use crate::til::{Type, TypeRaw}; use crate::IDBString; use anyhow::{anyhow, ensure, Context, Result}; @@ -14,6 +14,8 @@ use super::{CommentType, TypeAttribute, TypeVariantRaw}; pub struct Struct { pub effective_alignment: Option, pub members: Vec, + pub extra_padding: Option, + /// Unaligned struct pub is_unaligned: bool, /// Gcc msstruct attribute @@ -53,6 +55,7 @@ impl Struct { Ok(Struct { effective_alignment: value.effective_alignment, members, + extra_padding: value.extra_padding, is_unaligned: value.is_unaligned, is_msstruct: value.is_msstruct, is_cppobj: value.is_cppobj, @@ -74,6 +77,7 @@ impl Struct { pub(crate) struct StructRaw { effective_alignment: Option, members: Vec, + extra_padding: Option, /// Unaligned struct is_unaligned: bool, @@ -91,11 +95,14 @@ pub(crate) struct StructRaw { impl StructRaw { pub fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ) -> Result { + // TODO n == 0 && n_cond == false? + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x325f87 + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x303393 // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x459883 - let Some(n) = input.read_dt_de()? else { + let Some((n, _)) = input.read_dt_de()? else { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 // simple reference let ref_type = TypeRaw::read_ref(&mut *input, header)?; @@ -119,8 +126,10 @@ impl StructRaw { let mut is_unaligned = false; let mut is_cppobj = false; let mut is_vft = false; + let mut is_fixed = false; let mut is_method = false; let mut is_bitset2 = false; + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x30379a if let Some(TypeAttribute { tattr, extended: _extended, @@ -141,6 +150,7 @@ impl StructRaw { is_unaligned = tattr & TAUDT_UNALIGNED != 0; is_cppobj = tattr & TAUDT_CPPOBJ != 0; is_vft = tattr & TAUDT_VFTABLE != 0; + is_fixed = tattr & TAUDT_FIXED != 0; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478203 // TODO using a field flag on the struct seems out-of-place is_method = tattr & TAFLD_METHOD != 0; @@ -153,6 +163,7 @@ impl StructRaw { | TAUDT_UNALIGNED | TAUDT_CPPOBJ | TAUDT_VFTABLE + | TAUDT_FIXED | TAFLD_METHOD; #[cfg(feature = "restrictive")] ensure!( @@ -172,15 +183,21 @@ impl StructRaw { &mut *input, header, is_method, + is_fixed, is_bitset2, ) .with_context(|| format!("Member {i}")) }) .collect::>()?; + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x3269ca + let extra_padding = + is_fixed.then(|| input.read_ext_att()).transpose()?; + Ok(TypeVariantRaw::Struct(Self { effective_alignment, members, + extra_padding, is_unaligned, is_msstruct, is_cppobj, @@ -253,15 +270,17 @@ pub(crate) struct StructMemberRaw { impl StructMemberRaw { fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, - is_bit_set: bool, + is_method: bool, + is_fixed: bool, is_bit_set2: bool, ) -> Result { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x326610 let ty = TypeRaw::read(&mut *input, header)?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478256 - let att = is_bit_set + let att = is_method .then(|| Self::read_member_att_1(input, header)) .transpose()?; @@ -273,7 +292,7 @@ impl StructMemberRaw { let mut is_unknown_8 = false; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47825d - if !is_bit_set || att.is_some() { + if !is_method || att.is_some() { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47825d if let Some(TypeAttribute { tattr, @@ -298,11 +317,22 @@ impl StructMemberRaw { is_vft = tattr & TAFLD_VFTABLE != 0; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478203 is_method = tattr & TAFLD_METHOD != 0; + // TODO handle those flags + let _is_gap = tattr & TAFLD_GAP != 0; + let _is_regcmt = tattr & TAFLD_REGCMT != 0; + let _is_frame_r = tattr & TAFLD_FRAME_R != 0; + let _is_frame_s = tattr & TAFLD_FRAME_S != 0; + let _is_bytil = tattr & TAFLD_BYTIL != 0; const _ALL_FLAGS: u16 = MAX_DECL_ALIGN | TAFLD_BASECLASS | TAFLD_UNALIGNED | TAFLD_VFTABLE - | TAFLD_METHOD; + | TAFLD_METHOD + | TAFLD_GAP + | TAFLD_REGCMT + | TAFLD_FRAME_R + | TAFLD_FRAME_S + | TAFLD_BYTIL; #[cfg(feature = "restrictive")] ensure!( tattr & !_ALL_FLAGS == 0, @@ -315,6 +345,12 @@ impl StructMemberRaw { ); } + if is_fixed && !is_method { + // TODO unknown meaning + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x326820 + let _value = input.read_ext_att()?; + } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47822d if is_bit_set2 && !is_method { // TODO there is more to this impl? @@ -337,7 +373,7 @@ impl StructMemberRaw { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x486cd0 fn read_member_att_1( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, _header: &TILSectionHeader, ) -> Result { let att = input.read_ext_att()?; @@ -377,7 +413,7 @@ impl StructMemberRaw { } fn basic_att( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, att: u64, ) -> Result { if (att >> 8) & 0x10 != 0 { diff --git a/src/til/union.rs b/src/til/union.rs index 3c15bb9..15905f2 100644 --- a/src/til/union.rs +++ b/src/til/union.rs @@ -3,7 +3,7 @@ use anyhow::{anyhow, Context, Result}; use std::collections::HashMap; use std::num::{NonZeroU16, NonZeroU8}; -use crate::ida_reader::IdaGenericBufUnpack; +use crate::ida_reader::IdbBufRead; use crate::til::{Type, TypeRaw}; use crate::IDBString; @@ -76,10 +76,12 @@ pub(crate) struct UnionRaw { impl UnionRaw { pub fn read( - input: &mut impl IdaGenericBufUnpack, + input: &mut impl IdbBufRead, header: &TILSectionHeader, ) -> Result { - let Some(n) = input.read_dt_de()? else { + // TODO n == 0 && n_cond == false? + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x325f87 + let Some((n, _)) = input.read_dt_de()? else { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 // is ref let ref_type = TypeRaw::read_ref(&mut *input, header)?; diff --git a/src/tools/decompress_til.rs b/src/tools/decompress_til.rs index 355c16b..7a4fe31 100644 --- a/src/tools/decompress_til.rs +++ b/src/tools/decompress_til.rs @@ -2,8 +2,7 @@ use std::fs::File; use std::io::BufReader; use anyhow::{anyhow, Result}; -use idb_rs::til::section::TILSection; -use idb_rs::IDBParser; +use idb_rs::{til::section::TILSection, IDAVariants}; use crate::{Args, DecompressTilArgs, FileType}; @@ -13,7 +12,7 @@ pub fn decompress_til(args: &Args, til_args: &DecompressTilArgs) -> Result<()> { match args.input_type() { FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); - let mut parser = IDBParser::new(input)?; + let mut parser = IDAVariants::new(input)?; let til_offset = parser.til_section_offset().ok_or_else(|| { anyhow!("IDB file don't contains a TIL sector") })?; diff --git a/src/tools/dump_addr_info.rs b/src/tools/dump_addr_info.rs index 5905709..3e3f8ca 100644 --- a/src/tools/dump_addr_info.rs +++ b/src/tools/dump_addr_info.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_addr_info(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { // TODO create a function for that in ida_info let version = match id0.ida_info()? { idb_rs::id0::IDBParam::V1(idb_rs::id0::IDBParam1 { diff --git a/src/tools/dump_dirtree_bookmarks_idaplace.rs b/src/tools/dump_dirtree_bookmarks_idaplace.rs index 4e2c839..1dad51e 100644 --- a/src/tools/dump_dirtree_bookmarks_idaplace.rs +++ b/src/tools/dump_dirtree_bookmarks_idaplace.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_bookmarks_idaplace(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_bookmarks_idaplace()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_bookmarks_structplace.rs b/src/tools/dump_dirtree_bookmarks_structplace.rs index 68a3f33..5007803 100644 --- a/src/tools/dump_dirtree_bookmarks_structplace.rs +++ b/src/tools/dump_dirtree_bookmarks_structplace.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_bookmarks_structplace(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_bookmarks_structplace()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_bookmarks_tiplace.rs b/src/tools/dump_dirtree_bookmarks_tiplace.rs index 561c787..b8153b3 100644 --- a/src/tools/dump_dirtree_bookmarks_tiplace.rs +++ b/src/tools/dump_dirtree_bookmarks_tiplace.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_bookmarks_tiplace(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_bookmarks_tiplace()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_bpts.rs b/src/tools/dump_dirtree_bpts.rs index 7071698..3265337 100644 --- a/src/tools/dump_dirtree_bpts.rs +++ b/src/tools/dump_dirtree_bpts.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_bpts(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_imports()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_enums.rs b/src/tools/dump_dirtree_enums.rs index f9deee5..4af2cd6 100644 --- a/src/tools/dump_dirtree_enums.rs +++ b/src/tools/dump_dirtree_enums.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_enums(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_enums()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_funcs.rs b/src/tools/dump_dirtree_funcs.rs index 1c53df0..2ee581d 100644 --- a/src/tools/dump_dirtree_funcs.rs +++ b/src/tools/dump_dirtree_funcs.rs @@ -2,24 +2,34 @@ use crate::{dump_dirtree::print_dirtree, get_id0_section, Args}; use anyhow::{ensure, Result}; use idb_rs::id0::{ID0Section, Id0Address, Id0AddressKey}; +use idb_rs::{IDAKind, IDAVariants}; pub fn dump_dirtree_funcs(args: &Args) -> Result<()> { - // parse the id0 sector/file - let id0 = get_id0_section(args)?; + // parse the id0 sector/file match get_id0_section(args)? { + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_function_address()?; print_dirtree(|entry| print_function(&id0, *entry).unwrap(), &dirtree); Ok(()) } -pub fn print_function(id0: &ID0Section, address: Id0Address) -> Result<()> { +pub fn print_function( + id0: &ID0Section, + address: Id0Address, +) -> Result<()> { let infos = id0.address_info_at(address)?; let mut name = None; let mut ty = None; for info in infos { match info? { idb_rs::id0::AddressInfo::Comment(_) + | idb_rs::id0::AddressInfo::DefinedStruct(_) | idb_rs::id0::AddressInfo::Other { .. } => {} idb_rs::id0::AddressInfo::Label(label) => { if let Some(_old) = name.replace(label) { @@ -47,5 +57,6 @@ pub fn print_function(id0: &ID0Section, address: Id0Address) -> Result<()> { (None, Some(ty)) => print!("UNAMED:{ty:?}"), (Some(name), None) => print!("\"{name}\""), } + println!(); Ok(()) } diff --git a/src/tools/dump_dirtree_imports.rs b/src/tools/dump_dirtree_imports.rs index 649a62e..d703caa 100644 --- a/src/tools/dump_dirtree_imports.rs +++ b/src/tools/dump_dirtree_imports.rs @@ -2,9 +2,19 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_imports(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} + +fn dump(id0: ID0Section) -> Result<()> { + println!("Loader Name AKA `$ loader name`: "); let dirtree = id0.dirtree_bpts()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_names.rs b/src/tools/dump_dirtree_names.rs index bd219bc..a2ec3a5 100644 --- a/src/tools/dump_dirtree_names.rs +++ b/src/tools/dump_dirtree_names.rs @@ -1,19 +1,26 @@ use crate::{dump_dirtree::print_dirtree, get_id0_section, Args}; use anyhow::Result; -use idb_rs::id0::Id0AddressKey; + +use idb_rs::id0::{ID0Section, Id0AddressKey}; +use idb_rs::{IDAKind, IDAVariants}; pub fn dump_dirtree_names(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_names()?; print_dirtree( |address| { print!("{:#x}:", address.as_u64()); let label = id0.label_at(*address); if let Some(name) = label.unwrap() { - print!("{}", String::from_utf8_lossy(name)); + print!("{}", String::from_utf8_lossy(&name)); } else { print!("[Label Not Found]"); } diff --git a/src/tools/dump_dirtree_structs.rs b/src/tools/dump_dirtree_structs.rs index f231a61..a863d16 100644 --- a/src/tools/dump_dirtree_structs.rs +++ b/src/tools/dump_dirtree_structs.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_dirtree_structs(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { let dirtree = id0.dirtree_structs()?; println!("{:?}", dirtree); diff --git a/src/tools/dump_dirtree_types.rs b/src/tools/dump_dirtree_types.rs index 497140a..147cb00 100644 --- a/src/tools/dump_dirtree_types.rs +++ b/src/tools/dump_dirtree_types.rs @@ -4,29 +4,35 @@ use std::io::BufReader; use crate::{dump_dirtree::print_dirtree, Args, FileType}; use anyhow::{anyhow, Result}; -use idb_rs::{id0::Id0TilOrd, IDBParser}; + +use idb_rs::id0::{ID0Section, Id0TilOrd}; +use idb_rs::til::section::TILSection; +use idb_rs::{IDAKind, IDAVariants}; pub fn dump_dirtree_types(args: &Args) -> Result<()> { // parse the id0 sector/file - let (id0, til) = match args.input_type() { - FileType::Til => { - return Err(anyhow!("TIL don't contains any ID0 data")) - } + match args.input_type() { + FileType::Til => Err(anyhow!("TIL don't contains any ID0 data")), FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); - let mut parser = IDBParser::new(input)?; + let mut parser = IDAVariants::new(input)?; let id0_offset = parser.id0_section_offset().ok_or_else(|| { anyhow!("IDB file don't contains a ID0 sector") })?; - let id0 = parser.read_id0_section(id0_offset)?; let til_offset = parser.til_section_offset().ok_or_else(|| { anyhow!("IDB file don't contains a TIL sector") })?; + let id0 = parser.read_id0_section(id0_offset)?; let til = parser.read_til_section(til_offset)?; - (id0, til) + match id0 { + IDAVariants::IDA32(id0) => dump(&id0, &til), + IDAVariants::IDA64(id0) => dump(&id0, &til), + } } - }; + } +} +fn dump(id0: &ID0Section, til: &TILSection) -> Result<()> { let dirtree = id0.dirtree_tinfos()?; let print_til = |id0ord: &Id0TilOrd| { if let Some(til) = til.get_ord(*id0ord) { diff --git a/src/tools/dump_functions.rs b/src/tools/dump_functions.rs index 72988fc..2623b1e 100644 --- a/src/tools/dump_functions.rs +++ b/src/tools/dump_functions.rs @@ -1,12 +1,19 @@ use crate::{dump_dirtree_funcs::print_function, get_id0_section, Args}; use anyhow::Result; -use idb_rs::id0::{Comments, EntryPoint, Id0AddressKey}; + +use idb_rs::id0::{Comments, EntryPoint, ID0Section, Id0AddressKey}; +use idb_rs::{IDAKind, IDAVariants}; pub fn dump_functions(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { println!("Function and Comments AKA `$ funcs`: "); for entry in id0.functions_and_comments()? { match entry? { diff --git a/src/tools/dump_id0.rs b/src/tools/dump_id0.rs index 4f40417..fe98790 100644 --- a/src/tools/dump_id0.rs +++ b/src/tools/dump_id0.rs @@ -4,10 +4,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_id0(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { for entry in id0.all_entries() { let key = id0_to_str(&entry.key); let value = id0_to_str(&entry.value); diff --git a/src/tools/dump_id1.rs b/src/tools/dump_id1.rs new file mode 100644 index 0000000..539eca0 --- /dev/null +++ b/src/tools/dump_id1.rs @@ -0,0 +1,58 @@ +use crate::{get_id1_section, Args}; + +use idb_rs::id1::ByteInfo; +use idb_rs::id1::ByteType; + +use anyhow::Result; + +macro_rules! print_char_if_bool { + ($cond:expr, $value:literal) => { + print!("{} ", if $cond { $value } else { ' ' }); + }; +} + +pub fn dump_id1(args: &Args) -> Result<()> { + // parse the id1 sector/file + let id1 = get_id1_section(args)?; + + for (address, byte_info) in id1.all_bytes() { + print!("{address:08X}: {:#010X} ", byte_info.as_raw()); + + let ByteInfo { + byte_value: _, + has_comment, + has_reference, + has_comment_ext, + has_name, + has_dummy_name, + exec_flow_from_prev_inst, + op_invert_sig, + op_bitwise_negation, + is_unused_set, + byte_type, + } = byte_info.decode().unwrap(); + print_char_if_bool!(has_comment, 'C'); + print_char_if_bool!(has_comment_ext, 'Ĉ'); + print_char_if_bool!(has_reference, 'R'); + print_char_if_bool!(has_name, 'N'); + print_char_if_bool!(has_dummy_name, 'Ñ'); + print_char_if_bool!(exec_flow_from_prev_inst, 'X'); + print_char_if_bool!(op_invert_sig, 'S'); + print_char_if_bool!(op_bitwise_negation, 'B'); + print_char_if_bool!(is_unused_set, 'U'); + + print!("| "); + match byte_type { + ByteType::Data(data) => { + print!("D "); + print!("{data:?} "); + } + ByteType::Code(_code) => print!("C "), + ByteType::Tail => print!("T "), + ByteType::Unknown => print!("U "), + } + + println!(); + } + Ok(()) +} diff --git a/src/tools/dump_loader_name.rs b/src/tools/dump_loader_name.rs index fa4d6c3..a5a969b 100644 --- a/src/tools/dump_loader_name.rs +++ b/src/tools/dump_loader_name.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_loader_name(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { println!("Loader Name AKA `$ loader name`: "); for name in id0.loader_name()? { println!(" {}", name?); diff --git a/src/tools/dump_root_info.rs b/src/tools/dump_root_info.rs index 61e75c8..d64e267 100644 --- a/src/tools/dump_root_info.rs +++ b/src/tools/dump_root_info.rs @@ -2,12 +2,21 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_root_info(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { println!("Segments AKA `Root Node`: "); - for entry in id0.root_info()? { + let root_node = id0.root_info_node()?; + for entry in id0.root_info(root_node)? { println!(" {:x?}", entry?); } diff --git a/src/tools/dump_segments.rs b/src/tools/dump_segments.rs index f85ac2d..5bf056f 100644 --- a/src/tools/dump_segments.rs +++ b/src/tools/dump_segments.rs @@ -2,10 +2,18 @@ use crate::{get_id0_section, Args}; use anyhow::Result; +use idb_rs::id0::ID0Section; +use idb_rs::{IDAKind, IDAVariants}; + pub fn dump_segments(args: &Args) -> Result<()> { // parse the id0 sector/file - let id0 = get_id0_section(args)?; + match get_id0_section(args)? { + IDAVariants::IDA32(id0) => dump(id0), + IDAVariants::IDA64(id0) => dump(id0), + } +} +fn dump(id0: ID0Section) -> Result<()> { println!("Segments AKA `$ segs`: "); for entry in id0.segments()? { println!(" {:x?}", entry?); @@ -20,10 +28,12 @@ pub fn dump_segments(args: &Args) -> Result<()> { version, .. }) => version, }; - println!(); - println!("Segments AKA `$ fileregions`: "); - for entry in id0.file_regions(version)? { - println!(" {:x?}", entry?); + if let Ok(idx) = id0.file_regions_idx() { + println!(); + println!("Segments AKA `$ fileregions`: "); + for entry in id0.file_regions(idx, version) { + println!(" {:x?}", entry?); + } } Ok(()) } diff --git a/src/tools/dump_til.rs b/src/tools/dump_til.rs index 9e06748..61e5b54 100644 --- a/src/tools/dump_til.rs +++ b/src/tools/dump_til.rs @@ -2,9 +2,10 @@ use std::fs::File; use std::io::BufReader; use anyhow::{anyhow, Result}; + use idb_rs::til::section::{TILSection, TILSectionExtendedSizeofInfo}; use idb_rs::til::TILMacro; -use idb_rs::IDBParser; +use idb_rs::IDAVariants; use crate::{Args, FileType}; @@ -13,7 +14,7 @@ pub fn dump_til(args: &Args) -> Result<()> { let til = match args.input_type() { FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); - let mut parser = IDBParser::new(input)?; + let mut parser = IDAVariants::new(input)?; let til_offset = parser.til_section_offset().ok_or_else(|| { anyhow!("IDB file don't contains a TIL sector") })?; @@ -21,10 +22,7 @@ pub fn dump_til(args: &Args) -> Result<()> { } FileType::Til => { let mut input = BufReader::new(File::open(&args.input)?); - idb_rs::til::section::TILSection::read( - &mut input, - idb_rs::IDBSectionCompression::None, - )? + idb_rs::til::section::TILSection::read(&mut input)? } }; @@ -41,6 +39,7 @@ pub fn dump_til(args: &Args) -> Result<()> { dependencies, compiler_id, cc, + compiler_guessed, cn, cm, def_align, @@ -60,7 +59,8 @@ pub fn dump_til(args: &Args) -> Result<()> { println!("dependency-{i}: {}", dependency.as_utf8_lossy()); } println!("id: {compiler_id:?}"); - println!("cc: {cc:?}"); + let cc_guessed = if *compiler_guessed { " (guessed)" } else { "" }; + println!("cc: {cc:?}{cc_guessed}"); println!("cm: {cm:?}"); println!("cn: {cn:?}"); println!("def_align: {}", def_align.map(|x| x.get()).unwrap_or(0)); diff --git a/src/tools/produce_idc.rs b/src/tools/produce_idc.rs new file mode 100644 index 0000000..da36210 --- /dev/null +++ b/src/tools/produce_idc.rs @@ -0,0 +1,922 @@ +use std::borrow::{Borrow, Cow}; +use std::io::BufReader; +use std::iter::Peekable; +use std::{fs::File, io::Write}; + +use anyhow::{anyhow, ensure, Context, Result}; + +use idb_rs::id0::{AddressInfo, Comments, FunctionsAndComments, ID0Section}; +use idb_rs::id1::{ + ByteData, ByteDataType, ByteInfoRaw, ByteRawType, ByteType, ID1Section, + InstOpInfo, +}; +use idb_rs::til::section::TILSection; +use idb_rs::til::TILTypeInfo; +use idb_rs::{IDAKind, IDAVariants}; + +use crate::{Args, FileType, ProduceIdcArgs}; + +// InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb6e80 +pub fn produce_idc(args: &Args, idc_args: &ProduceIdcArgs) -> Result<()> { + let input = BufReader::new(File::open(&args.input)?); + match args.input_type() { + FileType::Til => { + return Err(anyhow!( + "Produce IDC file from til file is not implemented yet" + )); + } + FileType::Idb => { + let mut parser = IDAVariants::new(input)?; + let id0_offset = parser.id0_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a ID0 sector") + })?; + let id1_offset = parser.id1_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a ID1 sector") + })?; + let til_offset = parser.til_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; + let id0 = parser.read_id0_section(id0_offset)?; + let id1 = parser.read_id1_section(id1_offset)?; + let til = parser.read_til_section(til_offset)?; + match id0 { + IDAVariants::IDA32(id0) => { + produce_idc_inner( + &mut std::io::stdout(), + idc_args, + &id0, + &id1, + &til, + )?; + } + IDAVariants::IDA64(id0) => { + produce_idc_inner( + &mut std::io::stdout(), + idc_args, + &id0, + &id1, + &til, + )?; + } + } + } + } + Ok(()) +} + +fn produce_idc_inner( + fmt: &mut impl Write, + args: &ProduceIdcArgs, + id0: &ID0Section, + id1: &ID1Section, + til: &TILSection, +) -> Result<()> { + if !args.banner.is_empty() { + writeln!(fmt, "//\n// +-------------------------------------------------------------------------+")?; + for line in &args.banner { + writeln!(fmt, "// |{line:^73}|")?; + } + writeln!(fmt, "// +-------------------------------------------------------------------------+\n//")?; + } + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb6e80 + let _unknown_value1 = true; // all database, or just range? + let _unknown_value2 = true; // export user types? + match (_unknown_value1, _unknown_value2) { + (false, false) => { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb919a + // TODO implement range dump + //writeln!(fmt)?; + //writeln!(fmt, "// DUMP OF RANGE {start}..{end}")?; + todo!(); + } + (false, true) => { + // TODO also implement user type definitions + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb6fd4 + writeln!(fmt, "//")?; + writeln!( + fmt, + "// This file contains the user-defined type definitions." + )?; + writeln!(fmt, "// To use it press F2 in IDA and enter the name of this file.")?; + writeln!(fmt, "//")?; + } + (true, _) => { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb71a9 + writeln!(fmt, "//")?; + writeln!( + fmt, + "// This file should be used in the following way:" + )?; + writeln!( + fmt, + "// - reload executable into IDA with using switch -c" + )?; + writeln!( + fmt, + "// - use File, Load IDC file and load this file." + )?; + writeln!(fmt, "//")?; + writeln!(fmt, "// NOTE: This file doesn't contain all information from the database.")?; + writeln!(fmt, "//")?; + } + } + writeln!(fmt)?; + + writeln!(fmt, "#define UNLOADED_FILE 1")?; + writeln!(fmt, "#include ")?; + writeln!(fmt)?; + writeln!(fmt, "extern ltf; // load_type flags")?; + writeln!(fmt)?; + + produce_main(fmt, _unknown_value1, _unknown_value2)?; + + if _unknown_value1 { + writeln!(fmt)?; + produce_gen_info(fmt, id0, til)?; + writeln!(fmt)?; + produce_segments(fmt, id0)?; + } + + if _unknown_value2 { + writeln!(fmt)?; + produce_types(fmt, til)?; + } + + produce_patches(fmt, id0, id1)?; + + writeln!(fmt)?; + produce_bytes_info(fmt, id0, id1, til)?; + + produce_functions(fmt, id0, til)?; + + writeln!(fmt)?; + produce_seg_regs(fmt, id0, til)?; + + writeln!(fmt)?; + produce_all_patches(fmt, id0, til)?; + + writeln!(fmt)?; + produce_bytes(fmt, id0, til)?; + + writeln!(fmt)?; + writeln!(fmt, "// End of file.")?; + Ok(()) +} + +fn produce_main( + fmt: &mut impl Write, + _unknown_value1: bool, + _unknown_value2: bool, +) -> Result<()> { + writeln!(fmt, "static main(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " ltf = ARGV.count > 1 ? ARGV[1] : LOADTYPE_DEFAULT;")?; + + match (_unknown_value1, _unknown_value2) { + (false, false) => { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb91bc + writeln!(fmt, " Patches(); // info about patches bytes")?; + writeln!(fmt, " SegRegs(); // segment register values")?; + writeln!(fmt, " Bytes(); // individual bytes (code,data)")?; + writeln!(fmt, " Functions(); // function definitions")?; + } + (false, true) => { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb6ff6 + writeln!(fmt, " LocalTypes();")?; + } + (true, _) => { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb71cb + writeln!(fmt, " // set \'loading idc file\' mode")?; + writeln!(fmt, " set_inf_attr(INF_GENFLAGS, INFFL_LOADIDC|get_inf_attr(INF_GENFLAGS));")?; + writeln!(fmt, " GenInfo(); // various settings")?; + writeln!(fmt, " Segments(); // segmentation")?; + writeln!(fmt, " LocalTypes(); // local types")?; + writeln!(fmt, " Patches(); // manual patches")?; + writeln!(fmt, " SegRegs(); // segment register values")?; + writeln!(fmt, " Bytes(); // individual bytes (code,data)")?; + writeln!(fmt, " Functions(); // function definitions")?; + writeln!(fmt, " // clear 'loading idc file' mode")?; + writeln!(fmt, " set_inf_attr(INF_GENFLAGS, ~INFFL_LOADIDC&get_inf_attr(INF_GENFLAGS));")?; + } + } + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_gen_info( + fmt: &mut impl Write, + id0: &ID0Section, + til: &TILSection, +) -> Result<()> { + let info = id0.ida_info()?; + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// General information")?; + writeln!(fmt)?; + writeln!(fmt, "static GenInfo(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " delete_all_segments(); // purge database")?; + let cpu = match &info { + idb_rs::id0::IDBParam::V1(x) => &x.cpu, + idb_rs::id0::IDBParam::V2(x) => &x.cpu, + }; + writeln!( + fmt, + " set_processor_type(\"{}\", SETPROC_USER);", + String::from_utf8_lossy(cpu) + )?; + let compiler = match &info { + idb_rs::id0::IDBParam::V1(x) => x.compiler, + idb_rs::id0::IDBParam::V2(x) => x.cc_id.into(), + }; + writeln!(fmt, " set_inf_attr(INF_COMPILER, {compiler});")?; + let strlit_break = match &info { + idb_rs::id0::IDBParam::V1(x) => x.ascii_break, + idb_rs::id0::IDBParam::V2(x) => x.strlit_break, + }; + writeln!(fmt, " set_inf_attr(INF_STRLIT_BREAK, {strlit_break:#X});",)?; + let scf_allcmt = match &info { + idb_rs::id0::IDBParam::V1(_x) => { + // TODO todo!("flag from V1 x.cmtflag.is_allcmt()") + false as u8 + } + idb_rs::id0::IDBParam::V2(x) => x.cmtflg.is_allcmt() as u8, + }; + writeln!(fmt, " set_flag(INF_CMTFLG, SCF_ALLCMT, {scf_allcmt});")?; + let oflg_show_void = match &info { + idb_rs::id0::IDBParam::V1(_x) => { + // TODO todo!("flag from V1 x.outflags.show_void()") + false as u8 + } + idb_rs::id0::IDBParam::V2(x) => x.outflags.show_void() as u8, + }; + writeln!( + fmt, + " set_flag(INF_OUTFLAGS, OFLG_SHOW_VOID, {oflg_show_void});" + )?; + let xrefnum = match &info { + idb_rs::id0::IDBParam::V1(x) => x.xrefnum, + idb_rs::id0::IDBParam::V2(x) => x.xrefnum, + }; + writeln!(fmt, " set_inf_attr(INF_XREFNUM, {xrefnum});")?; + let oflg_show_auto = match &info { + idb_rs::id0::IDBParam::V1(_x) => { + // TODO todo!("flag from V1 x.outflags.show_auto()") + false as u8 + } + idb_rs::id0::IDBParam::V2(x) => x.outflags.show_auto() as u8, + }; + writeln!( + fmt, + " set_flag(INF_OUTFLAGS, OFLG_SHOW_AUTO, {oflg_show_auto});", + )?; + let indent = match &info { + idb_rs::id0::IDBParam::V1(x) => x.indent, + idb_rs::id0::IDBParam::V2(x) => x.indent, + }; + writeln!(fmt, " set_inf_attr(INF_INDENT, {indent});")?; + let cmd_indent = match &info { + idb_rs::id0::IDBParam::V1(_x) => { + // TODO todo!("value from V1.cmd_indent") + 0 + } + idb_rs::id0::IDBParam::V2(x) => x.cmt_ident, + }; + writeln!(fmt, " set_inf_attr(INF_CMT_INDENT, {cmd_indent});")?; + let max_ref = match &info { + idb_rs::id0::IDBParam::V1(x) => x.maxref, + idb_rs::id0::IDBParam::V2(x) => x.maxref, + }; + writeln!(fmt, " set_inf_attr(INF_MAXREF, {max_ref:#X});")?; + for dep in &til.header.dependencies { + writeln!(fmt, " add_default_til(\"{}\");", dep.as_utf8_lossy())?; + } + writeln!(fmt, "}}")?; + + Ok(()) +} + +fn produce_segments( + fmt: &mut impl Write, + id0: &ID0Section, +) -> Result<()> { + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about segmentation")?; + writeln!(fmt)?; + writeln!(fmt, "static Segments(void)")?; + writeln!(fmt, "{{")?; + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb7480 + // https://docs.hex-rays.com/developer-guide/idc/idc-api-reference/alphabetical-list-of-idc-functions/292 + let segs: Vec<_> = id0.segments()?.collect::>()?; + let mut segs_sorted: Vec<&_> = segs.iter().collect(); + segs_sorted.sort_unstable_by_key(|seg| seg.selector); + for seg in segs_sorted { + let sel = seg.selector; + let val = seg.orgbase; + writeln!(fmt, " set_selector({sel:#X}, {val:#X});")?; + } + writeln!(fmt)?; + + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb74b7 + for seg in segs { + let startea = seg.address.start; + let endea = seg.address.end; + let base = seg.selector; + let use32 = match seg.bitness { + idb_rs::id0::SegmentBitness::S16Bits => 0, + idb_rs::id0::SegmentBitness::S32Bits => 1, + idb_rs::id0::SegmentBitness::S64Bits => 2, + }; + let align: u8 = seg.align.into(); + // TODO InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb754f + let comb = 2; + // TODO InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb7544 + let flags = if false { "|ADDSEG_SPARSE" } else { "" }; + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb75f4 + // https://docs.hex-rays.com/developer-guide/idc/idc-api-reference/alphabetical-list-of-idc-functions/299 + writeln!( + fmt, + " add_segm_ex({startea:#X}, {endea:#X}, {base:#X}, {use32}, {align}, {comb}, ADDSEG_NOSREG{flags});", + )?; + + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb7666 + let name = seg + .name + .as_ref() + .map(|x| id0.segment_name(*x).map(|x| String::from_utf8_lossy(x))) + .transpose()?; + writeln!( + fmt, + " set_segm_name({startea:#X}, \"{}\");", + name.as_ref().unwrap_or(&Cow::Borrowed("")) + )?; + + let seg_class_name = match seg.seg_type { + idb_rs::id0::SegmentType::Norm if name.is_some() => { + name.as_ref().unwrap().borrow() + } + idb_rs::id0::SegmentType::Norm => "NORM", + idb_rs::id0::SegmentType::Xtrn => "XTRN", + idb_rs::id0::SegmentType::Code => "CODE", + idb_rs::id0::SegmentType::Data => "DATA", + idb_rs::id0::SegmentType::Imp => "IMP", + idb_rs::id0::SegmentType::Grp => "GRP", + idb_rs::id0::SegmentType::Null => "NULL", + idb_rs::id0::SegmentType::Undf => "UNDF", + idb_rs::id0::SegmentType::Bss => "BSS", + idb_rs::id0::SegmentType::Abssym => "ABSSYM", + idb_rs::id0::SegmentType::Comm => "COMM", + idb_rs::id0::SegmentType::Imem => "IMEM", + }; + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb7699 + writeln!(fmt, " set_segm_class({startea:#X}, \"{seg_class_name}\");")?; + + //// TODO InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb76ac + //for _def_ref in seg.defsr.iter().filter(|x| **x != 0) { + // writeln!(fmt, "SegDefReg({startea:#X}, \"{seg_class_raw}\", {:X});")?; + // todo!(); + //} + + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb74e1 + // https://docs.hex-rays.com/developer-guide/idc/idc-api-reference/alphabetical-list-of-idc-functions/310 + let seg_class_raw: u8 = seg.seg_type.into(); + if seg_class_raw != 0 { + writeln!(fmt, " set_segm_type({startea:#X}, {seg_class_raw});")?; + } + } + + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb8c35 + let ida_info = id0.ida_info()?; + let low_off = match &ida_info { + idb_rs::id0::IDBParam::V1(x) => x.lowoff, + idb_rs::id0::IDBParam::V2(x) => x.lowoff, + }; + writeln!(fmt, " set_inf_attr(INF_LOW_OFF, {low_off:#X});")?; + let high_off = match &ida_info { + idb_rs::id0::IDBParam::V1(x) => x.highoff, + idb_rs::id0::IDBParam::V2(x) => x.highoff, + }; + writeln!(fmt, " set_inf_attr(INF_HIGH_OFF, {high_off:#X});")?; + + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_types(fmt: &mut impl Write, til: &TILSection) -> Result<()> { + // TODO types is 0, symbols is 1, etc, til files are 2..? + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb8ca9 + writeln!(fmt, "static LocalTypes_0() ")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " auto p_type, p_fields, p_cmt, p_fldcmts;")?; + writeln!(fmt)?; + for ty in &til.types { + produce_type_load(fmt, til, ty)?; + } + writeln!(fmt, "}}")?; + writeln!(fmt)?; + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about local types")?; + writeln!(fmt)?; + writeln!(fmt, "static LocalTypes()")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " LocalTypes_0();")?; + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_patches( + fmt: &mut impl Write, + id0: &ID0Section, + id1: &ID1Section, +) -> Result<()> { + let Some(patches_idx) = id0.segment_patches_idx() else { + return Ok(()); + }; + let patches = id0.segment_patches_original_value(patches_idx); + if patches.len() == 0 { + return Ok(()); + } + + writeln!(fmt)?; + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b170e + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about patches")?; + writeln!(fmt)?; + writeln!(fmt, "static Patches_0(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " auto x;")?; + writeln!(fmt, "#define id x")?; + writeln!(fmt)?; + for patch in patches { + let patch = patch?; + let address = patch.address; + let value = id1 + .byte_by_address(patch.address.into()) + .map(|x| x.byte_raw()) + .unwrap_or(0); + writeln!(fmt, " patch_byte({address:#X}, {value:#X});")?; + } + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_type_load( + fmt: &mut impl Write, + _til: &TILSection, + ty: &TILTypeInfo, +) -> Result<()> { + // TODO serialize the til will take a lot of time, better use a read_raw API + // although it could be a good test for code quality + writeln!(fmt, " p_type = \"TODO\";")?; + let have_fields = false; + let have_fldcmts = false; + if have_fields { + writeln!(fmt, " p_fields = \"TODO\";")?; + } + if let Some(cmt) = &ty.tinfo.comment { + writeln!(fmt, " p_cmt = \"{}\";", cmt.as_utf8_lossy())?; + } + if have_fldcmts { + writeln!(fmt, " p_fldcmts = \"TODO\";")?; + } + let ord = ty.ordinal; + let name = ty.name.as_utf8_lossy(); + write!(fmt, " load_type(ltf, {ord}, \"{name}\", p_type")?; + if have_fields { + write!(fmt, ", p_fields")?; + } + if ty.tinfo.comment.is_some() { + write!(fmt, ", p_cmt")?; + } + if have_fldcmts { + write!(fmt, ", p_fldcmts")?; + } + writeln!(fmt, ");")?; + Ok(()) +} + +fn produce_bytes_info( + fmt: &mut impl Write, + id0: &ID0Section, + id1: &ID1Section, + _til: &TILSection, +) -> Result<()> { + // InnerRef fb47a09e-b8d8-42f7-aa80-2435c4d1e049 0xb70ce + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about bytes")?; + writeln!(fmt)?; + writeln!(fmt, "static Bytes_0(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " auto x;")?; + writeln!(fmt, "#define id x")?; + writeln!(fmt)?; + + let mut all_bytes = id1.all_bytes().peekable(); + loop { + let Some((address, byte_info_raw)) = all_bytes.next() else { + break; + }; + + let byte_info = byte_info_raw.decode().unwrap(); + let addr_info = + id0.address_info_at(K::Usize::try_from(address).unwrap())?; + // print comments + // TODO byte_info.has_comment() ignored? + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1822 + for addr_info in addr_info { + if let AddressInfo::Comment(Comments::Comment(cmt)) = addr_info? { + writeln!( + fmt, + " set_cmt({address:#X}, \"{}\", 0);", + String::from_utf8_lossy(cmt) + )?; + } + } + + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1ddd + if byte_info.has_comment_ext { + let pre_cmts = addr_info.filter_map(|x| match x { + Ok(AddressInfo::Comment(Comments::PreComment(cmt))) => { + Some(Ok(cmt)) + } + Ok(_x) => None, + Err(e) => Some(Err(e)), + }); + for (i, cmt) in pre_cmts.enumerate() { + writeln!( + fmt, + " update_extra_cmt({address:#X}, E_PREV + {i:>3}, \"{}\");", + String::from_utf8_lossy(cmt?) + )?; + } + + let post_cmts = addr_info.filter_map(|x| match x { + Ok(AddressInfo::Comment(Comments::PostComment(cmt))) => { + Some(Ok(cmt)) + } + Ok(_x) => None, + Err(e) => Some(Err(e)), + }); + for (i, cmt) in post_cmts.enumerate() { + writeln!( + fmt, + " update_extra_cmt({address:#X}, E_NEXT + {i:>3}, \"{}\");", + String::from_utf8_lossy(cmt?) + )?; + } + } + + // TODO InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1dee + // if matches!(byte_type, (ByteType::Code | ByteType::Data)) { + // is_manual(byte_type, 0xf) || + // (!is_off(byte_type, 0xf) && !is_seg(byte_type, 0xf) && + // !is_char(byte_type, 0xf) && !is_enum(byte_type, 0xf) && + // !is_stroff(byte_type, 0xf) && !is_stkvar(byte_type, 0xf) && + // !is_numop(byte_type, 0xf)) + // "x=\"\"" | "" + // } + let set_x = match byte_info.byte_type { + ByteType::Data(ByteData { + print_info: + InstOpInfo::Hex + | InstOpInfo::Dec + | InstOpInfo::Bin + | InstOpInfo::Oct, + data_type: _, + }) => "x=", + _ => "", + }; + + match byte_info.byte_type { + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1dee + ByteType::Code(code) => { + let _len = count_tails(&mut all_bytes) + 1; + if !byte_info.exec_flow_from_prev_inst || code.is_func_start + // || TODO: byte_info.is_manual() + { + writeln!(fmt, " create_insn({set_x}{address:#X});")?; + } + } + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1e37 + ByteType::Data(data) => { + match data.data_type { + ByteDataType::Strlit => { + let len = count_tails(&mut all_bytes); + writeln!( + fmt, + " create_strlit({set_x}{address:#X}, {len:#X});" + )? + } + ByteDataType::Dword => { + let len = count_element(&mut all_bytes, 4)?; + writeln!(fmt, " create_dword({set_x}{address:#X});")?; + if len > 1 { + writeln!( + fmt, + " make_array({address:#X}, {len:#X});" + )? + } + } + ByteDataType::Byte => { + let len = count_tails(&mut all_bytes); + writeln!(fmt, " create_byte({set_x}{address:#X});")?; + if len > 1 { + writeln!( + fmt, + " make_array({address:#X}, {len:#X});" + )? + } + } + ByteDataType::Word => { + let len = count_element(&mut all_bytes, 2)?; + writeln!(fmt, " create_word({set_x}{address:#X});")?; + if len > 1 { + writeln!( + fmt, + " make_array({address:#X}, {len:#X});" + )? + } + } + ByteDataType::Qword => { + let len = count_element(&mut all_bytes, 8)?; + writeln!(fmt, " create_qword({set_x}{address:#X});")?; + if len > 1 { + writeln!( + fmt, + " make_array({address:#X}, {len:#X});" + )? + } + } + ByteDataType::Tbyte => { + let _len = count_tails(&mut all_bytes); + // TODO make array? + writeln!(fmt, " create_tbyte({set_x}{address:#X});")? + } + ByteDataType::Float => { + let _len = count_tails(&mut all_bytes); + // TODO make array? + writeln!(fmt, " create_float({set_x}{address:#X});")? + } + ByteDataType::Packreal => writeln!( + fmt, + " create_pack_real({set_x}{address:#X});" + )?, + ByteDataType::Yword => { + let _len = count_tails(&mut all_bytes); + // TODO make array? + writeln!(fmt, " create_yword({set_x}{address:#X});")? + } + ByteDataType::Double => { + let _len = count_tails(&mut all_bytes); + // TODO make array? + writeln!(fmt, " create_double({set_x}{address:#X});")? + } + ByteDataType::Oword => { + let _len = count_tails(&mut all_bytes); + // TODO make array? + writeln!(fmt, " create_oword({set_x}{address:#X});")? + } + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b2690 + ByteDataType::Struct => { + let _len = count_tails(&mut all_bytes); + // TODO ensure struct have the same len that _len + // TODO make a struct_def_at + // TODO make array? + let struct_id = id0 + .address_info_at( + K::Usize::try_from(address).unwrap(), + ) + .unwrap() + .find_map(|e| match e { + Ok(AddressInfo::DefinedStruct(s)) => { + Some(Ok(s)) + } + Err(e) => Some(Err(e)), + Ok(_) => None, + }); + let struct_name = struct_id + .map(|idx| { + id0.struct_at(idx.unwrap()) + .with_context(|| { + format!("ID1 addr {address:#X}") + }) + .unwrap() + }) + .unwrap_or(b"BAD_STRUCT"); + writeln!( + fmt, + " create_struct({address:#X}, -1, \"{}\");", + core::str::from_utf8(struct_name).unwrap() + )?; + } + ByteDataType::Align => { + let len = count_tails(&mut all_bytes); + if len > 1 { + writeln!( + fmt, + " make_array({address:#X}, {len:#X});" + )? + } + } + ByteDataType::Zword | ByteDataType::Custom => { + let _len = count_tails(&mut all_bytes); + //TODO + } + ByteDataType::Reserved => { + todo!(); + } + } + match data.print_info { + InstOpInfo::Hex => writeln!(fmt, " op_hex(x, 0);")?, + InstOpInfo::Dec => writeln!(fmt, " op_dec(x, 0);")?, + InstOpInfo::Bin => writeln!(fmt, " op_bin(x, 0);")?, + InstOpInfo::Oct => writeln!(fmt, " op_oct(x, 0);")?, + InstOpInfo::Void + | InstOpInfo::Char + | InstOpInfo::Seg + | InstOpInfo::Off + | InstOpInfo::Enum + | InstOpInfo::Fop + | InstOpInfo::StrOff + | InstOpInfo::StackVar + | InstOpInfo::Float + | InstOpInfo::Custom => {} + } + // TODO get_data_elsize + // TODO make_array + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b2622 + } + ByteType::Tail => { + return Err(anyhow!("Unexpected ID1 Tail entry: {address:#X}")) + } + ByteType::Unknown => {} + } + + // TODO InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b1e5e + // for bit in 0..8 { + // if id1.is_invsign(address, byte_info, bit) { + // todo!(); + // } + // if id1.is_bnot(address, byte_info, bit) { + // todo!(); + // } + // if id1.is_defarg(address, byte_info, bit) { + // break; + // } + // todo!(); + //} + + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b2160 + if byte_info.has_name { + for addr_info in addr_info { + if let AddressInfo::Label(name) = addr_info? { + writeln!( + fmt, + " set_name({address:#X}, \"{}\");", + String::from_utf8_lossy(name.as_bytes()) + )?; + } + } + } + } + + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b28ea + // TODO add_func and other getn_func related functions + + // InnerRef 66961e377716596c17e2330a28c01eb3600be518 0x1b2fee + // TODO getn_fchunk related stuff + + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_functions( + fmt: &mut impl Write, + id0: &ID0Section, + _til: &TILSection, +) -> Result<()> { + use idb_rs::id0::FunctionsAndComments::*; + use idb_rs::id0::IDBFunctionExtra::*; + + // TODO find the InnerRef for this, maybe it's just `$ dirtree/funcs` + let id0_funcs = id0.functions_and_comments()?; + let funcs: Vec<_> = id0_funcs + .filter_map(|fun| match fun { + Err(e) => Some(Err(e)), + Ok(FunctionsAndComments::Function(fun)) => Some(Ok(fun)), + Ok( + Name + | FunctionsAndComments::Comment { .. } + | FunctionsAndComments::Unknown { .. }, + ) => None, + }) + .collect::>()?; + + if funcs.is_empty() { + return Ok(()); + } + + // TODO find the number of functions + writeln!(fmt)?; + writeln!(fmt, "static Functions_0(void)")?; + writeln!(fmt, "{{")?; + for fun in funcs { + let addr = fun.address.start; + writeln!(fmt, " add_func({addr:#X}, {:#X});", fun.address.end)?; + writeln!(fmt, " set_func_flags({addr:#X}, {:#x});", fun.flags)?; + writeln!(fmt, " apply_type({addr:#X}, \"TODO\");")?; + match &fun.extra { + NonTail { frame } => { + writeln!(fmt, " set_frame_size({addr:#X}, {frame:#X?});")?; + } + Tail { + owner: _, + refqty, + _unknown1, + _unknown2, + } if *refqty != K::Usize::from(0u8) => { + writeln!( + fmt, + " set_frame_size({addr:#X}, {refqty:#X}, {_unknown1}, {_unknown2:#X});" + )?; + } + Tail { .. } => {} + } + } + writeln!(fmt, "}}")?; + writeln!(fmt)?; + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about functions")?; + writeln!(fmt)?; + writeln!(fmt, "static Functions(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " Functions_0();")?; + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_seg_regs( + fmt: &mut impl Write, + _id0: &ID0Section, + _til: &TILSection, +) -> Result<()> { + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about segment registers")?; + writeln!(fmt)?; + writeln!(fmt, "static SegRegs(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " TODO();")?; + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_all_patches( + fmt: &mut impl Write, + _id0: &ID0Section, + _til: &TILSection, +) -> Result<()> { + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Information about all patched bytes:")?; + writeln!(fmt)?; + writeln!(fmt, "static Patches(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " TODO();")?; + writeln!(fmt, "}}")?; + Ok(()) +} + +fn produce_bytes( + fmt: &mut impl Write, + _id0: &ID0Section, + _til: &TILSection, +) -> Result<()> { + writeln!(fmt, "//------------------------------------------------------------------------")?; + writeln!(fmt, "// Call all byte feature functions:")?; + writeln!(fmt)?; + writeln!(fmt, "static Bytes(void)")?; + writeln!(fmt, "{{")?; + writeln!(fmt, " Bytes_0();")?; + writeln!(fmt, "}}")?; + Ok(()) +} + +fn count_tails(bytes: &mut Peekable) -> usize +where + I: Iterator, +{ + bytes + .take_while(|(_a, b)| b.byte_type() == ByteRawType::Tail) + .count() +} + +fn count_element(bytes: &mut Peekable, ele_len: usize) -> Result +where + I: Iterator, +{ + let len = count_tails(bytes) + 1; + ensure!(len >= ele_len, "Expected more ID1 Tail entries"); + ensure!( + len % ele_len == 0, + "More ID1 Tails that expects or invalid array len" + ); + Ok(len / ele_len) +} diff --git a/src/tools/tilib.rs b/src/tools/tilib.rs index 9e23c35..fe5536e 100644 --- a/src/tools/tilib.rs +++ b/src/tools/tilib.rs @@ -11,7 +11,7 @@ use idb_rs::til::{ Basic, SClass, TILTypeInfo, TILTypeSizeSolver, Type, TypeVariant, Typeref, TyperefType, TyperefValue, }; -use idb_rs::{IDBParser, IDBSectionCompression, IDBString}; +use idb_rs::{IDAVariants, IDBString}; use std::fs::File; use std::io::{BufReader, Result, Write}; @@ -33,12 +33,11 @@ pub fn tilib_print( let mut input = BufReader::new(File::open(&args.input)?); match args.input_type() { FileType::Til => { - let section = - TILSection::read(&mut input, IDBSectionCompression::None)?; + let section = TILSection::read(&mut input)?; print_til_section(std::io::stdout(), §ion, tilib_args)?; } FileType::Idb => { - let mut parser = IDBParser::new(input)?; + let mut parser = IDAVariants::new(input)?; let til_offset = parser.til_section_offset().ok_or_else(|| { anyhow::anyhow!("IDB file don't contains a TIL sector") })?; @@ -224,7 +223,7 @@ fn compiler_id_to_str(compiler: Compiler) -> &'static str { Compiler::Gnu => "GNU C++", Compiler::VisualAge => "Visual Age C++", Compiler::Delphi => "Delphi", - Compiler::Other => "?", + Compiler::Unsure => "?", } } @@ -1295,12 +1294,13 @@ fn print_til_struct_member_att( } _ => {} }, - TypeVariant::Array(array) => match &array.elem_type.type_variant { - TypeVariant::Basic(Basic::Char) => { + TypeVariant::Array(array) => { + if let TypeVariant::Basic(Basic::Char) = + &array.elem_type.type_variant + { print_til_struct_member_string_att(fmt, att)? } - _ => {} - }, + } _ => {} } Ok(()) diff --git a/src/tools/tools.rs b/src/tools/tools.rs index b1c7754..97ec255 100644 --- a/src/tools/tools.rs +++ b/src/tools/tools.rs @@ -3,6 +3,8 @@ mod dump_til; use dump_til::dump_til; mod dump_id0; use dump_id0::dump_id0; +mod dump_id1; +use dump_id1::dump_id1; //mod split_idb; //use split_idb::split_idb; mod decompress_til; @@ -39,8 +41,11 @@ mod dump_dirtree_bookmarks_tiplace; use dump_dirtree_bookmarks_tiplace::dump_dirtree_bookmarks_tiplace; mod tilib; use tilib::tilib_print; +mod produce_idc; +use produce_idc::produce_idc; -use idb_rs::{id0::ID0Section, IDBParser}; +use idb_rs::{id0::ID0Section, id1::ID1Section}; +use idb_rs::{IDAVariants, IDA32, IDA64}; use std::fs::File; use std::io::BufReader; @@ -82,6 +87,8 @@ enum Operation { DumpTil, /// Dump all entries of the ID0 database DumpID0, + /// Dump all entries of the ID1 database + DumpID1, //SplitIDB(SplitIDBArgs), /// Decompress the TIL Section and buckets DecompressTil(DecompressTilArgs), @@ -110,6 +117,8 @@ enum Operation { DumpDirtreeBookmarksTiplace, /// Print all til types from file and it's information PrintTilib(PrintTilibArgs), + /// Print a IDC file from the IDB database + ProduceIdc(ProduceIdcArgs), } ///// Split the IDB file into it's decompressed sectors. Allow IDB and I64 files. @@ -134,6 +143,12 @@ struct PrintTilibArgs { dump_struct_layout: Option, } +/// Produce idc from an IDB database +#[derive(Clone, Debug, Parser)] +struct ProduceIdcArgs { + banner: Vec, +} + impl Args { pub fn input_type(&self) -> FileType { if let Some(input_type) = self.force_type { @@ -148,12 +163,14 @@ impl Args { } } -fn get_id0_section(args: &Args) -> Result { +fn get_id0_section( + args: &Args, +) -> Result, ID0Section>> { match args.input_type() { FileType::Til => Err(anyhow!("TIL don't contains any ID0 data")), FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); - let mut parser = IDBParser::new(input)?; + let mut parser = IDAVariants::new(input)?; let id0_offset = parser.id0_section_offset().ok_or_else(|| { anyhow!("IDB file don't contains a TIL sector") })?; @@ -162,12 +179,27 @@ fn get_id0_section(args: &Args) -> Result { } } +fn get_id1_section(args: &Args) -> Result { + match args.input_type() { + FileType::Til => Err(anyhow!("TIL don't contains any ID1 data")), + FileType::Idb => { + let input = BufReader::new(File::open(&args.input)?); + let mut parser = IDAVariants::new(input)?; + let id1_offset = parser.id1_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; + parser.read_id1_section(id1_offset) + } + } +} + fn main() -> Result<()> { let args = Args::parse(); match &args.operation { Operation::DumpTil => dump_til(&args), Operation::DumpID0 => dump_id0(&args), + Operation::DumpID1 => dump_id1(&args), //Operation::SplitIDB(split_idbargs) => split_idb(&args, split_idbargs), Operation::DecompressTil(decompress_til_args) => { decompress_til(&args, decompress_til_args) @@ -194,5 +226,6 @@ fn main() -> Result<()> { dump_dirtree_bookmarks_tiplace(&args) } Operation::PrintTilib(tilib_args) => tilib_print(&args, tilib_args), + Operation::ProduceIdc(idc_args) => produce_idc(&args, idc_args), } }