From 19d4441ff396b1c2f6c1ab8094e4ff2f89dff61a Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sat, 6 Dec 2025 16:45:44 -0500 Subject: [PATCH 01/19] [Rust] Fix leaking list in `Function::guided_source_blocks` Forgot to call `BNFreeArchitectureAndAddressList`, also use `Location` instead of `ArchAndAddr`. --- rust/src/function.rs | 46 +++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/rust/src/function.rs b/rust/src/function.rs index 80de8e6d6..5e4cf08a3 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -94,6 +94,12 @@ impl From for Location { } } +impl From<&BNArchitectureAndAddress> for Location { + fn from(value: &BNArchitectureAndAddress) -> Self { + Self::from_raw(value.address, value.arch) + } +} + impl From for BNArchitectureAndAddress { fn from(value: Location) -> Self { Self { @@ -103,6 +109,29 @@ impl From for BNArchitectureAndAddress { } } +impl From<&Location> for BNArchitectureAndAddress { + fn from(value: &Location) -> Self { + Self::from(*value) + } +} + +impl CoreArrayProvider for Location { + type Raw = BNArchitectureAndAddress; + type Context = (); + type Wrapped<'a> = Self; +} + +unsafe impl CoreArrayProviderInner for Location { + unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { + // NOTE: Does not use _count because freeing does not require iterating the list. + BNFreeArchitectureAndAddressList(raw) + } + + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Location::from(*raw) + } +} + pub struct NativeBlockIter { arch: CoreArchitecture, bv: Ref, @@ -2594,19 +2623,14 @@ impl Function { unsafe { BNFunctionRemoveMetadata(self.handle, key.as_ptr()) }; } - pub fn guided_source_blocks(&self) -> HashSet { + /// The current list of guided source block start [`Location`]s for this function. + /// + /// These blocks have their direct outgoing branch targets analyzed. + pub fn guided_source_blocks(&self) -> HashSet { let mut count = 0; let raw = unsafe { BNGetGuidedSourceBlocks(self.handle, &mut count) }; - if raw.is_null() || count == 0 { - return HashSet::new(); - } - - (0..count) - .map(|i| { - let raw = unsafe { std::ptr::read(raw.add(i)) }; - ArchAndAddr::from(raw) - }) - .collect::>() + let array: Array = unsafe { Array::new(raw, count, ()) }; + array.into_iter().collect() } } From ab6e75eabb1c084d76200552fdbb1b0e8b179669 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sat, 6 Dec 2025 17:06:16 -0500 Subject: [PATCH 02/19] [Rust] Remove redundant `ArchAndAddr` type Use `Location` instead, `arch` can be a nullptr --- rust/src/architecture.rs | 58 +++++++++++++++++++++------------------- rust/src/binary_view.rs | 30 +++++++++------------ rust/src/function.rs | 42 +++++------------------------ 3 files changed, 49 insertions(+), 81 deletions(-) diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index 3ea215141..2f0490f23 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -23,7 +23,7 @@ use crate::{ calling_convention::CoreCallingConvention, data_buffer::DataBuffer, disassembly::InstructionTextToken, - function::{ArchAndAddr, Function, NativeBlock}, + function::{Function, NativeBlock}, platform::Platform, rc::*, relocation::CoreRelocationHandler, @@ -47,6 +47,7 @@ use crate::relocation::{CustomRelocationHandlerHandle, RelocationHandler}; use crate::variable::IndirectBranchInfo; use crate::confidence::Conf; +use crate::function::Location; use crate::low_level_il::expression::ValueExpr; use crate::low_level_il::lifting::{ get_default_flag_cond_llil, get_default_flag_write_llil, LowLevelILFlagWriteOp, @@ -1959,7 +1960,7 @@ pub struct BasicBlockAnalysisContext { // In pub indirect_branches: Vec, - pub indirect_no_return_calls: HashSet, + pub indirect_no_return_calls: HashSet, pub analysis_skip_override: BNFunctionAnalysisSkipOverride, pub guided_analysis_mode: bool, pub trigger_guided_on_invalid_instruction: bool, @@ -1969,13 +1970,13 @@ pub struct BasicBlockAnalysisContext { // In/Out pub max_size_reached: bool, - contextual_returns: HashMap, + contextual_returns: HashMap, // Out - direct_code_references: HashMap, - direct_no_return_calls: HashSet, - halted_disassembly_addresses: HashSet, - inlined_unresolved_indirect_branches: HashSet, + direct_code_references: HashMap, + direct_no_return_calls: HashSet, + halted_disassembly_addresses: HashSet, + inlined_unresolved_indirect_branches: HashSet, } impl BasicBlockAnalysisContext { @@ -1995,7 +1996,7 @@ impl BasicBlockAnalysisContext { let indirect_no_return_calls = (0..ctx_ref.indirectNoReturnCallsCount) .map(|i| { let raw = unsafe { std::ptr::read(ctx_ref.indirectNoReturnCalls.add(i)) }; - ArchAndAddr::from(raw) + Location::from(raw) }) .collect::>(); @@ -2003,7 +2004,7 @@ impl BasicBlockAnalysisContext { .map(|i| { let loc = unsafe { let raw = std::ptr::read(ctx_ref.contextualFunctionReturnLocations.add(i)); - ArchAndAddr::from(raw) + Location::from(raw) }; let val = unsafe { *ctx_ref.contextualFunctionReturnValues.add(i) }; (loc, val) @@ -2014,7 +2015,7 @@ impl BasicBlockAnalysisContext { .map(|i| { let src = unsafe { let raw = std::ptr::read(ctx_ref.directRefSources.add(i)); - ArchAndAddr::from(raw) + Location::from(raw) }; let tgt = unsafe { *ctx_ref.directRefTargets.add(i) }; (tgt, src) @@ -2024,14 +2025,14 @@ impl BasicBlockAnalysisContext { let direct_no_return_calls = (0..ctx_ref.directNoReturnCallsCount) .map(|i| { let raw = unsafe { std::ptr::read(ctx_ref.directNoReturnCalls.add(i)) }; - ArchAndAddr::from(raw) + Location::from(raw) }) .collect::>(); let halted_disassembly_addresses = (0..ctx_ref.haltedDisassemblyAddressesCount) .map(|i| { let raw = unsafe { std::ptr::read(ctx_ref.haltedDisassemblyAddresses.add(i)) }; - ArchAndAddr::from(raw) + Location::from(raw) }) .collect::>(); @@ -2040,7 +2041,7 @@ impl BasicBlockAnalysisContext { .map(|i| { let raw = unsafe { std::ptr::read(ctx_ref.inlinedUnresolvedIndirectBranches.add(i)) }; - ArchAndAddr::from(raw) + Location::from(raw) }) .collect::>(); @@ -2064,7 +2065,8 @@ impl BasicBlockAnalysisContext { } } - pub fn add_contextual_return(&mut self, loc: ArchAndAddr, value: bool) { + pub fn add_contextual_return(&mut self, loc: impl Into, value: bool) { + let loc = loc.into(); if !self.contextual_returns.contains_key(&loc) { self.contextual_returns_dirty = true; } @@ -2072,20 +2074,22 @@ impl BasicBlockAnalysisContext { self.contextual_returns.insert(loc, value); } - pub fn add_direct_code_reference(&mut self, target: u64, src: ArchAndAddr) { - self.direct_code_references.entry(target).or_insert(src); + pub fn add_direct_code_reference(&mut self, target: u64, src: impl Into) { + self.direct_code_references + .entry(target) + .or_insert(src.into()); } - pub fn add_direct_no_return_call(&mut self, loc: ArchAndAddr) { - self.direct_no_return_calls.insert(loc); + pub fn add_direct_no_return_call(&mut self, loc: impl Into) { + self.direct_no_return_calls.insert(loc.into()); } - pub fn add_halted_disassembly_address(&mut self, loc: ArchAndAddr) { - self.halted_disassembly_addresses.insert(loc); + pub fn add_halted_disassembly_address(&mut self, loc: impl Into) { + self.halted_disassembly_addresses.insert(loc.into()); } - pub fn add_inlined_unresolved_indirect_branch(&mut self, loc: ArchAndAddr) { - self.inlined_unresolved_indirect_branches.insert(loc); + pub fn add_inlined_unresolved_indirect_branch(&mut self, loc: impl Into) { + self.inlined_unresolved_indirect_branches.insert(loc.into()); } pub fn create_basic_block( @@ -2121,7 +2125,7 @@ impl BasicBlockAnalysisContext { let mut sources: Vec = Vec::with_capacity(total); let mut targets: Vec = Vec::with_capacity(total); for (target, src) in &self.direct_code_references { - sources.push(src.into_raw()); + sources.push(BNArchitectureAndAddress::from(src)); targets.push(*target); } unsafe { @@ -2138,7 +2142,7 @@ impl BasicBlockAnalysisContext { let total = self.direct_no_return_calls.len(); let mut locations: Vec = Vec::with_capacity(total); for loc in &self.direct_no_return_calls { - locations.push(loc.into_raw()); + locations.push(BNArchitectureAndAddress::from(loc)); } unsafe { BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls( @@ -2153,7 +2157,7 @@ impl BasicBlockAnalysisContext { let total = self.halted_disassembly_addresses.len(); let mut locations: Vec = Vec::with_capacity(total); for loc in &self.halted_disassembly_addresses { - locations.push(loc.into_raw()); + locations.push(BNArchitectureAndAddress::from(loc)); } unsafe { BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses( @@ -2168,7 +2172,7 @@ impl BasicBlockAnalysisContext { let total = self.inlined_unresolved_indirect_branches.len(); let mut locations: Vec = Vec::with_capacity(total); for loc in &self.inlined_unresolved_indirect_branches { - locations.push(loc.into_raw()); + locations.push(BNArchitectureAndAddress::from(loc)); } unsafe { BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches( @@ -2188,7 +2192,7 @@ impl BasicBlockAnalysisContext { let mut locations: Vec = Vec::with_capacity(total); let mut values: Vec = Vec::with_capacity(total); for (loc, value) in &self.contextual_returns { - locations.push(loc.into_raw()); + locations.push(BNArchitectureAndAddress::from(loc)); values.push(*value); } unsafe { diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index 4b55e321e..e1b7a5c00 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -38,7 +38,7 @@ use crate::external_library::{ExternalLibrary, ExternalLocation}; use crate::file_accessor::{Accessor, FileAccessor}; use crate::file_metadata::FileMetadata; use crate::flowgraph::FlowGraph; -use crate::function::{ArchAndAddr, Function, FunctionViewType, NativeBlock}; +use crate::function::{Function, FunctionViewType, Location, NativeBlock}; use crate::linear_view::{LinearDisassemblyLine, LinearViewCursor}; use crate::metadata::Metadata; use crate::platform::Platform; @@ -1445,29 +1445,23 @@ pub trait BinaryViewExt: BinaryViewBase { } } + // TODO: Should this instead be implemented on [`Function`] considering `src_func`? `Location` is local to the source function. fn should_skip_target_analysis( &self, - source: &ArchAndAddr, - srcfunc: &Function, - srcend: u64, - target: &ArchAndAddr, + src_loc: impl Into, + src_func: &Function, + src_end: u64, + target: impl Into, ) -> bool { - let mut srccopy = BNArchitectureAndAddress { - arch: source.arch.handle, - address: source.addr, - }; - let mut targetcopy = BNArchitectureAndAddress { - arch: target.arch.handle, - address: target.addr, - }; - + let src_loc = src_loc.into(); + let target = target.into(); unsafe { BNShouldSkipTargetAnalysis( self.as_ref().handle, - &mut srccopy, - srcfunc.handle, - srcend, - &mut targetcopy, + &mut src_loc.into(), + src_func.handle, + src_end, + &mut target.into(), ) } } diff --git a/rust/src/function.rs b/rust/src/function.rs index 5e4cf08a3..1ef0a377e 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -71,20 +71,21 @@ impl Location { arch: Some(unsafe { CoreArchitecture::from_raw(arch) }), } } + + pub fn new(arch: Option, addr: u64) -> Self { + Self { arch, addr } + } } impl From for Location { fn from(addr: u64) -> Self { - Location { arch: None, addr } + Location::new(None, addr) } } impl From<(CoreArchitecture, u64)> for Location { fn from(loc: (CoreArchitecture, u64)) -> Self { - Location { - arch: Some(loc.0), - addr: loc.1, - } + Location::new(Some(loc.0), loc.1) } } @@ -2998,34 +2999,3 @@ unsafe impl CoreArrayProviderInner for Comment { } } } - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub struct ArchAndAddr { - pub arch: CoreArchitecture, - pub addr: u64, -} - -impl ArchAndAddr { - pub fn new(arch: CoreArchitecture, addr: u64) -> Self { - Self { arch, addr } - } -} - -impl From for ArchAndAddr { - fn from(raw: BNArchitectureAndAddress) -> Self { - unsafe { - let arch = CoreArchitecture::from_raw(raw.arch); - let addr = raw.address; - ArchAndAddr { arch, addr } - } - } -} - -impl ArchAndAddr { - pub fn into_raw(self) -> BNArchitectureAndAddress { - BNArchitectureAndAddress { - arch: self.arch.handle, - address: self.addr, - } - } -} From c74211a688e189be2a475e3b7954f86da6adcfd8 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sun, 7 Dec 2025 15:03:44 -0500 Subject: [PATCH 03/19] [Rust] Move architecture module code into more reasonable files To keep backwards compatibility for commonly referenced code we re-export them within the architecture module. Also does some light refactoring of some newly added APIs to keep them more consistent with other parts of the codebase. --- arch/riscv/src/lib.rs | 8 +- plugins/dwarf/dwarfdump/src/lib.rs | 3 +- rust/examples/flowgraph.rs | 3 +- rust/src/architecture.rs | 1484 +------------------------- rust/src/architecture/basic_block.rs | 261 +++++ rust/src/architecture/branches.rs | 153 +++ rust/src/architecture/flag.rs | 447 ++++++++ rust/src/architecture/instruction.rs | 114 ++ rust/src/architecture/intrinsic.rs | 150 +++ rust/src/architecture/register.rs | 333 ++++++ rust/src/basic_block.rs | 100 +- rust/src/binary_view.rs | 6 +- rust/src/flowgraph.rs | 1 - rust/src/flowgraph/edge.rs | 6 +- rust/src/flowgraph/node.rs | 3 +- rust/src/function.rs | 4 +- rust/src/lib.rs | 5 +- rust/src/llvm.rs | 100 +- rust/src/variable.rs | 55 - view/minidump/src/command.rs | 2 +- view/minidump/src/view.rs | 4 +- 21 files changed, 1671 insertions(+), 1571 deletions(-) create mode 100644 rust/src/architecture/basic_block.rs create mode 100644 rust/src/architecture/branches.rs create mode 100644 rust/src/architecture/flag.rs create mode 100644 rust/src/architecture/instruction.rs create mode 100644 rust/src/architecture/intrinsic.rs create mode 100644 rust/src/architecture/register.rs diff --git a/arch/riscv/src/lib.rs b/arch/riscv/src/lib.rs index ef9e12e16..6ce254063 100644 --- a/arch/riscv/src/lib.rs +++ b/arch/riscv/src/lib.rs @@ -9,10 +9,9 @@ use binaryninja::relocation::{Relocation, RelocationHandlerExt}; use binaryninja::{ add_optional_plugin_dependency, architecture, architecture::{ - llvm_assemble, Architecture, ArchitectureExt, CoreArchitecture, CustomArchitectureHandle, - ImplicitRegisterExtend, InstructionInfo, LlvmServicesCodeModel, LlvmServicesDialect, - LlvmServicesRelocMode, Register as Reg, RegisterInfo, UnusedFlag, UnusedRegisterStack, - UnusedRegisterStackInfo, + Architecture, ArchitectureExt, CoreArchitecture, CustomArchitectureHandle, + ImplicitRegisterExtend, InstructionInfo, Register as Reg, RegisterInfo, UnusedFlag, + UnusedRegisterStack, UnusedRegisterStackInfo, }, binary_view::{BinaryView, BinaryViewExt}, calling_convention::{register_calling_convention, CallingConvention, ConventionBuilder}, @@ -20,6 +19,7 @@ use binaryninja::{ disassembly::{InstructionTextToken, InstructionTextTokenKind}, function::Function, function_recognizer::FunctionRecognizer, + llvm::{llvm_assemble, LlvmServicesCodeModel, LlvmServicesDialect, LlvmServicesRelocMode}, rc::Ref, relocation::{ CoreRelocationHandler, CustomRelocationHandlerHandle, RelocationHandler, RelocationInfo, diff --git a/plugins/dwarf/dwarfdump/src/lib.rs b/plugins/dwarf/dwarfdump/src/lib.rs index 1d0343f1b..7610c3f23 100644 --- a/plugins/dwarf/dwarfdump/src/lib.rs +++ b/plugins/dwarf/dwarfdump/src/lib.rs @@ -13,10 +13,11 @@ // limitations under the License. use binaryninja::{ + architecture::BranchType, binary_view::{BinaryView, BinaryViewExt}, command::{register_command, Command}, disassembly::{DisassemblyTextLine, InstructionTextToken, InstructionTextTokenKind}, - flowgraph::{BranchType, EdgeStyle, FlowGraph, FlowGraphNode, FlowGraphOption}, + flowgraph::{EdgeStyle, FlowGraph, FlowGraphNode, FlowGraphOption}, }; use dwarfreader::is_valid; diff --git a/rust/examples/flowgraph.rs b/rust/examples/flowgraph.rs index ede7ce84a..75c854a76 100644 --- a/rust/examples/flowgraph.rs +++ b/rust/examples/flowgraph.rs @@ -7,9 +7,10 @@ use binaryninja::interaction::handler::{ }; use binaryninja::interaction::{MessageBoxButtonResult, MessageBoxButtonSet, MessageBoxIcon}; use binaryninja::{ + architecture::BranchType, binary_view::{BinaryView, BinaryViewExt}, disassembly::{DisassemblyTextLine, InstructionTextToken, InstructionTextTokenKind}, - flowgraph::{BranchType, EdgePenStyle, FlowGraph, ThemeColor}, + flowgraph::{EdgePenStyle, FlowGraph, ThemeColor}, }; pub struct GraphPrinter; diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index 2f0490f23..becf7639c 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -23,44 +23,52 @@ use crate::{ calling_convention::CoreCallingConvention, data_buffer::DataBuffer, disassembly::InstructionTextToken, - function::{Function, NativeBlock}, + function::Function, platform::Platform, rc::*, relocation::CoreRelocationHandler, string::{IntoCStr, *}, types::{NameAndType, Type}, - BranchType, Endianness, + Endianness, }; use std::ops::Deref; use std::{ - borrow::{Borrow, Cow}, - collections::HashMap, - ffi::{c_char, c_int, c_void, CStr, CString}, - fmt::Display, + borrow::Borrow, + ffi::{c_char, c_void, CString}, hash::Hash, mem::MaybeUninit, }; -use crate::basic_block::BasicBlock; use crate::function_recognizer::FunctionRecognizer; use crate::relocation::{CustomRelocationHandlerHandle, RelocationHandler}; -use crate::variable::IndirectBranchInfo; use crate::confidence::Conf; -use crate::function::Location; use crate::low_level_il::expression::ValueExpr; use crate::low_level_il::lifting::{ get_default_flag_cond_llil, get_default_flag_write_llil, LowLevelILFlagWriteOp, }; use crate::low_level_il::{LowLevelILMutableExpression, LowLevelILMutableFunction}; -pub use binaryninjacore_sys::BNFlagRole as FlagRole; -pub use binaryninjacore_sys::BNImplicitRegisterExtend as ImplicitRegisterExtend; -pub use binaryninjacore_sys::BNLowLevelILFlagCondition as FlagCondition; -use std::collections::HashSet; -macro_rules! newtype { +pub mod basic_block; +pub mod branches; +pub mod flag; +pub mod instruction; +pub mod intrinsic; +pub mod register; + +// Re-export all the submodules to keep from breaking everyone's code. +// We split these out just to clarify each part, not necessarily to enforce an extra namespace. +pub use basic_block::*; +pub use branches::*; +pub use flag::*; +pub use instruction::*; +pub use intrinsic::*; +pub use register::*; + +#[macro_export] +macro_rules! new_id_type { ($name:ident, $inner_type:ty) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] + #[derive(std::fmt::Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct $name(pub $inner_type); impl From<$inner_type> for $name { @@ -75,377 +83,14 @@ macro_rules! newtype { } } - impl Display for $name { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.0) } } }; } -newtype!(RegisterId, u32); - -impl RegisterId { - pub fn is_temporary(&self) -> bool { - self.0 & 0x8000_0000 != 0 - } -} - -newtype!(RegisterStackId, u32); -newtype!(FlagId, u32); -// TODO: Make this NonZero? -newtype!(FlagWriteId, u32); -newtype!(FlagClassId, u32); -newtype!(FlagGroupId, u32); -newtype!(IntrinsicId, u32); - -#[derive(Default, Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub enum BranchKind { - #[default] - Unresolved, - Unconditional(u64), - False(u64), - True(u64), - Call(u64), - FunctionReturn, - SystemCall, - Indirect, - Exception, - UserDefined, -} - -#[derive(Default, Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub struct BranchInfo { - /// If `None` the target architecture is the same as the branch instruction. - pub arch: Option, - pub kind: BranchKind, -} - -impl BranchInfo { - /// Branches to an instruction with the current architecture. - pub fn new(kind: BranchKind) -> Self { - Self { arch: None, kind } - } - - /// Branches to an instruction with an explicit architecture. - /// - /// Use this if your architecture can transition to another architecture with a branch. - pub fn new_with_arch(kind: BranchKind, arch: CoreArchitecture) -> Self { - Self { - arch: Some(arch), - kind, - } - } - - pub fn target(&self) -> Option { - match self.kind { - BranchKind::Unconditional(target) => Some(target), - BranchKind::False(target) => Some(target), - BranchKind::True(target) => Some(target), - BranchKind::Call(target) => Some(target), - _ => None, - } - } -} - -impl From for BNBranchType { - fn from(value: BranchInfo) -> Self { - match value.kind { - BranchKind::Unresolved => BNBranchType::UnresolvedBranch, - BranchKind::Unconditional(_) => BNBranchType::UnconditionalBranch, - BranchKind::False(_) => BNBranchType::FalseBranch, - BranchKind::True(_) => BNBranchType::TrueBranch, - BranchKind::Call(_) => BNBranchType::CallDestination, - BranchKind::FunctionReturn => BNBranchType::FunctionReturn, - BranchKind::SystemCall => BNBranchType::SystemCall, - BranchKind::Indirect => BNBranchType::IndirectBranch, - BranchKind::Exception => BNBranchType::ExceptionBranch, - BranchKind::UserDefined => BNBranchType::UserDefinedBranch, - } - } -} - -impl From for BranchInfo { - fn from(value: BranchKind) -> Self { - Self { - arch: None, - kind: value, - } - } -} - -impl From for BranchType { - fn from(value: BranchKind) -> Self { - match value { - BranchKind::Unresolved => BranchType::UnresolvedBranch, - BranchKind::Unconditional(_) => BranchType::UnconditionalBranch, - BranchKind::True(_) => BranchType::TrueBranch, - BranchKind::False(_) => BranchType::FalseBranch, - BranchKind::Call(_) => BranchType::CallDestination, - BranchKind::FunctionReturn => BranchType::FunctionReturn, - BranchKind::SystemCall => BranchType::SystemCall, - BranchKind::Indirect => BranchType::IndirectBranch, - BranchKind::Exception => BranchType::ExceptionBranch, - BranchKind::UserDefined => BranchType::UserDefinedBranch, - } - } -} - -/// This is the number of branches that can be specified in an [`InstructionInfo`]. -pub const NUM_BRANCH_INFO: usize = 3; - -#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub struct InstructionInfo { - pub length: usize, - // TODO: This field name is really long... - pub arch_transition_by_target_addr: bool, - pub delay_slots: u8, - pub branches: [Option; NUM_BRANCH_INFO], -} - -impl InstructionInfo { - // TODO: `new_with_delay_slot`? - pub fn new(length: usize, delay_slots: u8) -> Self { - Self { - length, - arch_transition_by_target_addr: false, - delay_slots, - branches: Default::default(), - } - } - - pub fn add_branch(&mut self, branch_info: impl Into) { - // Will go through each slot and attempt to add the branch info. - // TODO: Return a result with BranchInfoSlotsFilled error. - for branch in &mut self.branches { - if branch.is_none() { - *branch = Some(branch_info.into()); - return; - } - } - } -} - -impl From for InstructionInfo { - fn from(value: BNInstructionInfo) -> Self { - // TODO: This is quite ugly, but we destructure the branch info so this will have to do. - let mut branch_info = [None; NUM_BRANCH_INFO]; - #[allow(clippy::needless_range_loop)] - for i in 0..value.branchCount.min(NUM_BRANCH_INFO) { - let branch_target = value.branchTarget[i]; - branch_info[i] = Some(BranchInfo { - kind: match value.branchType[i] { - BNBranchType::UnconditionalBranch => BranchKind::Unconditional(branch_target), - BNBranchType::FalseBranch => BranchKind::False(branch_target), - BNBranchType::TrueBranch => BranchKind::True(branch_target), - BNBranchType::CallDestination => BranchKind::Call(branch_target), - BNBranchType::FunctionReturn => BranchKind::FunctionReturn, - BNBranchType::SystemCall => BranchKind::SystemCall, - BNBranchType::IndirectBranch => BranchKind::Indirect, - BNBranchType::ExceptionBranch => BranchKind::Exception, - BNBranchType::UnresolvedBranch => BranchKind::Unresolved, - BNBranchType::UserDefinedBranch => BranchKind::UserDefined, - }, - arch: if value.branchArch[i].is_null() { - None - } else { - Some(unsafe { CoreArchitecture::from_raw(value.branchArch[i]) }) - }, - }); - } - Self { - length: value.length, - arch_transition_by_target_addr: value.archTransitionByTargetAddr, - delay_slots: value.delaySlots, - branches: branch_info, - } - } -} - -impl From for BNInstructionInfo { - fn from(value: InstructionInfo) -> Self { - let branch_count = value.branches.into_iter().filter(Option::is_some).count(); - // TODO: This is quite ugly, but we destructure the branch info so this will have to do. - let branch_info_0 = value.branches[0].unwrap_or_default(); - let branch_info_1 = value.branches[1].unwrap_or_default(); - let branch_info_2 = value.branches[2].unwrap_or_default(); - Self { - length: value.length, - branchCount: branch_count, - archTransitionByTargetAddr: value.arch_transition_by_target_addr, - delaySlots: value.delay_slots, - branchType: [ - branch_info_0.into(), - branch_info_1.into(), - branch_info_2.into(), - ], - branchTarget: [ - branch_info_0.target().unwrap_or_default(), - branch_info_1.target().unwrap_or_default(), - branch_info_2.target().unwrap_or_default(), - ], - branchArch: [ - branch_info_0 - .arch - .map(|a| a.handle) - .unwrap_or(std::ptr::null_mut()), - branch_info_1 - .arch - .map(|a| a.handle) - .unwrap_or(std::ptr::null_mut()), - branch_info_2 - .arch - .map(|a| a.handle) - .unwrap_or(std::ptr::null_mut()), - ], - } - } -} - -pub trait RegisterInfo: Sized { - type RegType: Register; - - fn parent(&self) -> Option; - fn size(&self) -> usize; - fn offset(&self) -> usize; - fn implicit_extend(&self) -> ImplicitRegisterExtend; -} - -pub trait Register: Debug + Sized + Clone + Copy + Hash + Eq { - type InfoType: RegisterInfo; - - fn name(&self) -> Cow<'_, str>; - fn info(&self) -> Self::InfoType; - - /// Unique identifier for this `Register`. - /// - /// *MUST* be in the range [0, 0x7fff_ffff] - fn id(&self) -> RegisterId; -} - -pub trait RegisterStackInfo: Sized { - type RegStackType: RegisterStack; - type RegType: Register; - type RegInfoType: RegisterInfo; - - fn storage_regs(&self) -> (Self::RegType, usize); - fn top_relative_regs(&self) -> Option<(Self::RegType, usize)>; - fn stack_top_reg(&self) -> Self::RegType; -} - -pub trait RegisterStack: Debug + Sized + Clone + Copy { - type InfoType: RegisterStackInfo< - RegType = Self::RegType, - RegInfoType = Self::RegInfoType, - RegStackType = Self, - >; - type RegType: Register; - type RegInfoType: RegisterInfo; - - fn name(&self) -> Cow<'_, str>; - fn info(&self) -> Self::InfoType; - - /// Unique identifier for this `RegisterStack`. - /// - /// *MUST* be in the range [0, 0x7fff_ffff] - fn id(&self) -> RegisterStackId; -} - -pub trait Flag: Debug + Sized + Clone + Copy + Hash + Eq { - type FlagClass: FlagClass; - - fn name(&self) -> Cow<'_, str>; - fn role(&self, class: Option) -> FlagRole; - - /// Unique identifier for this `Flag`. - /// - /// *MUST* be in the range [0, 0x7fff_ffff] - fn id(&self) -> FlagId; -} - -pub trait FlagWrite: Sized + Clone + Copy { - type FlagType: Flag; - type FlagClass: FlagClass; - - fn name(&self) -> Cow<'_, str>; - fn class(&self) -> Option; - - /// Unique identifier for this `FlagWrite`. - /// - /// *MUST NOT* be 0. - /// *MUST* be in the range [1, 0x7fff_ffff] - fn id(&self) -> FlagWriteId; - - fn flags_written(&self) -> Vec; -} - -pub trait FlagClass: Sized + Clone + Copy + Hash + Eq { - fn name(&self) -> Cow<'_, str>; - - /// Unique identifier for this `FlagClass`. - /// - /// *MUST NOT* be 0. - /// *MUST* be in the range [1, 0x7fff_ffff] - fn id(&self) -> FlagClassId; -} - -pub trait FlagGroup: Debug + Sized + Clone + Copy { - type FlagType: Flag; - type FlagClass: FlagClass; - - fn name(&self) -> Cow<'_, str>; - - /// Unique identifier for this `FlagGroup`. - /// - /// *MUST* be in the range [0, 0x7fff_ffff] - fn id(&self) -> FlagGroupId; - - /// Returns the list of flags that need to be resolved in order - /// to take the clean flag resolution path -- at time of writing, - /// all required flags must have been set by the same instruction, - /// and the 'querying' instruction must be reachable from *one* - /// instruction that sets all of these flags. - fn flags_required(&self) -> Vec; - - /// Returns the mapping of Semantic Flag Classes to Flag Conditions, - /// in the context of this Flag Group. - /// - /// Example: - /// - /// If we have a group representing `cr1_lt` (as in PowerPC), we would - /// have multiple Semantic Flag Classes used by the different Flag Write - /// Types to represent the different comparisons, so for `cr1_lt` we - /// would return a mapping along the lines of: - /// - /// ```text - /// cr1_signed -> LLFC_SLT, - /// cr1_unsigned -> LLFC_ULT, - /// ``` - /// - /// This allows the core to recover the semantics of the comparison and - /// inline it into conditional branches when appropriate. - fn flag_conditions(&self) -> HashMap; -} - -pub trait Intrinsic: Debug + Sized + Clone + Copy { - fn name(&self) -> Cow<'_, str>; - - /// Unique identifier for this `Intrinsic`. - fn id(&self) -> IntrinsicId; - - /// The intrinsic class for this `Intrinsic`. - fn class(&self) -> BNIntrinsicClass { - BNIntrinsicClass::GeneralIntrinsicClass - } - - // TODO: Maybe just return `(String, Conf>)`? - /// List of the input names and types for this intrinsic. - fn inputs(&self) -> Vec; - - /// List of the output types for this intrinsic. - fn outputs(&self) -> Vec>>; -} - pub trait Architecture: 'static + Sized + AsRef { type Handle: Borrow + Clone; @@ -673,742 +318,6 @@ pub trait Architecture: 'static + Sized + AsRef { fn handle(&self) -> Self::Handle; } -/// Type for architrectures that do not use register stacks. Will panic if accessed as a register stack. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct UnusedRegisterStackInfo { - _reg: std::marker::PhantomData, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct UnusedRegisterStack { - _reg: std::marker::PhantomData, -} - -impl RegisterStackInfo for UnusedRegisterStackInfo { - type RegStackType = UnusedRegisterStack; - type RegType = R; - type RegInfoType = R::InfoType; - - fn storage_regs(&self) -> (Self::RegType, usize) { - unreachable!() - } - fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { - unreachable!() - } - fn stack_top_reg(&self) -> Self::RegType { - unreachable!() - } -} - -impl RegisterStack for UnusedRegisterStack { - type InfoType = UnusedRegisterStackInfo; - type RegType = R; - type RegInfoType = R::InfoType; - - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn id(&self) -> RegisterStackId { - unreachable!() - } - fn info(&self) -> Self::InfoType { - unreachable!() - } -} - -/// Type for architrectures that do not use flags. Will panic if accessed as a flag. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct UnusedFlag; - -impl Flag for UnusedFlag { - type FlagClass = Self; - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn role(&self, _class: Option) -> FlagRole { - unreachable!() - } - fn id(&self) -> FlagId { - unreachable!() - } -} - -impl FlagWrite for UnusedFlag { - type FlagType = Self; - type FlagClass = Self; - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn class(&self) -> Option { - unreachable!() - } - fn id(&self) -> FlagWriteId { - unreachable!() - } - fn flags_written(&self) -> Vec { - unreachable!() - } -} - -impl FlagClass for UnusedFlag { - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn id(&self) -> FlagClassId { - unreachable!() - } -} - -impl FlagGroup for UnusedFlag { - type FlagType = Self; - type FlagClass = Self; - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn id(&self) -> FlagGroupId { - unreachable!() - } - fn flags_required(&self) -> Vec { - unreachable!() - } - fn flag_conditions(&self) -> HashMap { - unreachable!() - } -} - -/// Type for architrectures that do not use intrinsics. Will panic if accessed as an intrinsic. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct UnusedIntrinsic; - -impl Intrinsic for UnusedIntrinsic { - fn name(&self) -> Cow<'_, str> { - unreachable!() - } - fn id(&self) -> IntrinsicId { - unreachable!() - } - fn inputs(&self) -> Vec { - unreachable!() - } - fn outputs(&self) -> Vec>> { - unreachable!() - } -} - -#[derive(Debug, Copy, Clone)] -pub struct CoreRegisterInfo { - arch: CoreArchitecture, - id: RegisterId, - info: BNRegisterInfo, -} - -impl CoreRegisterInfo { - pub fn new(arch: CoreArchitecture, id: RegisterId, info: BNRegisterInfo) -> Self { - Self { arch, id, info } - } -} - -impl RegisterInfo for CoreRegisterInfo { - type RegType = CoreRegister; - - fn parent(&self) -> Option { - if self.id != RegisterId::from(self.info.fullWidthRegister) { - Some(CoreRegister::new( - self.arch, - RegisterId::from(self.info.fullWidthRegister), - )?) - } else { - None - } - } - - fn size(&self) -> usize { - self.info.size - } - - fn offset(&self) -> usize { - self.info.offset - } - - fn implicit_extend(&self) -> ImplicitRegisterExtend { - self.info.extend - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub struct CoreRegister { - arch: CoreArchitecture, - id: RegisterId, -} - -impl CoreRegister { - pub fn new(arch: CoreArchitecture, id: RegisterId) -> Option { - let register = Self { arch, id }; - register.is_valid().then_some(register) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the register is actually valid. - let name = unsafe { BNGetArchitectureRegisterName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl Register for CoreRegister { - type InfoType = CoreRegisterInfo; - - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureRegisterName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn info(&self) -> CoreRegisterInfo { - CoreRegisterInfo::new(self.arch, self.id, unsafe { - BNGetArchitectureRegisterInfo(self.arch.handle, self.id.into()) - }) - } - - fn id(&self) -> RegisterId { - self.id - } -} - -impl Debug for CoreRegister { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CoreRegister") - .field("id", &self.id) - .field("name", &self.name()) - .finish() - } -} - -impl CoreArrayProvider for CoreRegister { - type Raw = u32; - type Context = CoreArchitecture; - type Wrapped<'a> = Self; -} - -unsafe impl CoreArrayProviderInner for CoreRegister { - unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { - BNFreeRegisterList(raw) - } - - unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { - Self::new(*context, RegisterId::from(*raw)).expect("Register list contains valid registers") - } -} - -#[derive(Debug, Copy, Clone)] -pub struct CoreRegisterStackInfo { - arch: CoreArchitecture, - // TODO: Wrap BNRegisterStackInfo - info: BNRegisterStackInfo, -} - -impl CoreRegisterStackInfo { - pub fn new(arch: CoreArchitecture, info: BNRegisterStackInfo) -> Self { - Self { arch, info } - } -} - -impl RegisterStackInfo for CoreRegisterStackInfo { - type RegStackType = CoreRegisterStack; - type RegType = CoreRegister; - type RegInfoType = CoreRegisterInfo; - - fn storage_regs(&self) -> (Self::RegType, usize) { - ( - CoreRegister::new(self.arch, RegisterId::from(self.info.firstStorageReg)) - .expect("Storage register is valid"), - self.info.storageCount as usize, - ) - } - - fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { - if self.info.topRelativeCount == 0 { - None - } else { - Some(( - CoreRegister::new(self.arch, RegisterId::from(self.info.firstTopRelativeReg)) - .expect("Top relative register is valid"), - self.info.topRelativeCount as usize, - )) - } - } - - fn stack_top_reg(&self) -> Self::RegType { - CoreRegister::new(self.arch, RegisterId::from(self.info.stackTopReg)) - .expect("Stack top register is valid") - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct CoreRegisterStack { - arch: CoreArchitecture, - id: RegisterStackId, -} - -impl CoreRegisterStack { - pub fn new(arch: CoreArchitecture, id: RegisterStackId) -> Option { - let register_stack = Self { arch, id }; - register_stack.is_valid().then_some(register_stack) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the stack register is actually valid. - let name = unsafe { BNGetArchitectureRegisterStackName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl RegisterStack for CoreRegisterStack { - type InfoType = CoreRegisterStackInfo; - type RegType = CoreRegister; - type RegInfoType = CoreRegisterInfo; - - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureRegisterStackName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn info(&self) -> CoreRegisterStackInfo { - CoreRegisterStackInfo::new(self.arch, unsafe { - BNGetArchitectureRegisterStackInfo(self.arch.handle, self.id.into()) - }) - } - - fn id(&self) -> RegisterStackId { - self.id - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct CoreFlag { - arch: CoreArchitecture, - id: FlagId, -} - -impl CoreFlag { - pub fn new(arch: CoreArchitecture, id: FlagId) -> Option { - let flag = Self { arch, id }; - flag.is_valid().then_some(flag) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the flag is actually valid. - let name = unsafe { BNGetArchitectureFlagName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl Flag for CoreFlag { - type FlagClass = CoreFlagClass; - - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureFlagName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn role(&self, class: Option) -> FlagRole { - unsafe { - BNGetArchitectureFlagRole( - self.arch.handle, - self.id.into(), - class.map(|c| c.id.0).unwrap_or(0), - ) - } - } - - fn id(&self) -> FlagId { - self.id - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub struct CoreFlagWrite { - arch: CoreArchitecture, - id: FlagWriteId, -} - -impl CoreFlagWrite { - pub fn new(arch: CoreArchitecture, id: FlagWriteId) -> Option { - let flag_write = Self { arch, id }; - flag_write.is_valid().then_some(flag_write) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the flag write is actually valid. - let name = unsafe { BNGetArchitectureFlagWriteTypeName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl FlagWrite for CoreFlagWrite { - type FlagType = CoreFlag; - type FlagClass = CoreFlagClass; - - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureFlagWriteTypeName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn class(&self) -> Option { - let class = unsafe { - BNGetArchitectureSemanticClassForFlagWriteType(self.arch.handle, self.id.into()) - }; - - match class { - 0 => None, - class_id => Some(CoreFlagClass::new(self.arch, class_id.into())?), - } - } - - fn id(&self) -> FlagWriteId { - self.id - } - - fn flags_written(&self) -> Vec { - let mut count: usize = 0; - let regs: *mut u32 = unsafe { - BNGetArchitectureFlagsWrittenByFlagWriteType( - self.arch.handle, - self.id.into(), - &mut count, - ) - }; - - let ret = unsafe { - std::slice::from_raw_parts(regs, count) - .iter() - .map(|id| FlagId::from(*id)) - .filter_map(|reg| CoreFlag::new(self.arch, reg)) - .collect() - }; - - unsafe { - BNFreeRegisterList(regs); - } - - ret - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct CoreFlagClass { - arch: CoreArchitecture, - id: FlagClassId, -} - -impl CoreFlagClass { - pub fn new(arch: CoreArchitecture, id: FlagClassId) -> Option { - let flag = Self { arch, id }; - flag.is_valid().then_some(flag) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the flag is actually valid. - let name = - unsafe { BNGetArchitectureSemanticFlagClassName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl FlagClass for CoreFlagClass { - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureSemanticFlagClassName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn id(&self) -> FlagClassId { - self.id - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct CoreFlagGroup { - arch: CoreArchitecture, - id: FlagGroupId, -} - -impl CoreFlagGroup { - pub fn new(arch: CoreArchitecture, id: FlagGroupId) -> Option { - let flag_group = Self { arch, id }; - flag_group.is_valid().then_some(flag_group) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the flag group is actually valid. - let name = - unsafe { BNGetArchitectureSemanticFlagGroupName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl FlagGroup for CoreFlagGroup { - type FlagType = CoreFlag; - type FlagClass = CoreFlagClass; - - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureSemanticFlagGroupName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn id(&self) -> FlagGroupId { - self.id - } - - fn flags_required(&self) -> Vec { - let mut count: usize = 0; - let regs: *mut u32 = unsafe { - BNGetArchitectureFlagsRequiredForSemanticFlagGroup( - self.arch.handle, - self.id.into(), - &mut count, - ) - }; - - let ret = unsafe { - std::slice::from_raw_parts(regs, count) - .iter() - .map(|id| FlagId::from(*id)) - .filter_map(|reg| CoreFlag::new(self.arch, reg)) - .collect() - }; - - unsafe { - BNFreeRegisterList(regs); - } - - ret - } - - fn flag_conditions(&self) -> HashMap { - let mut count: usize = 0; - - unsafe { - let flag_conds = BNGetArchitectureFlagConditionsForSemanticFlagGroup( - self.arch.handle, - self.id.into(), - &mut count, - ); - - let ret = std::slice::from_raw_parts_mut(flag_conds, count) - .iter() - .filter_map(|class_cond| { - Some(( - CoreFlagClass::new(self.arch, class_cond.semanticClass.into())?, - class_cond.condition, - )) - }) - .collect(); - - BNFreeFlagConditionsForSemanticFlagGroup(flag_conds); - - ret - } - } -} - -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct CoreIntrinsic { - pub arch: CoreArchitecture, - pub id: IntrinsicId, -} - -impl CoreIntrinsic { - pub fn new(arch: CoreArchitecture, id: IntrinsicId) -> Option { - let intrinsic = Self { arch, id }; - intrinsic.is_valid().then_some(intrinsic) - } - - fn is_valid(&self) -> bool { - // We check the name to see if the intrinsic is actually valid. - let name = unsafe { BNGetArchitectureIntrinsicName(self.arch.handle, self.id.into()) }; - match name.is_null() { - true => false, - false => { - unsafe { BNFreeString(name) }; - true - } - } - } -} - -impl Intrinsic for CoreIntrinsic { - fn name(&self) -> Cow<'_, str> { - unsafe { - let name = BNGetArchitectureIntrinsicName(self.arch.handle, self.id.into()); - - // We need to guarantee ownership, as if we're still - // a Borrowed variant we're about to free the underlying - // memory. - // TODO: ^ the above assertion nullifies any benefit to passing back Cow tho? - let res = CStr::from_ptr(name); - let res = res.to_string_lossy().into_owned().into(); - - BNFreeString(name); - - res - } - } - - fn id(&self) -> IntrinsicId { - self.id - } - - fn class(&self) -> BNIntrinsicClass { - unsafe { BNGetArchitectureIntrinsicClass(self.arch.handle, self.id.into()) } - } - - fn inputs(&self) -> Vec { - let mut count: usize = 0; - unsafe { - let inputs = - BNGetArchitectureIntrinsicInputs(self.arch.handle, self.id.into(), &mut count); - - let ret = std::slice::from_raw_parts_mut(inputs, count) - .iter() - .map(NameAndType::from_raw) - .collect(); - - BNFreeNameAndTypeList(inputs, count); - - ret - } - } - - fn outputs(&self) -> Vec>> { - let mut count: usize = 0; - unsafe { - let inputs = - BNGetArchitectureIntrinsicOutputs(self.arch.handle, self.id.into(), &mut count); - - let ret = std::slice::from_raw_parts_mut(inputs, count) - .iter() - .map(Conf::>::from_raw) - .collect(); - - BNFreeOutputTypeList(inputs, count); - - ret - } - } -} - -impl Debug for CoreIntrinsic { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CoreIntrinsic") - .field("id", &self.id) - .field("name", &self.name()) - .field("class", &self.class()) - .field("inputs", &self.inputs()) - .field("outputs", &self.outputs()) - .finish() - } -} - // TODO: WTF?!?!?!? pub struct CoreArchitectureList(*mut *mut BNArchitecture, usize); @@ -1954,261 +863,6 @@ impl Architecture for CoreArchitecture { } } -pub struct BasicBlockAnalysisContext { - pub(crate) handle: *mut BNBasicBlockAnalysisContext, - contextual_returns_dirty: bool, - - // In - pub indirect_branches: Vec, - pub indirect_no_return_calls: HashSet, - pub analysis_skip_override: BNFunctionAnalysisSkipOverride, - pub guided_analysis_mode: bool, - pub trigger_guided_on_invalid_instruction: bool, - pub translate_tail_calls: bool, - pub disallow_branch_to_string: bool, - pub max_function_size: u64, - - // In/Out - pub max_size_reached: bool, - contextual_returns: HashMap, - - // Out - direct_code_references: HashMap, - direct_no_return_calls: HashSet, - halted_disassembly_addresses: HashSet, - inlined_unresolved_indirect_branches: HashSet, -} - -impl BasicBlockAnalysisContext { - pub unsafe fn from_raw(handle: *mut BNBasicBlockAnalysisContext) -> Self { - debug_assert!(!handle.is_null()); - - let ctx_ref = &*handle; - - let indirect_branches = (0..ctx_ref.indirectBranchesCount) - .map(|i| { - let raw: BNIndirectBranchInfo = - unsafe { std::ptr::read(ctx_ref.indirectBranches.add(i)) }; - IndirectBranchInfo::from(raw) - }) - .collect::>(); - - let indirect_no_return_calls = (0..ctx_ref.indirectNoReturnCallsCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.indirectNoReturnCalls.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let contextual_returns = (0..ctx_ref.contextualFunctionReturnCount) - .map(|i| { - let loc = unsafe { - let raw = std::ptr::read(ctx_ref.contextualFunctionReturnLocations.add(i)); - Location::from(raw) - }; - let val = unsafe { *ctx_ref.contextualFunctionReturnValues.add(i) }; - (loc, val) - }) - .collect::>(); - - let direct_code_references = (0..ctx_ref.directRefCount) - .map(|i| { - let src = unsafe { - let raw = std::ptr::read(ctx_ref.directRefSources.add(i)); - Location::from(raw) - }; - let tgt = unsafe { *ctx_ref.directRefTargets.add(i) }; - (tgt, src) - }) - .collect::>(); - - let direct_no_return_calls = (0..ctx_ref.directNoReturnCallsCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.directNoReturnCalls.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let halted_disassembly_addresses = (0..ctx_ref.haltedDisassemblyAddressesCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.haltedDisassemblyAddresses.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let inlined_unresolved_indirect_branches = (0..ctx_ref - .inlinedUnresolvedIndirectBranchCount) - .map(|i| { - let raw = - unsafe { std::ptr::read(ctx_ref.inlinedUnresolvedIndirectBranches.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - BasicBlockAnalysisContext { - handle, - contextual_returns_dirty: false, - indirect_branches, - indirect_no_return_calls, - analysis_skip_override: ctx_ref.analysisSkipOverride, - guided_analysis_mode: ctx_ref.guidedAnalysisMode, - trigger_guided_on_invalid_instruction: ctx_ref.triggerGuidedOnInvalidInstruction, - translate_tail_calls: ctx_ref.translateTailCalls, - disallow_branch_to_string: ctx_ref.disallowBranchToString, - max_function_size: ctx_ref.maxFunctionSize, - max_size_reached: ctx_ref.maxSizeReached, - contextual_returns, - direct_code_references, - direct_no_return_calls, - halted_disassembly_addresses, - inlined_unresolved_indirect_branches, - } - } - - pub fn add_contextual_return(&mut self, loc: impl Into, value: bool) { - let loc = loc.into(); - if !self.contextual_returns.contains_key(&loc) { - self.contextual_returns_dirty = true; - } - - self.contextual_returns.insert(loc, value); - } - - pub fn add_direct_code_reference(&mut self, target: u64, src: impl Into) { - self.direct_code_references - .entry(target) - .or_insert(src.into()); - } - - pub fn add_direct_no_return_call(&mut self, loc: impl Into) { - self.direct_no_return_calls.insert(loc.into()); - } - - pub fn add_halted_disassembly_address(&mut self, loc: impl Into) { - self.halted_disassembly_addresses.insert(loc.into()); - } - - pub fn add_inlined_unresolved_indirect_branch(&mut self, loc: impl Into) { - self.inlined_unresolved_indirect_branches.insert(loc.into()); - } - - pub fn create_basic_block( - &self, - arch: CoreArchitecture, - start: u64, - ) -> Option>> { - let raw_block = - unsafe { BNAnalyzeBasicBlocksContextCreateBasicBlock(self.handle, arch.handle, start) }; - - if raw_block.is_null() { - return None; - } - - unsafe { Some(BasicBlock::ref_from_raw(raw_block, NativeBlock::new())) } - } - - pub fn add_basic_block(&self, block: Ref>) { - unsafe { - BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self.handle, block.handle); - } - } - - pub fn add_temp_outgoing_reference(&self, target: &Function) { - unsafe { - BNAnalyzeBasicBlocksContextAddTempReference(self.handle, target.handle); - } - } - - pub fn finalize(&mut self) { - if !self.direct_code_references.is_empty() { - let total = self.direct_code_references.len(); - let mut sources: Vec = Vec::with_capacity(total); - let mut targets: Vec = Vec::with_capacity(total); - for (target, src) in &self.direct_code_references { - sources.push(BNArchitectureAndAddress::from(src)); - targets.push(*target); - } - unsafe { - BNAnalyzeBasicBlocksContextSetDirectCodeReferences( - self.handle, - sources.as_mut_ptr(), - targets.as_mut_ptr(), - total, - ); - } - } - - if !self.direct_no_return_calls.is_empty() { - let total = self.direct_no_return_calls.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.direct_no_return_calls { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls( - self.handle, - locations.as_mut_ptr(), - total, - ); - } - } - - if !self.halted_disassembly_addresses.is_empty() { - let total = self.halted_disassembly_addresses.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.halted_disassembly_addresses { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses( - self.handle, - locations.as_mut_ptr(), - total, - ); - } - } - - if !self.inlined_unresolved_indirect_branches.is_empty() { - let total = self.inlined_unresolved_indirect_branches.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.inlined_unresolved_indirect_branches { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches( - self.handle, - locations.as_mut_ptr(), - total, - ); - } - } - - unsafe { - (*self.handle).maxSizeReached = self.max_size_reached; - } - - if self.contextual_returns_dirty { - let total = self.contextual_returns.len(); - let mut locations: Vec = Vec::with_capacity(total); - let mut values: Vec = Vec::with_capacity(total); - for (loc, value) in &self.contextual_returns { - locations.push(BNArchitectureAndAddress::from(loc)); - values.push(*value); - } - unsafe { - BNAnalyzeBasicBlocksContextSetContextualFunctionReturns( - self.handle, - locations.as_mut_ptr(), - values.as_mut_ptr(), - total, - ); - } - } - - unsafe { BNAnalyzeBasicBlocksContextFinalize(self.handle) }; - } -} - impl Debug for CoreArchitecture { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("CoreArchitecture") @@ -3590,91 +2244,3 @@ where unsafe { &*self.handle } } } - -#[repr(i32)] -pub enum LlvmServicesDialect { - Unspecified = 0, - Att = 1, - Intel = 2, -} - -#[repr(i32)] -pub enum LlvmServicesCodeModel { - Default = 0, - Small = 1, - Kernel = 2, - Medium = 3, - Large = 4, -} - -#[repr(i32)] -pub enum LlvmServicesRelocMode { - Static = 0, - PIC = 1, - DynamicNoPIC = 2, -} - -pub fn llvm_assemble( - code: &str, - dialect: LlvmServicesDialect, - arch_triple: &str, - code_model: LlvmServicesCodeModel, - reloc_mode: LlvmServicesRelocMode, -) -> Result, String> { - let code = CString::new(code).map_err(|_| "Invalid encoding in code string".to_string())?; - let arch_triple = CString::new(arch_triple) - .map_err(|_| "Invalid encoding in architecture triple string".to_string())?; - let mut out_bytes: *mut c_char = std::ptr::null_mut(); - let mut out_bytes_len: c_int = 0; - let mut err_bytes: *mut c_char = std::ptr::null_mut(); - let mut err_len: c_int = 0; - - unsafe { - BNLlvmServicesInit(); - } - - let result = unsafe { - BNLlvmServicesAssemble( - code.as_ptr(), - dialect as i32, - arch_triple.as_ptr(), - code_model as i32, - reloc_mode as i32, - &mut out_bytes as *mut *mut c_char, - &mut out_bytes_len as *mut c_int, - &mut err_bytes as *mut *mut c_char, - &mut err_len as *mut c_int, - ) - }; - - let out = if out_bytes_len == 0 { - Vec::new() - } else { - unsafe { - std::slice::from_raw_parts( - out_bytes as *const c_char as *const u8, - out_bytes_len as usize, - ) - } - .to_vec() - }; - - let errors = if err_len == 0 { - "".into() - } else { - String::from_utf8_lossy(unsafe { - std::slice::from_raw_parts(err_bytes as *const c_char as *const u8, err_len as usize) - }) - .into_owned() - }; - - unsafe { - BNLlvmServicesAssembleFree(out_bytes, err_bytes); - } - - if result == 0 { - Ok(out) - } else { - Err(errors) - } -} diff --git a/rust/src/architecture/basic_block.rs b/rust/src/architecture/basic_block.rs new file mode 100644 index 000000000..1867f50a7 --- /dev/null +++ b/rust/src/architecture/basic_block.rs @@ -0,0 +1,261 @@ +use crate::architecture::{CoreArchitecture, IndirectBranchInfo}; +use crate::basic_block::BasicBlock; +use crate::function::{Function, Location, NativeBlock}; +use crate::rc::Ref; +use binaryninjacore_sys::*; +use std::collections::{HashMap, HashSet}; + +pub struct BasicBlockAnalysisContext { + pub(crate) handle: *mut BNBasicBlockAnalysisContext, + contextual_returns_dirty: bool, + + // In + pub indirect_branches: Vec, + pub indirect_no_return_calls: HashSet, + pub analysis_skip_override: BNFunctionAnalysisSkipOverride, + pub guided_analysis_mode: bool, + pub trigger_guided_on_invalid_instruction: bool, + pub translate_tail_calls: bool, + pub disallow_branch_to_string: bool, + pub max_function_size: u64, + + // In/Out + pub max_size_reached: bool, + contextual_returns: HashMap, + + // Out + direct_code_references: HashMap, + direct_no_return_calls: HashSet, + halted_disassembly_addresses: HashSet, + inlined_unresolved_indirect_branches: HashSet, +} + +impl BasicBlockAnalysisContext { + pub unsafe fn from_raw(handle: *mut BNBasicBlockAnalysisContext) -> Self { + debug_assert!(!handle.is_null()); + + let ctx_ref = &*handle; + + let indirect_branches = (0..ctx_ref.indirectBranchesCount) + .map(|i| { + let raw: BNIndirectBranchInfo = + unsafe { std::ptr::read(ctx_ref.indirectBranches.add(i)) }; + IndirectBranchInfo::from(raw) + }) + .collect::>(); + + let indirect_no_return_calls = (0..ctx_ref.indirectNoReturnCallsCount) + .map(|i| { + let raw = unsafe { std::ptr::read(ctx_ref.indirectNoReturnCalls.add(i)) }; + Location::from(raw) + }) + .collect::>(); + + let contextual_returns = (0..ctx_ref.contextualFunctionReturnCount) + .map(|i| { + let loc = unsafe { + let raw = std::ptr::read(ctx_ref.contextualFunctionReturnLocations.add(i)); + Location::from(raw) + }; + let val = unsafe { *ctx_ref.contextualFunctionReturnValues.add(i) }; + (loc, val) + }) + .collect::>(); + + let direct_code_references = (0..ctx_ref.directRefCount) + .map(|i| { + let src = unsafe { + let raw = std::ptr::read(ctx_ref.directRefSources.add(i)); + Location::from(raw) + }; + let tgt = unsafe { *ctx_ref.directRefTargets.add(i) }; + (tgt, src) + }) + .collect::>(); + + let direct_no_return_calls = (0..ctx_ref.directNoReturnCallsCount) + .map(|i| { + let raw = unsafe { std::ptr::read(ctx_ref.directNoReturnCalls.add(i)) }; + Location::from(raw) + }) + .collect::>(); + + let halted_disassembly_addresses = (0..ctx_ref.haltedDisassemblyAddressesCount) + .map(|i| { + let raw = unsafe { std::ptr::read(ctx_ref.haltedDisassemblyAddresses.add(i)) }; + Location::from(raw) + }) + .collect::>(); + + let inlined_unresolved_indirect_branches = (0..ctx_ref + .inlinedUnresolvedIndirectBranchCount) + .map(|i| { + let raw = + unsafe { std::ptr::read(ctx_ref.inlinedUnresolvedIndirectBranches.add(i)) }; + Location::from(raw) + }) + .collect::>(); + + BasicBlockAnalysisContext { + handle, + contextual_returns_dirty: false, + indirect_branches, + indirect_no_return_calls, + analysis_skip_override: ctx_ref.analysisSkipOverride, + guided_analysis_mode: ctx_ref.guidedAnalysisMode, + trigger_guided_on_invalid_instruction: ctx_ref.triggerGuidedOnInvalidInstruction, + translate_tail_calls: ctx_ref.translateTailCalls, + disallow_branch_to_string: ctx_ref.disallowBranchToString, + max_function_size: ctx_ref.maxFunctionSize, + max_size_reached: ctx_ref.maxSizeReached, + contextual_returns, + direct_code_references, + direct_no_return_calls, + halted_disassembly_addresses, + inlined_unresolved_indirect_branches, + } + } + + pub fn add_contextual_return(&mut self, loc: impl Into, value: bool) { + let loc = loc.into(); + if !self.contextual_returns.contains_key(&loc) { + self.contextual_returns_dirty = true; + } + + self.contextual_returns.insert(loc, value); + } + + pub fn add_direct_code_reference(&mut self, target: u64, src: impl Into) { + self.direct_code_references + .entry(target) + .or_insert(src.into()); + } + + pub fn add_direct_no_return_call(&mut self, loc: impl Into) { + self.direct_no_return_calls.insert(loc.into()); + } + + pub fn add_halted_disassembly_address(&mut self, loc: impl Into) { + self.halted_disassembly_addresses.insert(loc.into()); + } + + pub fn add_inlined_unresolved_indirect_branch(&mut self, loc: impl Into) { + self.inlined_unresolved_indirect_branches.insert(loc.into()); + } + + pub fn create_basic_block( + &self, + arch: CoreArchitecture, + start: u64, + ) -> Option>> { + let raw_block = + unsafe { BNAnalyzeBasicBlocksContextCreateBasicBlock(self.handle, arch.handle, start) }; + + if raw_block.is_null() { + return None; + } + + unsafe { Some(BasicBlock::ref_from_raw(raw_block, NativeBlock::new())) } + } + + pub fn add_basic_block(&self, block: Ref>) { + unsafe { + BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self.handle, block.handle); + } + } + + pub fn add_temp_outgoing_reference(&self, target: &Function) { + unsafe { + BNAnalyzeBasicBlocksContextAddTempReference(self.handle, target.handle); + } + } + + pub fn finalize(&mut self) { + if !self.direct_code_references.is_empty() { + let total = self.direct_code_references.len(); + let mut sources: Vec = Vec::with_capacity(total); + let mut targets: Vec = Vec::with_capacity(total); + for (target, src) in &self.direct_code_references { + sources.push(BNArchitectureAndAddress::from(src)); + targets.push(*target); + } + unsafe { + BNAnalyzeBasicBlocksContextSetDirectCodeReferences( + self.handle, + sources.as_mut_ptr(), + targets.as_mut_ptr(), + total, + ); + } + } + + if !self.direct_no_return_calls.is_empty() { + let total = self.direct_no_return_calls.len(); + let mut locations: Vec = Vec::with_capacity(total); + for loc in &self.direct_no_return_calls { + locations.push(BNArchitectureAndAddress::from(loc)); + } + unsafe { + BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls( + self.handle, + locations.as_mut_ptr(), + total, + ); + } + } + + if !self.halted_disassembly_addresses.is_empty() { + let total = self.halted_disassembly_addresses.len(); + let mut locations: Vec = Vec::with_capacity(total); + for loc in &self.halted_disassembly_addresses { + locations.push(BNArchitectureAndAddress::from(loc)); + } + unsafe { + BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses( + self.handle, + locations.as_mut_ptr(), + total, + ); + } + } + + if !self.inlined_unresolved_indirect_branches.is_empty() { + let total = self.inlined_unresolved_indirect_branches.len(); + let mut locations: Vec = Vec::with_capacity(total); + for loc in &self.inlined_unresolved_indirect_branches { + locations.push(BNArchitectureAndAddress::from(loc)); + } + unsafe { + BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches( + self.handle, + locations.as_mut_ptr(), + total, + ); + } + } + + unsafe { + (*self.handle).maxSizeReached = self.max_size_reached; + } + + if self.contextual_returns_dirty { + let total = self.contextual_returns.len(); + let mut locations: Vec = Vec::with_capacity(total); + let mut values: Vec = Vec::with_capacity(total); + for (loc, value) in &self.contextual_returns { + locations.push(BNArchitectureAndAddress::from(loc)); + values.push(*value); + } + unsafe { + BNAnalyzeBasicBlocksContextSetContextualFunctionReturns( + self.handle, + locations.as_mut_ptr(), + values.as_mut_ptr(), + total, + ); + } + } + + unsafe { BNAnalyzeBasicBlocksContextFinalize(self.handle) }; + } +} diff --git a/rust/src/architecture/branches.rs b/rust/src/architecture/branches.rs new file mode 100644 index 000000000..ff0ab5162 --- /dev/null +++ b/rust/src/architecture/branches.rs @@ -0,0 +1,153 @@ +use crate::architecture::CoreArchitecture; +use crate::function::Location; +use crate::rc::{CoreArrayProvider, CoreArrayProviderInner}; +use binaryninjacore_sys::*; + +pub use binaryninjacore_sys::BNBranchType as BranchType; + +#[derive(Default, Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub enum BranchKind { + #[default] + Unresolved, + Unconditional(u64), + False(u64), + True(u64), + Call(u64), + FunctionReturn, + SystemCall, + Indirect, + Exception, + UserDefined, +} + +#[derive(Default, Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct BranchInfo { + /// If `None` the target architecture is the same as the branch instruction. + pub arch: Option, + pub kind: BranchKind, +} + +impl BranchInfo { + /// Branches to an instruction with the current architecture. + pub fn new(kind: BranchKind) -> Self { + Self { arch: None, kind } + } + + /// Branches to an instruction with an explicit architecture. + /// + /// Use this if your architecture can transition to another architecture with a branch. + pub fn new_with_arch(kind: BranchKind, arch: CoreArchitecture) -> Self { + Self { + arch: Some(arch), + kind, + } + } + + pub fn target(&self) -> Option { + match self.kind { + BranchKind::Unconditional(target) => Some(target), + BranchKind::False(target) => Some(target), + BranchKind::True(target) => Some(target), + BranchKind::Call(target) => Some(target), + _ => None, + } + } +} + +impl From for BNBranchType { + fn from(value: BranchInfo) -> Self { + match value.kind { + BranchKind::Unresolved => BNBranchType::UnresolvedBranch, + BranchKind::Unconditional(_) => BNBranchType::UnconditionalBranch, + BranchKind::False(_) => BNBranchType::FalseBranch, + BranchKind::True(_) => BNBranchType::TrueBranch, + BranchKind::Call(_) => BNBranchType::CallDestination, + BranchKind::FunctionReturn => BNBranchType::FunctionReturn, + BranchKind::SystemCall => BNBranchType::SystemCall, + BranchKind::Indirect => BNBranchType::IndirectBranch, + BranchKind::Exception => BNBranchType::ExceptionBranch, + BranchKind::UserDefined => BNBranchType::UserDefinedBranch, + } + } +} + +impl From for BranchInfo { + fn from(value: BranchKind) -> Self { + Self { + arch: None, + kind: value, + } + } +} + +impl From for BranchType { + fn from(value: BranchKind) -> Self { + match value { + BranchKind::Unresolved => BranchType::UnresolvedBranch, + BranchKind::Unconditional(_) => BranchType::UnconditionalBranch, + BranchKind::True(_) => BranchType::TrueBranch, + BranchKind::False(_) => BranchType::FalseBranch, + BranchKind::Call(_) => BranchType::CallDestination, + BranchKind::FunctionReturn => BranchType::FunctionReturn, + BranchKind::SystemCall => BranchType::SystemCall, + BranchKind::Indirect => BranchType::IndirectBranch, + BranchKind::Exception => BranchType::ExceptionBranch, + BranchKind::UserDefined => BranchType::UserDefinedBranch, + } + } +} + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct IndirectBranchInfo { + pub source: Location, + pub dest: Location, + pub auto_defined: bool, +} + +impl From for IndirectBranchInfo { + fn from(value: BNIndirectBranchInfo) -> Self { + Self { + source: Location::from_raw(value.sourceAddr, value.sourceArch), + dest: Location::from_raw(value.destAddr, value.destArch), + auto_defined: value.autoDefined, + } + } +} + +impl From for BNIndirectBranchInfo { + fn from(value: IndirectBranchInfo) -> Self { + let source_arch = value + .source + .arch + .map(|a| a.handle) + .unwrap_or(std::ptr::null_mut()); + let dest_arch = value + .source + .arch + .map(|a| a.handle) + .unwrap_or(std::ptr::null_mut()); + Self { + sourceArch: source_arch, + sourceAddr: value.source.addr, + destArch: dest_arch, + destAddr: value.dest.addr, + autoDefined: value.auto_defined, + } + } +} + +impl CoreArrayProvider for IndirectBranchInfo { + type Raw = BNIndirectBranchInfo; + type Context = (); + type Wrapped<'a> = Self; +} + +unsafe impl CoreArrayProviderInner for IndirectBranchInfo { + unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { + BNFreeIndirectBranchList(raw) + } + + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Self::from(*raw) + } +} diff --git a/rust/src/architecture/flag.rs b/rust/src/architecture/flag.rs new file mode 100644 index 000000000..7c0f5deb1 --- /dev/null +++ b/rust/src/architecture/flag.rs @@ -0,0 +1,447 @@ +use crate::architecture::CoreArchitecture; +use binaryninjacore_sys::*; +use std::borrow::Cow; +use std::collections::HashMap; +use std::ffi::CStr; +use std::fmt::Debug; +use std::hash::Hash; + +pub use binaryninjacore_sys::BNFlagRole as FlagRole; +pub use binaryninjacore_sys::BNLowLevelILFlagCondition as FlagCondition; + +crate::new_id_type!(FlagId, u32); +// TODO: Make this NonZero? +crate::new_id_type!(FlagWriteId, u32); +crate::new_id_type!(FlagClassId, u32); +crate::new_id_type!(FlagGroupId, u32); + +pub trait Flag: Debug + Sized + Clone + Copy + Hash + Eq { + type FlagClass: FlagClass; + + fn name(&self) -> Cow<'_, str>; + fn role(&self, class: Option) -> FlagRole; + + /// Unique identifier for this `Flag`. + /// + /// *MUST* be in the range [0, 0x7fff_ffff] + fn id(&self) -> FlagId; +} + +pub trait FlagWrite: Sized + Clone + Copy { + type FlagType: Flag; + type FlagClass: FlagClass; + + fn name(&self) -> Cow<'_, str>; + fn class(&self) -> Option; + + /// Unique identifier for this `FlagWrite`. + /// + /// *MUST NOT* be 0. + /// *MUST* be in the range [1, 0x7fff_ffff] + fn id(&self) -> FlagWriteId; + + fn flags_written(&self) -> Vec; +} + +pub trait FlagClass: Sized + Clone + Copy + Hash + Eq { + fn name(&self) -> Cow<'_, str>; + + /// Unique identifier for this `FlagClass`. + /// + /// *MUST NOT* be 0. + /// *MUST* be in the range [1, 0x7fff_ffff] + fn id(&self) -> FlagClassId; +} + +pub trait FlagGroup: Debug + Sized + Clone + Copy { + type FlagType: Flag; + type FlagClass: FlagClass; + + fn name(&self) -> Cow<'_, str>; + + /// Unique identifier for this `FlagGroup`. + /// + /// *MUST* be in the range [0, 0x7fff_ffff] + fn id(&self) -> FlagGroupId; + + /// Returns the list of flags that need to be resolved in order + /// to take the clean flag resolution path -- at time of writing, + /// all required flags must have been set by the same instruction, + /// and the 'querying' instruction must be reachable from *one* + /// instruction that sets all of these flags. + fn flags_required(&self) -> Vec; + + /// Returns the mapping of Semantic Flag Classes to Flag Conditions, + /// in the context of this Flag Group. + /// + /// Example: + /// + /// If we have a group representing `cr1_lt` (as in PowerPC), we would + /// have multiple Semantic Flag Classes used by the different Flag Write + /// Types to represent the different comparisons, so for `cr1_lt` we + /// would return a mapping along the lines of: + /// + /// ```text + /// cr1_signed -> LLFC_SLT, + /// cr1_unsigned -> LLFC_ULT, + /// ``` + /// + /// This allows the core to recover the semantics of the comparison and + /// inline it into conditional branches when appropriate. + fn flag_conditions(&self) -> HashMap; +} + +/// Type for architectures that do not use flags. Will panic if accessed as a flag. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct UnusedFlag; + +impl Flag for UnusedFlag { + type FlagClass = Self; + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn role(&self, _class: Option) -> FlagRole { + unreachable!() + } + fn id(&self) -> FlagId { + unreachable!() + } +} + +impl FlagWrite for UnusedFlag { + type FlagType = Self; + type FlagClass = Self; + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn class(&self) -> Option { + unreachable!() + } + fn id(&self) -> FlagWriteId { + unreachable!() + } + fn flags_written(&self) -> Vec { + unreachable!() + } +} + +impl FlagClass for UnusedFlag { + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn id(&self) -> FlagClassId { + unreachable!() + } +} + +impl FlagGroup for UnusedFlag { + type FlagType = Self; + type FlagClass = Self; + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn id(&self) -> FlagGroupId { + unreachable!() + } + fn flags_required(&self) -> Vec { + unreachable!() + } + fn flag_conditions(&self) -> HashMap { + unreachable!() + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct CoreFlag { + arch: CoreArchitecture, + id: FlagId, +} + +impl CoreFlag { + pub fn new(arch: CoreArchitecture, id: FlagId) -> Option { + let flag = Self { arch, id }; + flag.is_valid().then_some(flag) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the flag is actually valid. + let name = unsafe { BNGetArchitectureFlagName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl Flag for CoreFlag { + type FlagClass = CoreFlagClass; + + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureFlagName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn role(&self, class: Option) -> FlagRole { + unsafe { + BNGetArchitectureFlagRole( + self.arch.handle, + self.id.into(), + class.map(|c| c.id.0).unwrap_or(0), + ) + } + } + + fn id(&self) -> FlagId { + self.id + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct CoreFlagWrite { + arch: CoreArchitecture, + id: FlagWriteId, +} + +impl CoreFlagWrite { + pub fn new(arch: CoreArchitecture, id: FlagWriteId) -> Option { + let flag_write = Self { arch, id }; + flag_write.is_valid().then_some(flag_write) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the flag write is actually valid. + let name = unsafe { BNGetArchitectureFlagWriteTypeName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl FlagWrite for CoreFlagWrite { + type FlagType = CoreFlag; + type FlagClass = CoreFlagClass; + + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureFlagWriteTypeName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn class(&self) -> Option { + let class = unsafe { + BNGetArchitectureSemanticClassForFlagWriteType(self.arch.handle, self.id.into()) + }; + + match class { + 0 => None, + class_id => Some(CoreFlagClass::new(self.arch, class_id.into())?), + } + } + + fn id(&self) -> FlagWriteId { + self.id + } + + fn flags_written(&self) -> Vec { + let mut count: usize = 0; + let regs: *mut u32 = unsafe { + BNGetArchitectureFlagsWrittenByFlagWriteType( + self.arch.handle, + self.id.into(), + &mut count, + ) + }; + + let ret = unsafe { + std::slice::from_raw_parts(regs, count) + .iter() + .map(|id| FlagId::from(*id)) + .filter_map(|reg| CoreFlag::new(self.arch, reg)) + .collect() + }; + + unsafe { + BNFreeRegisterList(regs); + } + + ret + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct CoreFlagClass { + arch: CoreArchitecture, + id: FlagClassId, +} + +impl CoreFlagClass { + pub fn new(arch: CoreArchitecture, id: FlagClassId) -> Option { + let flag = Self { arch, id }; + flag.is_valid().then_some(flag) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the flag is actually valid. + let name = + unsafe { BNGetArchitectureSemanticFlagClassName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl FlagClass for CoreFlagClass { + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureSemanticFlagClassName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn id(&self) -> FlagClassId { + self.id + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct CoreFlagGroup { + arch: CoreArchitecture, + id: FlagGroupId, +} + +impl CoreFlagGroup { + pub fn new(arch: CoreArchitecture, id: FlagGroupId) -> Option { + let flag_group = Self { arch, id }; + flag_group.is_valid().then_some(flag_group) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the flag group is actually valid. + let name = + unsafe { BNGetArchitectureSemanticFlagGroupName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl FlagGroup for CoreFlagGroup { + type FlagType = CoreFlag; + type FlagClass = CoreFlagClass; + + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureSemanticFlagGroupName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn id(&self) -> FlagGroupId { + self.id + } + + fn flags_required(&self) -> Vec { + let mut count: usize = 0; + let regs: *mut u32 = unsafe { + BNGetArchitectureFlagsRequiredForSemanticFlagGroup( + self.arch.handle, + self.id.into(), + &mut count, + ) + }; + + let ret = unsafe { + std::slice::from_raw_parts(regs, count) + .iter() + .map(|id| FlagId::from(*id)) + .filter_map(|reg| CoreFlag::new(self.arch, reg)) + .collect() + }; + + unsafe { + BNFreeRegisterList(regs); + } + + ret + } + + fn flag_conditions(&self) -> HashMap { + let mut count: usize = 0; + + unsafe { + let flag_conds = BNGetArchitectureFlagConditionsForSemanticFlagGroup( + self.arch.handle, + self.id.into(), + &mut count, + ); + + let ret = std::slice::from_raw_parts_mut(flag_conds, count) + .iter() + .filter_map(|class_cond| { + Some(( + CoreFlagClass::new(self.arch, class_cond.semanticClass.into())?, + class_cond.condition, + )) + }) + .collect(); + + BNFreeFlagConditionsForSemanticFlagGroup(flag_conds); + + ret + } + } +} diff --git a/rust/src/architecture/instruction.rs b/rust/src/architecture/instruction.rs new file mode 100644 index 000000000..94cb788b6 --- /dev/null +++ b/rust/src/architecture/instruction.rs @@ -0,0 +1,114 @@ +use crate::architecture::{BranchInfo, BranchKind, CoreArchitecture}; +use binaryninjacore_sys::*; + +/// This is the number of branches that can be specified in an [`InstructionInfo`]. +pub const NUM_BRANCH_INFO: usize = 3; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct InstructionInfo { + pub length: usize, + // TODO: This field name is really long... + pub arch_transition_by_target_addr: bool, + pub delay_slots: u8, + pub branches: [Option; NUM_BRANCH_INFO], +} + +impl InstructionInfo { + // TODO: `new_with_delay_slot`? + pub fn new(length: usize, delay_slots: u8) -> Self { + Self { + length, + arch_transition_by_target_addr: false, + delay_slots, + branches: Default::default(), + } + } + + /// Add a branch to this [`InstructionInfo`], maximum of 3 branches may be added (as per [`NUM_BRANCH_INFO`]). + pub fn add_branch(&mut self, branch_info: impl Into) { + // Will go through each slot and attempt to add the branch info. + // TODO: Return a result with BranchInfoSlotsFilled error. + for branch in &mut self.branches { + if branch.is_none() { + *branch = Some(branch_info.into()); + return; + } + } + } +} + +impl From for InstructionInfo { + fn from(value: BNInstructionInfo) -> Self { + // TODO: This is quite ugly, but we destructure the branch info so this will have to do. + let mut branch_info = [None; NUM_BRANCH_INFO]; + #[allow(clippy::needless_range_loop)] + for i in 0..value.branchCount.min(NUM_BRANCH_INFO) { + let branch_target = value.branchTarget[i]; + branch_info[i] = Some(BranchInfo { + kind: match value.branchType[i] { + BNBranchType::UnconditionalBranch => BranchKind::Unconditional(branch_target), + BNBranchType::FalseBranch => BranchKind::False(branch_target), + BNBranchType::TrueBranch => BranchKind::True(branch_target), + BNBranchType::CallDestination => BranchKind::Call(branch_target), + BNBranchType::FunctionReturn => BranchKind::FunctionReturn, + BNBranchType::SystemCall => BranchKind::SystemCall, + BNBranchType::IndirectBranch => BranchKind::Indirect, + BNBranchType::ExceptionBranch => BranchKind::Exception, + BNBranchType::UnresolvedBranch => BranchKind::Unresolved, + BNBranchType::UserDefinedBranch => BranchKind::UserDefined, + }, + arch: if value.branchArch[i].is_null() { + None + } else { + Some(unsafe { CoreArchitecture::from_raw(value.branchArch[i]) }) + }, + }); + } + Self { + length: value.length, + arch_transition_by_target_addr: value.archTransitionByTargetAddr, + delay_slots: value.delaySlots, + branches: branch_info, + } + } +} + +impl From for BNInstructionInfo { + fn from(value: InstructionInfo) -> Self { + let branch_count = value.branches.into_iter().filter(Option::is_some).count(); + // TODO: This is quite ugly, but we destructure the branch info so this will have to do. + let branch_info_0 = value.branches[0].unwrap_or_default(); + let branch_info_1 = value.branches[1].unwrap_or_default(); + let branch_info_2 = value.branches[2].unwrap_or_default(); + Self { + length: value.length, + branchCount: branch_count, + archTransitionByTargetAddr: value.arch_transition_by_target_addr, + delaySlots: value.delay_slots, + branchType: [ + branch_info_0.into(), + branch_info_1.into(), + branch_info_2.into(), + ], + branchTarget: [ + branch_info_0.target().unwrap_or_default(), + branch_info_1.target().unwrap_or_default(), + branch_info_2.target().unwrap_or_default(), + ], + branchArch: [ + branch_info_0 + .arch + .map(|a| a.handle) + .unwrap_or(std::ptr::null_mut()), + branch_info_1 + .arch + .map(|a| a.handle) + .unwrap_or(std::ptr::null_mut()), + branch_info_2 + .arch + .map(|a| a.handle) + .unwrap_or(std::ptr::null_mut()), + ], + } + } +} diff --git a/rust/src/architecture/intrinsic.rs b/rust/src/architecture/intrinsic.rs new file mode 100644 index 000000000..4ad5ee2af --- /dev/null +++ b/rust/src/architecture/intrinsic.rs @@ -0,0 +1,150 @@ +use crate::architecture::CoreArchitecture; +use crate::confidence::Conf; +use crate::rc::Ref; +use crate::types::{NameAndType, Type}; +use binaryninjacore_sys::{ + BNFreeNameAndTypeList, BNFreeOutputTypeList, BNFreeString, BNGetArchitectureIntrinsicClass, + BNGetArchitectureIntrinsicInputs, BNGetArchitectureIntrinsicName, + BNGetArchitectureIntrinsicOutputs, BNIntrinsicClass, +}; +use std::borrow::Cow; +use std::ffi::CStr; +use std::fmt::{Debug, Formatter}; + +crate::new_id_type!(IntrinsicId, u32); + +pub trait Intrinsic: Debug + Sized + Clone + Copy { + fn name(&self) -> Cow<'_, str>; + + /// Unique identifier for this `Intrinsic`. + fn id(&self) -> IntrinsicId; + + /// The intrinsic class for this `Intrinsic`. + fn class(&self) -> BNIntrinsicClass { + BNIntrinsicClass::GeneralIntrinsicClass + } + + // TODO: Maybe just return `(String, Conf>)`? + /// List of the input names and types for this intrinsic. + fn inputs(&self) -> Vec; + + /// List of the output types for this intrinsic. + fn outputs(&self) -> Vec>>; +} + +/// Type for architrectures that do not use intrinsics. Will panic if accessed as an intrinsic. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct UnusedIntrinsic; + +impl Intrinsic for UnusedIntrinsic { + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn id(&self) -> IntrinsicId { + unreachable!() + } + fn inputs(&self) -> Vec { + unreachable!() + } + fn outputs(&self) -> Vec>> { + unreachable!() + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct CoreIntrinsic { + pub arch: CoreArchitecture, + pub id: IntrinsicId, +} + +impl CoreIntrinsic { + pub fn new(arch: CoreArchitecture, id: IntrinsicId) -> Option { + let intrinsic = Self { arch, id }; + intrinsic.is_valid().then_some(intrinsic) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the intrinsic is actually valid. + let name = unsafe { BNGetArchitectureIntrinsicName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl Intrinsic for CoreIntrinsic { + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureIntrinsicName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + // TODO: ^ the above assertion nullifies any benefit to passing back Cow tho? + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn id(&self) -> IntrinsicId { + self.id + } + + fn class(&self) -> BNIntrinsicClass { + unsafe { BNGetArchitectureIntrinsicClass(self.arch.handle, self.id.into()) } + } + + fn inputs(&self) -> Vec { + let mut count: usize = 0; + unsafe { + let inputs = + BNGetArchitectureIntrinsicInputs(self.arch.handle, self.id.into(), &mut count); + + let ret = std::slice::from_raw_parts_mut(inputs, count) + .iter() + .map(NameAndType::from_raw) + .collect(); + + BNFreeNameAndTypeList(inputs, count); + + ret + } + } + + fn outputs(&self) -> Vec>> { + let mut count: usize = 0; + unsafe { + let inputs = + BNGetArchitectureIntrinsicOutputs(self.arch.handle, self.id.into(), &mut count); + + let ret = std::slice::from_raw_parts_mut(inputs, count) + .iter() + .map(Conf::>::from_raw) + .collect(); + + BNFreeOutputTypeList(inputs, count); + + ret + } + } +} + +impl Debug for CoreIntrinsic { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CoreIntrinsic") + .field("id", &self.id) + .field("name", &self.name()) + .field("class", &self.class()) + .field("inputs", &self.inputs()) + .field("outputs", &self.outputs()) + .finish() + } +} diff --git a/rust/src/architecture/register.rs b/rust/src/architecture/register.rs new file mode 100644 index 000000000..50bb38ba7 --- /dev/null +++ b/rust/src/architecture/register.rs @@ -0,0 +1,333 @@ +use crate::architecture::CoreArchitecture; +use crate::rc::{CoreArrayProvider, CoreArrayProviderInner}; +use binaryninjacore_sys::*; +use std::borrow::Cow; +use std::ffi::CStr; +use std::fmt::{Debug, Formatter}; +use std::hash::Hash; + +pub use binaryninjacore_sys::BNImplicitRegisterExtend as ImplicitRegisterExtend; + +crate::new_id_type!(RegisterId, u32); + +impl RegisterId { + pub fn is_temporary(&self) -> bool { + self.0 & 0x8000_0000 != 0 + } +} + +crate::new_id_type!(RegisterStackId, u32); + +pub trait RegisterInfo: Sized { + type RegType: Register; + + fn parent(&self) -> Option; + fn size(&self) -> usize; + fn offset(&self) -> usize; + fn implicit_extend(&self) -> ImplicitRegisterExtend; +} + +pub trait Register: Debug + Sized + Clone + Copy + Hash + Eq { + type InfoType: RegisterInfo; + + fn name(&self) -> Cow<'_, str>; + fn info(&self) -> Self::InfoType; + + /// Unique identifier for this `Register`. + /// + /// *MUST* be in the range [0, 0x7fff_ffff] + fn id(&self) -> RegisterId; +} + +pub trait RegisterStackInfo: Sized { + type RegStackType: RegisterStack; + type RegType: Register; + type RegInfoType: RegisterInfo; + + fn storage_regs(&self) -> (Self::RegType, usize); + fn top_relative_regs(&self) -> Option<(Self::RegType, usize)>; + fn stack_top_reg(&self) -> Self::RegType; +} + +/// Type for architectures that do not use register stacks. Will panic if accessed as a register stack. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct UnusedRegisterStackInfo { + _reg: std::marker::PhantomData, +} + +impl RegisterStackInfo for UnusedRegisterStackInfo { + type RegStackType = UnusedRegisterStack; + type RegType = R; + type RegInfoType = R::InfoType; + + fn storage_regs(&self) -> (Self::RegType, usize) { + unreachable!() + } + fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { + unreachable!() + } + fn stack_top_reg(&self) -> Self::RegType { + unreachable!() + } +} + +pub trait RegisterStack: Debug + Sized + Clone + Copy { + type InfoType: RegisterStackInfo< + RegType = Self::RegType, + RegInfoType = Self::RegInfoType, + RegStackType = Self, + >; + type RegType: Register; + type RegInfoType: RegisterInfo; + + fn name(&self) -> Cow<'_, str>; + fn info(&self) -> Self::InfoType; + + /// Unique identifier for this `RegisterStack`. + /// + /// *MUST* be in the range [0, 0x7fff_ffff] + fn id(&self) -> RegisterStackId; +} + +/// Type for architectures that do not use register stacks. Will panic if accessed as a register stack. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct UnusedRegisterStack { + _reg: std::marker::PhantomData, +} + +impl RegisterStack for UnusedRegisterStack { + type InfoType = UnusedRegisterStackInfo; + type RegType = R; + type RegInfoType = R::InfoType; + + fn name(&self) -> Cow<'_, str> { + unreachable!() + } + fn info(&self) -> Self::InfoType { + unreachable!() + } + fn id(&self) -> RegisterStackId { + unreachable!() + } +} + +#[derive(Debug, Copy, Clone)] +pub struct CoreRegisterInfo { + arch: CoreArchitecture, + id: RegisterId, + info: BNRegisterInfo, +} + +impl CoreRegisterInfo { + pub fn new(arch: CoreArchitecture, id: RegisterId, info: BNRegisterInfo) -> Self { + Self { arch, id, info } + } +} + +impl RegisterInfo for CoreRegisterInfo { + type RegType = CoreRegister; + + fn parent(&self) -> Option { + if self.id != RegisterId::from(self.info.fullWidthRegister) { + Some(CoreRegister::new( + self.arch, + RegisterId::from(self.info.fullWidthRegister), + )?) + } else { + None + } + } + + fn size(&self) -> usize { + self.info.size + } + + fn offset(&self) -> usize { + self.info.offset + } + + fn implicit_extend(&self) -> ImplicitRegisterExtend { + self.info.extend + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct CoreRegister { + arch: CoreArchitecture, + id: RegisterId, +} + +impl CoreRegister { + pub fn new(arch: CoreArchitecture, id: RegisterId) -> Option { + let register = Self { arch, id }; + register.is_valid().then_some(register) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the register is actually valid. + let name = unsafe { BNGetArchitectureRegisterName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl Register for CoreRegister { + type InfoType = CoreRegisterInfo; + + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureRegisterName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn info(&self) -> CoreRegisterInfo { + CoreRegisterInfo::new(self.arch, self.id, unsafe { + BNGetArchitectureRegisterInfo(self.arch.handle, self.id.into()) + }) + } + + fn id(&self) -> RegisterId { + self.id + } +} + +impl Debug for CoreRegister { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CoreRegister") + .field("id", &self.id) + .field("name", &self.name()) + .finish() + } +} + +impl CoreArrayProvider for CoreRegister { + type Raw = u32; + type Context = CoreArchitecture; + type Wrapped<'a> = Self; +} + +unsafe impl CoreArrayProviderInner for CoreRegister { + unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { + BNFreeRegisterList(raw) + } + + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { + Self::new(*context, RegisterId::from(*raw)).expect("Register list contains valid registers") + } +} + +#[derive(Debug, Copy, Clone)] +pub struct CoreRegisterStackInfo { + arch: CoreArchitecture, + // TODO: Wrap BNRegisterStackInfo + info: BNRegisterStackInfo, +} + +impl CoreRegisterStackInfo { + pub fn new(arch: CoreArchitecture, info: BNRegisterStackInfo) -> Self { + Self { arch, info } + } +} + +impl RegisterStackInfo for CoreRegisterStackInfo { + type RegStackType = CoreRegisterStack; + type RegType = CoreRegister; + type RegInfoType = CoreRegisterInfo; + + fn storage_regs(&self) -> (Self::RegType, usize) { + ( + CoreRegister::new(self.arch, RegisterId::from(self.info.firstStorageReg)) + .expect("Storage register is valid"), + self.info.storageCount as usize, + ) + } + + fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { + if self.info.topRelativeCount == 0 { + None + } else { + Some(( + CoreRegister::new(self.arch, RegisterId::from(self.info.firstTopRelativeReg)) + .expect("Top relative register is valid"), + self.info.topRelativeCount as usize, + )) + } + } + + fn stack_top_reg(&self) -> Self::RegType { + CoreRegister::new(self.arch, RegisterId::from(self.info.stackTopReg)) + .expect("Stack top register is valid") + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct CoreRegisterStack { + arch: CoreArchitecture, + id: RegisterStackId, +} + +impl CoreRegisterStack { + pub fn new(arch: CoreArchitecture, id: RegisterStackId) -> Option { + let register_stack = Self { arch, id }; + register_stack.is_valid().then_some(register_stack) + } + + fn is_valid(&self) -> bool { + // We check the name to see if the stack register is actually valid. + let name = unsafe { BNGetArchitectureRegisterStackName(self.arch.handle, self.id.into()) }; + match name.is_null() { + true => false, + false => { + unsafe { BNFreeString(name) }; + true + } + } + } +} + +impl RegisterStack for CoreRegisterStack { + type InfoType = CoreRegisterStackInfo; + type RegType = CoreRegister; + type RegInfoType = CoreRegisterInfo; + + fn name(&self) -> Cow<'_, str> { + unsafe { + let name = BNGetArchitectureRegisterStackName(self.arch.handle, self.id.into()); + + // We need to guarantee ownership, as if we're still + // a Borrowed variant we're about to free the underlying + // memory. + let res = CStr::from_ptr(name); + let res = res.to_string_lossy().into_owned().into(); + + BNFreeString(name); + + res + } + } + + fn info(&self) -> CoreRegisterStackInfo { + CoreRegisterStackInfo::new(self.arch, unsafe { + BNGetArchitectureRegisterStackInfo(self.arch.handle, self.id.into()) + }) + } + + fn id(&self) -> RegisterStackId { + self.id + } +} diff --git a/rust/src/basic_block.rs b/rust/src/basic_block.rs index 90c93ad4e..0bc2d95ed 100644 --- a/rust/src/basic_block.rs +++ b/rust/src/basic_block.rs @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::architecture::CoreArchitecture; +use crate::architecture::{BranchType, CoreArchitecture}; use crate::function::Function; use crate::rc::*; -use crate::BranchType; use binaryninjacore_sys::*; use std::fmt; use std::fmt::Debug; @@ -26,13 +25,6 @@ enum EdgeDirection { Outgoing, } -pub struct PendingBasicBlockEdge { - pub branch_type: BranchType, - pub target: u64, - pub arch: CoreArchitecture, - pub fallthrough: bool, -} - pub struct Edge<'a, C: 'a + BlockContext> { pub branch: BranchType, pub back_edge: bool, @@ -40,7 +32,7 @@ pub struct Edge<'a, C: 'a + BlockContext> { pub target: Guard<'a, BasicBlock>, } -impl<'a, C: 'a + fmt::Debug + BlockContext> fmt::Debug for Edge<'a, C> { +impl<'a, C: 'a + Debug + BlockContext> Debug for Edge<'a, C> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -96,6 +88,60 @@ unsafe impl<'a, C: 'a + BlockContext> CoreArrayProviderInner for Edge<'a, C> { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct PendingBasicBlockEdge { + pub branch_type: BranchType, + pub target: u64, + pub arch: CoreArchitecture, + pub fallthrough: bool, +} + +impl PendingBasicBlockEdge { + pub fn new( + branch_type: BranchType, + target: u64, + arch: CoreArchitecture, + fallthrough: bool, + ) -> Self { + Self { + branch_type, + target, + arch, + fallthrough, + } + } +} + +impl From for PendingBasicBlockEdge { + fn from(edge: BNPendingBasicBlockEdge) -> Self { + Self { + branch_type: edge.type_, + target: edge.target, + arch: unsafe { CoreArchitecture::from_raw(edge.arch) }, + fallthrough: edge.fallThrough, + } + } +} + +impl CoreArrayProvider for PendingBasicBlockEdge { + type Raw = BNPendingBasicBlockEdge; + type Context = (); + type Wrapped<'a> + = PendingBasicBlockEdge + where + Self: 'a; +} + +unsafe impl CoreArrayProviderInner for PendingBasicBlockEdge { + unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { + BNFreePendingBasicBlockEdgeList(raw); + } + + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + PendingBasicBlockEdge::from(*raw) + } +} + pub trait BlockContext: Clone + Sync + Send + Sized { type Instruction; type InstructionIndex: Debug + From; @@ -243,36 +289,24 @@ impl BasicBlock { } } - pub fn pending_outgoing_edges(&self) -> Vec { + /// Pending outgoing edges for the basic block. These are edges that have not yet been resolved. + pub fn pending_outgoing_edges(&self) -> Array { unsafe { let mut count = 0; let edges_ptr = BNGetBasicBlockPendingOutgoingEdges(self.handle, &mut count); - let edges = std::slice::from_raw_parts(edges_ptr, count); - - let mut result = Vec::with_capacity(count); - for edge in edges { - result.push(PendingBasicBlockEdge { - branch_type: edge.type_, - target: edge.target, - arch: CoreArchitecture::from_raw(edge.arch), - fallthrough: edge.fallThrough, - }); - } - - BNFreePendingBasicBlockEdgeList(edges_ptr); - result + Array::new(edges_ptr, count, ()) } } - pub fn add_pending_outgoing_edge( - &self, - typ: BranchType, - addr: u64, - arch: CoreArchitecture, - fallthrough: bool, - ) { + pub fn add_pending_outgoing_edge(&self, edge: &PendingBasicBlockEdge) { unsafe { - BNBasicBlockAddPendingOutgoingEdge(self.handle, typ, addr, arch.handle, fallthrough); + BNBasicBlockAddPendingOutgoingEdge( + self.handle, + edge.branch_type, + edge.target, + edge.arch.handle, + edge.fallthrough, + ); } } diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index e1b7a5c00..31bdeb894 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -1466,12 +1466,12 @@ pub trait BinaryViewExt: BinaryViewBase { } } - fn read_buffer(&self, offset: u64, len: usize) -> Result { + fn read_buffer(&self, offset: u64, len: usize) -> Option { let read_buffer = unsafe { BNReadViewBuffer(self.as_ref().handle, offset, len) }; if read_buffer.is_null() { - Err(()) + None } else { - Ok(DataBuffer::from_raw(read_buffer)) + Some(DataBuffer::from_raw(read_buffer)) } } diff --git a/rust/src/flowgraph.rs b/rust/src/flowgraph.rs index 4489d9509..02a79fa31 100644 --- a/rust/src/flowgraph.rs +++ b/rust/src/flowgraph.rs @@ -32,7 +32,6 @@ pub use edge::EdgeStyle; pub use edge::FlowGraphEdge; pub use node::FlowGraphNode; -pub type BranchType = BNBranchType; pub type EdgePenStyle = BNEdgePenStyle; pub type ThemeColor = BNThemeColor; pub type FlowGraphOption = BNFlowGraphOption; diff --git a/rust/src/flowgraph/edge.rs b/rust/src/flowgraph/edge.rs index 7862f1565..38e0ae9ee 100644 --- a/rust/src/flowgraph/edge.rs +++ b/rust/src/flowgraph/edge.rs @@ -1,8 +1,8 @@ -use binaryninjacore_sys::*; - +use crate::architecture::BranchType; use crate::flowgraph::node::FlowGraphNode; -use crate::flowgraph::{BranchType, EdgePenStyle, ThemeColor}; +use crate::flowgraph::{EdgePenStyle, ThemeColor}; use crate::rc::{CoreArrayProvider, CoreArrayProviderInner, Ref}; +use binaryninjacore_sys::*; #[derive(Clone, Debug, PartialEq)] pub struct FlowGraphEdge { diff --git a/rust/src/flowgraph/node.rs b/rust/src/flowgraph/node.rs index 5c2b9248d..282336709 100644 --- a/rust/src/flowgraph/node.rs +++ b/rust/src/flowgraph/node.rs @@ -1,7 +1,8 @@ +use crate::architecture::BranchType; use crate::basic_block::{BasicBlock, BlockContext}; use crate::disassembly::DisassemblyTextLine; use crate::flowgraph::edge::{EdgeStyle, FlowGraphEdge}; -use crate::flowgraph::{BranchType, FlowGraph}; +use crate::flowgraph::FlowGraph; use crate::function::HighlightColor; use crate::rc::{Array, CoreArrayProvider, CoreArrayProviderInner, Guard, Ref, RefCountable}; use binaryninjacore_sys::*; diff --git a/rust/src/function.rs b/rust/src/function.rs index 1ef0a377e..29d4fc3c9 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -37,7 +37,7 @@ pub use binaryninjacore_sys::BNFunctionAnalysisSkipOverride as FunctionAnalysisS pub use binaryninjacore_sys::BNFunctionUpdateType as FunctionUpdateType; pub use binaryninjacore_sys::BNHighlightStandardColor as HighlightStandardColor; -use crate::architecture::RegisterId; +use crate::architecture::{IndirectBranchInfo, RegisterId}; use crate::binary_view::AddressRange; use crate::confidence::Conf; use crate::high_level_il::HighLevelILFunction; @@ -46,7 +46,7 @@ use crate::low_level_il::LowLevelILRegularFunction; use crate::medium_level_il::MediumLevelILFunction; use crate::metadata::Metadata; use crate::variable::{ - IndirectBranchInfo, MergedVariable, NamedVariableWithType, RegisterValue, RegisterValueType, + MergedVariable, NamedVariableWithType, RegisterValue, RegisterValueType, StackVariableReference, Variable, }; use crate::workflow::Workflow; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 476cd04c5..50913ed83 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -94,6 +94,8 @@ pub mod workflow; use crate::file_metadata::FileMetadata; use crate::function::Function; +use crate::progress::{NoProgressCallback, ProgressCallback}; +use crate::string::raw_to_string; use binary_view::BinaryView; use binaryninjacore_sys::*; use metadata::Metadata; @@ -107,9 +109,6 @@ use string::BnString; use string::IntoCStr; use string::IntoJson; -use crate::progress::{NoProgressCallback, ProgressCallback}; -use crate::string::raw_to_string; -pub use binaryninjacore_sys::BNBranchType as BranchType; pub use binaryninjacore_sys::BNDataFlowQueryOption as DataFlowQueryOption; pub use binaryninjacore_sys::BNEndianness as Endianness; pub use binaryninjacore_sys::BNILBranchDependence as ILBranchDependence; diff --git a/rust/src/llvm.rs b/rust/src/llvm.rs index 1296db651..d2a0292aa 100644 --- a/rust/src/llvm.rs +++ b/rust/src/llvm.rs @@ -1,8 +1,102 @@ -use binaryninjacore_sys::BNLlvmServicesDisasmInstruction; +use binaryninjacore_sys::{ + BNLlvmServicesAssemble, BNLlvmServicesAssembleFree, BNLlvmServicesDisasmInstruction, + BNLlvmServicesInit, +}; use std::ffi::{CStr, CString}; use std::os::raw::{c_char, c_int}; -pub fn disas_instruction(triplet: &str, data: &[u8], address64: u64) -> Option<(usize, String)> { +#[repr(i32)] +pub enum LlvmServicesDialect { + Unspecified = 0, + Att = 1, + Intel = 2, +} + +#[repr(i32)] +pub enum LlvmServicesCodeModel { + Default = 0, + Small = 1, + Kernel = 2, + Medium = 3, + Large = 4, +} + +#[repr(i32)] +pub enum LlvmServicesRelocMode { + Static = 0, + PIC = 1, + DynamicNoPIC = 2, +} + +pub fn llvm_assemble( + code: &str, + dialect: LlvmServicesDialect, + triplet: &str, + code_model: LlvmServicesCodeModel, + reloc_mode: LlvmServicesRelocMode, +) -> Result, String> { + let code = CString::new(code).map_err(|_| "Invalid encoding in code string".to_string())?; + let arch_triple = CString::new(triplet) + .map_err(|_| "Invalid encoding in architecture triple string".to_string())?; + let mut out_bytes: *mut std::ffi::c_char = std::ptr::null_mut(); + let mut out_bytes_len: std::ffi::c_int = 0; + let mut err_bytes: *mut std::ffi::c_char = std::ptr::null_mut(); + let mut err_len: std::ffi::c_int = 0; + + unsafe { + BNLlvmServicesInit(); + } + + let result = unsafe { + BNLlvmServicesAssemble( + code.as_ptr(), + dialect as i32, + arch_triple.as_ptr(), + code_model as i32, + reloc_mode as i32, + &mut out_bytes as *mut *mut std::ffi::c_char, + &mut out_bytes_len as *mut std::ffi::c_int, + &mut err_bytes as *mut *mut std::ffi::c_char, + &mut err_len as *mut std::ffi::c_int, + ) + }; + + let out = if out_bytes_len == 0 { + Vec::new() + } else { + unsafe { + std::slice::from_raw_parts( + out_bytes as *const std::ffi::c_char as *const u8, + out_bytes_len as usize, + ) + } + .to_vec() + }; + + let errors = if err_len == 0 { + "".into() + } else { + String::from_utf8_lossy(unsafe { + std::slice::from_raw_parts( + err_bytes as *const std::ffi::c_char as *const u8, + err_len as usize, + ) + }) + .into_owned() + }; + + unsafe { + BNLlvmServicesAssembleFree(out_bytes, err_bytes); + } + + if result == 0 { + Ok(out) + } else { + Err(errors) + } +} + +pub fn llvm_disassemble(triplet: &str, data: &[u8], address: u64) -> Option<(usize, String)> { unsafe { let triplet = CString::new(triplet).ok()?; let mut src = data.to_vec(); @@ -11,7 +105,7 @@ pub fn disas_instruction(triplet: &str, data: &[u8], address64: u64) -> Option<( triplet.as_ptr(), src.as_mut_ptr(), src.len() as c_int, - address64, + address, buf.as_mut_ptr() as *mut c_char, buf.len(), ); diff --git a/rust/src/variable.rs b/rust/src/variable.rs index fc6febfb6..d224a45b2 100644 --- a/rust/src/variable.rs +++ b/rust/src/variable.rs @@ -911,58 +911,3 @@ impl PossibleValueSet { } } } - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub struct IndirectBranchInfo { - pub source: Location, - pub dest: Location, - pub auto_defined: bool, -} - -impl From for IndirectBranchInfo { - fn from(value: BNIndirectBranchInfo) -> Self { - Self { - source: Location::from_raw(value.sourceAddr, value.sourceArch), - dest: Location::from_raw(value.destAddr, value.destArch), - auto_defined: value.autoDefined, - } - } -} - -impl From for BNIndirectBranchInfo { - fn from(value: IndirectBranchInfo) -> Self { - let source_arch = value - .source - .arch - .map(|a| a.handle) - .unwrap_or(std::ptr::null_mut()); - let dest_arch = value - .source - .arch - .map(|a| a.handle) - .unwrap_or(std::ptr::null_mut()); - Self { - sourceArch: source_arch, - sourceAddr: value.source.addr, - destArch: dest_arch, - destAddr: value.dest.addr, - autoDefined: value.auto_defined, - } - } -} - -impl CoreArrayProvider for IndirectBranchInfo { - type Raw = BNIndirectBranchInfo; - type Context = (); - type Wrapped<'a> = Self; -} - -unsafe impl CoreArrayProviderInner for IndirectBranchInfo { - unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { - BNFreeIndirectBranchList(raw) - } - - unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { - Self::from(*raw) - } -} diff --git a/view/minidump/src/command.rs b/view/minidump/src/command.rs index 818b1c599..81629f556 100644 --- a/view/minidump/src/command.rs +++ b/view/minidump/src/command.rs @@ -8,7 +8,7 @@ use binaryninja::binary_view::{BinaryView, BinaryViewBase, BinaryViewExt}; pub fn print_memory_information(bv: &BinaryView) { debug!("Printing memory information"); if let Some(minidump_bv) = bv.parent_view() { - if let Ok(read_buffer) = minidump_bv.read_buffer(0, minidump_bv.len() as usize) { + if let Some(read_buffer) = minidump_bv.read_buffer(0, minidump_bv.len() as usize) { if let Ok(minidump_obj) = Minidump::read(read_buffer.get_data()) { if let Ok(memory_info_list) = minidump_obj.get_stream::() { let mut memory_info_list_writer = Vec::new(); diff --git a/view/minidump/src/view.rs b/view/minidump/src/view.rs index ff85fe076..413fafa15 100644 --- a/view/minidump/src/view.rs +++ b/view/minidump/src/view.rs @@ -117,7 +117,9 @@ impl MinidumpBinaryView { fn init(&self) -> BinaryViewResult<()> { let parent_view = self.parent_view().ok_or(())?; - let read_buffer = parent_view.read_buffer(0, parent_view.len() as usize)?; + let read_buffer = parent_view + .read_buffer(0, parent_view.len() as usize) + .ok_or(())?; if let Ok(minidump_obj) = Minidump::read(read_buffer.get_data()) { // Architecture, platform information From 4da1c464635bdc87b2c630b9a70524a9df7f6957 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sun, 7 Dec 2025 15:04:02 -0500 Subject: [PATCH 04/19] [Rust] Impl `Display` for `VersionInfo` --- rust/src/lib.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 50913ed83..ad1d910d4 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -104,6 +104,7 @@ use rc::Ref; use std::cmp; use std::collections::HashMap; use std::ffi::{c_char, c_void, CStr}; +use std::fmt::{Display, Formatter}; use std::path::{Path, PathBuf}; use string::BnString; use string::IntoCStr; @@ -513,6 +514,20 @@ impl Ord for VersionInfo { } } +impl Display for VersionInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.channel.is_empty() { + write!(f, "{}.{}.{}", self.major, self.minor, self.build) + } else { + write!( + f, + "{}.{}.{}-{}", + self.major, self.minor, self.build, self.channel + ) + } + } +} + pub fn version_info() -> VersionInfo { let info_raw = unsafe { BNGetVersionInfo() }; VersionInfo::from_owned_raw(info_raw) From 2372ab811e8a51d19d046acb1405f79f42ba6a75 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sun, 7 Dec 2025 17:29:16 -0500 Subject: [PATCH 05/19] [Rust] Misc cleanup for ABB --- rust/src/architecture/basic_block.rs | 331 +++++++++++++++++---------- rust/src/architecture/branches.rs | 6 + 2 files changed, 212 insertions(+), 125 deletions(-) diff --git a/rust/src/architecture/basic_block.rs b/rust/src/architecture/basic_block.rs index 1867f50a7..f092e9272 100644 --- a/rust/src/architecture/basic_block.rs +++ b/rust/src/architecture/basic_block.rs @@ -4,6 +4,7 @@ use crate::function::{Function, Location, NativeBlock}; use crate::rc::Ref; use binaryninjacore_sys::*; use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; pub struct BasicBlockAnalysisContext { pub(crate) handle: *mut BNBasicBlockAnalysisContext, @@ -36,65 +37,79 @@ impl BasicBlockAnalysisContext { let ctx_ref = &*handle; - let indirect_branches = (0..ctx_ref.indirectBranchesCount) - .map(|i| { - let raw: BNIndirectBranchInfo = - unsafe { std::ptr::read(ctx_ref.indirectBranches.add(i)) }; - IndirectBranchInfo::from(raw) - }) - .collect::>(); - - let indirect_no_return_calls = (0..ctx_ref.indirectNoReturnCallsCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.indirectNoReturnCalls.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let contextual_returns = (0..ctx_ref.contextualFunctionReturnCount) - .map(|i| { - let loc = unsafe { - let raw = std::ptr::read(ctx_ref.contextualFunctionReturnLocations.add(i)); - Location::from(raw) - }; - let val = unsafe { *ctx_ref.contextualFunctionReturnValues.add(i) }; - (loc, val) - }) - .collect::>(); - - let direct_code_references = (0..ctx_ref.directRefCount) - .map(|i| { - let src = unsafe { - let raw = std::ptr::read(ctx_ref.directRefSources.add(i)); - Location::from(raw) - }; - let tgt = unsafe { *ctx_ref.directRefTargets.add(i) }; - (tgt, src) - }) - .collect::>(); - - let direct_no_return_calls = (0..ctx_ref.directNoReturnCallsCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.directNoReturnCalls.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let halted_disassembly_addresses = (0..ctx_ref.haltedDisassemblyAddressesCount) - .map(|i| { - let raw = unsafe { std::ptr::read(ctx_ref.haltedDisassemblyAddresses.add(i)) }; - Location::from(raw) - }) - .collect::>(); - - let inlined_unresolved_indirect_branches = (0..ctx_ref - .inlinedUnresolvedIndirectBranchCount) - .map(|i| { - let raw = - unsafe { std::ptr::read(ctx_ref.inlinedUnresolvedIndirectBranches.add(i)) }; - Location::from(raw) - }) - .collect::>(); + let raw_indirect_branches: &[BNIndirectBranchInfo] = + std::slice::from_raw_parts(ctx_ref.indirectBranches, ctx_ref.indirectBranchesCount); + let indirect_branches: Vec = raw_indirect_branches + .iter() + .map(IndirectBranchInfo::from) + .collect(); + + let raw_indirect_no_return_calls: &[BNArchitectureAndAddress] = std::slice::from_raw_parts( + ctx_ref.indirectNoReturnCalls, + ctx_ref.indirectNoReturnCallsCount, + ); + let indirect_no_return_calls: HashSet = raw_indirect_no_return_calls + .iter() + .map(Location::from) + .collect(); + + let raw_contextual_return_locs: &[BNArchitectureAndAddress] = unsafe { + std::slice::from_raw_parts( + ctx_ref.contextualFunctionReturnLocations, + ctx_ref.contextualFunctionReturnCount, + ) + }; + let raw_contextual_return_vals: &[bool] = unsafe { + std::slice::from_raw_parts( + ctx_ref.contextualFunctionReturnValues, + ctx_ref.contextualFunctionReturnCount, + ) + }; + let contextual_returns: HashMap = raw_contextual_return_locs + .iter() + .map(Location::from) + .zip(raw_contextual_return_vals.iter().copied()) + .collect(); + + let raw_direct_ref_sources: &[BNArchitectureAndAddress] = + unsafe { std::slice::from_raw_parts(ctx_ref.directRefSources, ctx_ref.directRefCount) }; + let raw_direct_ref_targets: &[u64] = + unsafe { std::slice::from_raw_parts(ctx_ref.directRefTargets, ctx_ref.directRefCount) }; + let direct_code_references: HashMap = raw_direct_ref_targets + .iter() + .copied() + .zip(raw_direct_ref_sources.iter().map(Location::from)) + .collect(); + + let raw_direct_no_return_calls: &[BNArchitectureAndAddress] = std::slice::from_raw_parts( + ctx_ref.directNoReturnCalls, + ctx_ref.directNoReturnCallsCount, + ); + let direct_no_return_calls: HashSet = raw_direct_no_return_calls + .iter() + .map(Location::from) + .collect(); + + let raw_halted_disassembly_address: &[BNArchitectureAndAddress] = + std::slice::from_raw_parts( + ctx_ref.haltedDisassemblyAddresses, + ctx_ref.haltedDisassemblyAddressesCount, + ); + let halted_disassembly_addresses: HashSet = raw_halted_disassembly_address + .iter() + .map(Location::from) + .collect(); + + let raw_inlined_unresolved_indirect_branches: &[BNArchitectureAndAddress] = + std::slice::from_raw_parts( + ctx_ref.inlinedUnresolvedIndirectBranches, + ctx_ref.inlinedUnresolvedIndirectBranchCount, + ); + let inlined_unresolved_indirect_branches: HashSet = + raw_inlined_unresolved_indirect_branches + .iter() + .map(Location::from) + .collect(); BasicBlockAnalysisContext { handle, @@ -116,6 +131,7 @@ impl BasicBlockAnalysisContext { } } + /// Adds a contextual function return location and its value to the current function. pub fn add_contextual_return(&mut self, loc: impl Into, value: bool) { let loc = loc.into(); if !self.contextual_returns.contains_key(&loc) { @@ -125,16 +141,19 @@ impl BasicBlockAnalysisContext { self.contextual_returns.insert(loc, value); } + /// Adds a direct code reference to the current function. pub fn add_direct_code_reference(&mut self, target: u64, src: impl Into) { self.direct_code_references .entry(target) .or_insert(src.into()); } + /// Adds a direct no-return call location to the current function. pub fn add_direct_no_return_call(&mut self, loc: impl Into) { self.direct_no_return_calls.insert(loc.into()); } + /// Adds an address to the set of halted disassembly addresses. pub fn add_halted_disassembly_address(&mut self, loc: impl Into) { self.halted_disassembly_addresses.insert(loc.into()); } @@ -143,6 +162,9 @@ impl BasicBlockAnalysisContext { self.inlined_unresolved_indirect_branches.insert(loc.into()); } + /// Creates a new [`BasicBlock`] at the specified address for the given [`CoreArchitecture`]. + /// + /// After creating, you can add using [`BasicBlockAnalysisContext::add_basic_block`]. pub fn create_basic_block( &self, arch: CoreArchitecture, @@ -158,80 +180,127 @@ impl BasicBlockAnalysisContext { unsafe { Some(BasicBlock::ref_from_raw(raw_block, NativeBlock::new())) } } + /// Adds a [`BasicBlock`] to the current function. + /// + /// You can create a [`BasicBlock`] via [`BasicBlockAnalysisContext::create_basic_block`]. pub fn add_basic_block(&self, block: Ref>) { unsafe { BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self.handle, block.handle); } } + /// Adds a temporary outgoing reference to the specified function. pub fn add_temp_outgoing_reference(&self, target: &Function) { unsafe { BNAnalyzeBasicBlocksContextAddTempReference(self.handle, target.handle); } } + /// To be called before finalizing the basic block analysis. + fn update_direct_code_references(&mut self) { + let total = self.direct_code_references.len(); + let mut sources: Vec = Vec::with_capacity(total); + let mut targets: Vec = Vec::with_capacity(total); + for (target, src) in &self.direct_code_references { + sources.push(src.into()); + targets.push(*target); + } + unsafe { + BNAnalyzeBasicBlocksContextSetDirectCodeReferences( + self.handle, + sources.as_mut_ptr(), + targets.as_mut_ptr(), + total, + ); + } + } + + /// To be called before finalizing the basic block analysis. + fn update_direct_no_return_calls(&mut self) { + let total = self.direct_no_return_calls.len(); + let mut raw_locations: Vec<_> = self + .direct_no_return_calls + .iter() + .map(BNArchitectureAndAddress::from) + .collect(); + unsafe { + BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls( + self.handle, + raw_locations.as_mut_ptr(), + total, + ); + } + } + + /// To be called before finalizing the basic block analysis. + fn update_inlined_unresolved_indirect_branches(&mut self) { + let total = self.inlined_unresolved_indirect_branches.len(); + let mut raw_locations: Vec<_> = self + .inlined_unresolved_indirect_branches + .iter() + .map(BNArchitectureAndAddress::from) + .collect(); + unsafe { + BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches( + self.handle, + raw_locations.as_mut_ptr(), + total, + ); + } + } + + /// To be called before finalizing the basic block analysis. + fn update_halted_disassembly_addresses(&mut self) { + let total = self.halted_disassembly_addresses.len(); + let mut raw_locations: Vec<_> = self + .halted_disassembly_addresses + .iter() + .map(BNArchitectureAndAddress::from) + .collect(); + unsafe { + BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses( + self.handle, + raw_locations.as_mut_ptr(), + total, + ); + } + } + + /// To be called before finalizing the basic block analysis. + fn update_contextual_returns(&mut self) { + let total = self.contextual_returns.len(); + let mut locations: Vec = Vec::with_capacity(total); + let mut values: Vec = Vec::with_capacity(total); + for (loc, value) in &self.contextual_returns { + locations.push(loc.into()); + values.push(*value); + } + unsafe { + BNAnalyzeBasicBlocksContextSetContextualFunctionReturns( + self.handle, + locations.as_mut_ptr(), + values.as_mut_ptr(), + total, + ); + } + } + + /// Finalizes the function's basic block analysis. pub fn finalize(&mut self) { if !self.direct_code_references.is_empty() { - let total = self.direct_code_references.len(); - let mut sources: Vec = Vec::with_capacity(total); - let mut targets: Vec = Vec::with_capacity(total); - for (target, src) in &self.direct_code_references { - sources.push(BNArchitectureAndAddress::from(src)); - targets.push(*target); - } - unsafe { - BNAnalyzeBasicBlocksContextSetDirectCodeReferences( - self.handle, - sources.as_mut_ptr(), - targets.as_mut_ptr(), - total, - ); - } + self.update_direct_code_references(); } if !self.direct_no_return_calls.is_empty() { - let total = self.direct_no_return_calls.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.direct_no_return_calls { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls( - self.handle, - locations.as_mut_ptr(), - total, - ); - } + self.update_direct_no_return_calls(); } if !self.halted_disassembly_addresses.is_empty() { - let total = self.halted_disassembly_addresses.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.halted_disassembly_addresses { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses( - self.handle, - locations.as_mut_ptr(), - total, - ); - } + self.update_halted_disassembly_addresses(); } if !self.inlined_unresolved_indirect_branches.is_empty() { - let total = self.inlined_unresolved_indirect_branches.len(); - let mut locations: Vec = Vec::with_capacity(total); - for loc in &self.inlined_unresolved_indirect_branches { - locations.push(BNArchitectureAndAddress::from(loc)); - } - unsafe { - BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches( - self.handle, - locations.as_mut_ptr(), - total, - ); - } + self.update_inlined_unresolved_indirect_branches(); } unsafe { @@ -239,23 +308,35 @@ impl BasicBlockAnalysisContext { } if self.contextual_returns_dirty { - let total = self.contextual_returns.len(); - let mut locations: Vec = Vec::with_capacity(total); - let mut values: Vec = Vec::with_capacity(total); - for (loc, value) in &self.contextual_returns { - locations.push(BNArchitectureAndAddress::from(loc)); - values.push(*value); - } - unsafe { - BNAnalyzeBasicBlocksContextSetContextualFunctionReturns( - self.handle, - locations.as_mut_ptr(), - values.as_mut_ptr(), - total, - ); - } + self.update_contextual_returns(); } unsafe { BNAnalyzeBasicBlocksContextFinalize(self.handle) }; } } + +impl Debug for BasicBlockAnalysisContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BasicBlockAnalysisContext") + .field("indirect_branches", &self.indirect_branches) + .field("indirect_no_return_calls", &self.indirect_no_return_calls) + .field("analysis_skip_override", &self.analysis_skip_override) + .field("translate_tail_calls", &self.translate_tail_calls) + .field("disallow_branch_to_string", &self.disallow_branch_to_string) + .field("max_function_size", &self.max_function_size) + .field("guided_analysis_mode", &self.guided_analysis_mode) + .field( + "trigger_guided_on_invalid_instruction", + &self.trigger_guided_on_invalid_instruction, + ) + .field("max_size_reached", &self.max_size_reached) + .field("contextual_returns", &self.contextual_returns) + .field("direct_code_references", &self.direct_code_references) + .field("direct_no_return_calls", &self.direct_no_return_calls) + .field( + "halted_disassembly_addresses", + &self.halted_disassembly_addresses, + ) + .finish() + } +} diff --git a/rust/src/architecture/branches.rs b/rust/src/architecture/branches.rs index ff0ab5162..80baa1b65 100644 --- a/rust/src/architecture/branches.rs +++ b/rust/src/architecture/branches.rs @@ -136,6 +136,12 @@ impl From for BNIndirectBranchInfo { } } +impl From<&BNIndirectBranchInfo> for IndirectBranchInfo { + fn from(value: &BNIndirectBranchInfo) -> Self { + Self::from(*value) + } +} + impl CoreArrayProvider for IndirectBranchInfo { type Raw = BNIndirectBranchInfo; type Context = (); From 587a7fdc48290f6217c06d72f700ed7214fa1c1a Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sun, 7 Dec 2025 17:29:41 -0500 Subject: [PATCH 06/19] [Rust] Add misc documentation --- rust/src/architecture.rs | 30 +++++++++++++++++++++++++----- rust/src/binary_view.rs | 11 +++++++++++ rust/src/lib.rs | 5 +++-- rust/src/llvm.rs | 4 ++++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index becf7639c..e893213b8 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -123,12 +123,23 @@ pub trait Architecture: 'static + Sized + AsRef { fn associated_arch_by_addr(&self, addr: u64) -> CoreArchitecture; + /// Returns the [`InstructionInfo`] at the given virtual address with `data`. + /// + /// The [`InstructionInfo`] object should always fill the proper length and branches if not, the + /// next instruction will likely be incorrect. fn instruction_info(&self, data: &[u8], addr: u64) -> Option; + fn instruction_text( &self, data: &[u8], addr: u64, ) -> Option<(usize, Vec)>; + + // TODO: Why do we need to return a boolean here? Does `None` not represent the same thing? + /// Appends arbitrary low-level il instructions to `il`. + /// + /// If `None` is returned, no instructions were appended and the data is invalid. If `Some` is returned, + /// the instructions consumed length is returned (necessary for variable length instruction decoding). fn instruction_llil( &self, data: &[u8], @@ -136,6 +147,10 @@ pub trait Architecture: 'static + Sized + AsRef { il: &LowLevelILMutableFunction, ) -> Option<(usize, bool)>; + /// Performs basic block recovery and commits the results to the function analysis. + /// + /// NOTE: Only implement this method if function-level analysis is required. Otherwise, do not + /// implement to let default basic block analysis take place. fn analyze_basic_blocks( &self, function: &mut Function, @@ -147,7 +162,7 @@ pub trait Architecture: 'static + Sized + AsRef { } /// Fallback flag value calculation path. This method is invoked when the core is unable to - /// recover flag use semantics, and resorts to emitting instructions that explicitly set each + /// recover the flag using semantics and resorts to emitting instructions that explicitly set each /// observed flag to the value of an expression returned by this function. /// /// This function *MUST NOT* append instructions that have side effects. @@ -166,11 +181,10 @@ pub trait Architecture: 'static + Sized + AsRef { Some(get_default_flag_write_llil(self, role, op, il)) } - /// Determines what flags need to be examined in order to attempt automatic recovery of the - /// semantics of this flag use. + /// Determines what flags need to be examined to attempt automatic recovery of the flag uses semantics. /// - /// If automatic recovery is not possible, the `flag_cond_llil` method will be invoked to give - /// this `Architecture` implementation arbitrary control over the expression to be evaluated. + /// If automatic recovery is not possible, the [`Architecture::flag_cond_llil`] method will be invoked + /// to give this [`Architecture`] implementation arbitrary control over the expression to be evaluated. fn flags_required_for_flag_condition( &self, _condition: FlagCondition, @@ -485,6 +499,12 @@ impl Architecture for CoreArchitecture { } } + /// Performs basic block recovery and commits the results to the function analysis. + /// + /// NOTE: Only implement this method if function-level analysis is required. Otherwise, do not + /// implement to let default basic block analysis take place. + /// + /// NOTE: The default implementation exists in C++ here: fn analyze_basic_blocks( &self, function: &mut Function, diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index 31bdeb894..9952cf286 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -1322,6 +1322,11 @@ pub trait BinaryViewExt: BinaryViewBase { } } + /// This list contains the analysis entry function, and functions like init_array, fini_array, + /// and TLS callbacks etc. + /// + /// We see `entry_functions` as good starting points for analysis, these functions normally don't + /// have internal references. Exported functions in a dll/so file are not included. fn entry_point_functions(&self) -> Array { unsafe { let mut count = 0; @@ -1706,6 +1711,9 @@ pub trait BinaryViewExt: BinaryViewBase { } } + /// Retrieve the metadata as the type `T`. + /// + /// Fails if the metadata does not exist, or if the metadata failed to coerce to type `T`. fn get_metadata(&self, key: &str) -> Option> where T: for<'a> TryFrom<&'a Metadata>, @@ -2235,6 +2243,9 @@ pub trait BinaryViewExt: BinaryViewBase { } } + /// Retrieve the string that falls on a given virtual address. + /// + /// NOTE: This returns discovered strings and is therefore governed by `analysis.limits.minStringLength` and other settings. fn string_at(&self, addr: u64) -> Option { let mut str_ref = BNStringReference::default(); let success = unsafe { BNGetStringAtAddress(self.as_ref().handle, addr, &mut str_ref) }; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ad1d910d4..deaeb097f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -555,7 +555,7 @@ pub fn license_count() -> i32 { /// Set the license that will be used once the core initializes. You can reset the license by passing `None`. /// -/// If not set the normal license retrieval will occur: +/// If not set, the normal license retrieval will occur: /// 1. Check the BN_LICENSE environment variable /// 2. Check the Binary Ninja user directory for license.dat #[cfg(not(feature = "demo"))] @@ -623,7 +623,7 @@ pub fn add_optional_plugin_dependency(name: &str) { unsafe { BNAddOptionalPluginDependency(raw_name.as_ptr()) }; } -// Provide ABI version automatically so that the core can verify binary compatibility +/// Exported function to tell the core what core ABI version this plugin was compiled against. #[cfg(not(feature = "no_exports"))] #[no_mangle] #[allow(non_snake_case)] @@ -631,6 +631,7 @@ pub extern "C" fn CorePluginABIVersion() -> u32 { plugin_abi_version() } +/// Exported function to tell the core what UI ABI version this plugin was compiled against. #[cfg(not(feature = "no_exports"))] #[no_mangle] pub extern "C" fn UIPluginABIVersion() -> u32 { diff --git a/rust/src/llvm.rs b/rust/src/llvm.rs index d2a0292aa..7f3a57bbf 100644 --- a/rust/src/llvm.rs +++ b/rust/src/llvm.rs @@ -1,3 +1,7 @@ +//! LLVM functionality exposed by the core. +//! +//! Also see [`crate::demangle::demangle_llvm`]. + use binaryninjacore_sys::{ BNLlvmServicesAssemble, BNLlvmServicesAssembleFree, BNLlvmServicesDisasmInstruction, BNLlvmServicesInit, From 6dded1dc1901cf6e0fb4978ee7f36e11582aa93f Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 8 Dec 2025 19:18:12 -0500 Subject: [PATCH 07/19] [Rust] Remove `UnusedRegisterStackInfo` and update architecture documentation --- arch/msp430/src/architecture.rs | 4 +- arch/riscv/src/lib.rs | 4 +- rust/src/architecture.rs | 284 ++++++++++++++++++++++++----- rust/src/architecture/branches.rs | 2 +- rust/src/architecture/intrinsic.rs | 2 +- rust/src/architecture/register.rs | 142 ++++++++++++--- 6 files changed, 358 insertions(+), 80 deletions(-) diff --git a/arch/msp430/src/architecture.rs b/arch/msp430/src/architecture.rs index 91826193a..ef4c503a8 100644 --- a/arch/msp430/src/architecture.rs +++ b/arch/msp430/src/architecture.rs @@ -5,7 +5,7 @@ use crate::register::Register; use binaryninja::{ architecture::{ Architecture, CoreArchitecture, CustomArchitectureHandle, FlagCondition, InstructionInfo, - UnusedIntrinsic, UnusedRegisterStack, UnusedRegisterStackInfo, + UnusedIntrinsic, UnusedRegisterStack, }, disassembly::{InstructionTextToken, InstructionTextTokenKind}, Endianness, @@ -42,7 +42,7 @@ impl Msp430 { impl Architecture for Msp430 { type Handle = CustomArchitectureHandle; - type RegisterStackInfo = UnusedRegisterStackInfo; + type RegisterStackInfo = UnusedRegisterStack; type RegisterStack = UnusedRegisterStack; type Register = Register; type RegisterInfo = Register; diff --git a/arch/riscv/src/lib.rs b/arch/riscv/src/lib.rs index 6ce254063..22ecdd6db 100644 --- a/arch/riscv/src/lib.rs +++ b/arch/riscv/src/lib.rs @@ -11,7 +11,7 @@ use binaryninja::{ architecture::{ Architecture, ArchitectureExt, CoreArchitecture, CustomArchitectureHandle, ImplicitRegisterExtend, InstructionInfo, Register as Reg, RegisterInfo, UnusedFlag, - UnusedRegisterStack, UnusedRegisterStackInfo, + UnusedRegisterStack, }, binary_view::{BinaryView, BinaryViewExt}, calling_convention::{register_calling_convention, CallingConvention, ConventionBuilder}, @@ -644,7 +644,7 @@ impl Architecture for RiscVArch { type RegisterInfo = Register; type Register = Register; - type RegisterStackInfo = UnusedRegisterStackInfo; + type RegisterStackInfo = UnusedRegisterStack; type RegisterStack = UnusedRegisterStack; type Flag = UnusedFlag; diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index e893213b8..d85faacae 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -94,22 +94,62 @@ macro_rules! new_id_type { pub trait Architecture: 'static + Sized + AsRef { type Handle: Borrow + Clone; + /// The [`RegisterInfo`] associated with this architecture. type RegisterInfo: RegisterInfo; + + /// The [`Register`] associated with this architecture. type Register: Register; + + /// The [`RegisterStackInfo`] associated with this architecture. + /// + /// You may only set this to [`UnusedRegisterStack`] if [`Self::RegisterStack`] is as well. type RegisterStackInfo: RegisterStackInfo< RegType = Self::Register, RegInfoType = Self::RegisterInfo, RegStackType = Self::RegisterStack, >; + + /// The [`RegisterStack`] associated with this architecture. + /// + /// If you do not override [`Architecture::register_stack_from_id`] and [`Architecture::register_stacks`], + /// you may set this to [`UnusedRegisterStack`]. type RegisterStack: RegisterStack< InfoType = Self::RegisterStackInfo, RegType = Self::Register, RegInfoType = Self::RegisterInfo, >; + /// The [`Flag`] associated with this architecture. + /// + /// If you do not override [`Architecture::flag_from_id`] and [`Architecture::flags`], you may + /// set this to [`UnusedFlag`]. type Flag: Flag; + + /// The [`FlagWrite`] associated with this architecture. + /// + /// Can only be set to [`UnusedFlag`] if [`Self::Flag`] is as well. Otherwise, it is expected that + /// this points to a custom [`FlagWrite`] with the following functions defined: + /// + /// - [`Architecture::flag_write_types`] + /// - [`Architecture::flag_write_from_id`] type FlagWrite: FlagWrite; + + /// The [`FlagClass`] associated with this architecture. + /// + /// Can only be set to [`UnusedFlag`] if [`Self::Flag`] is as well. Otherwise, it is expected that + /// this points to a custom [`FlagClass`] with the following functions defined: + /// + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] type FlagClass: FlagClass; + + /// The [`FlagGroup`] associated with this architecture. + /// + /// Can only be set to [`UnusedFlag`] if [`Self::Flag`] is as well. Otherwise, it is expected that + /// this points to a custom [`FlagGroup`] with the following functions defined: + /// + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] type FlagGroup: FlagGroup; type Intrinsic: Intrinsic; @@ -118,10 +158,26 @@ pub trait Architecture: 'static + Sized + AsRef { fn address_size(&self) -> usize; fn default_integer_size(&self) -> usize; fn instruction_alignment(&self) -> usize; + + /// The maximum length of an instruction in bytes. This is used to determine the size of the buffer + /// given to callbacks such as [`Architecture::instruction_info`], [`Architecture::instruction_text`] + /// and [`Architecture::instruction_llil`]. + /// + /// NOTE: The maximum **CANNOT** be greater than 256. fn max_instr_len(&self) -> usize; - fn opcode_display_len(&self) -> usize; - fn associated_arch_by_addr(&self, addr: u64) -> CoreArchitecture; + /// How many bytes to display in the opcode space before displaying a `...`, typically set to + /// the [`Architecture::max_instr_len`], however, can be overridden to display a truncated opcode. + fn opcode_display_len(&self) -> usize { + self.max_instr_len() + } + + /// In binaries with multiple architectures, you may wish to associate a specific architecture + /// with a given virtual address. This can be seen in armv7 where odd addresses are associated + /// with the thumb architecture. + fn associated_arch_by_addr(&self, _addr: u64) -> CoreArchitecture { + *self.as_ref() + } /// Returns the [`InstructionInfo`] at the given virtual address with `data`. /// @@ -129,6 +185,20 @@ pub trait Architecture: 'static + Sized + AsRef { /// next instruction will likely be incorrect. fn instruction_info(&self, data: &[u8], addr: u64) -> Option; + /// Disassembles a raw byte sequence into a human-readable list of text tokens. + /// + /// This function is responsible for the visual representation of assembly instructions. + /// It does *not* define semantics (use [`Architecture::instruction_llil`] for that); + /// it simply tells the UI how to print the instruction. + /// + /// # Returns + /// + /// An `Option` containing a tuple: + /// + /// * `usize`: The size of the decoded instruction in bytes. Is used to advance to the next instruction. + /// * `Vec`: A list of text tokens representing the instruction. + /// + /// Returns `None` if the bytes do not form a valid instruction. fn instruction_text( &self, data: &[u8], @@ -231,61 +301,179 @@ pub trait Architecture: 'static + Sized + AsRef { } fn registers_all(&self) -> Vec; + + fn register_from_id(&self, id: RegisterId) -> Option; + fn registers_full_width(&self) -> Vec; + + // TODO: Document the difference between global and system registers. fn registers_global(&self) -> Vec { Vec::new() } + + // TODO: Document the difference between global and system registers. fn registers_system(&self) -> Vec { Vec::new() } + /// List of concrete register stacks for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::register_stack_from_id`] fn register_stacks(&self) -> Vec { Vec::new() } - fn flags(&self) -> Vec { - Vec::new() + /// Get the [`Self::RegisterStack`] associated with the given [`RegisterStackId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::register_stacks`] + fn register_stack_from_id(&self, _id: RegisterStackId) -> Option { + None } - fn flag_write_types(&self) -> Vec { + + /// List of concrete flags for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_types`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flags(&self) -> Vec { Vec::new() } - fn flag_classes(&self) -> Vec { - Vec::new() + + /// Get the [`Self::Flag`] associated with the given [`FlagId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_write_types`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flag_from_id(&self, _id: FlagId) -> Option { + None } - fn flag_groups(&self) -> Vec { + + /// List of concrete flag write types for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flag_write_types(&self) -> Vec { Vec::new() } - fn stack_pointer_reg(&self) -> Option; - fn link_reg(&self) -> Option { + /// Get the [`Self::FlagWrite`] associated with the given [`FlagWriteId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_types`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flag_write_from_id(&self, _id: FlagWriteId) -> Option { None } - fn register_from_id(&self, id: RegisterId) -> Option; - - fn register_stack_from_id(&self, _id: RegisterStackId) -> Option { - None + /// List of concrete flag classes for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flag_classes(&self) -> Vec { + Vec::new() } - fn flag_from_id(&self, _id: FlagId) -> Option { + /// Get the [`Self::FlagClass`] associated with the given [`FlagClassId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_groups`] + /// - [`Architecture::flag_group_from_id`] + fn flag_class_from_id(&self, _id: FlagClassId) -> Option { None } - fn flag_write_from_id(&self, _id: FlagWriteId) -> Option { - None + + /// List of concrete flag groups for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_group_from_id`] + fn flag_groups(&self) -> Vec { + Vec::new() } - fn flag_class_from_id(&self, _id: FlagClassId) -> Option { + + /// Get the [`Self::FlagGroup`] associated with the given [`FlagGroupId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::flags`] + /// - [`Architecture::flag_from_id`] + /// - [`Architecture::flag_write_from_id`] + /// - [`Architecture::flag_classes`] + /// - [`Architecture::flag_class_from_id`] + /// - [`Architecture::flag_groups`] + fn flag_group_from_id(&self, _id: FlagGroupId) -> Option { None } - fn flag_group_from_id(&self, _id: FlagGroupId) -> Option { + + fn stack_pointer_reg(&self) -> Option; + + fn link_reg(&self) -> Option { None } + /// List of concrete intrinsics for this architecture. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::intrinsic_from_id`] fn intrinsics(&self) -> Vec { Vec::new() } + fn intrinsic_class(&self, _id: IntrinsicId) -> BNIntrinsicClass { BNIntrinsicClass::GeneralIntrinsicClass } + + /// Get the [`Self::Intrinsic`] associated with the given [`IntrinsicId`]. + /// + /// You **must** override the following functions as well: + /// + /// - [`Architecture::intrinsics`] fn intrinsic_from_id(&self, _id: IntrinsicId) -> Option { None } @@ -293,6 +481,7 @@ pub trait Architecture: 'static + Sized + AsRef { fn can_assemble(&self) -> bool { false } + fn assemble(&self, _code: &str, _addr: u64) -> Result, String> { Err("Assemble unsupported".into()) } @@ -300,15 +489,19 @@ pub trait Architecture: 'static + Sized + AsRef { fn is_never_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } + fn is_always_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } + fn is_invert_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } + fn is_skip_and_return_zero_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } + fn is_skip_and_return_value_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } @@ -587,6 +780,10 @@ impl Architecture for CoreArchitecture { } } + fn register_from_id(&self, id: RegisterId) -> Option { + CoreRegister::new(*self, id) + } + fn registers_full_width(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -655,6 +852,10 @@ impl Architecture for CoreArchitecture { } } + fn register_stack_from_id(&self, id: RegisterStackId) -> Option { + CoreRegisterStack::new(*self, id) + } + fn flags(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -672,6 +873,10 @@ impl Architecture for CoreArchitecture { } } + fn flag_from_id(&self, id: FlagId) -> Option { + CoreFlag::new(*self, id) + } + fn flag_write_types(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -689,6 +894,10 @@ impl Architecture for CoreArchitecture { } } + fn flag_write_from_id(&self, id: FlagWriteId) -> Option { + CoreFlagWrite::new(*self, id) + } + fn flag_classes(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -706,6 +915,10 @@ impl Architecture for CoreArchitecture { } } + fn flag_class_from_id(&self, id: FlagClassId) -> Option { + CoreFlagClass::new(*self, id) + } + fn flag_groups(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -723,6 +936,10 @@ impl Architecture for CoreArchitecture { } } + fn flag_group_from_id(&self, id: FlagGroupId) -> Option { + CoreFlagGroup::new(*self, id) + } + fn stack_pointer_reg(&self) -> Option { match unsafe { BNGetArchitectureStackPointerRegister(self.handle) } { 0xffff_ffff => None, @@ -737,30 +954,6 @@ impl Architecture for CoreArchitecture { } } - fn register_from_id(&self, id: RegisterId) -> Option { - CoreRegister::new(*self, id) - } - - fn register_stack_from_id(&self, id: RegisterStackId) -> Option { - CoreRegisterStack::new(*self, id) - } - - fn flag_from_id(&self, id: FlagId) -> Option { - CoreFlag::new(*self, id) - } - - fn flag_write_from_id(&self, id: FlagWriteId) -> Option { - CoreFlagWrite::new(*self, id) - } - - fn flag_class_from_id(&self, id: FlagClassId) -> Option { - CoreFlagClass::new(*self, id) - } - - fn flag_group_from_id(&self, id: FlagGroupId) -> Option { - CoreFlagGroup::new(*self, id) - } - fn intrinsics(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -889,10 +1082,7 @@ impl Debug for CoreArchitecture { .field("name", &self.name()) .field("endianness", &self.endianness()) .field("address_size", &self.address_size()) - .field("default_integer_size", &self.default_integer_size()) .field("instruction_alignment", &self.instruction_alignment()) - .field("max_instr_len", &self.max_instr_len()) - .field("opcode_display_len", &self.opcode_display_len()) .finish() } } @@ -1717,7 +1907,7 @@ where result.offset = info.offset(); result.size = info.size(); - result.extend = info.implicit_extend(); + result.extend = info.implicit_extend().into(); } } diff --git a/rust/src/architecture/branches.rs b/rust/src/architecture/branches.rs index 80baa1b65..a6810b345 100644 --- a/rust/src/architecture/branches.rs +++ b/rust/src/architecture/branches.rs @@ -22,7 +22,7 @@ pub enum BranchKind { #[derive(Default, Copy, Clone, PartialEq, Eq, Hash, Debug)] pub struct BranchInfo { - /// If `None` the target architecture is the same as the branch instruction. + /// If `None`, the target architecture is the same as the branching instruction. pub arch: Option, pub kind: BranchKind, } diff --git a/rust/src/architecture/intrinsic.rs b/rust/src/architecture/intrinsic.rs index 4ad5ee2af..5d9827a13 100644 --- a/rust/src/architecture/intrinsic.rs +++ b/rust/src/architecture/intrinsic.rs @@ -32,7 +32,7 @@ pub trait Intrinsic: Debug + Sized + Clone + Copy { fn outputs(&self) -> Vec>>; } -/// Type for architrectures that do not use intrinsics. Will panic if accessed as an intrinsic. +/// Type for architectures that do not use intrinsics. Will panic if accessed as an intrinsic. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct UnusedIntrinsic; diff --git a/rust/src/architecture/register.rs b/rust/src/architecture/register.rs index 50bb38ba7..1f17c7857 100644 --- a/rust/src/architecture/register.rs +++ b/rust/src/architecture/register.rs @@ -6,8 +6,6 @@ use std::ffi::CStr; use std::fmt::{Debug, Formatter}; use std::hash::Hash; -pub use binaryninjacore_sys::BNImplicitRegisterExtend as ImplicitRegisterExtend; - crate::new_id_type!(RegisterId, u32); impl RegisterId { @@ -18,59 +16,133 @@ impl RegisterId { crate::new_id_type!(RegisterStackId, u32); +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum ImplicitRegisterExtend { + /// The upper bits of the parent register are preserved (untouched). + /// + /// # Example (x86-64) + /// + /// Executing `inc al` only modifies the lowest 8 bits of `rax`. The upper 56 bits of `rax` remain + /// completely unchanged. + NoExtend = 0, + /// The upper bits of the parent register are zeroed out. + /// + /// # Example (x86-64) + /// + /// Executing `mov eax, 1` writes `1` to the lower 32 bits of `rax`, but implicitly **clears** the + /// upper 32 bits of `rax` to zero. + ZeroExtendToFullWidth, + /// The upper bits of the parent register are filled with the sign bit (MSB) of the value written. + SignExtendToFullWidth, +} + +impl From for ImplicitRegisterExtend { + fn from(value: BNImplicitRegisterExtend) -> Self { + match value { + BNImplicitRegisterExtend::NoExtend => Self::NoExtend, + BNImplicitRegisterExtend::ZeroExtendToFullWidth => Self::ZeroExtendToFullWidth, + BNImplicitRegisterExtend::SignExtendToFullWidth => Self::SignExtendToFullWidth, + } + } +} + +impl From for BNImplicitRegisterExtend { + fn from(value: ImplicitRegisterExtend) -> Self { + match value { + ImplicitRegisterExtend::NoExtend => Self::NoExtend, + ImplicitRegisterExtend::ZeroExtendToFullWidth => Self::ZeroExtendToFullWidth, + ImplicitRegisterExtend::SignExtendToFullWidth => Self::SignExtendToFullWidth, + } + } +} + +/// Information about a register. pub trait RegisterInfo: Sized { type RegType: Register; + /// The register that this register is an alias of. + /// + /// # Example (x86-64) + /// + /// The register `rax` is a parent of the register `eax`. fn parent(&self) -> Option; + + /// Size of the register in bytes. fn size(&self) -> usize; + + /// Offset of the register in bytes from the start of the containing [`RegisterInfo::parent`]. fn offset(&self) -> usize; + + /// Used when this register aliases a logical register to determine what happens to the upper bits. fn implicit_extend(&self) -> ImplicitRegisterExtend; } pub trait Register: Debug + Sized + Clone + Copy + Hash + Eq { type InfoType: RegisterInfo; + /// The displayed name of the register, such as "eax". fn name(&self) -> Cow<'_, str>; + fn info(&self) -> Self::InfoType; /// Unique identifier for this `Register`. /// - /// *MUST* be in the range [0, 0x7fff_ffff] + /// NOTE: *MUST* be in the range [0, 0x7fff_ffff] fn id(&self) -> RegisterId; } +/// Information about a register stack. pub trait RegisterStackInfo: Sized { type RegStackType: RegisterStack; type RegType: Register; type RegInfoType: RegisterInfo; + // TODO: Return a list of the registers instead? + /// The sequence of physical registers that back this stack. + /// + /// This defines the absolute storage locations in the hardware, ignoring the current stack pointer. + /// + /// Return the start of the "fake" registers defined. The core requires that the id's be contiguous + /// as you only return the **first** storage register and the count. + /// + /// # Example (x87 FPU) + /// + /// [`RegisterStackInfo::top_relative_regs`] with (REG_ST0, 8) and then define here (REG_PHYSICAL_0, 8). fn storage_regs(&self) -> (Self::RegType, usize); - fn top_relative_regs(&self) -> Option<(Self::RegType, usize)>; - fn stack_top_reg(&self) -> Self::RegType; -} - -/// Type for architectures that do not use register stacks. Will panic if accessed as a register stack. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct UnusedRegisterStackInfo { - _reg: std::marker::PhantomData, -} -impl RegisterStackInfo for UnusedRegisterStackInfo { - type RegStackType = UnusedRegisterStack; - type RegType = R; - type RegInfoType = R::InfoType; + // TODO: Return a list of the registers instead? + /// The sequence of registers used to access the stack relative to the current top. + /// + /// Return the start of the relative registers defined. The core requires that the id's be contiguous + /// as you only return the **first** relative register and the count. + /// + /// # Example (x87 FPU) + /// + /// Returns (REG_ST0, 8), where the id's of all the later relative registers are contiguous. + fn top_relative_regs(&self) -> Option<(Self::RegType, usize)>; - fn storage_regs(&self) -> (Self::RegType, usize) { - unreachable!() - } - fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { - unreachable!() - } - fn stack_top_reg(&self) -> Self::RegType { - unreachable!() - } + /// The specific register that holds the index of the current stack top. + /// + /// The value in this register determines which physical `storage_reg` corresponds + /// to the first `top_relative_reg`. + /// + /// # Example (x87 FPU) + /// + /// Returns the `TOP` as a fake register. + /// + /// * If `TOP` == 0: `top_relative_regs[0]` maps to `storage_regs[0]`. + /// * If `TOP` == 1: `top_relative_regs[0]` maps to `storage_regs[1]`. + fn stack_top_reg(&self) -> Self::RegType; } +/// Register stacks are used in architectures where registers are accessed relative to a +/// dynamic stack pointer rather than by fixed names. +/// +/// For more information see [`RegisterStackInfo`]. +/// +/// # Example +/// The **x87 FPU** on x86 uses a register stack (`ST(0)` through `ST(7)`). +/// Pushing a value decrements the stack top pointer; popping increments it. pub trait RegisterStack: Debug + Sized + Clone + Copy { type InfoType: RegisterStackInfo< RegType = Self::RegType, @@ -96,7 +168,7 @@ pub struct UnusedRegisterStack { } impl RegisterStack for UnusedRegisterStack { - type InfoType = UnusedRegisterStackInfo; + type InfoType = Self; type RegType = R; type RegInfoType = R::InfoType; @@ -111,6 +183,22 @@ impl RegisterStack for UnusedRegisterStack { } } +impl RegisterStackInfo for UnusedRegisterStack { + type RegStackType = Self; + type RegType = R; + type RegInfoType = R::InfoType; + + fn storage_regs(&self) -> (Self::RegType, usize) { + unreachable!() + } + fn top_relative_regs(&self) -> Option<(Self::RegType, usize)> { + unreachable!() + } + fn stack_top_reg(&self) -> Self::RegType { + unreachable!() + } +} + #[derive(Debug, Copy, Clone)] pub struct CoreRegisterInfo { arch: CoreArchitecture, @@ -147,7 +235,7 @@ impl RegisterInfo for CoreRegisterInfo { } fn implicit_extend(&self) -> ImplicitRegisterExtend { - self.info.extend + self.info.extend.into() } } From f45f9f0e8e2311bdf1ff347b806c3e095a815e5e Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 8 Dec 2025 19:19:34 -0500 Subject: [PATCH 08/19] [Rust] Make `InstructionTextToken` field `expr_index` optional There was a TODO there to document that it is optional, we should just wrap it in `Option` instead. --- rust/src/disassembly.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/rust/src/disassembly.rs b/rust/src/disassembly.rs index b59800065..138bcd8a8 100644 --- a/rust/src/disassembly.rs +++ b/rust/src/disassembly.rs @@ -19,11 +19,11 @@ use crate::architecture::Architecture; use crate::architecture::CoreArchitecture; use crate::basic_block::BasicBlock; use crate::function::{Location, NativeBlock}; -use crate::high_level_il as hlil; use crate::low_level_il as llil; use crate::medium_level_il as mlil; use crate::string::IntoCStr; use crate::string::{raw_to_string, strings_to_string_list, BnString}; +use crate::{high_level_il as hlil, BN_INVALID_EXPR}; use crate::rc::*; @@ -266,8 +266,7 @@ pub struct InstructionTextToken { pub text: String, pub confidence: u8, pub context: InstructionTextTokenContext, - // TODO: Document that this is not necessary to set and that this is valid in a limited context. - pub expr_index: usize, + pub expr_index: Option, pub kind: InstructionTextTokenKind, } @@ -278,7 +277,10 @@ impl InstructionTextToken { text: raw_to_string(value.text).unwrap(), confidence: value.confidence, context: value.context.into(), - expr_index: value.exprIndex, + expr_index: match value.exprIndex { + BN_INVALID_EXPR => None, + index => Some(index), + }, kind: InstructionTextTokenKind::from_raw(value), } } @@ -305,7 +307,7 @@ impl InstructionTextToken { // NOTE: Expected to be freed with `InstructionTextToken::free_raw`. typeNames: strings_to_string_list(&type_names), namesCount: type_names.len(), - exprIndex: value.expr_index, + exprIndex: value.expr_index.unwrap_or(BN_INVALID_EXPR), } } @@ -326,7 +328,7 @@ impl InstructionTextToken { text: text.into(), confidence: MAX_CONFIDENCE, context: InstructionTextTokenContext::Normal, - expr_index: 0, + expr_index: None, kind, } } @@ -341,7 +343,7 @@ impl InstructionTextToken { text: text.into(), confidence: MAX_CONFIDENCE, context: InstructionTextTokenContext::Normal, - expr_index: 0, + expr_index: None, kind, } } From b8580bd78b11a5a3b14b6afd57f076e484d6ba4c Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 8 Dec 2025 19:21:30 -0500 Subject: [PATCH 09/19] [Rust] Add simple architecture tests Still need to add a custom architecture for unit tests like in python --- rust/tests/architecture.rs | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 rust/tests/architecture.rs diff --git a/rust/tests/architecture.rs b/rust/tests/architecture.rs new file mode 100644 index 000000000..07a333648 --- /dev/null +++ b/rust/tests/architecture.rs @@ -0,0 +1,77 @@ +use binaryninja::architecture::{Architecture, CoreArchitecture}; +use binaryninja::disassembly::{ + InstructionTextToken, InstructionTextTokenContext, InstructionTextTokenKind, +}; +use binaryninja::headless::Session; + +#[test] +fn test_architecture_info() { + let _session = Session::new().expect("Failed to initialize session"); + let arch = CoreArchitecture::by_name("x86_64").expect("Failed to get architecture"); + assert_eq!(arch.name(), "x86_64"); + assert_eq!(arch.endianness(), binaryninja::Endianness::LittleEndian); + assert_eq!(arch.address_size(), 8); +} + +#[test] +fn test_architecture_disassembly() { + let _session = Session::new().expect("Failed to initialize session"); + let arch = CoreArchitecture::by_name("x86_64").expect("Failed to get architecture"); + + // mov rax, 0x10 + let data = b"\x48\xC7\xC0\x10\x00\x00\x00"; + let address = 0x1000; + + let (instr_len, tokens) = arch + .instruction_text(data, address) + .expect("Failed to disassemble instruction"); + assert_eq!(instr_len, 7); + + let expected_tokens: Vec = vec![ + InstructionTextToken { + address: 0, + text: "mov".to_string(), + confidence: 255, + context: InstructionTextTokenContext::Normal, + expr_index: None, + kind: InstructionTextTokenKind::Instruction, + }, + InstructionTextToken { + address: 0, + text: " ".to_string(), + confidence: 255, + context: InstructionTextTokenContext::Normal, + expr_index: None, + kind: InstructionTextTokenKind::Text, + }, + InstructionTextToken { + address: 0, + text: "rax".to_string(), + confidence: 255, + context: InstructionTextTokenContext::Normal, + expr_index: None, + kind: InstructionTextTokenKind::Register, + }, + InstructionTextToken { + address: 0, + text: ", ".to_string(), + confidence: 255, + context: InstructionTextTokenContext::Normal, + expr_index: None, + kind: InstructionTextTokenKind::OperandSeparator, + }, + InstructionTextToken { + address: 0, + text: "0x10".to_string(), + confidence: 255, + context: InstructionTextTokenContext::Normal, + expr_index: None, + kind: InstructionTextTokenKind::PossibleAddress { + value: 16, + size: Some(8), + }, + }, + ]; + + assert_eq!(tokens, expected_tokens); +} From a389f1490eded84cf5f35b97d3a29a75096875eb Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 8 Dec 2025 19:54:31 -0500 Subject: [PATCH 10/19] [Rust] More architecture module documentation and misc cleanup --- rust/src/architecture.rs | 98 +++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 26 deletions(-) diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index d85faacae..297fabf8e 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Architectures provide disassembly, lifting, and associated metadata about a CPU to inform analysis and decompilation. +//! Architectures provide disassembly, lifting, and associated metadata about a CPU to inform +//! analysis and decompilation. +//! +//! For more information see the [`Architecture`] trait and the [`CoreArchitecture`] structure for +//! querying already registered architectures. -// container abstraction to avoid Vec<> (want CoreArchFlagList, CoreArchRegList) // RegisterInfo purge use binaryninjacore_sys::*; use std::fmt::{Debug, Formatter}; @@ -91,6 +94,16 @@ macro_rules! new_id_type { }; } +/// The [`Architecture`] trait is the backbone of Binary Ninja's analysis capabilities. It tells the +/// core how to interpret the machine code into LLIL, a generic intermediate representation for +/// program analysis. +/// +/// To add support for a new Instruction Set Architecture (ISA), you must implement this trait and +/// register it. The core analysis loop relies on your implementation for three critical stages: +/// +/// 1. **Disassembly ([`Architecture::instruction_text`])**: Machine code into human-readable text (e.g., `55` -> `push rbp`). +/// 2. **Control Flow Analysis ([`Architecture::instruction_info`])**: Identifying where execution goes next (e.g., "This is a `call` instruction, it targets address `0x401000`"). +/// 3. **Lifting ([`Architecture::instruction_llil`])**: Translating machine code into **Low Level Intermediate Language (LLIL)**, which enables decompilation and automated analysis. pub trait Architecture: 'static + Sized + AsRef { type Handle: Borrow + Clone; @@ -316,6 +329,12 @@ pub trait Architecture: 'static + Sized + AsRef { Vec::new() } + fn stack_pointer_reg(&self) -> Option; + + fn link_reg(&self) -> Option { + None + } + /// List of concrete register stacks for this architecture. /// /// You **must** override the following functions as well: @@ -450,12 +469,6 @@ pub trait Architecture: 'static + Sized + AsRef { None } - fn stack_pointer_reg(&self) -> Option; - - fn link_reg(&self) -> Option { - None - } - /// List of concrete intrinsics for this architecture. /// /// You **must** override the following functions as well: @@ -478,30 +491,51 @@ pub trait Architecture: 'static + Sized + AsRef { None } + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::assemble`]. fn can_assemble(&self) -> bool { false } + /// Assemble the code at the specified address and return the machine code in bytes. + /// + /// If overridden, you must set [`Architecture::can_assemble`] to `true`. fn assemble(&self, _code: &str, _addr: u64) -> Result, String> { Err("Assemble unsupported".into()) } - fn is_never_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { - false + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::invert_branch`]. + fn is_never_branch_patch_available(&self, data: &[u8], addr: u64) -> bool { + self.is_invert_branch_patch_available(data, addr) } + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::always_branch`]. fn is_always_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::invert_branch`]. fn is_invert_branch_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } - fn is_skip_and_return_zero_patch_available(&self, _data: &[u8], _addr: u64) -> bool { - false + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::skip_and_return_value`]. + fn is_skip_and_return_zero_patch_available(&self, data: &[u8], addr: u64) -> bool { + self.is_skip_and_return_value_patch_available(data, addr) } + /// Let the UI display this patch option. + /// + /// If set to true, you must override [`Architecture::skip_and_return_value`]. fn is_skip_and_return_value_patch_available(&self, _data: &[u8], _addr: u64) -> bool { false } @@ -510,14 +544,23 @@ pub trait Architecture: 'static + Sized + AsRef { false } + /// Patch the instruction to always branch. + /// + /// If overridden, you must also override [`Architecture::is_always_branch_patch_available`]. fn always_branch(&self, _data: &mut [u8], _addr: u64) -> bool { false } + /// Patch the instruction to invert the branch condition. + /// + /// If overridden, you must also override [`Architecture::is_invert_branch_patch_available`]. fn invert_branch(&self, _data: &mut [u8], _addr: u64) -> bool { false } + /// Patch the instruction to skip and return value. + /// + /// If overridden, you must also override [`Architecture::is_skip_and_return_value_patch_available`]. fn skip_and_return_value(&self, _data: &mut [u8], _addr: u64, _value: u64) -> bool { false } @@ -835,6 +878,20 @@ impl Architecture for CoreArchitecture { } } + fn stack_pointer_reg(&self) -> Option { + match unsafe { BNGetArchitectureStackPointerRegister(self.handle) } { + 0xffff_ffff => None, + reg => Some(CoreRegister::new(*self, reg.into())?), + } + } + + fn link_reg(&self) -> Option { + match unsafe { BNGetArchitectureLinkRegister(self.handle) } { + 0xffff_ffff => None, + reg => Some(CoreRegister::new(*self, reg.into())?), + } + } + fn register_stacks(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -940,20 +997,6 @@ impl Architecture for CoreArchitecture { CoreFlagGroup::new(*self, id) } - fn stack_pointer_reg(&self) -> Option { - match unsafe { BNGetArchitectureStackPointerRegister(self.handle) } { - 0xffff_ffff => None, - reg => Some(CoreRegister::new(*self, reg.into())?), - } - } - - fn link_reg(&self) -> Option { - match unsafe { BNGetArchitectureLinkRegister(self.handle) } { - 0xffff_ffff => None, - reg => Some(CoreRegister::new(*self, reg.into())?), - } - } - fn intrinsics(&self) -> Vec { unsafe { let mut count: usize = 0; @@ -1221,6 +1264,9 @@ pub trait ArchitectureExt: Architecture { impl ArchitectureExt for T {} +/// Registers a new architecture with the given name. +/// +/// NOTE: This function should only be called within `CorePluginInit`. pub fn register_architecture(name: &str, func: F) -> &'static A where A: 'static + Architecture> + Send + Sync + Sized, From 67e6b384496e51696872bafe2ebe7e2a3586f636 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 8 Dec 2025 20:34:56 -0500 Subject: [PATCH 11/19] [Rust] Add string reader helpers and fix `analysis_info` - `analysis_info` was causing a double free with function refs, also did not need to be wrapped in a Result --- rust/src/binary_view.rs | 75 +++++++++++++++++++++++++++++---------- rust/tests/binary_view.rs | 8 ++++- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index 9952cf286..0ab2504c4 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -59,9 +59,9 @@ use crate::types::{ NamedTypeReference, QualifiedName, QualifiedNameAndType, QualifiedNameTypeAndId, Type, }; use crate::variable::DataVariable; -use crate::Endianness; +use crate::{Endianness, BN_FULL_CONFIDENCE}; use std::collections::HashMap; -use std::ffi::{c_char, c_void}; +use std::ffi::{c_char, c_void, CString}; use std::ops::Range; use std::path::Path; use std::ptr::NonNull; @@ -222,10 +222,8 @@ pub trait BinaryViewExt: BinaryViewBase { /// Reads up to `len` bytes from address `offset` fn read_vec(&self, offset: u64, len: usize) -> Vec { let mut ret = vec![0; len]; - let size = self.read(&mut ret, offset); ret.truncate(size); - ret } @@ -238,6 +236,22 @@ pub trait BinaryViewExt: BinaryViewBase { read_size } + /// Reads up to `len` bytes from the address `offset` returning a `CString` if available. + fn read_c_string_at(&self, offset: u64, len: usize) -> Option { + let mut buf = vec![0; len]; + let size = self.read(&mut buf, offset); + let string = CString::new(buf[..size].to_vec()).ok()?; + Some(string) + } + + /// Reads up to `len` bytes from the address `offset` returning a `String` if available. + fn read_utf8_string_at(&self, offset: u64, len: usize) -> Option { + let mut buf = vec![0; len]; + let size = self.read(&mut buf, offset); + let string = String::from_utf8(buf[..size].to_vec()).ok()?; + Some(string) + } + /// Search the view using the query options. /// /// In the `on_match` callback return `false` to stop searching. @@ -562,17 +576,15 @@ pub trait BinaryViewExt: BinaryViewBase { } } - fn analysis_info(&self) -> Result { - let info_ref = unsafe { BNGetAnalysisInfo(self.as_ref().handle) }; - if info_ref.is_null() { - return Err(()); - } - let info = unsafe { *info_ref }; + fn analysis_info(&self) -> AnalysisInfo { + let info_ptr = unsafe { BNGetAnalysisInfo(self.as_ref().handle) }; + assert!(!info_ptr.is_null()); + let info = unsafe { *info_ptr }; let active_infos = unsafe { slice::from_raw_parts(info.activeInfo, info.count) }; let mut active_info_list = vec![]; for active_info in active_infos { - let func = unsafe { Function::ref_from_raw(active_info.func) }; + let func = unsafe { Function::from_raw(active_info.func).to_owned() }; active_info_list.push(ActiveAnalysisInfo { func, analysis_time: active_info.analysisTime, @@ -584,11 +596,11 @@ pub trait BinaryViewExt: BinaryViewBase { let result = AnalysisInfo { state: info.state, analysis_time: info.analysisTime, - active_info: vec![], + active_info: active_info_list, }; - unsafe { BNFreeAnalysisInfo(info_ref) }; - Ok(result) + unsafe { BNFreeAnalysisInfo(info_ptr) }; + result } fn analysis_progress(&self) -> AnalysisProgress { @@ -772,7 +784,7 @@ pub trait BinaryViewExt: BinaryViewBase { } else { std::ptr::null_mut() }, - confidence: 255, // BN_FULL_CONFIDENCE + confidence: BN_FULL_CONFIDENCE, }; unsafe { @@ -2235,6 +2247,14 @@ pub trait BinaryViewExt: BinaryViewBase { /// NOTE: This returns a list of [`StringReference`] as strings may not be representable /// as a [`String`] or even a [`BnString`]. It is the caller's responsibility to read the underlying /// data and convert it to a representable form. + /// + /// Some helpers for reading strings are available: + /// + /// - [`BinaryViewExt::read_c_string_at`] + /// - [`BinaryViewExt::read_utf8_string_at`] + /// + /// NOTE: This returns discovered strings and is therefore governed by `analysis.limits.minStringLength` + /// and other settings. fn strings(&self) -> Array { unsafe { let mut count = 0; @@ -2245,13 +2265,22 @@ pub trait BinaryViewExt: BinaryViewBase { /// Retrieve the string that falls on a given virtual address. /// - /// NOTE: This returns discovered strings and is therefore governed by `analysis.limits.minStringLength` and other settings. - fn string_at(&self, addr: u64) -> Option { + /// NOTE: This returns a [`StringReference`] and since strings may not be representable as a Rust + /// [`String`] or even a [`BnString`]. It is the caller's responsibility to read the underlying + /// data and convert it to a representable form. + /// + /// Some helpers for reading strings are available: + /// + /// - [`BinaryViewExt::read_c_string_at`] + /// - [`BinaryViewExt::read_utf8_string_at`] + /// + /// NOTE: This returns discovered strings and is therefore governed by `analysis.limits.minStringLength` + /// and other settings. + fn string_at(&self, addr: u64) -> Option { let mut str_ref = BNStringReference::default(); let success = unsafe { BNGetStringAtAddress(self.as_ref().handle, addr, &mut str_ref) }; - if success { - Some(str_ref) + Some(str_ref.into()) } else { None } @@ -2262,6 +2291,14 @@ pub trait BinaryViewExt: BinaryViewBase { /// NOTE: This returns a list of [`StringReference`] as strings may not be representable /// as a [`String`] or even a [`BnString`]. It is the caller's responsibility to read the underlying /// data and convert it to a representable form. + /// + /// Some helpers for reading strings are available: + /// + /// - [`BinaryViewExt::read_c_string_at`] + /// - [`BinaryViewExt::read_utf8_string_at`] + /// + /// NOTE: This returns discovered strings and is therefore governed by `analysis.limits.minStringLength` + /// and other settings. fn strings_in_range(&self, range: Range) -> Array { unsafe { let mut count = 0; diff --git a/rust/tests/binary_view.rs b/rust/tests/binary_view.rs index a6c7b919c..c2a98da7c 100644 --- a/rust/tests/binary_view.rs +++ b/rust/tests/binary_view.rs @@ -1,5 +1,5 @@ use binaryninja::binary_view::search::SearchQuery; -use binaryninja::binary_view::{AnalysisState, BinaryViewBase, BinaryViewExt}; +use binaryninja::binary_view::{AnalysisState, BinaryViewBase, BinaryViewExt, StringType}; use binaryninja::data_buffer::DataBuffer; use binaryninja::function::{Function, FunctionViewType}; use binaryninja::headless::Session; @@ -99,6 +99,12 @@ fn test_binary_view_strings() { .expect("Failed to find string 'Microsoft (R) Optimizing Compiler'"); assert_eq!(str_15dc.start, image_base + 0x15dc); assert_eq!(str_15dc.length, 33); + assert_eq!(str_15dc.ty, StringType::AsciiString); + + let string = view + .read_c_string_at(str_15dc.start, str_15dc.length) + .expect("Failed to read string"); + assert_eq!(string, c"Microsoft (R) Optimizing Compiler"); } #[test] From 1f0b28fe9621eeb09e06616551c3fe3b9f454767 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 13:24:47 -0500 Subject: [PATCH 12/19] [Rust] Impl `Display` for `Symbol` --- rust/README.md | 2 +- rust/examples/simple.rs | 2 +- rust/src/symbol.rs | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/rust/README.md b/rust/README.md index 1edd1d90c..a18d46cd4 100644 --- a/rust/README.md +++ b/rust/README.md @@ -34,7 +34,7 @@ fn main() { println!("Function count: {}", bv.functions().len()); for func in &bv.functions() { - println!("{}:", func.symbol().full_name()); + println!("{}: {}", func.start(), func.symbol()); } } ``` diff --git a/rust/examples/simple.rs b/rust/examples/simple.rs index c6065d48e..f01d5267a 100644 --- a/rust/examples/simple.rs +++ b/rust/examples/simple.rs @@ -17,7 +17,7 @@ fn main() { println!("Function count: {}", bv.functions().len()); for func in &bv.functions() { - println!("{:?}:", func.symbol().full_name()); + println!("{}:", func.symbol()); for basic_block in &func.basic_blocks() { // TODO : This is intended to be refactored to be more nice to work with soon(TM) for addr in basic_block.as_ref() { diff --git a/rust/src/symbol.rs b/rust/src/symbol.rs index 094aeba75..fac8cae56 100644 --- a/rust/src/symbol.rs +++ b/rust/src/symbol.rs @@ -15,7 +15,7 @@ //! Interfaces for the various kinds of symbols in a binary. use std::fmt; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; use std::ptr; @@ -349,3 +349,9 @@ impl PartialEq for Symbol { self.handle == other.handle } } + +impl Display for Symbol { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(&self.short_name().to_string_lossy()) + } +} From 120afebc4073d9887bfbb5426a5fa2de15842ee7 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 13:57:08 -0500 Subject: [PATCH 13/19] [Rust] Impl `Display` for `FileMetadata` --- rust/README.md | 2 +- rust/examples/simple.rs | 2 +- rust/src/file_metadata.rs | 10 ++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rust/README.md b/rust/README.md index a18d46cd4..60c4847c2 100644 --- a/rust/README.md +++ b/rust/README.md @@ -29,7 +29,7 @@ fn main() { .load("/bin/cat") .expect("Couldn't open `/bin/cat`"); - println!("Filename: `{}`", bv.file().filename()); + println!("File: `{}`", bv.file()); println!("File size: `{:#x}`", bv.len()); println!("Function count: {}", bv.functions().len()); diff --git a/rust/examples/simple.rs b/rust/examples/simple.rs index f01d5267a..66dbdfc08 100644 --- a/rust/examples/simple.rs +++ b/rust/examples/simple.rs @@ -12,7 +12,7 @@ fn main() { .load("/bin/cat") .expect("Couldn't open `/bin/cat`"); - println!("Filename: `{}`", bv.file().filename()); + println!("File: `{}`", bv.file()); println!("File size: `{:#x}`", bv.len()); println!("Function count: {}", bv.functions().len()); diff --git a/rust/src/file_metadata.rs b/rust/src/file_metadata.rs index 86e28a3e2..dc9d36ff8 100644 --- a/rust/src/file_metadata.rs +++ b/rust/src/file_metadata.rs @@ -27,7 +27,7 @@ use binaryninjacore_sys::{ }; use binaryninjacore_sys::{BNCreateDatabaseWithProgress, BNOpenExistingDatabaseWithProgress}; use std::ffi::c_void; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; use std::path::Path; use crate::progress::ProgressCallback; @@ -375,7 +375,7 @@ impl FileMetadata { } impl Debug for FileMetadata { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("FileMetadata") .field("filename", &self.filename()) .field("session_id", &self.session_id()) @@ -388,6 +388,12 @@ impl Debug for FileMetadata { } } +impl Display for FileMetadata { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.filename()) + } +} + unsafe impl Send for FileMetadata {} unsafe impl Sync for FileMetadata {} From 6855bd1457acc54059d145c5cf2168589805afc6 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 15:04:18 -0500 Subject: [PATCH 14/19] [Rust] Remove deprecated MLIL functions These have been deprecated since last stable, so they will be removed now. I do believe that there might be some external users of this api so we should alert them. --- rust/src/medium_level_il/function.rs | 68 ---------------------------- 1 file changed, 68 deletions(-) diff --git a/rust/src/medium_level_il/function.rs b/rust/src/medium_level_il/function.rs index e5f72cc1e..950f07f53 100644 --- a/rust/src/medium_level_il/function.rs +++ b/rust/src/medium_level_il/function.rs @@ -8,12 +8,10 @@ use super::{ }; use crate::architecture::CoreArchitecture; use crate::basic_block::BasicBlock; -use crate::confidence::Conf; use crate::disassembly::DisassemblySettings; use crate::flowgraph::FlowGraph; use crate::function::{Function, Location}; use crate::rc::{Array, CoreArrayProvider, CoreArrayProviderInner, Ref, RefCountable}; -use crate::types::Type; use crate::variable::{PossibleValueSet, RegisterValue, SSAVariable, UserVariableValue, Variable}; // TODO: Does this belong here? @@ -116,44 +114,6 @@ impl MediumLevelILFunction { unsafe { Array::new(blocks, count, context) } } - #[deprecated = "Use `Function::create_user_stack_var` instead"] - pub fn create_user_stack_var<'a, C: Into>>( - &self, - offset: i64, - var_type: C, - name: &str, - ) { - self.function() - .create_user_stack_var(offset, var_type, name) - } - - #[deprecated = "Use `Function::delete_user_stack_var` instead"] - pub fn delete_user_stack_var(&self, offset: i64) { - self.function().delete_user_stack_var(offset) - } - - #[deprecated = "Use `Function::create_user_var` instead"] - pub fn create_user_var<'a, C: Into>>( - &self, - var: &Variable, - var_type: C, - name: &str, - ignore_disjoint_uses: bool, - ) { - self.function() - .create_user_var(var, var_type, name, ignore_disjoint_uses) - } - - #[deprecated = "Use `Function::delete_user_var` instead"] - pub fn delete_user_var(&self, var: &Variable) { - self.function().delete_user_var(var) - } - - #[deprecated = "Use `Function::is_var_user_defined` instead"] - pub fn is_var_user_defined(&self, var: &Variable) -> bool { - self.function().is_var_user_defined(var) - } - /// Allows the user to specify a PossibleValueSet value for an MLIL /// variable at its definition site. /// @@ -245,34 +205,6 @@ impl MediumLevelILFunction { Ok(()) } - #[deprecated = "Use `Function::create_auto_stack_var` instead"] - pub fn create_auto_stack_var<'a, T: Into>>( - &self, - offset: i64, - var_type: T, - name: &str, - ) { - self.function() - .create_auto_stack_var(offset, var_type, name) - } - - #[deprecated = "Use `Function::delete_auto_stack_var` instead"] - pub fn delete_auto_stack_var(&self, offset: i64) { - self.function().delete_auto_stack_var(offset) - } - - #[deprecated = "Use `Function::create_auto_var` instead"] - pub fn create_auto_var<'a, C: Into>>( - &self, - var: &Variable, - var_type: C, - name: &str, - ignore_disjoint_uses: bool, - ) { - self.function() - .create_auto_var(var, var_type, name, ignore_disjoint_uses) - } - /// Returns a list of ILReferenceSource objects (IL xrefs or cross-references) /// that reference the given variable. The variable is a local variable that can be either on the stack, /// in a register, or in a flag. From 1ca380b1acd512a905053df454d4d9ee82e6d045 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 15:05:04 -0500 Subject: [PATCH 15/19] [Rust] Refactor `AnalysisProgress` returned from `BinaryViewExt::analysis_progress` --- rust/src/binary_view.rs | 65 ++++++++++++++++++++++++++++++++------- rust/tests/binary_view.rs | 8 +++-- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index 0ab2504c4..d0b22103c 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -62,6 +62,7 @@ use crate::variable::DataVariable; use crate::{Endianness, BN_FULL_CONFIDENCE}; use std::collections::HashMap; use std::ffi::{c_char, c_void, CString}; +use std::fmt::{Display, Formatter}; use std::ops::Range; use std::path::Path; use std::ptr::NonNull; @@ -187,11 +188,57 @@ pub struct AnalysisInfo { pub active_info: Vec, } -#[derive(Debug, Clone)] -pub struct AnalysisProgress { - pub state: AnalysisState, - pub count: usize, - pub total: usize, +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub enum AnalysisProgress { + Initial, + Hold, + Idle, + Discovery, + Disassembling(usize, usize), + Analyzing(usize, usize), + ExtendedAnalysis, +} + +impl Display for AnalysisProgress { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + AnalysisProgress::Initial => { + write!(f, "Initial") + } + AnalysisProgress::Hold => { + write!(f, "Hold") + } + AnalysisProgress::Idle => { + write!(f, "Idle") + } + AnalysisProgress::Discovery => { + write!(f, "Discovery") + } + AnalysisProgress::Disassembling(count, total) => { + write!(f, "Disassembling ({count}/{total})") + } + AnalysisProgress::Analyzing(count, total) => { + write!(f, "Analyzing ({count}/{total})") + } + AnalysisProgress::ExtendedAnalysis => { + write!(f, "Extended Analysis") + } + } + } +} + +impl From for AnalysisProgress { + fn from(value: BNAnalysisProgress) -> Self { + match value.state { + BNAnalysisState::InitialState => Self::Initial, + BNAnalysisState::HoldState => Self::Hold, + BNAnalysisState::IdleState => Self::Idle, + BNAnalysisState::DiscoveryState => Self::Discovery, + BNAnalysisState::DisassembleState => Self::Disassembling(value.count, value.total), + BNAnalysisState::AnalyzeState => Self::Analyzing(value.count, value.total), + BNAnalysisState::ExtendedAnalyzeState => Self::ExtendedAnalysis, + } + } } pub trait BinaryViewExt: BinaryViewBase { @@ -604,12 +651,8 @@ pub trait BinaryViewExt: BinaryViewBase { } fn analysis_progress(&self) -> AnalysisProgress { - let progress = unsafe { BNGetAnalysisProgress(self.as_ref().handle) }; - AnalysisProgress { - state: progress.state, - count: progress.count, - total: progress.total, - } + let progress_raw = unsafe { BNGetAnalysisProgress(self.as_ref().handle) }; + AnalysisProgress::from(progress_raw) } fn default_arch(&self) -> Option { diff --git a/rust/tests/binary_view.rs b/rust/tests/binary_view.rs index c2a98da7c..7a0a3299b 100644 --- a/rust/tests/binary_view.rs +++ b/rust/tests/binary_view.rs @@ -1,5 +1,7 @@ use binaryninja::binary_view::search::SearchQuery; -use binaryninja::binary_view::{AnalysisState, BinaryViewBase, BinaryViewExt, StringType}; +use binaryninja::binary_view::{ + AnalysisProgress, AnalysisState, BinaryViewBase, BinaryViewExt, StringType, +}; use binaryninja::data_buffer::DataBuffer; use binaryninja::function::{Function, FunctionViewType}; use binaryninja::headless::Session; @@ -16,7 +18,7 @@ fn test_binary_loading() { let out_dir = env!("OUT_DIR").parse::().unwrap(); let view = binaryninja::load(out_dir.join("atox.obj")).expect("Failed to create view"); assert!(view.has_initial_analysis(), "No initial analysis"); - assert_eq!(view.analysis_progress().state, AnalysisState::IdleState); + assert_eq!(view.analysis_progress(), AnalysisProgress::Idle); assert_eq!(view.file().is_analysis_changed(), false); assert_eq!(view.file().is_database_backed(), false); } @@ -188,7 +190,7 @@ fn test_deterministic_functions() { for file_name in TARGET_FILES { let path = out_dir.join(file_name); let view = session.load(&path).expect("Failed to load view"); - assert_eq!(view.analysis_progress().state, AnalysisState::IdleState); + assert_eq!(view.analysis_progress(), AnalysisProgress::Idle); let functions: BTreeMap = view .functions() .iter() From e0ddafb9f0673fa5f20da8ca591a4be4efaf82ae Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 15:51:14 -0500 Subject: [PATCH 16/19] [Rust] Add module comment to data renderer --- rust/src/data_renderer.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/src/data_renderer.rs b/rust/src/data_renderer.rs index e193b32db..d6b073432 100644 --- a/rust/src/data_renderer.rs +++ b/rust/src/data_renderer.rs @@ -1,3 +1,5 @@ +//! Render data variables using builtin renderers as well as add custom rendering. + use binaryninjacore_sys::*; use core::ffi; use ffi::c_void; From 4afc1a65c84ca486912365767296fdfaea9f568e Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 16:51:14 -0500 Subject: [PATCH 17/19] [Rust] New type for `TypeArchiveId` Prevents type confusions considering there are at times, three different id types being referred to in the type archive API --- rust/src/type_archive.rs | 43 ++++++++++++++++++++++++-------------- rust/tests/type_archive.rs | 4 ++++ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/rust/src/type_archive.rs b/rust/src/type_archive.rs index 5f7ffb84a..4a0472050 100644 --- a/rust/src/type_archive.rs +++ b/rust/src/type_archive.rs @@ -14,6 +14,16 @@ use crate::string::{raw_to_string, BnString, IntoCStr}; use crate::type_container::TypeContainer; use crate::types::{QualifiedName, QualifiedNameAndType, QualifiedNameTypeAndId, Type}; +#[repr(transparent)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TypeArchiveId(pub String); + +impl Display for TypeArchiveId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.0)) + } +} + #[repr(transparent)] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TypeArchiveSnapshotId(pub String); @@ -79,24 +89,24 @@ impl TypeArchive { NonNull::new(handle).map(|handle| unsafe { TypeArchive::ref_from_raw(handle) }) } - /// Create a Type Archive at the given path and id, returning None if it could not be created. + /// Create a Type Archive at the given path and id, returning `None` if it could not be created. /// /// If the file has already been created and is not a valid type archive this will return `None`. pub fn create_with_id( path: impl AsRef, - id: &str, + id: &TypeArchiveId, platform: &Platform, ) -> Option> { let raw_path = path.as_ref().to_cstr(); - let id = id.to_cstr(); + let id = id.0.as_str().to_cstr(); let handle = unsafe { BNCreateTypeArchiveWithId(raw_path.as_ptr(), platform.handle, id.as_ptr()) }; NonNull::new(handle).map(|handle| unsafe { TypeArchive::ref_from_raw(handle) }) } /// Get a reference to the Type Archive with the known id, if one exists. - pub fn lookup_by_id(id: &str) -> Option> { - let id = id.to_cstr(); + pub fn lookup_by_id(id: &TypeArchiveId) -> Option> { + let id = id.0.as_str().to_cstr(); let handle = unsafe { BNLookupTypeArchiveById(id.as_ptr()) }; NonNull::new(handle).map(|handle| unsafe { TypeArchive::ref_from_raw(handle) }) } @@ -110,10 +120,11 @@ impl TypeArchive { } /// Get the guid for a Type Archive - pub fn id(&self) -> BnString { + pub fn id(&self) -> TypeArchiveId { let result = unsafe { BNGetTypeArchiveId(self.handle.as_ptr()) }; assert!(!result.is_null()); - unsafe { BnString::from_raw(result) } + let result_str = unsafe { BnString::from_raw(result) }; + TypeArchiveId(result_str.to_string_lossy().to_string()) } /// Get the associated Platform for a Type Archive @@ -711,9 +722,9 @@ impl TypeArchive { /// conflicting type ids pub fn merge_snapshots( &self, - base_snapshot: &str, - first_snapshot: &str, - second_snapshot: &str, + base_snapshot: &TypeArchiveSnapshotId, + first_snapshot: &TypeArchiveSnapshotId, + second_snapshot: &TypeArchiveSnapshotId, merge_conflicts: M, ) -> Result> where @@ -740,9 +751,9 @@ impl TypeArchive { /// conflicting type ids pub fn merge_snapshots_with_progress( &self, - base_snapshot: &str, - first_snapshot: &str, - second_snapshot: &str, + base_snapshot: &TypeArchiveSnapshotId, + first_snapshot: &TypeArchiveSnapshotId, + second_snapshot: &TypeArchiveSnapshotId, merge_conflicts: M, mut progress: PC, ) -> Result> @@ -750,9 +761,9 @@ impl TypeArchive { M: IntoIterator, PC: ProgressCallback, { - let base_snapshot = base_snapshot.to_cstr(); - let first_snapshot = first_snapshot.to_cstr(); - let second_snapshot = second_snapshot.to_cstr(); + let base_snapshot = base_snapshot.0.as_str().to_cstr(); + let first_snapshot = first_snapshot.0.as_str().to_cstr(); + let second_snapshot = second_snapshot.0.as_str().to_cstr(); let (merge_keys, merge_values): (Vec, Vec) = merge_conflicts .into_iter() .map(|(k, v)| (BnString::new(k), BnString::new(v))) diff --git a/rust/tests/type_archive.rs b/rust/tests/type_archive.rs index 7f0bd2f0a..12fcec9da 100644 --- a/rust/tests/type_archive.rs +++ b/rust/tests/type_archive.rs @@ -23,4 +23,8 @@ fn test_create_archive() { .expect("Found test type"); assert_eq!(test_type.width(), 7); assert_eq!(test_type.type_class(), TypeClass::IntegerTypeClass); + + let lookup_type_archive = + TypeArchive::lookup_by_id(&type_archive.id()).expect("Failed to lookup type archive"); + assert_eq!(lookup_type_archive, type_archive); } From bd374003d0d3b9b9a8e0a08985958f232b94a1a9 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 16:51:41 -0500 Subject: [PATCH 18/19] [Rust] Add APIs to retrieve type archives for a binary view --- rust/src/binary_view.rs | 59 ++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/rust/src/binary_view.rs b/rust/src/binary_view.rs index d0b22103c..a48d0264b 100644 --- a/rust/src/binary_view.rs +++ b/rust/src/binary_view.rs @@ -64,7 +64,7 @@ use std::collections::HashMap; use std::ffi::{c_char, c_void, CString}; use std::fmt::{Display, Formatter}; use std::ops::Range; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::ptr::NonNull; use std::{result, slice}; // TODO : general reorg of modules related to bv @@ -76,6 +76,7 @@ pub mod writer; use crate::binary_view::search::SearchQuery; use crate::disassembly::DisassemblySettings; +use crate::type_archive::{TypeArchive, TypeArchiveId}; use crate::workflow::Workflow; pub use memory_map::MemoryMap; pub use reader::BinaryReader; @@ -2355,23 +2356,45 @@ pub trait BinaryViewExt: BinaryViewBase { } } - // - // fn type_archives(&self) -> Array { - // let mut ids: *mut *mut c_char = std::ptr::null_mut(); - // let mut paths: *mut *mut c_char = std::ptr::null_mut(); - // let count = unsafe { BNBinaryViewGetTypeArchives(self.as_ref().handle, &mut ids, &mut paths) }; - // let path_list = unsafe { Array::::new(paths, count, ()) }; - // let ids_list = unsafe { std::slice::from_raw_parts(ids, count).to_vec() }; - // let archives = ids_list.iter().filter_map(|id| { - // let archive_raw = unsafe { BNBinaryViewGetTypeArchive(self.as_ref().handle, *id) }; - // match archive_raw.is_null() { - // true => None, - // false => Some(archive_raw) - // } - // }).collect(); - // unsafe { BNFreeStringList(ids, count) }; - // Array::new(archives) - // } + /// Retrieve the attached type archives as their [`TypeArchiveId`]. + /// + /// Using the returned id you can retrieve the [`TypeArchive`] with [`BinaryViewExt::type_archive_by_id`]. + fn attached_type_archives(&self) -> Vec { + let mut ids: *mut *mut c_char = std::ptr::null_mut(); + let mut paths: *mut *mut c_char = std::ptr::null_mut(); + let count = + unsafe { BNBinaryViewGetTypeArchives(self.as_ref().handle, &mut ids, &mut paths) }; + // We discard the path here, you can retrieve it later with [`BinaryViewExt::type_archive_path_by_id`], + // this is so we can simplify the return type which will commonly just want to query through to the type + // archive itself. + let _path_list = unsafe { Array::::new(paths, count, ()) }; + let id_list = unsafe { Array::::new(ids, count, ()) }; + id_list + .into_iter() + .map(|id| TypeArchiveId(id.to_string())) + .collect() + } + + /// Look up a connected [`TypeArchive`] by its `id`. + /// + /// NOTE: A [`TypeArchive`] can be attached but not connected, returning `None`. + fn type_archive_by_id(&self, id: &TypeArchiveId) -> Option> { + let id = id.0.as_str().to_cstr(); + let result = unsafe { BNBinaryViewGetTypeArchive(self.as_ref().handle, id.as_ptr()) }; + let result_ptr = NonNull::new(result)?; + Some(unsafe { TypeArchive::ref_from_raw(result_ptr) }) + } + + /// Look up the path for an attached (but not necessarily connected) [`TypeArchive`] by its `id`. + fn type_archive_path_by_id(&self, id: &TypeArchiveId) -> Option { + let id = id.0.as_str().to_cstr(); + let result = unsafe { BNBinaryViewGetTypeArchivePath(self.as_ref().handle, id.as_ptr()) }; + if result.is_null() { + return None; + } + let path_str = unsafe { BnString::into_string(result) }; + Some(PathBuf::from(path_str)) + } } impl BinaryViewExt for T {} From 29a1fb223e93f347e0d91ae9c31798c0c43f947c Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 9 Dec 2025 17:03:55 -0500 Subject: [PATCH 19/19] [Rust] Fix untyped `expr_idx` within MLIL `ILReferenceSource` --- rust/src/medium_level_il/function.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/src/medium_level_il/function.rs b/rust/src/medium_level_il/function.rs index 950f07f53..d437adde6 100644 --- a/rust/src/medium_level_il/function.rs +++ b/rust/src/medium_level_il/function.rs @@ -220,7 +220,7 @@ impl MediumLevelILFunction { /// # use binaryninja::variable::Variable; /// # let mlil_fun: MediumLevelILFunction = todo!(); /// # let mlil_var: Variable = todo!(); - /// let instr_idx = mlil_fun.var_refs(&mlil_var).get(0).expr_idx; + /// let instr_addr = mlil_fun.var_refs(&mlil_var).get(0).addr; /// ``` pub fn var_refs(&self, var: &Variable) -> Array { let mut count = 0; @@ -525,7 +525,7 @@ pub struct ILReferenceSource { pub arch: CoreArchitecture, pub addr: u64, pub graph_type: FunctionGraphType, - pub expr_idx: usize, + pub expr_idx: MediumLevelExpressionIndex, } impl From for ILReferenceSource { @@ -535,7 +535,7 @@ impl From for ILReferenceSource { arch: unsafe { CoreArchitecture::from_raw(value.arch) }, addr: value.addr, graph_type: value.type_, - expr_idx: value.exprId, + expr_idx: MediumLevelExpressionIndex(value.exprId), } } } @@ -547,7 +547,7 @@ impl From<&BNILReferenceSource> for ILReferenceSource { arch: unsafe { CoreArchitecture::from_raw(value.arch) }, addr: value.addr, graph_type: value.type_, - expr_idx: value.exprId, + expr_idx: MediumLevelExpressionIndex(value.exprId), } } }