diff --git a/plugins/workflow_objc/src/activities/alloc_init.rs b/plugins/workflow_objc/src/activities/alloc_init.rs new file mode 100644 index 000000000..5c8bf0645 --- /dev/null +++ b/plugins/workflow_objc/src/activities/alloc_init.rs @@ -0,0 +1,73 @@ +use binaryninja::{ + binary_view::{BinaryView, BinaryViewExt as _}, + medium_level_il::MediumLevelILLiftedInstruction, + rc::Ref, + types::Type, + workflow::AnalysisContext, +}; +use bstr::ByteSlice; + +use super::util; +use crate::{error::ILLevel, metadata::GlobalState, workflow::Confidence, Error}; + +// j_ prefixes are for stub functions in the dyld shared cache. +// The prefix is added by Binary Ninja's shared cache workflow. +const ALLOC_INIT_FUNCTIONS: &[&[u8]] = &[ + b"_objc_alloc_init", + b"_objc_alloc_initWithZone", + b"_objc_alloc", + b"_objc_allocWithZone", + b"_objc_opt_new", + b"j__objc_alloc_init", + b"j__objc_alloc_initWithZone", + b"j__objc_alloc", + b"j__objc_allocWithZone", + b"j__objc_opt_new", +]; + +fn return_type_for_alloc_call(call: &util::Call<'_>, view: &BinaryView) -> Option> { + if call.call.params.is_empty() { + return None; + } + + let class_addr = + util::match_constant_pointer_or_load_of_constant_pointer(&call.call.params[0])?; + let class_symbol_name = view.symbol_by_address(class_addr)?.full_name(); + let class_name = util::class_name_from_symbol_name(class_symbol_name.to_bytes().as_bstr())?; + + let class_type = view.type_by_name(class_name.to_str_lossy())?; + Some(Type::pointer(&call.target.arch(), &class_type)) +} + +fn process_instruction(instr: &MediumLevelILLiftedInstruction, view: &BinaryView) -> Option<()> { + let call = util::match_call_to_function_named(instr, view, ALLOC_INIT_FUNCTIONS)?; + + util::adjust_return_type_of_call( + &call, + return_type_for_alloc_call(&call, view)?.as_ref(), + Confidence::AllocInit as u8, + ); + + Some(()) +} + +pub fn process(ac: &AnalysisContext) -> Result<(), Error> { + let bv = ac.view(); + if GlobalState::should_ignore_view(&bv) { + return Ok(()); + } + + let mlil = ac.mlil_function().ok_or(Error::MissingIL { + level: ILLevel::Medium, + func_start: ac.function().start(), + })?; + let mlil_ssa = mlil.ssa_form(); + + for block in &mlil_ssa.basic_blocks() { + for instr in block.iter() { + process_instruction(&instr.lift(), &bv); + } + } + + Ok(()) +} diff --git a/plugins/workflow_objc/src/activities/mod.rs b/plugins/workflow_objc/src/activities/mod.rs index 98c9770f8..b418c1a65 100644 --- a/plugins/workflow_objc/src/activities/mod.rs +++ b/plugins/workflow_objc/src/activities/mod.rs @@ -1,4 +1,6 @@ +pub mod alloc_init; pub mod inline_stubs; pub mod objc_msg_send_calls; pub mod remove_memory_management; pub mod super_init; +pub(crate) mod util; diff --git a/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs b/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs index 3a772e798..157bb19c1 100644 --- a/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs +++ b/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs @@ -107,7 +107,7 @@ fn process_instruction( }; let mut function_changed = false; - if adjust_call_type::process_call(bv, func, insn, &selector, message_send_type).is_ok() { + if adjust_call_type::process_call(bv, func, ssa, insn, &selector, message_send_type).is_ok() { function_changed = true; } @@ -166,7 +166,7 @@ fn selector_from_call( return None; }; - let raw_selector = ssa.get_ssa_register_value(®.source_reg())?.value as u64; + let raw_selector = ssa.get_ssa_register_value(reg.source_reg())?.value as u64; if raw_selector == 0 { return None; } diff --git a/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs b/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs index d9cb13432..c3f9d2f38 100644 --- a/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs +++ b/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs @@ -1,26 +1,150 @@ use binaryninja::{ + architecture::CoreRegister, binary_view::{BinaryView, BinaryViewBase as _, BinaryViewExt}, confidence::Conf, function::Function, low_level_il::{ - function::{Mutable, SSA}, - instruction::LowLevelILInstruction, + expression::{ + ExpressionHandler as _, LowLevelILExpression, LowLevelILExpressionKind, ValueExpr, + }, + function::{LowLevelILFunction, Mutable, SSA}, + instruction::{InstructionHandler as _, LowLevelILInstruction, LowLevelILInstructionKind}, + operation::{CallSsa, Operation}, + LowLevelILSSARegisterKind, }, rc::Ref, types::{FunctionParameter, Type}, + variable::PossibleValueSet, }; +use bstr::ByteSlice as _; use super::MessageSendType; -use crate::{metadata::Selector, workflow::Confidence, Error}; +use crate::{activities::util, metadata::Selector, workflow::Confidence, Error}; fn named_type(bv: &BinaryView, name: &str) -> Option> { bv.type_by_name(name) .map(|t| Type::named_type_from_type(name, &t)) } +// j_ prefixes are for stub functions in the dyld shared cache. +const ALLOC_FUNCTIONS: &[&str] = &[ + "_objc_alloc_init", + "_objc_alloc_initWithZone", + "_objc_alloc", + "_objc_allocWithZone", + "_objc_opt_new", + "j__objc_alloc_init", + "j__objc_alloc_initWithZone", + "j__objc_alloc", + "j__objc_allocWithZone", + "j__objc_opt_new", +]; + +/// Extract parameter expressions from a call, handling the SeparateParamListSsa wrapper. +fn call_param_exprs<'a>( + call_op: &'a Operation<'a, Mutable, SSA, CallSsa>, +) -> Option>> { + let LowLevelILExpressionKind::CallParamSsa(params) = &call_op.param_expr().kind() else { + return None; + }; + + let param_exprs = params.param_exprs(); + Some( + if let Some(LowLevelILExpressionKind::SeparateParamListSsa(inner)) = + param_exprs.first().map(|e| e.kind()) + { + inner.param_exprs() + } else { + param_exprs + }, + ) +} + +/// Follow an SSA register back through register-to-register copies to find the +/// instruction that originally defined its value. +fn source_def_for_register<'a>( + ssa: &'a LowLevelILFunction, + reg: LowLevelILSSARegisterKind, +) -> Option> { + let mut def = ssa.get_ssa_register_definition(reg)?; + while let LowLevelILInstructionKind::SetRegSsa(set_reg) = def.kind() { + let LowLevelILExpressionKind::RegSsa(src_reg) = set_reg.source_expr().kind() else { + break; + }; + def = ssa.get_ssa_register_definition(src_reg.source_reg())?; + } + Some(def) +} + +/// For init-family selectors on a normal message send, try to determine the return type +/// by tracing the receiver register back to an alloc call and resolving the class. +fn return_type_for_init_receiver( + bv: &BinaryView, + func: &Function, + ssa: &LowLevelILFunction, + insn: &LowLevelILInstruction, + selector: &Selector, + message_send_type: MessageSendType, +) -> Option> { + if message_send_type != MessageSendType::Normal || !selector.name.starts_with("init") { + return None; + } + + let call_op = match insn.kind() { + LowLevelILInstructionKind::CallSsa(op) | LowLevelILInstructionKind::TailCallSsa(op) => op, + _ => return None, + }; + + let param_exprs = call_param_exprs(&call_op)?; + let LowLevelILExpressionKind::RegSsa(receiver_reg) = param_exprs.first()?.kind() else { + return None; + }; + + let def = source_def_for_register(ssa, receiver_reg.source_reg())?; + let def_call_op = match def.kind() { + LowLevelILInstructionKind::CallSsa(op) | LowLevelILInstructionKind::TailCallSsa(op) => op, + _ => return None, + }; + + // Check if the defining call is to an alloc function. + let target_values = def_call_op.target().possible_values(); + let call_target = match target_values { + PossibleValueSet::ConstantValue { value } + | PossibleValueSet::ConstantPointerValue { value } + | PossibleValueSet::ImportedAddressValue { value } => value as u64, + _ => return None, + }; + + let target_name = bv + .symbol_by_address(call_target)? + .raw_name() + .to_string_lossy() + .into_owned(); + if !ALLOC_FUNCTIONS.contains(&target_name.as_str()) { + return None; + } + + // Get the class from the alloc call's first parameter. + let alloc_params = call_param_exprs(&def_call_op)?; + let LowLevelILExpressionKind::RegSsa(class_reg) = alloc_params.first()?.kind() else { + return None; + }; + + let class_addr = ssa.get_ssa_register_value(class_reg.source_reg())?.value as u64; + if class_addr == 0 { + return None; + } + + let class_symbol_name = bv.symbol_by_address(class_addr)?.full_name(); + let class_name = util::class_name_from_symbol_name(class_symbol_name.to_bytes().as_bstr())?; + let class_type = bv.type_by_name(class_name.to_str_lossy())?; + Some(Type::pointer(&func.arch(), &class_type)) +} + pub fn process_call( bv: &BinaryView, func: &Function, + ssa: &LowLevelILFunction, insn: &LowLevelILInstruction, selector: &Selector, message_send_type: MessageSendType, @@ -39,8 +163,9 @@ pub fn process_call( }; let sel = named_type(bv, "SEL").unwrap_or_else(|| Type::pointer(&arch, &Type::char())); - // TODO: Infer return type based on receiver type / selector. - let return_type = id.clone(); + let return_type = + return_type_for_init_receiver(bv, func, ssa, insn, selector, message_send_type) + .unwrap_or_else(|| id.clone()); let mut params = vec![ FunctionParameter::new(receiver_type, receiver_name.to_string(), None), diff --git a/plugins/workflow_objc/src/activities/super_init.rs b/plugins/workflow_objc/src/activities/super_init.rs index 68c6d3ee5..81b9a4537 100644 --- a/plugins/workflow_objc/src/activities/super_init.rs +++ b/plugins/workflow_objc/src/activities/super_init.rs @@ -1,21 +1,16 @@ use binaryninja::{ - binary_view::{BinaryView, BinaryViewBase, BinaryViewExt as _}, - confidence::Conf, - function::Function, + binary_view::{BinaryView, BinaryViewBase as _, BinaryViewExt as _}, medium_level_il::{ - operation::{ - Constant, LiftedCallSsa, LiftedLoadSsa, LiftedSetVarSsa, LiftedSetVarSsaField, Var, - VarSsa, - }, - MediumLevelILFunction, MediumLevelILLiftedInstruction, MediumLevelILLiftedInstructionKind, + operation::{Constant, LiftedSetVarSsa, LiftedSetVarSsaField, Var, VarSsa}, + MediumLevelILLiftedInstruction, MediumLevelILLiftedInstructionKind, }, rc::Ref, types::Type, - variable::{RegisterValueType, SSAVariable}, workflow::AnalysisContext, }; -use bstr::{BStr, ByteSlice}; +use bstr::ByteSlice; +use super::util; use crate::{ error::ILLevel, metadata::{GlobalState, Selector}, @@ -32,110 +27,16 @@ const OBJC_MSG_SEND_SUPER_FUNCTIONS: &[&[u8]] = &[ b"j__objc_msgSendSuper", ]; -fn ssa_variable_value_or_load_of_constant_pointer( - function: &MediumLevelILFunction, - var: &SSAVariable, -) -> Option { - let value = function.ssa_variable_value(var); - match value.state { - RegisterValueType::ConstantPointerValue => return Some(value.value as u64), - RegisterValueType::UndeterminedValue => {} - _ => return None, - } - - let def = function.ssa_variable_definition(var)?; - let MediumLevelILLiftedInstructionKind::SetVarSsa(set_var) = def.lift().kind else { - return None; - }; - - let MediumLevelILLiftedInstructionKind::LoadSsa(LiftedLoadSsa { src, .. }) = set_var.src.kind - else { - return None; - }; - - match src.kind { - MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), - _ => None, - } -} - -/// If `instr` is a constant pointer or is a variable whose value is loaded from a constant pointer, -/// return that pointer address. -fn match_constant_pointer_or_load_of_constant_pointer( - instr: &MediumLevelILLiftedInstruction, -) -> Option { - match instr.kind { - MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), - MediumLevelILLiftedInstructionKind::VarSsa(var) => { - ssa_variable_value_or_load_of_constant_pointer(&instr.function, &var.src) - } - _ => None, - } -} - -#[allow(clippy::struct_field_names)] -struct Call<'a> { - pub instr: &'a MediumLevelILLiftedInstruction, - pub call: &'a LiftedCallSsa, - pub target: Ref, -} - -/// Returns a `Call` if `instr` is a call or tail call to a function whose name appears in `function_names` -fn match_call_to_function_named<'a>( - instr: &'a MediumLevelILLiftedInstruction, - view: &'a BinaryView, - function_names: &'a [&[u8]], -) -> Option> { - let (MediumLevelILLiftedInstructionKind::TailcallSsa(ref call) - | MediumLevelILLiftedInstructionKind::CallSsa(ref call)) = instr.kind - else { - return None; - }; - - let MediumLevelILLiftedInstructionKind::ConstPtr(Constant { - constant: call_target, - }) = call.dest.kind - else { - return None; - }; - - let target_function = view.function_at(&instr.function.function().platform(), call_target)?; - let function_name = target_function.symbol().full_name(); - if !function_names.contains(&function_name.to_bytes()) { - return None; - } - - Some(Call { - instr, - call, - target: target_function, - }) -} - -fn class_name_from_symbol_name(symbol_name: &BStr) -> Option<&BStr> { - // The symbol name for the `objc_class_t` can have different names depending - // on factors such as being local or external, and whether the reference - // is from the shared cache or a standalone Mach-O file. - Some(if symbol_name.starts_with(b"cls_") { - &symbol_name[4..] - } else if symbol_name.starts_with(b"clsRef_") { - &symbol_name[7..] - } else if symbol_name.starts_with(b"_OBJC_CLASS_$_") { - &symbol_name[14..] - } else { - return None; - }) -} - /// Detect the return type for a call to `objc_msgSendSuper2` where the selector is in the `init` family. /// Returns `None` if selector is not in the `init` family or the return type cannot be determined. -fn return_type_for_super_init(call: &Call, view: &BinaryView) -> Option> { +fn return_type_for_super_init(call: &util::Call, view: &BinaryView) -> Option> { // Expecting to see at least `objc_super` and a selector. if call.call.params.len() < 2 { return None; } - let selector_addr = match_constant_pointer_or_load_of_constant_pointer(&call.call.params[1])?; + let selector_addr = + util::match_constant_pointer_or_load_of_constant_pointer(&call.call.params[1])?; let selector = Selector::from_address(view, selector_addr).ok()?; // TODO: This will match `initialize` and `initiate` which are not init methods. @@ -238,7 +139,7 @@ fn return_type_for_super_init(call: &Call, view: &BinaryView) -> Option Option, return_type: &Type) { - let function = call.instr.function.function(); - - // We're changing only the return type, so preserve other aspects of any existing call type adjustment. - let target_function_type = if let Some(existing_call_type_adjustment) = - function.call_type_adjustment(call.instr.address, None) - { - existing_call_type_adjustment.contents - } else { - call.target.function_type() - }; - - // There's nothing to do if the return type is already correct - if let Some(conf) = target_function_type.return_value() { - if &*conf.contents == return_type { - return; - } - } - - let adjusted_call_type = target_function_type - .to_builder() - .set_child_type(return_type) - .finalize(); - - function.set_auto_call_type_adjustment( - call.instr.address, - Conf::new(&*adjusted_call_type, Confidence::SuperInit as u8), - None, - ); -} - fn process_instruction(instr: &MediumLevelILLiftedInstruction, view: &BinaryView) -> Option<()> { - let call = match_call_to_function_named(instr, view, OBJC_MSG_SEND_SUPER_FUNCTIONS)?; + let call = util::match_call_to_function_named(instr, view, OBJC_MSG_SEND_SUPER_FUNCTIONS)?; - adjust_return_type_of_call(&call, return_type_for_super_init(&call, view)?.as_ref()); + util::adjust_return_type_of_call( + &call, + return_type_for_super_init(&call, view)?.as_ref(), + Confidence::SuperInit as u8, + ); Some(()) } diff --git a/plugins/workflow_objc/src/activities/util.rs b/plugins/workflow_objc/src/activities/util.rs new file mode 100644 index 000000000..3ac7df7c1 --- /dev/null +++ b/plugins/workflow_objc/src/activities/util.rs @@ -0,0 +1,140 @@ +use binaryninja::{ + binary_view::{BinaryView, BinaryViewExt as _}, + confidence::Conf, + function::Function, + medium_level_il::{ + operation::{Constant, LiftedCallSsa, LiftedLoadSsa}, + MediumLevelILFunction, MediumLevelILLiftedInstruction, MediumLevelILLiftedInstructionKind, + }, + rc::Ref, + types::Type, + variable::{RegisterValueType, SSAVariable}, +}; +use bstr::BStr; + +#[allow(clippy::struct_field_names)] +pub struct Call<'a> { + pub instr: &'a MediumLevelILLiftedInstruction, + pub call: &'a LiftedCallSsa, + pub target: Ref, +} + +/// Returns a `Call` if `instr` is a call or tail call to a function whose name appears in `function_names` +pub fn match_call_to_function_named<'a>( + instr: &'a MediumLevelILLiftedInstruction, + view: &'a BinaryView, + function_names: &'a [&[u8]], +) -> Option> { + let (MediumLevelILLiftedInstructionKind::TailcallSsa(ref call) + | MediumLevelILLiftedInstructionKind::CallSsa(ref call)) = instr.kind + else { + return None; + }; + + let MediumLevelILLiftedInstructionKind::ConstPtr(Constant { + constant: call_target, + }) = call.dest.kind + else { + return None; + }; + + let target_function = view.function_at(&instr.function.function().platform(), call_target)?; + let function_name = target_function.symbol().full_name(); + if !function_names.contains(&function_name.to_bytes()) { + return None; + } + + Some(Call { + instr, + call, + target: target_function, + }) +} + +fn ssa_variable_value_or_load_of_constant_pointer( + function: &MediumLevelILFunction, + var: &SSAVariable, +) -> Option { + let value = function.ssa_variable_value(var); + match value.state { + RegisterValueType::ConstantPointerValue => return Some(value.value as u64), + RegisterValueType::UndeterminedValue => {} + _ => return None, + } + + let def = function.ssa_variable_definition(var)?; + let MediumLevelILLiftedInstructionKind::SetVarSsa(set_var) = def.lift().kind else { + return None; + }; + + let MediumLevelILLiftedInstructionKind::LoadSsa(LiftedLoadSsa { src, .. }) = set_var.src.kind + else { + return None; + }; + + match src.kind { + MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), + _ => None, + } +} + +/// If `instr` is a constant pointer or is a variable whose value is loaded from a constant pointer, +/// return that pointer address. +pub fn match_constant_pointer_or_load_of_constant_pointer( + instr: &MediumLevelILLiftedInstruction, +) -> Option { + match instr.kind { + MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), + MediumLevelILLiftedInstructionKind::VarSsa(var) => { + ssa_variable_value_or_load_of_constant_pointer(&instr.function, &var.src) + } + _ => None, + } +} + +pub fn class_name_from_symbol_name(symbol_name: &BStr) -> Option<&BStr> { + // The symbol name for the `objc_class_t` can have different names depending + // on factors such as being local or external, and whether the reference + // is from the shared cache or a standalone Mach-O file. + Some(if symbol_name.starts_with(b"cls_") { + &symbol_name[4..] + } else if symbol_name.starts_with(b"clsRef_") { + &symbol_name[7..] + } else if symbol_name.starts_with(b"_OBJC_CLASS_$_") { + &symbol_name[14..] + } else { + return None; + }) +} + +/// Adjust the return type of the call represented by `call`. +pub fn adjust_return_type_of_call(call: &Call<'_>, return_type: &Type, confidence: u8) { + let function = call.instr.function.function(); + + // We're changing only the return type, so preserve other aspects of any existing call type adjustment. + let target_function_type = if let Some(existing_call_type_adjustment) = + function.call_type_adjustment(call.instr.address, None) + { + existing_call_type_adjustment.contents + } else { + call.target.function_type() + }; + + // There's nothing to do if the return type is already correct + if let Some(conf) = target_function_type.return_value() { + if &*conf.contents == return_type { + return; + } + } + + let adjusted_call_type = target_function_type + .to_builder() + .set_child_type(return_type) + .finalize(); + + function.set_auto_call_type_adjustment( + call.instr.address, + Conf::new(&*adjusted_call_type, confidence), + None, + ); +} diff --git a/plugins/workflow_objc/src/workflow.rs b/plugins/workflow_objc/src/workflow.rs index fdda1d453..8c52e0f48 100644 --- a/plugins/workflow_objc/src/workflow.rs +++ b/plugins/workflow_objc/src/workflow.rs @@ -7,6 +7,7 @@ use crate::{activities, error::WorkflowRegistrationError}; #[repr(u8)] pub enum Confidence { ObjCMsgSend = 96, + AllocInit = 98, SuperInit = 100, } @@ -50,6 +51,19 @@ pub fn register_activities() -> Result<(), WorkflowRegistrationError> { run(activities::inline_stubs::process), ); + let alloc_init_activity = Activity::new_with_action( + activity::Config::action( + "core.function.objectiveC.types.allocInit", + "Obj-C: Adjust return types of objc_alloc_init calls", + "Adjust the return type of calls to objc_alloc / objc_alloc_init when a fixed type is passed as an argument.", + ) + .eligibility( + activity::Eligibility::auto().predicate( + activity::ViewType::in_(["Mach-O", "DSCView"]), + )), + run(activities::alloc_init::process), + ); + let super_init_activity = Activity::new_with_action( activity::Config::action( "core.function.objectiveC.types.superInit", @@ -85,7 +99,8 @@ pub fn register_activities() -> Result<(), WorkflowRegistrationError> { &remove_memory_management_activity, "core.function.generateMediumLevelIL", )? - .activity_after(&super_init_activity, "core.function.generateMediumLevelIL")? + .activity_after(&alloc_init_activity, "core.function.generateMediumLevelIL")? + .activity_after(&super_init_activity, &alloc_init_activity.name())? .register()?; Ok(())