上一篇博客介绍了Mach O文件的格式,以及每个部分的作用,这一部分开始将要介绍的是在上一个基础上如何将Mach O文件加载并运行起来。我们会从点击执行应用开始,到加载dyld,初始化dyld,再到dyld将镜像加载到内存,Rebase/Bind,Runtime初始化,其它的初始化代码,最后运行main函数为止。整个过程如下图所示:

在开始这篇博客内容规划之前,最难的是从哪里开头讲,后面查看了XNU代码后发现个人能hold住的起点应该是load_init_program,当然后面随着技术的提高肯定能够从更深层次给大家讲明白这个问题,都是时间问题,对自己有信心,好了不说太多无关的内容了,直接切入主题:

我们知道XNU内核启动后,启动的第一个进程是launchd。launchd启动之后会启动其他守护进程。我们可以看XNU源码中有如下一句注释:

// Description:	Load the "init" program; in most cases, this will be "launchd"

我们先来看下load_init_program

void
load_init_program(proc_t p)
{
//......
/*
* Copy out program name.
*/
init_addr = VM_MIN_ADDRESS;
(void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,VM_FLAGS_ANYWHERE);
if (init_addr == 0)
init_addr++;

//将/sbin/launchd拷贝到 init_addr = 1
(void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),(unsigned) sizeof(init_program_name)+1);

argv[argc++] = (char *) init_addr; //argv[0] = "/sbin/launchd"
init_addr += sizeof(init_program_name);
init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);

if (boothowto & RB_SINGLE) {
const char *init_args = "-s";
copyout(init_args, CAST_USER_ADDR_T(init_addr),strlen(init_args));
argv[argc++] = (char *)init_addr; //argv[1] = "-s"
init_addr += strlen(init_args);
init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
}
/*
* Null-end the argument list
*/
argv[argc] = NULL;

/*
* Copy out the argument list.
*/
//参数拷贝到指定到位置
(void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),(unsigned) sizeof(argv));

//到此位置 argv[0] = "/sbin/launchd"
// argv[1] = "-s"
// argv[2] = 参数列表

/*
* Set up argument block for fake call to execve.
*/
init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); //argv[0] = "/sbin/launchd"
init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
init_exec_args.envp = CAST_USER_ADDR_T(0);

//将init_exec_args作为参数传递给execve
error = execve(p,&init_exec_args,retval);
if (error)
panic("Process 1 exec of %s failed, errno %d\n",
init_program_name, error);
}

我们接下来看下execve

int execve(proc_t p, struct execve_args *uap, register_t *retval) {
struct __mac_execve_args muap;
int err;
muap.fname = uap->fname; //程序执行路径
muap.argp = uap->argp; //参数列表
muap.envp = uap->envp; //环境变量
muap.mac_p = USER_ADDR_NULL;
//这里只是调用了__mac_execve将执行文件名,参数列表,环境变量传入
err = __mac_execve(p, &muap, retval);
return(err);
}

在execve主要将execve_args中的程序执行文件路径,参数列表,环境变量通过__mac_execve传入到后续流程:

int __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval) {
//.....
task = current_task(); //获取当前的任务
//.......
//激活镜像:主要是为加载镜像进行数据的初始化,以及资源相关的操作
error = exec_activate_image(imgp);
//.......
return(error);
}

__mac_execve 主要是调用exec_activate_image来激活镜像

/*
* exec_activate_image
* 遍历可用镜像激活器,并激活与imgp 结构相关的镜像
* 主要是拷贝可执行文件到内存中,并根据不同的可执行文件类型选择不同的加载函数,
* 所有的镜像的加载要么终止在一个错误上,要么最终完成加载镜像。
*/
static int exec_activate_image(struct image_params *imgp) {
//.......
again:
// 检查镜像是否是普通文件,是否可执行,是否可读
error = exec_check_permissions(imgp);
//......
encapsulated_binary:
//根据不同的可执行文件类型选择不同的加载函数
//OS X有三种可执行文件,mach-o由exec_mach_imgact处理,fat binary由exec_fat_imgact处理,
//interpreter(解释器)由exec_shell_imgact处理
for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
//遍历镜像激活函数,寻找可以激活当前镜像的激活器
error = (*execsw[i].ex_imgact)(imgp);
//...........
}
//........
return (error);
}

exec_activate_image主要任务是遍历可用镜像激活器,按照可执行文件的格式,执行不同的函数。目前有三种格式,单指令集可执行文件(Mach-o Binary),多指令集可执行文件(Fat Binary),shell 脚本(Interpreter Script)。
exec_activate_image首先会对镜像文件进行一次校验,检查镜像是否是普通文件,是否可执行,是否可读,然后遍历镜像激活函数,寻找可以激活当前镜像的激活器。execsw是当前能够支持的镜像激活表。它是一个结构体指针,每项结构体的结构如下所示:

struct execsw {
int (*ex_imgact)(struct image_params *); //激活镜像的方法指针
const char *ex_name; //镜像名
}

包含激活镜像的方法,以及镜像名。

struct execsw {
int (*ex_imgact)(struct image_params *); //激活镜像的方法指针
const char *ex_name; //镜像名
} execsw[] = {
{ exec_mach_imgact, "Mach-o Binary" },
{ exec_fat_imgact, "Fat Binary" },
#ifdef IMGPF_POWERPC
{ exec_powerpc32_imgact, "PowerPC binary" },
#endif /* IMGPF_POWERPC */
{ exec_shell_imgact, "Interpreter Script" },
{ NULL, NULL}
};

我们这里只以Mach-o Binary的激活器为例子来看下是怎么激活的。

static int exec_mach_imgact(struct image_params *imgp) {

struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
//........
/*
* 首先确保这是个Mach-O 1.0 或者Mach-O 2.0二进制文件
* make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
* is a reserved field on the end, so for the most part, we can
* treat them as if they were identical.
* magic检查
*/
if ((mach_header->magic != MH_MAGIC/*32位架构*/) &&
(mach_header->magic != MH_MAGIC_64/*64位架构*/)) {
error = -1;
goto bad;
}

// 如果文件类型为MH_DYLIB 或者MH_BUNDLE 返回错误
//为什么MH_DYLIB,MH_BUNDLE 要认定为error
switch (mach_header->filetype) {
case MH_DYLIB:
case MH_BUNDLE:
error = -1;
goto bad;
}

//cpu 类型检查
if (!imgp->ip_origcputype) {
imgp->ip_origcputype = mach_header->cputype;
imgp->ip_origcpusubtype = mach_header->cpusubtype;
}

task = current_task();
thread = current_thread();
uthread = get_bsdthread_info(thread);
//.....
/*
* 加载 Mach-O 文件
* Load the Mach-O file.
*/

/*
* NOTE: An error after this point indicates we have potentially
* destroyed or overwrote some process state while attempting an
* execve() following a vfork(), which is an unrecoverable condition.
*/

/*
* We reset the task to 64-bit (or not) here. It may have picked up
* a new map, and we need that to reflect its true 64-bit nature.
*/

task_set_64bit(task,
((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT));

/*
* 实际加载我们之前想要加载的镜像
* Actually load the image file we previously decided to load.
*/
lret = load_machfile(imgp, mach_header, thread, map, &load_result);

//.....
/* load_machfile() maps the vnode */
(void)ubc_map(imgp->ip_vp, PROT_READ | PROT_EXEC);

/*
* Set up the system reserved areas in the new address space.
*/
vm_map_exec(get_task_map(task),
task,
(void *) p->p_fd->fd_rdir,
cpu_type());
//.......
}

exec_mach_imgact 其实会对Mach O文件的header信息校验,确保当前的文件是Mach O文件,并且校验它的cpu类型是否符合当前设备平台。当这些都校验通过后就通过load_machfile加载Mach O文件。

load_return_t load_machfile(
struct image_params *imgp,
struct mach_header *header,
thread_t thread,
vm_map_t new_map,
load_result_t *result
)
{
//......
//解析mach文件
lret = parse_machfile(vp, map, thread, header, file_offset, macho_size,0, result);
//.....
}

load_machfile会加载Mach-O中的各种load monmand命令。在其内部会禁止数据段执行防止溢出漏洞攻击,还会设置地址空间布局随机化(ASLR),还有一些映射的调整。load_machfile内部直接调用了parse_machfile方法。

static load_return_t parse_machfile(
struct vnode *vp,
vm_map_t map,
thread_t thread,
struct mach_header *header,
off_t file_offset,
off_t macho_size,
int depth,
load_result_t *result
)
{
//.....
//类型校验
if (header->magic == MH_MAGIC_64 || header->magic == MH_CIGAM_64) {
mach_header_sz = sizeof(struct mach_header_64);
}
/*
* Break infinite recursion
* 打破因为层级较深导致的无限递归
*/
if (depth > 6) {
return(LOAD_FAILURE);
}
task = (task_t)get_threadtask(thread);

//深度计数值递增
// depth负责parse_machfile 遍历次数,第一次是解析mach-o, 第二次'load_dylinker'会调用此函数来进行dyld的解析
depth++;

/*
* 检查是否是正确的计算机类型
* Check to see if right machine type.
*/
if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != cpu_type()) ||
!grade_binary(header->cputype,
header->cpusubtype & ~CPU_SUBTYPE_MASK))
return(LOAD_BADARCH);

abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64);

//主要是用来对Mach-O做检测,会检测Mach-O头部,解析其架构、检查imgp等内容,
//并拒绝接受Dylib和Bundle这样的文件,这些文件会由dyld负责加载
switch (header->filetype) {
case MH_OBJECT:
case MH_EXECUTE:
case MH_PRELOAD:
//第一次的时候这里会走通
if (depth != 1) {
return (LOAD_FAILURE);
}
break;
case MH_FVMLIB:
case MH_DYLIB:
if (depth == 1) {
return (LOAD_FAILURE);
}
break;
case MH_DYLINKER:
//第二次的时候这里会走通
if (depth != 2) {
return (LOAD_FAILURE);
}
break;
default:
return (LOAD_FAILURE);
}
//.....
/*
* Round size of Mach-O commands up to page boundry.
*/
size = round_page(mach_header_sz + header->sizeofcmds);
if (size <= 0)
return(LOAD_BADMACHO);

/*
* 将加载命令映射到内核地址
* Map the load commands into kernel memory.
*/

//.......

/*
* 扫描每个命令,处理每个命令
* Scan through the commands, processing each one as necessary.
*/
for (pass = 1; pass <= 2; pass++) {
/*
* Loop through each of the load_commands indicated by the
* Mach-O header; if an absurd value is provided, we just
* run off the end of the reserved section by incrementing
* the offset too far, so we are implicitly fail-safe.
*/
offset = mach_header_sz;
//加载命令数目
ncmds = header->ncmds;
while (ncmds--) {
/*
* 获取指向命令的地址
* Get a pointer to the command.
*/
lcp = (struct load_command *)(addr + offset);
oldoffset = offset;
offset += lcp->cmdsize;
/*
* Perform prevalidation of the struct load_command
* before we attempt to use its contents. Invalid
* values are ones which result in an overflow, or
* which can not possibly be valid commands, or which
* straddle or exist past the reserved section at the
* start of the image.
*/
if (oldoffset > offset ||
lcp->cmdsize < sizeof(struct load_command) ||
offset > header->sizeofcmds + mach_header_sz) {
ret = LOAD_BADMACHO;
break;
}
/*
* Act on struct load_command's for which kernel
* intervention is required.
*/
switch(lcp->cmd) {
/*加载64位segment*/
case LC_SEGMENT_64:
if (pass != 1)
break;
ret = load_segment_64(
(struct segment_command_64 *)lcp,
pager,
file_offset,
macho_size,
ubc_getsize(vp),
map,
result);
break;
/*加载32位segment*/
case LC_SEGMENT:
if (pass != 1)
break;
ret = load_segment(
(struct segment_command *) lcp,
pager,
file_offset,
macho_size,
ubc_getsize(vp),
map,
result);
break;
/*加载线程数据*/
case LC_THREAD:
if (pass != 2)
break;
ret = load_thread((struct thread_command *)lcp,
thread,
result);
break;
/*加载unix线程数据*/
case LC_UNIXTHREAD:
if (pass != 2)
break;
ret = load_unixthread(
(struct thread_command *) lcp,
thread,
result);
break;
/*加载动态加载器 程序需要的dyld的路径*/
case LC_LOAD_DYLINKER:
if (pass != 2)
break;
if ((depth == 1) && (dlp == 0)) {
dlp = (struct dylinker_command *)lcp;
dlarchbits = (header->cputype & CPU_ARCH_MASK);
} else {
ret = LOAD_FAILURE;
}
break;
/*加载代码签名加载器*/
case LC_CODE_SIGNATURE:
/* CODE SIGNING */
if (pass != 2)
break;
/* pager -> uip ->
load signatures & store in uip
set VM object "signed_pages"
*/
ret = load_code_signature(
(struct linkedit_data_command *) lcp,
vp,
file_offset,
macho_size,
header->cputype,
(depth == 1) ? result : NULL);
if (ret != LOAD_SUCCESS) {
printf("proc %d: load code signature error %d "
"for file \"%s\"\n",
p->p_pid, ret, vp->v_name);
ret = LOAD_SUCCESS; /* ignore error */
} else {
got_code_signatures = TRUE;
}
break;
default:
/* Other commands are ignored by the kernel */
ret = LOAD_SUCCESS;
break;
}
if (ret != LOAD_SUCCESS)
break;
}
if (ret != LOAD_SUCCESS)
break;
}
//加载成功
if (ret == LOAD_SUCCESS) {
if (! got_code_signatures) {
struct cs_blob *blob;
/* no embedded signatures: look for detached ones */
blob = ubc_cs_blob_get(vp, -1, file_offset);
if (blob != NULL) {
/* get flags to be applied to the process */
result->csflags |= blob->csb_flags;
}
}
//加载动态链接器dlp为从上面获取到的动态dyliner的路径
if (dlp != 0)
//加载dylinker
ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64);
//......
}
//.....
return(ret);
}

在parse_machfile中会将Mach O文件中loadCommand部分的命令加载到内存执行。这些command中最重要的命令是LC_SEGMENT/LC_SEGMENT_64 以及 LC_UNIXTHREAD/LC_MAINLC_SEGMENT/LC_SEGMENT_64用于告诉加载器某些可执行的部分代码需要映射到指定的内存区域,LC_UNIXTHREAD/LC_MAIN 告诉加载器在可执行代码加载后的入口点,有了它动态加载器知道在加载结束后跳到哪个位置,这些入口点要么是main方法,要么是在编译时期编译器添加的启动代码。如果没有动态库,这些命令已经足够了,但是如果有依赖动态库,在将所有的segments映射到内存中的时候,加载器还需要处理可执行代码的全部依赖,这些在后面的时候会详细介绍。由于parse_machfile会在后续递归调用,所以这里用了一个depth来控制递归深度。

当depth等于1的时候:

switch (header->filetype) {
case MH_OBJECT:
case MH_EXECUTE:
case MH_PRELOAD:
//depth=1 时候这里会走通
if (depth != 1) {
return (LOAD_FAILURE);
}
break;
case MH_FVMLIB:
case MH_DYLIB:
//depth=1 时候这里会失败
if (depth == 1) {
return (LOAD_FAILURE);
}
break;
case MH_DYLINKER:
//depth=1 时候这里会失败
if (depth != 2) {
return (LOAD_FAILURE);
}
break;
default:
return (LOAD_FAILURE);
}

这时候如果Mach O 文件类型为 MH_OBJECTMH_EXECUTEMH_PRELOAD 就会走到下面的流程,下面的流程会先执行LC_SEGMENT_64,LC_SEGMENT将某些可执行的代码映射到指定的内存区域。然后再执行LC_THREAD,LC_UNIXTHREAD,LC_LOAD_DYLINKER,LC_CODE_SIGNATURE。这里最为关键的命令是LC_LOAD_DYLINKER,它会对dyld进行赋值。在最后会调用:

ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64);
static
load_return_t
load_dylinker(
struct dylinker_command *lcp,
integer_t archbits,
vm_map_t map,
thread_t thread,
int depth,
load_result_t *result,
boolean_t is_64bit
)
{
//..........
/*
* 首先直接映射dyld
* First try to map dyld in directly. This should work most of
* the time since there shouldn't normally be something already
* mapped to its address.
*/
// 解析dyld
ret = parse_machfile(vp, map, thread, &header, file_offset, macho_size, depth, &myresult);
/*
* If it turned out something was in the way, then we'll take
* take this longer path to map dyld into a temporary map and
* copy it into destination map at a different address.
*/
//如果加载成功设置会返回entry_point
if (ret == LOAD_SUCCESS) {
result->dynlinker = TRUE;
result->entry_point = myresult.entry_point;
(void)ubc_map(vp, PROT_READ | PROT_EXEC);
}
out:
vnode_put(vp);
return (ret);
}

load_dylinker函数主要负责加载dyld,以及调用parse_machfile函数对dyld解析。这时候header->filetype = MH_DYLINKER 并且 depth = 2.

switch (header->filetype) {
case MH_OBJECT:
case MH_EXECUTE:
case MH_PRELOAD:
//depth=2 时候这里走不通
if (depth != 1) {
return (LOAD_FAILURE);
}
break;
case MH_FVMLIB:
case MH_DYLIB:
//depth=2 时候这里会走通
if (depth == 1) {
return (LOAD_FAILURE);
}
break;
case MH_DYLINKER:
//depth=2 时候这里会走通
if (depth != 2) {
return (LOAD_FAILURE);
}
break;
default:
return (LOAD_FAILURE);
}

下面的流程会先执行LC_SEGMENT_64,LC_SEGMENT将某些可执行的代码映射到指定的内存区域。然后再执行LC_THREAD,LC_UNIXTHREAD,LC_CODE_SIGNATURE。这时候dyld就被加载进来了,在LC_UNIXTHREAD中会设置dyld的entry point。

我们来看下LC_UNIXTHREAD

case LC_UNIXTHREAD:
if (pass != 2)
break;
ret = load_unixthread(
(struct thread_command *) lcp,
thread,
result);
break;
static
load_return_t
load_thread(
struct thread_command *tcp,
thread_t thread,
load_result_t *result
)
{
//.....
task = get_threadtask(thread);
//.....

lret = load_threadstate(thread,
(unsigned long *)(((vm_offset_t)tcp) +
sizeof(struct thread_command)),
tcp->cmdsize - sizeof(struct thread_command));
if (lret != LOAD_SUCCESS)
return (lret);

if (result->thread_count == 0) {
lret = load_threadstack(thread,
(unsigned long *)(((vm_offset_t)tcp) +
sizeof(struct thread_command)),
tcp->cmdsize - sizeof(struct thread_command),
&result->user_stack,
&customstack);
if (customstack)
result->customstack = 1;
else
result->customstack = 0;

if (lret != LOAD_SUCCESS)
return(lret);

lret = load_threadentry(thread,
(unsigned long *)(((vm_offset_t)tcp) +
sizeof(struct thread_command)),
tcp->cmdsize - sizeof(struct thread_command),
&result->entry_point);
if (lret != LOAD_SUCCESS)
return(lret);
}
else
thread_resume(thread);

result->thread_count++;

return(LOAD_SUCCESS);
}

这里最关键的部分是load_threadentry

static
load_return_t
load_threadentry(
thread_t thread,
unsigned long *ts,
unsigned long total_size,
mach_vm_offset_t *entry_point
)
{
//......
/*
* Set the thread state.
*/
*entry_point = MACH_VM_MIN_ADDRESS;
while (total_size > 0) {
flavor = *ts++;
size = *ts++;
entry_size = (size+2)*sizeof(unsigned long);
if (entry_size > total_size)
return(LOAD_BADMACHO);
total_size -= entry_size;
/*
* Third argument is a kernel space pointer; it gets cast
* to the appropriate type in thread_entrypoint() based on
* the value of flavor.
*/
ret = thread_entrypoint(thread, flavor, (thread_state_t)ts, size, entry_point);
if (ret != KERN_SUCCESS) {
return(LOAD_FAILURE);
}
ts += size; /* ts is a (unsigned long *) */
}
return(LOAD_SUCCESS);
}
kern_return_t
thread_entrypoint(
__unused thread_t thread,
int flavor,
thread_state_t tstate,
__unused unsigned int count,
mach_vm_offset_t *entry_point
)
{
/*
* Set a default.
*/
if (*entry_point == 0)
*entry_point = VM_MIN_ADDRESS;

switch (flavor) {
case x86_THREAD_STATE32:
{
x86_thread_state32_t *state25;

state25 = (i386_thread_state_t *) tstate;
*entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS;
break;
}

case x86_THREAD_STATE64:
{
x86_thread_state64_t *state25;

state25 = (x86_thread_state64_t *) tstate;
*entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64;
break;
}
}
return (KERN_SUCCESS);
}

thread_entrypoint 中会对entry_point进行设置。那么state25->rip又是什么呢?它就是dyld的入口地址****_dyld_start。也就是说在dyld加载到内存结束后,会将入口指向_dyld_start****。
如果想对dyld有比较直观的了解,其实在deviceSupport/xx.x.x/Symbols/usr/lib/dyld.到此为止就是从launchd开始到dyld加载结束到指定dyld的入口地址的全部过程。下篇博客将会对dyld进行详细的介绍。

Contents