可执行文件加载执行过程:

上一节我们说到ELF文件格式,静态库的符号解析和重定位的内容。这一节我们来分析一下可执行文件。

由上一节我们知道可执行文件也是ELF文件,当程序被加载器加载到内存时是按照ELF格式去解析,然后把可执行文件的不同节加载到虚拟地址空间中。我们看一下32位下的进程虚拟地址模型:

可执行文件的信息被加载到了虚拟地址空间。根据ELF信息找到程序的入口地址就可以执行进程了。我们看一下linux 3.2 内核代码 fs/exec.c中调用可执行文件的大致过程。

static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
bool clear_in_exec;
int retval;
const struct cred *cred = current_cred();
...
...
retval = search_binary_handler(bprm,regs); //搜索可执行文件处理接口
if (retval < )
goto out;
} int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
pid_t old_pid; .......
retval = -ENOENT;
for (try=; try<; try++) {
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) {
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
//找到可执行文件加载入口
if (!fn)
continue;
}
}
} typedef struct elf64_hdr {
unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry; /* Entry point virtual address */
Elf64_Off e_phoff; /* Program header table file offset */
Elf64_Off e_shoff; /* Section header table file offset */
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr; load_binary 可执行目标文件对应 binfmt_elf.c 文件
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
//解析elf文件 做段映射
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
retval = -ENOMEM;
goto out_ret;
} /* Get the exec-header */
loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC;
/* First of all, some simple consistency checks */
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != )
goto out; if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
if (!bprm->file->f_op || !bprm->file->f_op->mmap)
goto out; /* Now read in all of the header information */
if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
goto out;
if (loc->elf_ex.e_phnum < ||
loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
goto out;
size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
retval = -ENOMEM;
elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out; retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
(char *)elf_phdata, size);
if (retval != size) {
if (retval >= )
retval = -EIO;
goto out_free_ph;
} elf_ppnt = elf_phdata;
elf_bss = ;
elf_brk = ; start_code = ~0UL;
end_code = ;
start_data = ;
end_data = ; for (i = ; i < loc->elf_ex.e_phnum; i++) {
if (elf_ppnt->p_type == PT_INTERP) {
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < )
goto out_free_ph; retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph; retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= )
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - ] != '\0')
goto out_free_interp; interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp; /*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
would_dump(bprm, interpreter); /* Get the exec headers */
retval = kernel_read(interpreter, ,
(void *)&loc->interp_elf_ex,
sizeof(loc->interp_elf_ex));
if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= )
retval = -EIO;
goto out_free_dentry;
} break;
}
elf_ppnt++;
} elf_ppnt = elf_phdata;
for (i = ; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
if (elf_ppnt->p_type == PT_GNU_STACK) {
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
} /* Some simple consistency checks for the interpreter */
if (elf_interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != )
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
} /* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
goto out_free_dentry; /* OK, This is the point of no return */
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags; /* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY(loc->elf_ex);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC; if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will
change some of these later */
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = ;
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
} current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = , elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = , elf_flags;
unsigned long k, vaddr;
unsigned long total_size = ; if (elf_ppnt->p_type != PT_LOAD)
continue; if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte; /* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk(elf_bss + load_bias,
elf_brk + load_bias);
if (retval) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
}
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
/*
* This bss-zeroing can fail if the ELF
* file specifies odd protections. So
* we don't check the return value
*/
}
}
} if (elf_ppnt->p_flags & PF_R)
elf_prot |= PROT_READ;
if (elf_ppnt->p_flags & PF_W)
elf_prot |= PROT_WRITE;
if (elf_ppnt->p_flags & PF_X)
elf_prot |= PROT_EXEC; elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr;
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
/* Try and get dynamic programs out of the way of the
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
#if defined(CONFIG_X86) || defined(CONFIG_ARM)
/* Memory randomization might have been switched off
* in runtime via sysctl.
* If that is the case, retain the original non-zero
* load_bias value in order to establish proper
* non-randomized mappings.
*/
if (current->flags & PF_RANDOMIZE)
load_bias = ;
else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
retval = -EINVAL;
goto out_free_dentry;
}
} error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, );
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
} if (!load_addr_set) {
load_addr_set = ;
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
if (loc->elf_ex.e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
k = elf_ppnt->p_vaddr;
if (k < start_code)
start_code = k;
if (start_data < k)
start_data = k; /*
* Check to see if the section's size will overflow the
* allowed task size. Note that p_filesz must always be
* <= p_memsz so it is only necessary to check p_memsz.
*/
if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
send_sig(SIGKILL, current, );
retval = -EINVAL;
goto out_free_dentry;
} k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk)
elf_brk = k;
} loc->elf_ex.e_entry += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias; /* Calling set_brk effectively mmaps the pages that we need
* for the bss and break sections. We must do this before
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
retval = set_brk(elf_bss, elf_brk);
if (retval) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
}
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
send_sig(SIGSEGV, current, );
retval = -EFAULT; /* Nobody gets to see this, but.. */
goto out_free_dentry;
} if (elf_interpreter) {
unsigned long uninitialized_var(interp_map_addr); elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
&interp_map_addr,
load_bias);
if (!IS_ERR((void *)elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
elf_entry += loc->interp_elf_ex.e_entry;
}
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr; allow_write_access(interpreter);
fput(interpreter);
kfree(elf_interpreter);
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = -EINVAL;
goto out_free_dentry;
}
} kfree(elf_phdata); set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out;
}
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out;
}
/* N.B. passed_fileno might not be initialized? */
current->mm->end_code = end_code;
current->mm->start_code = start_code;
current->mm->start_data = start_data;
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > )) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef CONFIG_COMPAT_BRK
current->brk_randomized = ;
#endif
}
#endif if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
down_write(&current->mm->mmap_sem);
error = do_mmap(NULL, , PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, );
up_write(&current->mm->mmap_sem);
}
//开始执行新进程
start_thread(regs, elf_entry, bprm->p);
retval = ;
out:
kfree(loc);
out_ret:
return retval; /* error cleanup */
out_free_dentry:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_interp:
kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}

我们仔细看一下这里

        elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
//读取可执行文件的程序头
retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
(char *)elf_phdata, size);
if (retval != size) {
if (retval >= )
retval = -EIO;
goto out_free_ph;
}

readelf 看一下可执行文件的程序头(x86_64)

程序头描述了可执行文件到虚拟地址的映射关系。 这里还可以看到INTERP段 请求解释器/lib64/ld-linux-x86-64.so.2.  它被用来加载和定位动态库, 这里的可执行文件对标准c库是动态链接, 所以会请求这个文件。

for (i = ; i < loc->elf_ex.e_phnum; i++) {
//检查是否有需要加载的解释器
if (elf_ppnt->p_type == PT_INTERP) {
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < )
goto out_free_ph; retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph;
//根据其位置和大小把整个"解释器"段的内容读入缓冲区
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= )
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - ] != '\0')
goto out_free_interp;
//打开解释器
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp; /*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
would_dump(bprm, interpreter); /* Get the exec headers */
//得到解释器头部
retval = kernel_read(interpreter, ,
(void *)&loc->interp_elf_ex,
sizeof(loc->interp_elf_ex));
if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= )
retval = -EIO;
goto out_free_dentry;
} break;
}
elf_ppnt++;
}
   //PT_GNU_STACK 判断栈中是否有可执行代码
  ... .... .... ..

然后结下来的工作就是检查和确定装入地址开始装入了

/* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = , elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = , elf_flags;
unsigned long k, vaddr;
unsigned long total_size = ; if (elf_ppnt->p_type != PT_LOAD)
continue; if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
。。。。。。。。
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, );
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
//从elf中读取程序入口地址 开始执行
start_thread(regs, elf_entry, bprm->p);
retval = ;
}

执行的过程:

当加载器执行时, 就会创建上图所示虚拟存储器镜像(图为32位系统)。在程序段头表的指导下将可执行文件的代码段和数据段映射到虚拟地址的虚拟地址空间,接下来加载器跳转到_start 入口处依次执行  : __libc_init_first  ------>  _init  -------> atexit -------> main ------>_exit   首先初始化c库。调用初始化代码,注册退出函数  执行main 函数 最后执行退出函数。关于这块可以参考: http://blog.chinaunix.net/uid-24774106-id-3450789.html

到这里可执行文件执行过程已经简单了解了,整个过程与进程的创建执行有关,加载分析ELF并不复杂,其中还牵涉到动态库的链接和使用后续会单独看一下动态库加载过程。

Linux 链接详解(2)的更多相关文章

  1. Linux 链接详解----静态链接实例分析

    由Linux链接详解(1)中我们简单的分析了静态库的引用解析和重定位的内容, 下面我们结合实例来看一下静态链接重定位过程. /* * a.c */ ; void add(int c); int mai ...

  2. Linux 链接详解----动态链接库

    静态库的缺点: 库函数被包含在每一个运行的进程中,会造成主存的浪费. 目标文件的size过大 每次更新一个模块都需要重新编译,更新困难,使用不方便. 动态库: 是一个目标文件,包含代码和数据,它可以在 ...

  3. Linux 链接详解(1)

    可执行文件的生成过程: hello.c ----预处理--->  hello.i ----编译----> hello.s -----汇编-----> hello.o -----链接- ...

  4. Linux命令详解之—pwd命令

    Linux的pwd命令也是一个非常常用的命令,本文为大家介绍下Linux中pwd命令的用法. 更多Linux命令详情请看:Linux命令速查手册 Linux pwd命令用于显示工作目录. 执行pwd指 ...

  5. Linux 系统结构详解

    Linux 系统结构详解 Linux系统一般有4个主要部分: 内核.shell.文件系统和应用程序.内核.shell和文件系统一起形成了基本的操作系统结构,它们使得用户可以运行程序.管理文件并使用系统 ...

  6. Linux权限详解 命令之 chmod:修改权限

    权限简介 Linux系统上对文件的权限有着严格的控制,用于如果相对某个文件执行某种操作,必须具有对应的权限方可执行成功. Linux下文件的权限类型一般包括读,写,执行.对应字母为 r.w.x. Li ...

  7. Linux 目录详解 树状目录结构图

    1.树状目录结构图 2./目录 目录 描述 / 第一层次结构的根.整个文件系统层次结构的根目录. /bin/ 需要在单用户模式可用的必要命令(可执行文件):面向所有用户,例如:cat.ls.cp,和/ ...

  8. [转帖]Linux文件系统详解

    Linux文件系统详解 https://www.cnblogs.com/alantu2018/p/8461749.html 贼复杂.. 从操作系统的角度详解Linux文件系统层次.文件系统分类.文件系 ...

  9. Linux命令详解之—tail命令

    tail命令也是一个非常常用的文件查看类的命令,今天就为大家介绍下Linux tail命令的用法. 更多Linux命令详情请看:Linux命令速查手册 Linux tail命令主要用来从指定点开始将文 ...

随机推荐

  1. 一张表搞懂各种 Docker 监控方案 - 每天5分钟玩转 Docker 容器技术(86)

    前面我们已经介绍了ps/top/stats.Sysdig.Weave Scope.cAdvisor 和 Prometheus 多种容器监控工具和方案,是时候做一个比较了.下面将从五个方面来对比它们之间 ...

  2. java笔记02

    一,编写一个方法,使用以上算法生成指定数目(比如1000个)的随机整数 /** * */ package 课堂2; import java.util.Random; /** * @author 信16 ...

  3. PHP上传大文件配置

    使用PHP上传比较大的文件时,发现程序没反应,那是因为PHP的默认设置限制了上传文件的大小,所以要修改配置文件php.ini中的一些选项:(以下只是我个人的配置,大家可根据自己的实际需要配置) fil ...

  4. LeetCode 404. Sum of Left Leaves (左子叶之和)

    Find the sum of all left leaves in a given binary tree. Example: 3 / \ 9 20 / \ 15 7 There are two l ...

  5. 用source语句引用mysql文件的细节注意

    今天在使用 mysql数据库的时候,创建 数据表的时候出现了很多的小问题,今天一天花费了大量的时间去解决这些问题.首先就是一些小的细节,在文本编辑器上编辑好了SQL语句,然后转移到mysql的命令行中 ...

  6. 数位DP按位枚举模板

    借鉴:http://www.cnblogs.com/xz816111/p/4809913.html // pos = 当前处理的位置(一般从高位到低位) // pre = 上一个位的数字(更高的那一位 ...

  7. Hive安装和部署

    在root的用户下搭建的 构建hive之前必须要先搭建好hadoop才可以. hive定义了一种类似SQL查询语言--HQL 类似SQL ,但是不完全相同 Hive是一个数据仓库,它部署在Hadoop ...

  8. SQL Server分组查询某最大值的整条数据(包含linq写法)

    想实现如下效果,就是分组后时间最大的那一条数据: 1.SQL SELECT * FROM ( SELECT * , ROW_NUMBER() OVER ( PARTITION BY RIP_GUID ...

  9. Linux网络中接收 "二进制" 流的那些事 --- 就recv的返回值和strlen库函数进行对话

    1.    前言 很多朋友在做网络编程开发的时候可能都遇到这样的问题,在进行接收二进制流的数据的时候,使用strlen库函数来得到 二进制数据长度的时候并不准确.为什么呢??首先,使用strlen进行 ...

  10. 一、VueJs 填坑日记之基础概念知识解释

    概述在最开始听说vuejs这个词是在2016年,当时天真的认为自己是个后端开发工程师不需要学习太多的前端知识,不过紧接着在2017年在公司就用到了vuejs.对于初学者(尤其是干后端的初学者)来说,刚 ...