Linux用户空间内存区域的匿名映射
1
在调用mmap系统调用时,可以指定的标志(flag)参数:
1: #define MAP_SHARED 0x01 /* Share changes */
2: #define MAP_PRIVATE 0x02 /* Changes are private */
3: #define MAP_TYPE 0x0f /* Mask for type of mapping */
4: #define MAP_FIXED 0x10 /* Interpret addr exactly */
5: #define MAP_ANONYMOUS 0x20 /* don't use a file */
6: #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
7: # define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */
8: #else
9: # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
10: #endif
MAP_SHARED
用于多个进程共享对一个文件的访问
MAP_PRIVATE
用于创建一个与数据源分离的私有映射,对区域的写入操作不影响数据源文件中的内容
MAP_FIXED
用于在指定的目标线性地址创建一个映射,不允许调整到其他地址
MAP_ANONYMOUS
用于创建与文件无关的映射,或者说没有数据源的映射
do_anonymous_page会调用alloc_zeroed_user_highpage_movable分配一个初始化为全0的内存页。
2
在vm_area_struct数据结构定义中,有一个双链表结点:anon_vma_chain
1: struct vm_area_struct {
2: ......
3: /*
4: * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
5: * list, after a COW of one of the file pages. A MAP_SHARED vma
6: * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
7: * or brk vma (with NULL file) can only be in an anon_vma list.
8: */
9: struct list_head anon_vma_chain; /* Serialized by mmap_sem &
10: * page_table_lock */
11: struct anon_vma *anon_vma; /* Serialized by page_table_lock */
12: ......
13: }
其中,struct anon_vma定义:
1: /*
2: * The anon_vma heads a list of private "related" vmas, to scan if
3: * an anonymous page pointing to this anon_vma needs to be unmapped:
4: * the vmas on the list will be related by forking, or by splitting.
5: *
6: * Since vmas come and go as they are split and merged (particularly
7: * in mprotect), the mapping field of an anonymous page cannot point
8: * directly to a vma: instead it points to an anon_vma, on whose list
9: * the related vmas can be easily linked or unlinked.
10: *
11: * After unlinking the last vma on the list, we must garbage collect
12: * the anon_vma object itself: we're guaranteed no page can be
13: * pointing to this anon_vma once its vma list is empty.
14: */
15: struct anon_vma {
16: struct anon_vma *root; /* Root of this anon_vma tree */
17: struct mutex mutex; /* Serialize access to vma list */
18: /*
19: * The refcount is taken on an anon_vma when there is no
20: * guarantee that the vma of page tables will exist for
21: * the duration of the operation. A caller that takes
22: * the reference is responsible for clearing up the
23: * anon_vma if they are the last user on release
24: */
25: atomic_t refcount;
26:
27: /*
28: * NOTE: the LSB of the head.next is set by
29: * mm_take_all_locks() _after_ taking the above lock. So the
30: * head must only be read/written after taking the above lock
31: * to be sure to see a valid next pointer. The LSB bit itself
32: * is serialized by a system wide lock only visible to
33: * mm_take_all_locks() (mm_all_locks_mutex).
34: */
35: struct list_head head; /* Chain of private "related" vmas */
36: };
3
do_mmap
1: static inline unsigned long do_mmap(struct file *file, unsigned long addr,
2: unsigned long len, unsigned long prot,
3: unsigned long flag, unsigned long offset)
4: {
5: unsigned long ret = -EINVAL;
6: if ((offset + PAGE_ALIGN(len)) < offset)
7: goto out;
8: if (!(offset & ~PAGE_MASK))
9: ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
10: out:
11: return ret;
12: }
if ((offset + PAGE_ALIGN(len)) < offset)
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
/*
* 'kernel.h' contains some often-used function prototypes etc
*/
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask)
即
if ((offset + (((len) + (PAGE_SIZE)) & ~(PAGE_SIZE-1))) < offset)
表示如果len太长,再进行align to page boundary操作就会溢出了,那么没有那么多的线性地址空间可以给它映射,因此失败。
if (!(offset & ~PAGE_MASK))
如果offset是位于页的边界处,则继续操作
ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
其中最后一个参数代表了映射区域在文件中的页序号。
1: /*
2: * The caller must hold down_write(¤t->mm->mmap_sem).
3: */
4:
5: unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
6: unsigned long len, unsigned long prot,
7: unsigned long flags, unsigned long pgoff)
8: {
9: struct mm_struct * mm = current->mm;
10: struct inode *inode;
11: vm_flags_t vm_flags;
12: int error;
13: unsigned long reqprot = prot;
14:
15: /*
16: * Does the application expect PROT_READ to imply PROT_EXEC?
17: *
18: * (the exception is when the underlying filesystem is noexec
19: * mounted, in which case we dont add PROT_EXEC.)
20: */
21: if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
22: if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
23: prot |= PROT_EXEC;
24:
25: if (!len)
26: return -EINVAL;
27:
28: if (!(flags & MAP_FIXED))
29: addr = round_hint_to_min(addr);
30:
31: /* Careful about overflows.. */
32: len = PAGE_ALIGN(len);
33: if (!len)
34: return -ENOMEM;
35:
36: /* offset overflow? */
37: if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
38: return -EOVERFLOW;
39:
40: /* Too many mappings? */
41: if (mm->map_count > sysctl_max_map_count)
42: return -ENOMEM;
43:
44: /* Obtain the address to map to. we verify (or select) it and ensure
45: * that it represents a valid section of the address space.
46: */
47: addr = get_unmapped_area(file, addr, len, pgoff, flags);
48: if (addr & ~PAGE_MASK)
49: return addr;
50:
51: /* Do simple checking here so the lower-level routines won't have
52: * to. we assume access permissions have been handled by the open
53: * of the memory object, so we don't do any here.
54: */
55: vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
56: mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
57:
58: if (flags & MAP_LOCKED)
59: if (!can_do_mlock())
60: return -EPERM;
61:
62: /* mlock MCL_FUTURE? */
63: if (vm_flags & VM_LOCKED) {
64: unsigned long locked, lock_limit;
65: locked = len >> PAGE_SHIFT;
66: locked += mm->locked_vm;
67: lock_limit = rlimit(RLIMIT_MEMLOCK);
68: lock_limit >>= PAGE_SHIFT;
69: if (locked > lock_limit && !capable(CAP_IPC_LOCK))
70: return -EAGAIN;
71: }
72:
73: inode = file ? file->f_path.dentry->d_inode : NULL;
74:
75: if (file) {
76: switch (flags & MAP_TYPE) {
77: case MAP_SHARED:
78: if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
79: return -EACCES;
80:
81: /*
82: * Make sure we don't allow writing to an append-only
83: * file..
84: */
85: if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
86: return -EACCES;
87:
88: /*
89: * Make sure there are no mandatory locks on the file.
90: */
91: if (locks_verify_locked(inode))
92: return -EAGAIN;
93:
94: vm_flags |= VM_SHARED | VM_MAYSHARE;
95: if (!(file->f_mode & FMODE_WRITE))
96: vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
97:
98: /* fall through */
99: case MAP_PRIVATE:
100: if (!(file->f_mode & FMODE_READ))
101: return -EACCES;
102: if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
103: if (vm_flags & VM_EXEC)
104: return -EPERM;
105: vm_flags &= ~VM_MAYEXEC;
106: }
107:
108: if (!file->f_op || !file->f_op->mmap)
109: return -ENODEV;
110: break;
111:
112: default:
113: return -EINVAL;
114: }
115: } else {
116: switch (flags & MAP_TYPE) {
117: case MAP_SHARED:
118: /*
119: * Ignore pgoff.
120: */
121: pgoff = 0;
122: vm_flags |= VM_SHARED | VM_MAYSHARE;
123: break;
124: case MAP_PRIVATE:
125: /*
126: * Set pgoff according to addr for anon_vma.
127: */
128: pgoff = addr >> PAGE_SHIFT;
129: break;
130: default:
131: return -EINVAL;
132: }
133: }
134:
135: error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
136: if (error)
137: return error;
138:
139: return mmap_region(file, addr, len, flags, vm_flags, pgoff);
140: }
141: EXPORT_SYMBOL(do_mmap_pgoff);
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;
get_unmapped_area函数用于查找到一个可以安放请求的这么长的一个vma的线性地址范围,返回这个范围的起始地址。如果这个起始地址不是从页对齐处开始的,代表找到的这个地址是不符合要求的,因此也不再往下走了,直接返回。
但是是问题是,如果直接返回了,那么调用都会不会不做检查,直接认为内核已经完成了mmap的操作,而尝试去读写这块还没有与文件建立起关联的内存区域呢,会发生什么不可知的事?
【根据http://www.cnblogs.com/long123king/p/3502170.html中的思想,当进程真正需要访问页时,会触发Page Fault,那么这一步关键是设置好相应的Page Fault handler以及相应struct的指针成员】
Linux用户空间内存区域的匿名映射的更多相关文章
- linux用户空间和内核空间(内核高端内存)_转
转自:Linux用户空间与内核空间(理解高端内存) 参考: 1. 进程内核栈.用户栈 2. 解惑-Linux内核空间 3. linux kernel学习笔记-5 内存管理 Linux 操作系统和驱 ...
- Linux内核空间内存申请函数kmalloc、kzalloc、vmalloc
我们都知道在用户空间动态申请内存用的函数是 malloc(),这个函数在各种操作系统上的使用是一致的,对应的用户空间内存释放函数是 free(). 注意:动态申请的内存使用完后必须要释放,否则会造成内 ...
- Linux内核空间内存申请函数kmalloc、kzalloc、vmalloc的区别【转】
转自:http://www.th7.cn/system/lin/201606/167750.shtml 我们都知道在用户空间动态申请内存用的函数是 malloc(),这个函数在各种操作系统上的使用是一 ...
- Linux用户空间与内核空间(理解高端内存)
Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针传递数据,因为Linux使用的虚拟内存机制,用户空间的数据可能被换出,当内核空间使用用户空间指针时,对应的数 ...
- linux 用户空间与内核空间——高端内存详解
摘要:Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针传递数据,因为Linux使用的虚拟内存机制,用户空间的数据可能被换出,当内核空间使用用户空间指针时,对 ...
- Linux用户空间与内核空间(理解高端内存)【转】
转自:http://www.cnblogs.com/wuchanming/p/4360277.html Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针传递 ...
- linux 用户空间与内核空间——高端内存了解
Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针传递数据,因为Linux使用的虚拟内存机制,用户空间的数据可能被换出,当内核空间使用用户空间指针时,对应的数 ...
- Linux用户空间与内核空间
源:http://blog.csdn.net/f22jay/article/details/7925531 Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针 ...
- Linux用户空间与内核地址空间
Linux 操作系统和驱动程序运行在内核空间,应用程序运行在用户空间,两者不能简单地使用指针传递数据,因为Linux使用的虚拟内存机制,用户空间的数据可能被换出,当内核空间使用用户空间指针时,对应的数 ...
随机推荐
- USB仪器控制教程
概观 本教程是为出发点使用NI-VISA与USB设备进行通信.它不打算作为一个起点,学习USB构架或USB通讯中使用的各种协议.阅读本教程后,您应该能够安装一个USB设备,并使用NI-VISA与该设备 ...
- linux命令行光标移动技巧
看一个真正的专家操作命令行绝对是一种很好的体验-光标在单词之间来回穿梭,命令行不同的滚动.在这里强烈建立适应GUI节目的开发者尝试一下在提示符下面工作.但是事情也不是那么简单,还是需要知道“如何去做” ...
- Tomcat负载均衡、调优核心应用进阶学习笔记(四):JVM调优
文章目录 概述 **JVM管理的内存段可分为两大类:线程共享内存和线程私有内存** Java Heap SIze Options The memory structure of a JVM proce ...
- vue/cli3引入cesium
vue/cli3引入cesium 一开始用了webpack结合vue引入vue:结果是各种bug,搞了半天.最后问了基友,发现vue脚手架这个·简单高效的方法,只需要几行代码就轻松地搞定啦! 方案一. ...
- Web RTC录视频
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...
- (1.3)学习笔记之mysql体系结构(C/S整体架构、内存结构、物理存储结构、逻辑结构)
目录 1.学习笔记之mysql体系结构(C/S架构) 2.mysql整体架构 3.存储引擎 4.sql语句处理--SQL层(内存层) 5.服务器内存结构 6.mysql如何使用磁盘空间 7.mysql ...
- Springboot的Mybatis逆向工程
1.pom.xml添加mybatis和逆向插件依赖: <dependency> <groupId>org.mybatis.spring.boot</groupId> ...
- Oracle查询最近执行过的SQL语句
oracle 查询最近执行过的 SQL语句 select sql_text,last_load_time from v$sql order by last_load_time desc; SELECT ...
- 详解 Flexible Box 中的 flex 属性
导读: 弹性盒子是 CSS3 的一种布局模式,一种当页面需要适应不同的屏幕大小以及设备类型时确保元素拥有适当的行为的布局方式.其中 flex 属性用于指定弹性子元素如何分配空间. flex 属性的值 ...
- Java技术专区-虚拟机系列-类加载机制(类的初始化)
类加载的生命周期: 加载 -> 验证 -> 准备 -> 解析 -> 初始化 -> 使用 -> 卸载 加载 -> 验证 -> 准备 -& ...