1: struct per_cpu_pageset __percpu *pageset;

首先,分析一个函数,__free_pages,这个函数是Buddy System提供的API接口函数,用于翻译曾经分配的一组页(多少个页视order大小而定)

   1: void __free_pages(struct page *page, unsigned int order)

   2: {

   3:     if (put_page_testzero(page)) {

   4:         if (order == 0)

   5:             free_hot_cold_page(page, 0);

   6:         else

   7:             __free_pages_ok(page, order);

   8:     }

   9: }

首先,调用put_page_testzero来查看该页是否还有其他引用(struct page结构中的_count)。

即先减去当前的这次引用(减1),然后查看是否引用值已经为0。

   1: /*

   2:  * Drop a ref, return true if the refcount fell to zero (the page has no users)

   3:  */

   4: static inline int put_page_testzero(struct page *page)

   5: {

   6:     VM_BUG_ON(atomic_read(&page->_count) == 0);

   7:     return atomic_dec_and_test(&page->_count);

   8: }

其中,atomic_xxx是内核提供的原子操作实现,有兴趣的话可以进一步深入研究。

然后,如果order为1,代表只有一个内存页需要释放,就调用free_hot_cold_page函数。

   1: /*

   2:  * Free a 0-order page

   3:  * cold == 1 ? free a cold page : free a hot page

   4:  */

   5: void free_hot_cold_page(struct page *page, int cold)

   6: {

   7:     struct zone *zone = page_zone(page);

   8:     struct per_cpu_pages *pcp;

   9:     unsigned long flags;

  10:     int migratetype;

  11:     int wasMlocked = __TestClearPageMlocked(page);

  12:  

  13:     if (!free_pages_prepare(page, 0))

  14:         return;

  15:  

  16:     migratetype = get_pageblock_migratetype(page);

  17:     set_page_private(page, migratetype);

  18:     local_irq_save(flags);

  19:     if (unlikely(wasMlocked))

  20:         free_page_mlock(page);

  21:     __count_vm_event(PGFREE);

  22:  

  23:     /*

  24:      * We only track unmovable, reclaimable and movable on pcp lists.

  25:      * Free ISOLATE pages back to the allocator because they are being

  26:      * offlined but treat RESERVE as movable pages so we can get those

  27:      * areas back if necessary. Otherwise, we may have to free

  28:      * excessively into the page allocator

  29:      */

  30:     if (migratetype >= MIGRATE_PCPTYPES) {

  31:         if (unlikely(migratetype == MIGRATE_ISOLATE)) {

  32:             free_one_page(zone, page, 0, migratetype);

  33:             goto out;

  34:         }

  35:         migratetype = MIGRATE_MOVABLE;

  36:     }

  37:  

  38:     pcp = &this_cpu_ptr(zone->pageset)->pcp;

  39:     if (cold)

  40:         list_add_tail(&page->lru, &pcp->lists[migratetype]);

  41:     else

  42:         list_add(&page->lru, &pcp->lists[migratetype]);

  43:     pcp->count++;

  44:     if (pcp->count >= pcp->high) {

  45:         free_pcppages_bulk(zone, pcp->batch, pcp);

  46:         pcp->count -= pcp->batch;

  47:     }

  48:  

  49: out:

  50:     local_irq_restore(flags);

  51: }

page_zone是根据page找到其所在的zone的函数,具体实现是在page->flags里面有相应的比特位,保存它是从哪个zone上分配的。

那么page->flags是从什么时候开始携带这些信息的呢?

首先,所有的page结构体都保存在pglist_data的成员node_mem_map指向的一片内存里。

   1: /*

   2:  * Initially all pages are reserved - free ones are freed

   3:  * up by free_all_bootmem() once the early boot process is

   4:  * done. Non-atomic initialization, single-pass.

   5:  */

   6: void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,

   7:         unsigned long start_pfn, enum memmap_context context)

   8: {

   9:     struct page *page;

  10:     unsigned long end_pfn = start_pfn + size;

  11:     unsigned long pfn;

  12:     struct zone *z;

  13:  

  14:     if (highest_memmap_pfn < end_pfn - 1)

  15:         highest_memmap_pfn = end_pfn - 1;

  16:  

  17:     z = &NODE_DATA(nid)->node_zones[zone];

  18:     for (pfn = start_pfn; pfn < end_pfn; pfn++) {

  19:         /*

  20:          * There can be holes in boot-time mem_map[]s

  21:          * handed to this function.  They do not

  22:          * exist on hotplugged memory.

  23:          */

  24:         if (context == MEMMAP_EARLY) {

  25:             if (!early_pfn_valid(pfn))

  26:                 continue;

  27:             if (!early_pfn_in_nid(pfn, nid))

  28:                 continue;

  29:         }

  30:         page = pfn_to_page(pfn);

  31:         set_page_links(page, zone, nid, pfn);

  32:         mminit_verify_page_links(page, zone, nid, pfn);

  33:         init_page_count(page);

  34:         reset_page_mapcount(page);

  35:         SetPageReserved(page);

  36:         /*

  37:          * Mark the block movable so that blocks are reserved for

  38:          * movable at startup. This will force kernel allocations

  39:          * to reserve their blocks rather than leaking throughout

  40:          * the address space during boot when many long-lived

  41:          * kernel allocations are made. Later some blocks near

  42:          * the start are marked MIGRATE_RESERVE by

  43:          * setup_zone_migrate_reserve()

  44:          *

  45:          * bitmap is created for zone's valid pfn range. but memmap

  46:          * can be created for invalid pages (for alignment)

  47:          * check here not to call set_pageblock_migratetype() against

  48:          * pfn out of zone.

  49:          */

  50:         if ((z->zone_start_pfn <= pfn)

  51:             && (pfn < z->zone_start_pfn + z->spanned_pages)

  52:             && !(pfn & (pageblock_nr_pages - 1)))

  53:             set_pageblock_migratetype(page, MIGRATE_MOVABLE);

  54:  

  55:         INIT_LIST_HEAD(&page->lru);

  56: #ifdef WANT_PAGE_VIRTUAL

  57:         /* The shift won't overflow because ZONE_NORMAL is below 4G. */

  58:         if (!is_highmem_idx(zone))

  59:             set_page_address(page, __va(pfn << PAGE_SHIFT));

  60: #endif

  61:     }

  62: }

在Buddy System初始化的过程中,会调用memmap_init_zone函数,在该函数中,会将属于该Zone的所有page结构体都遍历处理一遍,都调用一次set_page_links来建立page与zone之间的对应关系。

   1: static inline void set_page_zone(struct page *page, enum zone_type zone)

   2: {

   3:     page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);

   4:     page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;

   5: }

   6:  

   7: static inline void set_page_node(struct page *page, unsigned long node)

   8: {

   9:     page->flags &= ~(NODES_MASK << NODES_PGSHIFT);

  10:     page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;

  11: }

  12:  

  13: static inline void set_page_links(struct page *page, enum zone_type zone,

  14:     unsigned long node, unsigned long pfn)

  15: {

  16:     set_page_zone(page, zone);

  17:     set_page_node(page, node);

  18: #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)

  19:     set_page_section(page, pfn_to_section_nr(pfn));

  20: #endif

  21: }

内存启动过程的初始化

   1: void __init setup_arch(char **cmdline_p)

   2: {

   3: ......

   4: /* max_pfn_mapped is updated here */

   5: max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);

   6: max_pfn_mapped = max_low_pfn_mapped;

   7: ......

   8: paging_init();

   9: ......

  10: }

调用init_memory_mapping

   1: /*

   2:  * Setup the direct mapping of the physical memory at PAGE_OFFSET.

   3:  * This runs before bootmem is initialized and gets pages directly from

   4:  * the physical memory. To access them they are temporarily mapped.

   5:  */

   6: unsigned long __init_refok init_memory_mapping(unsigned long start,

   7:                            unsigned long end)

   8: {

   9: ......

  10:  

  11: for (i = 0; i < nr_range; i++)

  12:     ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,

  13:                        mr[i].page_size_mask);

  14:  

  15: ......

  16: }

调用kernel_physical_mapping_init

   1: /*

   2:  * This maps the physical memory to kernel virtual address space, a total

   3:  * of max_low_pfn pages, by creating page tables starting from address

   4:  * PAGE_OFFSET:

   5:  */

   6: unsigned long __init

   7: kernel_physical_mapping_init(unsigned long start,

   8:                  unsigned long end,

   9:                  unsigned long page_size_mask)

  10: {

  11:     int use_pse = page_size_mask == (1<<PG_LEVEL_2M);

  12:     unsigned long last_map_addr = end;

  13:     unsigned long start_pfn, end_pfn;

  14:     pgd_t *pgd_base = swapper_pg_dir;

  15:     int pgd_idx, pmd_idx, pte_ofs;

  16:     unsigned long pfn;

  17:     pgd_t *pgd;

  18:     pmd_t *pmd;

  19:     pte_t *pte;

  20:     unsigned pages_2m, pages_4k;

  21:     int mapping_iter;

  22:  

  23:     start_pfn = start >> PAGE_SHIFT;

  24:     end_pfn = end >> PAGE_SHIFT;

  25:  

  26:     /*

  27:      * First iteration will setup identity mapping using large/small pages

  28:      * based on use_pse, with other attributes same as set by

  29:      * the early code in head_32.S

  30:      *

  31:      * Second iteration will setup the appropriate attributes (NX, GLOBAL..)

  32:      * as desired for the kernel identity mapping.

  33:      *

  34:      * This two pass mechanism conforms to the TLB app note which says:

  35:      *

  36:      *     "Software should not write to a paging-structure entry in a way

  37:      *      that would change, for any linear address, both the page size

  38:      *      and either the page frame or attributes."

  39:      */

  40:     mapping_iter = 1;

  41:  

  42:     if (!cpu_has_pse)

  43:         use_pse = 0;

  44:  

  45: repeat:

  46:     pages_2m = pages_4k = 0;

  47:     pfn = start_pfn;

  48:     pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);

  49:     pgd = pgd_base + pgd_idx;

  50:     for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {

  51:         pmd = one_md_table_init(pgd);

  52:  

  53:         if (pfn >= end_pfn)

  54:             continue;

  55: #ifdef CONFIG_X86_PAE

  56:         pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);

  57:         pmd += pmd_idx;

  58: #else

  59:         pmd_idx = 0;

  60: #endif

  61:         for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;

  62:              pmd++, pmd_idx++) {

  63:             unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;

  64:  

  65:             /*

  66:              * Map with big pages if possible, otherwise

  67:              * create normal page tables:

  68:              */

  69:             if (use_pse) {

  70:                 unsigned int addr2;

  71:                 pgprot_t prot = PAGE_KERNEL_LARGE;

  72:                 /*

  73:                  * first pass will use the same initial

  74:                  * identity mapping attribute + _PAGE_PSE.

  75:                  */

  76:                 pgprot_t init_prot =

  77:                     __pgprot(PTE_IDENT_ATTR |

  78:                          _PAGE_PSE);

  79:  

  80:                 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +

  81:                     PAGE_OFFSET + PAGE_SIZE-1;

  82:  

  83:                 if (is_kernel_text(addr) ||

  84:                     is_kernel_text(addr2))

  85:                     prot = PAGE_KERNEL_LARGE_EXEC;

  86:  

  87:                 pages_2m++;

  88:                 if (mapping_iter == 1)

  89:                     set_pmd(pmd, pfn_pmd(pfn, init_prot));

  90:                 else

  91:                     set_pmd(pmd, pfn_pmd(pfn, prot));

  92:  

  93:                 pfn += PTRS_PER_PTE;

  94:                 continue;

  95:             }

  96:             pte = one_page_table_init(pmd);

  97:  

  98:             pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);

  99:             pte += pte_ofs;

 100:             for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;

 101:                  pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {

 102:                 pgprot_t prot = PAGE_KERNEL;

 103:                 /*

 104:                  * first pass will use the same initial

 105:                  * identity mapping attribute.

 106:                  */

 107:                 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);

 108:  

 109:                 if (is_kernel_text(addr))

 110:                     prot = PAGE_KERNEL_EXEC;

 111:  

 112:                 pages_4k++;

 113:                 if (mapping_iter == 1) {

 114:                     set_pte(pte, pfn_pte(pfn, init_prot));

 115:                     last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;

 116:                 } else

 117:                     set_pte(pte, pfn_pte(pfn, prot));

 118:             }

 119:         }

 120:     }

 121:     if (mapping_iter == 1) {

 122:         /*

 123:          * update direct mapping page count only in the first

 124:          * iteration.

 125:          */

 126:         update_page_count(PG_LEVEL_2M, pages_2m);

 127:         update_page_count(PG_LEVEL_4K, pages_4k);

 128:  

 129:         /*

 130:          * local global flush tlb, which will flush the previous

 131:          * mappings present in both small and large page TLB's.

 132:          */

 133:         __flush_tlb_all();

 134:  

 135:         /*

 136:          * Second iteration will set the actual desired PTE attributes.

 137:          */

 138:         mapping_iter = 2;

 139:         goto repeat;

 140:     }

 141:     return last_map_addr;

 142: }

在这里面,将swapper_pg_dir作为pgd_t(Page Directory)的指针,对swapper_pg_dir指向的内存区域作处理,将Normal区域的映射关系建立到该页目录中。

然后在paging_init中

   1: static void __init pagetable_init(void)

   2: {

   3:     pgd_t *pgd_base = swapper_pg_dir;

   4:  

   5:     permanent_kmaps_init(pgd_base);

   6: }

内存Zone中的pageset成员分析的更多相关文章

  1. DEBUG模式下, 内存中的变量地址分析

    测试函数的模板实现 /// @file my_template.h /// @brief 测试数据类型用的模板实现 #ifndef MY_TEMPLATE_H_2016_0123_1226 #defi ...

  2. 继承的基本概念: (1)Java不支持多继承,也就是说子类至多只能有一个父类。 (2)子类继承了其父类中不是私有的成员变量和成员方法,作为自己的成员变量和方法。 (3)子类中定义的成员变量和父类中定义的成员变量相同时,则父类中的成员变量不能被继承。 (4)子类中定义的成员方法,并且这个方法的名字返回类型,以及参数个数和类型与父类的某个成员方法完全相同,则父类的成员方法不能被继承。 分析以上程

    继承的基本概念: (1)Java不支持多继承,也就是说子类至多只能有一个父类. (2)子类继承了其父类中不是私有的成员变量和成员方法,作为自己的成员变量和方法.(3)子类中定义的成员变量和父类中定义的 ...

  3. linux-3.2.36内核启动2-setup_arch中的内存初始化1(arm平台 分析高端内存和初始化memblock)【转】

    转自:http://blog.csdn.net/tommy_wxie/article/details/17093307 上一篇微博留下了这几个函数,现在我们来分析它们         sanity_c ...

  4. java内存模型中工作内存并不一定会同步主内存的情况分析

    其实是为了填之前的一个坑  在一个多线程的案例中出现了阻塞的情况. https://www.cnblogs.com/hetutu-5238/p/10477875.html   其中的第二个问题,即多个 ...

  5. 关于 self 和 super 在oc 中 的疑惑 与 分析

    关于 self 和 super 在oc 中 的疑惑 与 分析   面试一定都是很注重 基础的,不管高级还是初级. 虽然基础好跟基础不好都可以写 代码,网上那么多资料.  区分高低也就是研究的深度和广度 ...

  6. (第三章)Java内存模型(中)

    一.volatile的内存语义 1.1 volatile的特性 理解volatile特性的一个好办法是把对volatile变量的单个读/写,看成是使用同一个锁对这些单个读/写操作做了同步.下面通过具体 ...

  7. C++中类的内存空间大小(sizeof)分析

    注意类大小与结构体大小还是有一些不同的,类里面还包含成员函数(特别是虚函数),结构体中一般只有数据成员. 首先明确各数据类型占多大的空间.例如int到底是占2字节还是4字节空间: 在TC里,int是2 ...

  8. C++中几个值得分析的小问题(2)

    下面有3个小问题,作为C++ Beginner你一定要知道错在哪里了. 1.派生类到基类的引用或指针转换一定“完美”存在? 一般情况,你很可能会认为:派生类对象的引用或指针转换为基类对象的引用或指针是 ...

  9. C++中static数据成员详解

        本文和大家分享的主要是c++中static数据成员的相关用法及源码示例,希望能帮助大家更好的学习C++. static(静态存储)数据成员 StaticTest.cpp : 定义控制台应用程序 ...

随机推荐

  1. python中的pow()函数解释

    转载自:https://blog.csdn.net/wuxiaobingandbob/article/details/47776209 Python中pow(),里面可以有两个或三个参数,它们的意义是 ...

  2. NGINX-二级域名

    先给二级域名添加到 DNS 解析再配置 nginx server { #侦听80端口 listen 80; #定义使用 www.nginx.cn访问 server_name ~^(?<subdo ...

  3. jsp中jstl、el使用

    tomcat7.0+JSTL1.1.2(不冲突) EL表达式获取变量 ${表达式} 如:${user.name} 不可以动态取值 ${user[name]}可以动态取值,变量名中含有特殊字符时只能用此 ...

  4. FPGA前仿真后仿真

    前仿真 后仿真 时序(综合后)仿真 时序仿真将时延考虑进去,包括综合后产生的(与.或.非)门时延,还有布局布线产生的时延. 综合(Synthesize),就是将HDL语言设计输入翻译成由与.或.非门和 ...

  5. Push to GitHub:could not resolve host: github.com

    系统:Mac os x 10.11.3 操作:Push to GitHub 错误如下: git push origin ssh: Could not resolve hostname ssh.gith ...

  6. s-cms学校建站重装漏洞

    文件位置 ./install/index.php 影响版本 PHP V5.0 过程 通过获取GET请求判断是安装还是结束安装 安装步骤1 安装步骤2 安装步骤3 安装步骤4 解释 安装步骤1-3都没有 ...

  7. InnoDB与Myisam比较

    InnoDB与Myisam比较                                                                                     ...

  8. jsp页面通过ajax取值/展示数据及分页显示

    jsp页面通过ajax从后台获取数据,在页面展示,并实现分页效果代码: [JavaScript部分代码] 1 <script> function getComposition(pageno ...

  9. quick'n'dirty poc for CVE-2013-1763 SOCK_DIAG bug in kernel 3.3-3.8

    /* * quick'n'dirty poc for CVE-2013-1763 SOCK_DIAG bug in kernel 3.3-3.8 * bug found by Spender * po ...

  10. Web开发进阶

    1.可靠性 可扩展性,服务降级,负载均衡   应用扩展 1.垂直扩展,方式:提升机器硬件,缺点,成本昂贵,扩展能力有限 2.水平扩展,方式:增加节点,优点:升级过程平花,硬件成本低,理论上无线扩展,确 ...