struct hlist_head {
struct hlist_node *first;
}; struct hlist_node {
struct hlist_node *next, **pprev;
};

hlist_head表示哈希表的头结点,是hash数组的一个元素。

哈希表中每一个entry(hlist_head)所对应的都是一个链表(hlist),该链表的结点由hlist_node表示。

hlist_head结构体只有一个域,即first。 first指针指向该hlist链表的第一个节点。

hlist_node结构体有两个域,next 和pprev。 next指针很容易理解,它指向下个hlist_node结点,倘若该节点是链表的最后一个节点,next指向NULL。

pprev是一个二级指针, 它指向前一个节点的next指针。为什么我们需要这样一个指针呢?它的好处是什么?

在回答这个问题之前,我们先研究另一个问题:为什么散列表的实现需要两个不同的数据结构?

散列表的目的是为了方便快速的查找,所以散列表通常是一个比较大的数组,否则“冲突”的概率会非常大, 这样也就失去了散列表的意义。如何做到既能维护一张大表,又能不使用过多的内存呢?就只能从数据结构上下功夫了。所以对于散列表的每个entry,它的结构体中只存放一个指针,解决了占用空间的问题。

现在又出现了另一个问题:数据结构不一致。显然,如果hlist_node采用传统的next,prev指针, 对于第一个节点和后面其他节点的处理会不一致。这样并不优雅,而且效率上也有损失。

hlist_node巧妙地将pprev指向上一个节点的next指针的地址,由于hlist_head和hlist_node指向的下一个节点的指针类型相同,这样就解决了通用性!

以IP分片时hlist使用为例,说明一下hlist的使用。

  /**
32 * struct inet_frag_queue - fragment queue
33 *
34 * @lock: spinlock protecting the queue
35 * @timer: queue expiration timer
36 * @list: hash bucket list
37 * @refcnt: reference count of the queue
38 * @fragments: received fragments head
39 * @fragments_tail: received fragments tail
40 * @stamp: timestamp of the last received fragment
41 * @len: total length of the original datagram
42 * @meat: length of received fragments so far
43 * @flags: fragment queue flags
44 * @max_size: maximum received fragment size
45 * @net: namespace that this frag belongs to
46 * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
47 */
struct inet_frag_queue {
spinlock_t lock;
struct timer_list timer;
struct hlist_node list; //hash节点
atomic_t refcnt;
struct sk_buff *fragments;
struct sk_buff *fragments_tail;
ktime_t stamp;
int len;
int meat;
__u8 flags;
u16 max_size;
struct netns_frags *net;
struct hlist_node list_evictor;
}; #define INETFRAGS_HASHSZ 1024 //hash数组的大小 /* averaged:
67 * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
68 * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
69 * struct frag_queue))
70 */
#define INETFRAGS_MAXDEPTH 128 struct inet_frag_bucket {
struct hlist_head chain; //hash冲突链表
spinlock_t chain_lock;
}; struct inet_frags {
struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; //hash数组 struct work_struct frags_work;
unsigned int next_bucket;
unsigned long last_rebuild_jiffies;
bool rebuild; /* The first call to hashfn is responsible to initialize
87 * rnd. This is best done with net_get_random_once.
88 *
89 * rnd_seqlock is used to let hash insertion detect
90 * when it needs to re-lookup the hash chain to use.
91 */
u32 rnd;
seqlock_t rnd_seqlock;
int qsize; unsigned int (*hashfn)(const struct inet_frag_queue *);
bool (*match)(const struct inet_frag_queue *q,
const void *arg);
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
void (*skb_free)(struct sk_buff *);
void (*frag_expire)(unsigned long data);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
};
   /*
2 * inet fragments management
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Pavel Emelyanov <xemul@openvz.org>
10 * Started as consolidation of ipv4/ip_fragment.c,
11 * ipv6/reassembly. and ipv6 nf conntrack reassembly
12 */ #include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h> #include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h> #define INETFRAGS_EVICT_BUCKETS 128
#define INETFRAGS_EVICT_MAX 512 /* don't rebuild inetfrag table with new secret more often than this */
#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
35 * Value : 0xff if frame should be dropped.
36 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
37 */
const u8 ip_frag_ecn_table[] = {
/* at least one fragment had CE, and others ECT_0 or ECT_1 */
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, /* invalid combinations : drop frame */
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
};
EXPORT_SYMBOL(ip_frag_ecn_table); static unsigned int //hash函数
inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
{
return f->hashfn(q) & (INETFRAGS_HASHSZ - );
} static bool inet_frag_may_rebuild(struct inet_frags *f)
{
return time_after(jiffies,
f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
} static void inet_frag_secret_rebuild(struct inet_frags *f)
{
int i; write_seqlock_bh(&f->rnd_seqlock); if (!inet_frag_may_rebuild(f))
goto out; get_random_bytes(&f->rnd, sizeof(u32)); for (i = ; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n; hb = &f->hash[i];
spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(q, n, &hb->chain, list) {//安全遍历所有hash冲突链表
unsigned int hval = inet_frag_hashfn(f, q); //计算hash值 if (hval != i) {
struct inet_frag_bucket *hb_dest; hlist_del(&q->list); /* Relink to new hash chain. */
hb_dest = &f->hash[hval]; //hash冲突链表 /* This is the only place where we take
98 * another chain_lock while already holding
99 * one. As this will not run concurrently,
100 * we cannot deadlock on hb_dest lock below, if its
101 * already locked it will be released soon since
102 * other caller cannot be waiting for hb lock
103 * that we've taken above.
104 */
spin_lock_nested(&hb_dest->chain_lock,
SINGLE_DEPTH_NESTING);                      //把节点list加入到hash冲突链表chain里面
hlist_add_head(&q->list, &hb_dest->chain);
spin_unlock(&hb_dest->chain_lock);
}
}
spin_unlock(&hb->chain_lock);
} f->rebuild = false;
f->last_rebuild_jiffies = jiffies;
out:
write_sequnlock_bh(&f->rnd_seqlock);
} static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
{
return q->net->low_thresh == ||
frag_mem_limit(q->net) >= q->net->low_thresh;
} static unsigned int
inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
{
struct inet_frag_queue *fq;
struct hlist_node *n;
unsigned int evicted = ;
HLIST_HEAD(expired); spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq))
continue; if (!del_timer(&fq->timer))
continue; hlist_add_head(&fq->list_evictor, &expired);
++evicted;
} spin_unlock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
f->frag_expire((unsigned long) fq); return evicted;
} static void inet_frag_worker(struct work_struct *work)
{
unsigned int budget = INETFRAGS_EVICT_BUCKETS;
unsigned int i, evicted = ;
struct inet_frags *f; f = container_of(work, struct inet_frags, frags_work); BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); local_bh_disable(); for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
i = (i + ) & (INETFRAGS_HASHSZ - );
if (evicted > INETFRAGS_EVICT_MAX)
break;
} f->next_bucket = i; local_bh_enable(); if (f->rebuild && inet_frag_may_rebuild(f))
inet_frag_secret_rebuild(f);
} static void inet_frag_schedule_worker(struct inet_frags *f)
{
if (unlikely(!work_pending(&f->frags_work)))
schedule_work(&f->frags_work);
} int inet_frags_init(struct inet_frags *f)
{
int i; INIT_WORK(&f->frags_work, inet_frag_worker); for (i = ; i < INETFRAGS_HASHSZ; i++) { //初始化hash数组中所有的元素
struct inet_frag_bucket *hb = &f->hash[i]; spin_lock_init(&hb->chain_lock); //初始化一个hash冲突链表
INIT_HLIST_HEAD(&hb->chain);
} seqlock_init(&f->rnd_seqlock);
f->last_rebuild_jiffies = ;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, , ,
NULL);
if (!f->frags_cachep)
return -ENOMEM; return ;
}
EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf)
{
init_frag_mem_limit(nf);
}
EXPORT_SYMBOL(inet_frags_init_net); void inet_frags_fini(struct inet_frags *f)
{
cancel_work_sync(&f->frags_work);
kmem_cache_destroy(f->frags_cachep);
}
EXPORT_SYMBOL(inet_frags_fini); void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
unsigned int seq;
int i; nf->low_thresh = ; evict_again:
local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock); for (i = ; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]); local_bh_enable();
cond_resched(); if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
goto evict_again; percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net); static struct inet_frag_bucket *
get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
__acquires(hb->chain_lock)
{
struct inet_frag_bucket *hb;
unsigned int seq, hash; restart:
seq = read_seqbegin(&f->rnd_seqlock); hash = inet_frag_hashfn(f, fq);
hb = &f->hash[hash]; spin_lock(&hb->chain_lock);
if (read_seqretry(&f->rnd_seqlock, seq)) {
spin_unlock(&hb->chain_lock);
goto restart;
} return hb;
} static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f);
hlist_del(&fq->list);
fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock);
} void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
{
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
struct sk_buff *skb)
{
if (f->skb_free)
f->skb_free(skb);
kfree_skb(skb);
} void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = ; WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != ); /* Release all fragment data. */
fp = q->fragments;
nf = q->net;
while (fp) {
struct sk_buff *xp = fp->next; sum_truesize += fp->truesize;
frag_kfree_skb(nf, f, fp);
fp = xp;
}
sum = sum_truesize + f->qsize; if (f->destructor)
f->destructor(q);
kmem_cache_free(f->frags_cachep, q); sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in,
struct inet_frags *f,
void *arg)
{
struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp; #ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
341 * such entry could have been created on other cpu before
342 * we acquired hash bucket lock.
343 */
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
qp_in->flags |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
}
}
#endif
qp = qp_in;
if (!mod_timer(&qp->timer, jiffies + nf->timeout))
atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain); spin_unlock(&hb->chain_lock); return qp;
} static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q; if (frag_mem_limit(nf) > nf->high_thresh) {
inet_frag_schedule_worker(f);
return NULL;
} q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
return NULL; q->net = nf;
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, ); return q;
} static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q; q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL; return inet_frag_intern(nf, q, f, arg);
} struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key,
unsigned int hash)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = ; if (frag_mem_limit(nf) > nf->low_thresh)
inet_frag_schedule_worker(f); hash &= (INETFRAGS_HASHSZ - );
hb = &f->hash[hash]; spin_lock(&hb->chain_lock);
hlist_for_each_entry(q, &hb->chain, list) {
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
return q;
}
depth++;
}
spin_unlock(&hb->chain_lock); if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key); if (inet_frag_may_rebuild(f)) {
if (!f->rebuild)
f->rebuild = true;
inet_frag_schedule_worker(f);
} return ERR_PTR(-ENOBUFS);
}
EXPORT_SYMBOL(inet_frag_find); void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix)
{
static const char msg[] = "inet_frag_find: Fragment hash bucket"
" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
". Dropping fragment.\n"; if (PTR_ERR(q) == -ENOBUFS)
net_dbg_ratelimited("%s%s", prefix, msg);
}
EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);

内核hlist的使用的更多相关文章

  1. Linux 内核 hlist 详解

    在Linux内核中,hlist(哈希链表)使用非常广泛.本文将对其数据结构和核心函数进行分析. 和hlist相关的数据结构有两个:hlist_head 和 hlist_node //hash桶的头结点 ...

  2. Linux内核hlist数据结构分析

         在内核编程中哈希链表hlist使用非常多,比方在openvswitch中流表的存储中就使用了(见[1]).hlist的表头仅有一个指向首节点的指针.而没有指向尾节点的指针,这样在有非常多个b ...

  3. Linux 内核链表 list.h 的使用

    Linux 内核链表 list.h 的使用 C 语言本身并不自带集合(Collection)工具,当我们需要把结构体(struct)实例串联起来时,就需要在结构体内声明指向下一实例的指针,构成所谓的& ...

  4. Linux内核list/hlist解读

    转自:http://blog.chinaunix.net/uid-20671208-id-3763131.html 目录 1. 前言 2 2. 通用宏 2 2.1. typeof 2 2.1.1. 定 ...

  5. linux内核数据结构学习总结

    目录 . 进程相关数据结构 ) struct task_struct ) struct cred ) struct pid_link ) struct pid ) struct signal_stru ...

  6. 深入分析 Linux 内核链表--转

    引用地址:http://www.ibm.com/developerworks/cn/linux/kernel/l-chain/index.html 一. 链表数据结构简介 链表是一种常用的组织有序数据 ...

  7. 深入分析 Linux 内核链表

    转载:http://www.ibm.com/developerworks/cn/linux/kernel/l-chain/   一. 链表数据结构简介 链表是一种常用的组织有序数据的数据结构,它通过指 ...

  8. linux 内核分析之list_head

    转自:http://www.cnblogs.com/riky/archive/2006/12/28/606242.html 一.链表数据结构简介 链表是一种常用的组织有序数据的数据结构,它通过指针将一 ...

  9. Linux内核 hlist_head/hlist_node结构解析

    内核中的定义: struct hlist_head {    struct hlist_node *first;}; struct hlist_node {    struct hlist_node ...

随机推荐

  1. iOS Xcode之SVN(remove git)

    项目用SVN比较多,所以大家都把精力放在如何在XCODE上使用SVN.     配置SVN当然是很简单,但提交都默认出现git的提交窗,否则要到repositories界面去提交.   目前没有找到什 ...

  2. SQL server中使用临时表存储数据

    将查询出来的数据直接用“INTO #临时表名称”的方式完成临时表的创建及数据的插入 SELECT * INTO #temp_NowStatusFROM Test SELECT * FROM #temp ...

  3. Go语言性能优化

    原文:http://bravenewgeek.com/so-you-wanna-go-fast/ 我曾经和很多聪明的人一起工作.我们很多人都对性能问题很痴迷,我们之前所做的是尝试逼近能够预期的(性能) ...

  4. 史上最全Vim快捷键键位图 -- 入门到进阶

    文章欢迎转载,但转载时请保留本段文字,并置于文章的顶部 作者:卢钧轶(cenalulu) 本文原文地址:http://cenalulu.github.io/linux/all-vim-cheatshe ...

  5. 系统根据用户cookies,为用户打上各种标签

    DSP营销学院_品友学院 | 品友推广官网 http://e.ipinyou.com/school_article40.html 智能算法+动态出价=最大发挥推广费用的价值 针对每一个曝光进行甄别和竞 ...

  6. FW: Dockerfile RUN, CMD & ENTRYPOINT

    Dockerfile RUN, CMD & ENTRYPOINT     在使用Dockerfile创建image时, 有几条指令比较容易混淆, RUN, CMD, ENTRYPOINT. R ...

  7. Spring 框架的事务管理

    1. Spring 框架的事务管理相关的类和API PlateformTransactionManager 接口: 平台事务管理器(真正管理事务的类); TransactionDefinition 接 ...

  8. Java 集合框架查阅技巧

    如何记录每一个容器的结构和所属体系呢? List ArrayList LinkedList Set HashSet TreeSet 其中,后缀名就是该集合所属的体系,前缀名就是该集合的数据结构. 看到 ...

  9. Linux(1)- 服务器核心知识、Linux入门、VMware与centeos安装、远程连接linux、linux基本命令使用

    一.服务器核心知识 1.电脑和电脑的硬件组成 现在的人们几乎无时无刻不在使用着电脑!不管是桌上型电脑(桌机).笔记型电脑(笔电).平板电脑,还是智慧型手机等等,这些东西都算是电脑.虽然接触这么多,但是 ...

  10. java 使用LinkedList模拟一个堆栈或者队列数据结构

    近期在复习下java基础,看了下java基础,在看到集合时突然发现想起来曾经面试有一道笔试题:模拟一个堆栈或者队列数据结构,当时还没做出来,今天就写一下,首先得明确堆栈和队列的数据结构 堆栈:先进后出 ...