基础参数初始化

nf_conntrack_init_start函数完成连接跟踪基础参数的初始化,包括了hash,slab,扩展项,GC任务等;

 int nf_conntrack_init_start(void)
{
int max_factor = ;
int ret = -ENOMEM;
int i; /* struct nf_ct_ext uses u8 to store offsets/size */
BUILD_BUG_ON(total_extension_size() > 255u); seqcount_init(&nf_conntrack_generation); for (i = ; i < CONNTRACK_LOCKS; i++)
spin_lock_init(&nf_conntrack_locks[i]); /* 根据内存大小,初始化htable_size */
if (!nf_conntrack_htable_size) {
/* Idea from tcp.c: use 1/16384 of memory.
* On i386: 32MB machine has 512 buckets.
* >= 1GB machines have 16384 buckets.
* >= 4GB machines have 65536 buckets.
*/
nf_conntrack_htable_size
= (((totalram_pages << PAGE_SHIFT) / )
/ sizeof(struct hlist_head));
if (totalram_pages > ( * ( * * / PAGE_SIZE)))
nf_conntrack_htable_size = ;
else if (totalram_pages > ( * * / PAGE_SIZE))
nf_conntrack_htable_size = ;
if (nf_conntrack_htable_size < )
nf_conntrack_htable_size = ; /* Use a max. factor of four by default to get the same max as
* with the old struct list_heads. When a table size is given
* we use the old value of 8 to avoid reducing the max.
* entries. */
max_factor = ;
} /* 分配hash */
nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, );
if (!nf_conntrack_hash)
return -ENOMEM; /* 设置连接跟踪项的最大值 */
nf_conntrack_max = max_factor * nf_conntrack_htable_size; /* 创建nf_conn连接跟踪slab */
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn),
NFCT_INFOMASK + ,
SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep; printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
nf_conntrack_max); /* 各个扩展项的初始化 */
ret = nf_conntrack_expect_init();
if (ret < )
goto err_expect; ret = nf_conntrack_acct_init();
if (ret < )
goto err_acct; ret = nf_conntrack_tstamp_init();
if (ret < )
goto err_tstamp; ret = nf_conntrack_ecache_init();
if (ret < )
goto err_ecache; ret = nf_conntrack_timeout_init();
if (ret < )
goto err_timeout; ret = nf_conntrack_helper_init();
if (ret < )
goto err_helper; ret = nf_conntrack_labels_init();
if (ret < )
goto err_labels; ret = nf_conntrack_seqadj_init();
if (ret < )
goto err_seqadj; /* 初始化nf_ct_l3protos[]初始化为nf_conntrack_l3proto_generic */
ret = nf_conntrack_proto_init();
if (ret < )
goto err_proto; /* 初始化连接跟踪gc任务 */
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); return ; err_proto:
nf_conntrack_seqadj_fini();
err_seqadj:
nf_conntrack_labels_fini();
err_labels:
nf_conntrack_helper_fini();
err_helper:
nf_conntrack_timeout_fini();
err_timeout:
nf_conntrack_ecache_fini();
err_ecache:
nf_conntrack_tstamp_fini();
err_tstamp:
nf_conntrack_acct_fini();
err_acct:
nf_conntrack_expect_fini();
err_expect:
kmem_cache_destroy(nf_conntrack_cachep);
err_cachep:
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
return ret;
}
协议与tuple操作初始化

nf_conntrack_l3proto_ipv4_init函数完成了协议和tuple操作函数相关的初始化;

 static int __init nf_conntrack_l3proto_ipv4_init(void)
{
int ret = ; need_conntrack(); ret = nf_register_sockopt(&so_getorigdst);
if (ret < ) {
pr_err("Unable to register netfilter socket option\n");
return ret;
} ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < ) {
pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
goto cleanup_sockopt;
} /* nf_conntrack_l4proto相关初始化 */
ret = nf_ct_l4proto_register(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
if (ret < )
goto cleanup_pernet; /* nf_conntrack_l3proto相关初始化 */
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < ) {
pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
goto cleanup_l4proto;
} return ret;
cleanup_l4proto:
nf_ct_l4proto_unregister(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
cleanup_pernet:
unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
}
nf_conntrack_l3proto_ipv4

nf_conntrack_l3proto_ipv4 结构成员初始化包括了基础信息,tuple的相关操作,钩子函数的注册,注销等,每个函数的作用如下;

 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
.name = "ipv4",
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
.print_tuple = ipv4_print_tuple,
.get_l4proto = ipv4_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
#endif
.net_ns_get = ipv4_hooks_register,
.net_ns_put = ipv4_hooks_unregister,
.me = THIS_MODULE,
};
tuple相关操作
 /* 从ip头中获取源目的地址,存入tuple */
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
const __be32 *ap;
__be32 _addrs[];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * , _addrs);
if (ap == NULL)
return false; tuple->src.u3.ip = ap[];
tuple->dst.u3.ip = ap[]; return true;
} /* 根据原tuple地址设置新tuple,源目的地址均相反 */
static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip; return true;
} /* 打印tuple的源目的地址 */
static void ipv4_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
seq_printf(s, "src=%pI4 dst=%pI4 ",
&tuple->src.u3.ip, &tuple->dst.u3.ip);
} /* 获取ip头中的协议 */
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
const struct iphdr *iph;
struct iphdr _iph; iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (iph == NULL)
return -NF_ACCEPT; /* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though. */
if (iph->frag_off & htons(IP_OFFSET))
return -NF_ACCEPT; *dataoff = nhoff + (iph->ihl << );
*protonum = iph->protocol; /* Check bogus IP headers */
if (*dataoff > skb->len) {
pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
"nhoff %u, ihl %u, skblen %u\n",
nhoff, iph->ihl << , skb->len);
return -NF_ACCEPT;
} return NF_ACCEPT;
}
netlink与tuple之间的操作
 /* 填充tuple的源目的地址到netlink */
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
goto nla_put_failure;
return ; nla_put_failure:
return -;
} static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+] = {
[CTA_IP_V4_SRC] = { .type = NLA_U32 },
[CTA_IP_V4_DST] = { .type = NLA_U32 },
}; /* 将netlink中的源目的地址填充到tuple */
static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL; t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); return ;
} /* 获取netlink中的tuple大小 */
static int ipv4_nlattr_tuple_size(void)
{
return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + );
}
钩子函数的注册

ipv4_hooks_register钩子函数注册回调,其中分为两步,defrag钩子注册和其他钩子注册;

 static int ipv4_hooks_register(struct net *net)
{
struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
int err = ; mutex_lock(&register_ipv4_hooks); cnet->users++;
if (cnet->users > )
goto out_unlock; /* defrag钩子函数注册 */
err = nf_defrag_ipv4_enable(net);
if (err) {
cnet->users = ;
goto out_unlock;
} /* 注册netfilter钩子函数 */
err = nf_register_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops)); if (err)
cnet->users = ;
out_unlock:
mutex_unlock(&register_ipv4_hooks);
return err;
}

nf_defrag_ipv4_enable为defrag钩子注册流程;

 int nf_defrag_ipv4_enable(struct net *net)
{
int err = ; might_sleep(); if (net->nf.defrag_ipv4)
return ; mutex_lock(&defrag4_mutex);
if (net->nf.defrag_ipv4)
goto out_unlock; err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
if (err == )
net->nf.defrag_ipv4 = true; out_unlock:
mutex_unlock(&defrag4_mutex);
return err;
}

defrag钩子函数;

 static struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
};

conntrack_in,helper,confirm钩子函数;

 static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
nf_conntrack_l4proto_tcp4

多种协议会实现自己的nf_conntrack_l4proto,这里只列出tcp的实现,简要看下;

nf_conntrack_l4proto_tcp4结构中的函数与nf_conntrack_l3proto_ipv4 结构类似,其中多数函数是将源目的ip的操作换成了源目的端口的操作,此处不重复分析,另外增加了几个连接相关的操作函数packet,new等,遇到的时候在展开分析;

 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
.name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
.print_tuple = tcp_print_tuple,
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
.ctnl_timeout = {
.nlattr_to_obj = tcp_timeout_nlattr_to_obj,
.obj_to_nlattr = tcp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_TCP_MAX,
.obj_size = sizeof(unsigned int) *
TCP_CONNTRACK_TIMEOUT_MAX,
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
.init_net = tcp_init_net,
.get_net_proto = tcp_get_net_proto,
};

Netfilter 之 连接跟踪初始化的更多相关文章

  1. Netfilter之连接跟踪实现机制初步分析

    Netfilter之连接跟踪实现机制初步分析 原文: http://blog.chinaunix.net/uid-22227409-id-2656910.html 什么是连接跟踪 连接跟踪(CONNT ...

  2. Netfilter 之 连接跟踪相关数据结构

    Netfilter通过连接跟踪来记录和跟踪连接的状态,为状态防火墙和NAT提供基础支持: 钩子点与钩子函数 下图为钩子点和钩子函数的关系图(点击图片查看原图),其中ipv4_conntrack_def ...

  3. Netfilter 之 连接跟踪的helper

    注册helper nf_conntrack_ftp_init是连接跟踪ftp模块的初始化函数,可以看到其调用了nf_conntrack_helpers_register来注册helper: stati ...

  4. Netfilter 之 连接跟踪钩子函数分析

    ipv4_conntrack_defrag ipv4_conntrack_defrag对输入包进行检查,如果是分片包,则调用nf_ct_ipv4_gather_frags函数进行重组: static ...

  5. Netfilter&iptables:如何理解连接跟踪机制?

    如何理解Netfilter中的连接跟踪机制? 本篇我打算以一个问句开头,因为在知识探索的道路上只有多问然后充分调动起思考的机器才能让自己走得更远.连接跟踪定义很简单:用来记录和跟踪连接的状态. 问:为 ...

  6. linux内核netfilter连接跟踪的hash算法

    linux内核netfilter连接跟踪的hash算法 linux内核中的netfilter是一款强大的基于状态的防火墙,具有连接跟踪(conntrack)的实现.conntrack是netfilte ...

  7. linux nf_conntrack 连接跟踪机制 2

    连接跟踪初始化 基础参数的初始化:nf_conntrack_standalone_init 会调用nf_conntrack_init_start 完成连接跟踪基础参数的初始化, hash slab 扩 ...

  8. linux nf_conntrack 连接跟踪机制 3-hook

    conntrack hook函数分析 enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, NF_IP_PRI_CONNTRACK_DEFRA ...

  9. [转]nf_conntrack: table full, dropping packet 连接跟踪表已满,开始丢包 的解决办法

      nf_conntrack: table full, dropping packet  连接跟踪表已满,开始丢包 的解决办法 中午业务说机器不能登录,我通过USM管理界面登录单板的时候发现机器没有僵 ...

随机推荐

  1. com.mysql.jdbc.exceptions.jdbc4.MySQLTransactionRollbackException: Lock wait timeout exceeded; try restarting transaction 问题解决

    有两种设置方法 第一种在mysql的配置文件中加入,然后重启mysql innodb_lock_wait_timeout = 500 第二种直接执行如下命令 set global innodb_loc ...

  2. 关于写SQL语句的技巧

    一.SQL总结写法 SQL的写法无非就是几种,关联查询,子查询,分组函数,各种函数的使用 1.首先根据要做的需求,先分析一下,需要用到哪些查询,例如要用到关联查询,就先把用到的表列出来,比如a,b,c ...

  3. 如何对Nginx日志文件进行切割保存

    日积月累下,日志文件会越来越大,日志文件太大严重影响服务器效率,须要定时对日志文件进行切割. 切割的方式有按月切割.按天切割.按小时切割,一般都是按天切割. 那么如何进行切割呢? 思路: 创建日志文件 ...

  4. 10.Spring整合Hibernate_3_HibernateTemplate

    将sessionFactory 注入给 hibernateTemplate,让hibernateTemplate帮我们完成一些模板代码 <!-- 使用HibernateTemplate --&g ...

  5. 16.Listener(监听器)

    /*监听器*/ java的事件监听机制(主要是对一些web元素的监听 (ServletContext(计时器),HttpSession和ServletRequest)) 1.事件监听涉及到三个组件:事 ...

  6. apache thinkphp5 强制https://访问

    根目录下,.htaccess文件 <IfModule mod_rewrite.c> Options +FollowSymlinks -Multiviews RewriteEngine On ...

  7. 【6】Zookeeper脚本及API

    一.客户端脚本 1.1.客户端连接 cd /usr/local/services/zookeeper/zookeeper-3.4.13/bin ##连接本地Zookeeper服务器 sh zkCli. ...

  8. dropbear源码编译安装及AIDE软件监控

    ssh协议的另一个实现:dropbear源码编译安装:• 1.安装开发包组:yum groupinstall “Development tools”• 2.下载 -2017.75.tar.bz2    ...

  9. Django模型层:单表操作,多表操作,常用(非常用)字段和参数,Django-model进阶

    一.web应用 二.模板的导入与继承 三.静态文件相关 四.inclusion_tag:返回html片段 五.模型层 一.web应用 -s包括两个部分:web服务器+application -目前阶段 ...

  10. (初级篇)docker基础应用--01

    命令 获取镜像: docker pull //: ,如:docker pull nginx 查看镜像列表: docker images 查看镜像信息: docker inspect