ipv4_conntrack_defrag

ipv4_conntrack_defrag对输入包进行检查,如果是分片包,则调用nf_ct_ipv4_gather_frags函数进行重组;

 static unsigned int ipv4_conntrack_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct sock *sk = skb->sk; if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
inet_sk(sk)->nodefrag)
return NF_ACCEPT; #if IS_ENABLED(CONFIG_NF_CONNTRACK)
#if !IS_ENABLED(CONFIG_NF_NAT)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
#endif
#endif
/* Gather fragments. */
/* 如果是分片的话进行分片重组 */
if (ip_is_fragment(ip_hdr(skb))) {
enum ip_defrag_users user =
nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(state->net, skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
}

nf_ct_ipv4_gather_frags内部调用了ip_defrag进行重组,ip_defrag相关分析,请移步IP分片重组;

 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
int err; local_bh_disable();
/* 分片重组 */
err = ip_defrag(net, skb, user);
local_bh_enable(); if (!err)
skb->ignore_df = ; return err;
}
ipv4_conntrack_in

ipv4_conntrack_in是对nf_conntrack_in的封装,是连接跟踪的输入本机或者由本机转发的入口函数,该函数获取l3proto ,l4proto,调用resolve_normal_ct检查是否有tuple节点,没有则创建,并且与skb关联,并调用l4proto->packet函数对连接状态进行处理;

 static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
 unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
int ret; /* 获取skb关联的nf_conn */
tmpl = nf_ct_get(skb, &ctinfo); /* 已经关联了nf_conn或者设置了不跟踪标记 */
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
/* 环回 || 不跟踪,返回accept */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
} /* 清空关联的nf_conn */
skb->_nfct = ;
} /* rcu_read_lock()ed by nf_hook_thresh */
/* 根据协议类型找到对应协议的l3proto */
l3proto = __nf_ct_l3proto_find(pf); /* 获取数据偏移和4层协议 */
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= ) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
} /* 根据协议和4层协议号获取l4proto */
l4proto = __nf_ct_l4proto_find(pf, protonum); /* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
/* 如果l4设置了错误检查函数,则进行检查 */
if (l4proto->error != NULL) {
ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
if (ret <= ) {
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
/* ICMP[v6] protocol trackers may assign one conntrack. */
if (skb->_nfct)
goto out;
}
repeat:
/* 查看hash中是否有对应tuple节点,没有则新建;更新nf_conn_info状态,并且与skb进行关联 */
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto);
if (ret < ) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
} /* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 没有关联的nf_conn,不是连接合法的一部分 */
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = NF_ACCEPT;
goto out;
} /* Decide what timeout policy we want to apply to this flow. */
/* 获取超时策略,扩展中的策略,或者默认l4proto中的策略 */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto); /* 处理4层协议的状态,tcp为tcp_packet */
ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
if (ret <= ) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(&ct->ct_general);
skb->_nfct = ;
NF_CT_STAT_INC_ATOMIC(net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
ret = -ret;
goto out;
} /* 第一次收到应答,则设置IPS_SEEN_REPLY_BIT标记,原值为0,则需要记录应答事件 */
if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
!test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
nf_ct_put(tmpl); return ret;
}

resolve_normal_ct函数将数据包中的相关字段设置到tuple中,并且检查hash中是否有该tuple,如果没有则新建tuple,而后设置连接状态,并且与skb进行关联;

 static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash; /* 将源目的地址端口协议方向等字段设置到tuple */
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
return ;
} /* look for tuple match */
/* 从hash中查找tuple */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash); /* 未找到该tuple */
if (!h) {
/* 创建一个节点 */
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
return ;
if (IS_ERR(h))
return PTR_ERR(h);
} /* 获取到nf_conn */
ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */
/* 应答方向,已建立连接应答 */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
ctinfo = IP_CT_ESTABLISHED_REPLY;
}
/* 原始方向 */
else {
/* Once we've had two way comms, always ESTABLISHED. */
/* 已经见过应答了,那么是已连接状态 */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
ctinfo = IP_CT_ESTABLISHED;
}
/* 有期望连接标记,则设置关联字段 */
else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
ctinfo = IP_CT_RELATED;
}
/* 其他情况,新连接 */
else {
pr_debug("new packet for %p\n", ct);
ctinfo = IP_CT_NEW;
}
} /* skb关联nf_conn */
nf_ct_set(skb, ct, ctinfo);
return ;
}
ipv4_conntrack_local

ipv4_conntrack_local是由本机发出的数据包连接跟踪的入口,是对nf_conntrack_in函数的封装;

 static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT; /* 分片,返回accpet */
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT; /* 调用conntrack_in */
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
ipv4_helper

ipv4_helper函数查找已经注册的help扩展,如果存在则调用扩展的helper函数;

 static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */
/* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 未关联,或者是 已建立连接的关联连接的响应 */
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT; /* 获取help扩展 */
help = nfct_help(ct); /* 没有扩展 */
if (!help)
return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_thresh */
/* 或者helper */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT; /* 执行扩展的help函数 */
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
ipv4_confirm

ipv4_confirm相关函数完成对连接的确认,并且将连接按照方向加入到对应的hash表中;

 static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo; /* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 未关联,或者是 已建立连接的关联连接的响应 */
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out; /* adjust seqs for loopback traffic only in outgoing direction */
/* 有调整序号标记,且不是环回包,调整序号 */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
/* 调用conntrack_confirm */
return nf_conntrack_confirm(skb);
}
 static inline int nf_conntrack_confirm(struct sk_buff *skb)
{
struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb);
int ret = NF_ACCEPT; /* nf_conn存在 */
if (ct) {
/* 未确认,则进行确认 */
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb);
/* accpet状态事件通知 */
if (likely(ret == NF_ACCEPT))
nf_ct_deliver_cached_events(ct);
}
return ret;
}
 int
__nf_conntrack_confirm(struct sk_buff *skb)
{
const struct nf_conntrack_zone *zone;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
unsigned int sequence;
int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
which created connection will be IP_CT_NEW or for an
expected connection, IP_CT_RELATED. */
/* 只对原始方向的连接进行确认,应答方向是已经处理过的 */
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT; zone = nf_ct_zone(ct);
local_bh_disable(); /* 计算原始方向和应答方向的hash */
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
/* reuse the hash saved before */
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means no one else could have
* confirmed us.
*/
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
goto dying;
} /* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */ /* 下面两个如果找到说明有冲突 */ /* 遍历原始方向hash,查找是否有相同节点 */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out; /* 遍历应答方向hash,查找是否有相同节点 */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out; /* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
/* 设置超时时间 */
ct->timeout += nfct_time_stamp;
/* 引用计数增加 */
atomic_inc(&ct->ct_general.use);
/* 更新为已确认 */
ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */
/* 有时间戳扩展,则设置时间戳 */
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
if (skb->tstamp == )
__net_timestamp(skb); tstamp->start = ktime_to_ns(skb->tstamp);
}
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
/* 将原始节点和应答节点插入到对应的hash中 */
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable(); /* 事件通知 */
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct); nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT; out:
/* 加入到dying列表 */
nf_ct_add_to_dying_list(ct);
/* 解决冲突?? */
ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}

Netfilter 之 连接跟踪钩子函数分析的更多相关文章

  1. Netfilter之连接跟踪实现机制初步分析

    Netfilter之连接跟踪实现机制初步分析 原文: http://blog.chinaunix.net/uid-22227409-id-2656910.html 什么是连接跟踪 连接跟踪(CONNT ...

  2. Netfilter 之 连接跟踪相关数据结构

    Netfilter通过连接跟踪来记录和跟踪连接的状态,为状态防火墙和NAT提供基础支持: 钩子点与钩子函数 下图为钩子点和钩子函数的关系图(点击图片查看原图),其中ipv4_conntrack_def ...

  3. Netfilter 之 连接跟踪初始化

    基础参数初始化 nf_conntrack_init_start函数完成连接跟踪基础参数的初始化,包括了hash,slab,扩展项,GC任务等: int nf_conntrack_init_start( ...

  4. Netfilter 之 连接跟踪的helper

    注册helper nf_conntrack_ftp_init是连接跟踪ftp模块的初始化函数,可以看到其调用了nf_conntrack_helpers_register来注册helper: stati ...

  5. Netfilter&iptables:如何理解连接跟踪机制?

    如何理解Netfilter中的连接跟踪机制? 本篇我打算以一个问句开头,因为在知识探索的道路上只有多问然后充分调动起思考的机器才能让自己走得更远.连接跟踪定义很简单:用来记录和跟踪连接的状态. 问:为 ...

  6. 对于数据包的截取,使用linux中的netfilter钩子函数

    http://blog.csdn.net/wswifth/article/details/5115358 在师哥的代码(packet.c)中使用的是Linux2.4内核中的一个子系统:netfilte ...

  7. linux内核netfilter连接跟踪的hash算法

    linux内核netfilter连接跟踪的hash算法 linux内核中的netfilter是一款强大的基于状态的防火墙,具有连接跟踪(conntrack)的实现.conntrack是netfilte ...

  8. Netfilter 之 钩子函数与钩子点关系图

    概述 通过钩子点和优先级的代码追溯,得到如下对应关系图,图中横坐标为钩子点,纵坐标为优先级,每个钩子点上的钩子函数按照优先级排布: 详细分析 5个钩子点如下所示,在这个五个钩子点上的钩子函数按照上面的 ...

  9. Netfilter 之 钩子函数注册

    通过注册流程代码的分析,能够明确钩子函数的注册流程,理解存储钩子函数的数据结构,如下图(点击图片可查看原图): 废话不多说,开始分析: nf_hook_ops是注册的钩子函数的核心结构,字段含义如下所 ...

随机推荐

  1. Ubuntu18.04通过网线共享网络

    Ubuntu18.04通过网线共享网络 这几天要给实验室一个新电脑装系统,但是实验室路由器好像有点问题,所以决定共享我的笔记本的网络,但是搜了很多教程都是基于Ubuntu16.04的,而Ubuntu1 ...

  2. 关于Mybatis的几件小事(一)

    一.Mybatis简介 1.Mybatis简介 MyBatis是支持定制化SQL.存储过程以及高级映射的优秀的持久层框架. MyBatis避免了几乎所有的JDBC代码和手动设置参数以及获取结果集. M ...

  3. DIP原则

    依赖倒置原则(Dependence Inversion Principle,DIP)的原始定义: 高层模块不应该依赖底层模块,两者都应该依赖其抽象: 抽象不应该依赖细节: 细节应该依赖抽象.

  4. eclipse debug 调试找不到资源问题解决

    eclipse debug 的时候,如果使用maven bulid,就可能找不到class,这种情况就需要先停止服务,然后配置 Run configurations-Source,然后remove掉D ...

  5. yii框架下使用redis

    1 首先获取到 yii2-redis-master.zip 压缩包 下载地址https://github.com/yiisoft/yii2-redis/archive/master.zip 2 把下载 ...

  6. selenium检测webdriver封爬虫的解决方法

    有不少朋友在开发爬虫的过程中喜欢使用Selenium + Chromedriver,以为这样就能做到不被网站的反爬虫机制发现. 先不说淘宝这种基于用户行为的反爬虫策略,仅仅是一个普通的小网站,使用一行 ...

  7. EasyUI+JSP之java读取数据库后JSON格式数据的返回及调用

    做作业工程中遇到一些问题,特此记录一下 解决的问题:使用EasyUI框架搭建简单学生管理系统(数据库增删改查)操作时配合JSP,不知道如何把从数据库获得的数据封装成JSON格式并传回前端JSP并进行展 ...

  8. 编译原理实战——使用Lex/Flex进行编写一个有一定词汇量的词法分析器

    编译原理实战--使用Lex/Flex进行编写一个有一定词汇量的词法分析器 by steve yu 2019.9.30 参考文档:1.https://blog.csdn.net/mist14/artic ...

  9. Almost Acyclic Graph CodeForces - 915D (思维+拓扑排序判环)

    Almost Acyclic Graph CodeForces - 915D time limit per test 1 second memory limit per test 256 megaby ...

  10. string::capacity string::size string::length string::max_size

    size_t capacity() const noexcept; #include <iostream>#include <string> using namespace s ...