ICMP 实现
以下代码取自 kernel-2.6. . [数据结构]
struct icmp_control {
void (*handler)(struct sk_buff *skb); //icmp处理函数,根据icmp的类型字段
short error; /* This ICMP is classed as an error message */
};
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+]; //每个icmp类型有一个项 [/数据结构]
[初始化]
文件net/ipv4/af_inet.c中,函数
static int __init inet_init(void)
{
......
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < ) //注册协议处理函数,参看下面协议处理实现
printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
......
icmp_init(&inet_family_ops); //icmp协议初始化
......
}
icmp初始化函数
static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; //每cpu变量
void __init icmp_init(struct net_proto_family *ops)
{
struct inet_sock *inet;
int i;
for_each_possible_cpu(i) { //循环所有的cpu
int err;
//在每个cpu上调用__sock_create函数创建一个 socket实例。
err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i));
if (err < )
panic("Failed to create the ICMP control socket.\n"); per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; //指定分配内存方法为atomic
/* Enough space for 2 * 64K ICMP packets, including sk_buff struct overhead. */
per_cpu(__icmp_socket, i)->sk->sk_sndbuf = ( * (( * ) + sizeof(struct sk_buff))); //指定发送缓冲区大小 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); //获取inet_sock指针, 分配sock结构时空间大小就是inet_sock的大小
inet->uc_ttl = -;
inet->pmtudisc = IP_PMTUDISC_DONT; /* Unhash it so that IP input processing does not even see it, we do not wish this socket to see incoming packets. */
//进入的包看不到这些socket结构
per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
}
}
[/初始化]
[协议处理实现]
注册的协议处理函数,当ip向上递交数据包时,如果发现是icmp协议就会调用这个函数。
static struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
};
处理进入的icmp包
int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { //skb的ip校验和标志
case CHECKSUM_COMPLETE:
if (!csum_fold(skb->csum)) //没有伪头部的校验和检测
break; /* fall through */
case CHECKSUM_NONE:
skb->csum = ;
if (__skb_checksum_complete(skb)) //全部内容的校验和检测
goto error;
}
if (!pskb_pull(skb, sizeof(struct icmphdr))) //是否有icmp头空间,如果有移动data指针到icmp头后面
goto error; icmph = icmp_hdr(skb); //获取icmp头
ICMPMSGIN_INC_STATS_BH(icmph->type); /*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded.
*/
if (icmph->type > NR_ICMP_TYPES)
goto error;
//icmp是发送到本地的多播或广播地址
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
/* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be silently ignored (we let user decide with a sysctl). * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently discarded if to broadcast/multicast.*/
if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && sysctl_icmp_echo_ignore_broadcasts) {
goto error;
}
//除了回显和时间截,地址掩码请求和应答,其他到广播和多播的icmp包全部丢弃
if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) {
goto error;
}
}
icmp_pointers[icmph->type].handler(skb); //根据icmp类型调用相应的处理函数
drop:
kfree_skb(skb); //处理完了释放skb
return ;
error:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto drop;
}
类型处理函数在内核中被静态的初始化.
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + ] = {
[ICMP_ECHOREPLY] = {
.handler = icmp_discard, //空函数
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_DEST_UNREACH] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_SOURCE_QUENCH] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_REDIRECT] = {
.handler = icmp_redirect,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_ECHO] = {
.handler = icmp_echo,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_TIME_EXCEEDED] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_PARAMETERPROB] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_TIMESTAMP] = {
.handler = icmp_timestamp,
},
[ICMP_TIMESTAMPREPLY] = {
.handler = icmp_discard,
},
[ICMP_INFO_REQUEST] = {
.handler = icmp_discard,
},
[ICMP_INFO_REPLY] = {
.handler = icmp_discard,
},
[ICMP_ADDRESS] = {
.handler = icmp_address,
},
[ICMP_ADDRESSREPLY] = {
.handler = icmp_address_reply,
},
};
我们一个一个看。
icmp接收到不可达包的处理,不可达包括ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
static void icmp_unreach(struct sk_buff *skb)
{
struct iphdr *iph;
struct icmphdr *icmph;
int hash, protocol;
struct net_protocol *ipprot;
struct sock *raw_sk;
u32 info = ;
//数据部分包括了携带的ip头吗
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out_err; icmph = icmp_hdr(skb); //icmp头
iph = (struct iphdr *)skb->data; //携带的ip头 //ip头损坏
if (iph->ihl < ) /* Mangled header, drop. */
goto out_err; if (icmph->type == ICMP_DEST_UNREACH) { //icmp类型是目的不可达
switch (icmph->code & ) { //错误码标识
case ICMP_NET_UNREACH: //网络
case ICMP_HOST_UNREACH: //主机
case ICMP_PROT_UNREACH: //协议
case ICMP_PORT_UNREACH: //端口
break; //不可达
case ICMP_FRAG_NEEDED: //需要分片
if (ipv4_config.no_pmtu_disc) {
LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: fragmentation needed and DF set.\n", NIPQUAD(iph->daddr));
} else {
//在到那个目的地址的路由缓存中保存mtu的大小,在发送数据时就会根据这个mtu大小进行分片
info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu));
if (!info)
goto out;
}
case ICMP_SR_FAILED:
LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source Route Failed.\n", NIPQUAD(iph->daddr));
break;
default:
break;
}
if (icmph->code > NR_ICMP_UNREACH) //超过限制,错误的的不可达码
goto out;
} else if (icmph->type == ICMP_PARAMETERPROB)
info = ntohl(icmph->un.gateway) >> ;
//一些路由器会发送应答到广播地址,可能是用户工具引起的问题
if (!sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(iph->daddr) == RTN_BROADCAST) {
if (net_ratelimit())
printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP type %u, code %u "
"error to a broadcast: %u.%u.%u.%u on %s\n", NIPQUAD(ip_hdr(skb)->saddr),
icmph->type, icmph->code, NIPQUAD(iph->daddr), skb->dev->name);
goto out;
}
/* Checkin full IP header plus 8 bytes of protocol to avoid additional coding at protocol handlers. */
if (!pskb_may_pull(skb, iph->ihl * + )) //ip头加8字节的协议
goto out; iph = (struct iphdr *)skb->data;
protocol = iph->protocol; //获取协议
hash = protocol & (MAX_INET_PROTOS - ); //递交icmp信息到 raw socket, why ??????
read_lock(&raw_v4_lock);
if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex)) != NULL) {
raw_err(raw_sk, skb, info);
raw_sk = sk_next(raw_sk);
iph = (struct iphdr *)skb->data;
}
}
read_unlock(&raw_v4_lock); rcu_read_lock();
ipprot = rcu_dereference(inet_protos[hash]); //根据协议查找协议处理结构
if (ipprot && ipprot->err_handler) //如果有,调用相关的协议错误处理函数处理这个icmp不可达包
ipprot->err_handler(skb, info);
rcu_read_unlock();
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
icmp重定向处理
static void icmp_redirect(struct sk_buff *skb)
{
struct iphdr *iph; if (skb->len < sizeof(struct iphdr)) 长度检测
goto out_err;
/* Get the copied header of the packet that caused the redirect */
if (!pskb_may_pull(skb, sizeof(struct iphdr))) //ip头长度检测
goto out; iph = (struct iphdr *)skb->data; //取出ip头 switch (icmp_hdr(skb)->code & ) { //编码
case ICMP_REDIR_NET: //网络重定向
case ICMP_REDIR_NETTOS:
/* As per RFC recommendations now handle it as a host redirect.*/
case ICMP_REDIR_HOST: //主机重定向
case ICMP_REDIR_HOSTTOS:
//在路由告诉缓存中,更新相同缓存项的rt_gateway字段
ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr, icmp_hdr(skb)->un.gateway, iph->saddr, skb->dev);
break;
}
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
icmp回显请求
static void icmp_echo(struct sk_buff *skb)
{
if (!sysctl_icmp_echo_ignore_all) { //是否忽略回显请求
struct icmp_bxm icmp_param;
//保存一些icmp内容
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
icmp_param.skb = skb;
icmp_param.offset = ;
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
}
}
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
struct sock *sk = icmp_socket->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存
__be32 daddr;
//解析其中的ip选项
if (ip_options_echo(&icmp_param->replyopts, skb))
return; if (icmp_xmit_lock()) //是否可以锁定这个cpu上的icmp_socket.
return;
icmp_param->data.icmph.checksum = ; inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src; //目的地址
ipc.opt = NULL;
if (icmp_param->replyopts.optlen) { //有ip选项
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
daddr = icmp_param->replyopts.faddr;
}
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(ip_hdr(skb)->tos) } },
.proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl)) //路由查找,如果没找到那么什么也不发送了
goto out_unlock;
}
//是否立即发送应答
if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code))
icmp_push_reply(icmp_param, &ipc, rt); //发送应答 ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock();
}
判断应答是否发送
static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
{
struct dst_entry *dst = &rt->u.dst;
int rc = ; if (type > NR_ICMP_TYPES) //类型超过范围, 这应该是个bug,需要添加 rc = 0
goto out; /* Don't limit PMTU discovery. */
//这两个类型不做限制
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
goto out; /* No rate limit on loopback */
if (dst->dev && (dst->dev->flags & IFF_LOOPBACK)) //回环设备也不限制
goto out; /* Limit if icmp type is enabled in ratemask. */
if (( << type) & sysctl_icmp_ratemask) //用户通过/proc配置了限制速度的icmp类型掩码
rc = xrlim_allow(dst, sysctl_icmp_ratelimit);
out:
return rc;
}
#define XRLIM_BURST_FACTOR 6
int xrlim_allow(struct dst_entry *dst, int timeout)
{
unsigned long now;
int rc = ; //不发送 now = jiffies;
dst->rate_tokens += now - dst->rate_last; //累加过去的时间
dst->rate_last = now; //最后使用时间 if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) //累加时间超过指定的范围
dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; //设为最大值 if (dst->rate_tokens >= timeout) { //超过用户配置的时间限制
dst->rate_tokens -= timeout; //递减配置的时间限制
rc = ; //发送
}
return rc;
}
发送icmp应答函数
static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt)
{
struct sk_buff *skb;
//分配skb拷贝接收的skb数据到新分配的skb内存中,新skb被链入到icmp_socket->sk->sk_write_queue中.
if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < )
ip_flush_pending_frames(icmp_socket->sk); //拷贝失败
else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { //提取分配的skb
struct icmphdr *icmph = icmp_hdr(skb);
__wsum csum = ;
struct sk_buff *skb1;
//计算校验和
skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
csum = csum_add(csum, skb1->csum);
}
csum = csum_partial_copy_nocheck((void *)&icmp_param->data, (char *)icmph, icmp_param->head_len, csum);
icmph->checksum = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(icmp_socket->sk); //发送队列中的skb
}
}
static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
__wsum csum;
//拷贝数据
csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset + offset, to, len, );
//添加所有icmp_param->skb的校验和到地一个skb中
skb->csum = csum_block_add(skb->csum, csum, odd);
if (icmp_pointers[icmp_param->data.icmph.type].error)
nf_ct_attach(skb, icmp_param->skb);
return ;
}
拷贝数据到ip数据负载部分,如果需要将所有碎片链入到sk->sk_write_queue队列中
int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable *rt, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb; struct ip_options *opt = NULL;
int hh_len;
int exthdrlen;
int mtu;
int copy;
int err;
int offset = ;
unsigned int maxfraglen, fragheaderlen;
int csummode = CHECKSUM_NONE; if (flags & MSG_PROBE)
return ;
if (skb_queue_empty(&sk->sk_write_queue)) { //写队列为空
opt = ipc->opt;
if (opt) { //有ip选项
if (inet->cork.opt == NULL) { //inet socket中ip选项指针为空,分配一个ip选项+ip最长头空间
inet->cork.opt = kmalloc(sizeof(struct ip_options) + , sk->sk_allocation);
if (unlikely(inet->cork.opt == NULL))
return -ENOBUFS;
}
//拷贝icmp中携带的ip选项
memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
inet->cork.flags |= IPCORK_OPT;
inet->cork.addr = ipc->addr; //记录发送这个icmp的地址
}
//IP_PMTUDISC_PROBE 表示忽略对方的mtu, 如果忽略使用本地设备的mtu,设置分片大小
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
inet->cork.rt = rt; //保存路由
inet->cork.length = ;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = ;
if ((exthdrlen = rt->u.dst.header_len) != ) { //需要额外的头长度
length += exthdrlen;
transhdrlen += exthdrlen;
}
} else { //队列不为空,用保存好的数据初始化一些变量
rt = inet->cork.rt;
if (inet->cork.flags & IPCORK_OPT)
opt = inet->cork.opt; transhdrlen = ;
exthdrlen = ;
mtu = inet->cork.fragsize;
}
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); //足够的硬件头空间 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : ); //每个碎片的ip头长度
maxfraglen = ((mtu - fragheaderlen) & ~) + fragheaderlen; //每个碎片的最大长度 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { //发送来的数据长度超过了允许的最大ip数据长度(65535 - ip头 + ip选项)
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
return -EMSGSIZE;
}
/* transhdrlen > 0 means that this is the first fragment and we wish it won't be fragmented in the future. */
if (transhdrlen && length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen)
csummode = CHECKSUM_PARTIAL;
inet->cork.length += length; //累加这个长度
//长度 > mtu ,协议是 udp,且网卡设备支持GSO分片
if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && (rt->u.dst.dev->features & NETIF_F_UFO)) {
err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags);
if (err)
goto error; return ;
}
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) //队列为空
goto alloc_new_skb; while (length > ) {
/* Check if the remaining data fits into current packet. */
copy = mtu - skb->len;////这的mtu,我认为应该为maxfraglen, 这样就不用 fraggap变量和相关的操作了
if (copy < length)
copy = maxfraglen - skb->len;
if (copy <= ) {
char *data;
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
struct sk_buff *skb_prev;
alloc_new_skb: skb_prev = skb;
if (skb_prev)
fraggap = skb_prev->len - maxfraglen;
else
fraggap = ; /* If remaining data exceeds the mtu, we know we need more fragment(s). */
datalen = length + fraggap; //这的mtu,我认为应该为maxfraglen, 这样就不用 fraggap变量和相关的操作了
if (datalen > mtu - fragheaderlen) //数据长度超过mtu - ip头长度,需要分片
datalen = maxfraglen - fragheaderlen; //设置成合适的长度 fraglen = datalen + fragheaderlen; //一个碎片的完整长度
if ((flags & MSG_MORE) && !(rt->u.dst.dev->features & NETIF_F_SG))
alloclen = mtu;
else
alloclen = datalen + fragheaderlen; /* The last fragment gets additional space at tail. Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be the last. */
if (datalen == length + fraggap) //最后一个分片将添加额外的长度
alloclen += rt->u.dst.trailer_len; if (transhdrlen) { //指定了传输层头长度
//分配内存hh_len是硬件地址长度
skb = sock_alloc_send_skb(sk, alloclen + hh_len + , (flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (atomic_read(&sk->sk_wmem_alloc) <= * sk->sk_sndbuf)
skb = sock_wmalloc(sk, alloclen + hh_len + , , sk->sk_allocation); if (unlikely(skb == NULL))
err = -ENOBUFS;
}
if (skb == NULL) //分配失败
goto error; /* Fill in the control structures */
skb->ip_summed = csummode;
skb->csum = ;
skb_reserve(skb, hh_len); //保留出硬件地址空间 data和tail向后移动 hh_len /*Find where to start putting bytes. */
data = skb_put(skb, fraglen); //返回data移动tail和增加len
skb_set_network_header(skb, exthdrlen);//如果有额外头,移动网络头位置
//传输层头在网络头后面
skb->transport_header = (skb->network_header + fragheaderlen);//fragheaderlen 可能包括ip选项长度
data += fragheaderlen; //data指向传输层头位置 if (fraggap) { //把上一个skb最后几个没有对齐的字节拷贝到这新包的 data + transhdrlen位置
skb->csum = skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, );
skb_prev->csum = csum_sub(skb_prev->csum, skb->csum);
data += fraggap; //移动指针
pskb_trim_unique(skb_prev, maxfraglen); //修改上一个skb的数据长度,进行缩小 }
//datalen包括传输层头和数据
copy = datalen - transhdrlen - fraggap;//要拷贝的数据长度
//从from拷贝一些传输层头后面的数据到data+transhdrlen的位置
if (copy > && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < ) {
err = -EFAULT;
kfree_skb(skb);
goto error;
}
offset += copy; //偏移累加
length -= datalen - fraggap; //长度递减,包含传输层头长度
transhdrlen = ;
exthdrlen = ;
csummode = CHECKSUM_NONE; /* Put the packet on the pending queue. */
__skb_queue_tail(&sk->sk_write_queue, skb); //链入队列
continue;
}
if (copy > length)
copy = length;
if (!(rt->u.dst.dev->features & NETIF_F_SG)) { //设备不支持SG
unsigned int off;
off = skb->len;
if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < ) {
__skb_trim(skb, off); err = -EFAULT;
goto error;
}
} else { //按SG分页处理
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-];
struct page *page = sk->sk_sndmsg_page;
int off = sk->sk_sndmsg_off;
unsigned int left; if (page && (left = PAGE_SIZE - off) > ) {
if (copy >= left)
copy = left; if (page != frag->page) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, );
frag = &skb_shinfo(skb)->frags[i];
} } else if (i < MAX_SKB_FRAGS) {
if (copy > PAGE_SIZE)
copy = PAGE_SIZE; page = alloc_pages(sk->sk_allocation, );
if (page == NULL) {
err = -ENOMEM;
goto error;
}
sk->sk_sndmsg_page = page;
sk->sk_sndmsg_off = ; skb_fill_page_desc(skb, i, page, , );
frag = &skb_shinfo(skb)->frags[i]; } else {
err = -EMSGSIZE;
goto error;
}
if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < ) {
err = -EFAULT;
goto error;
}
sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
}
offset += copy;
length -= copy;
}
return ;
error:
inet->cork.length -= length;
IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
ip_append_data函数失败就会调用这个函数十分所有skb
void ip_flush_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb); ip_cork_release(inet_sk(sk));
}
icmp_push_reply-> 取出队列中的skb,然后添加完整的ip头然后发送出去
int ip_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
__be16 df = ;
__u8 ttl;
int err = ; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) //取出一个skb
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list); //指向分片连表头 /* move skb->data to ip header from ext header */
if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb)); //移动data指针到ip头位置 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { //循环出队所有skb
__skb_pull(tmp_skb, skb_network_header_len(skb)); //移动data到传输层头位置
*tail_skb = tmp_skb; //当执行第一次时等于是(skb_shinfo(skb)->frag_list) = tmp_skb
tail_skb = &(tmp_skb->next); //指向了tmp_skb的next
//累加这个包的长度
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk); //递减sock的引用计数
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
}
//到这就是把所有在sk->sk_write_queue中的skb(所有分片)组合到第一个skb的skb_shinfo(skb)->frag_list连表中了。 /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow to fragment the frame generated here.
* No matter, what transforms how transforms change size of the packet, it will come out. */
if (inet->pmtudisc < IP_PMTUDISC_DO)
skb->local_df = ; //不分片
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame locally. */
if (inet->pmtudisc >= IP_PMTUDISC_DO || (skb->len <= dst_mtu(&rt->u.dst) && ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF); //设置不分片标志
if (inet->cork.flags & IPCORK_OPT) //有ip选项
opt = inet->cork.opt; if (rt->rt_type == RTN_MULTICAST) //多播ttl
ttl = inet->mc_ttl;
else
ttl = ip_select_ttl(inet, &rt->u.dst); //单播,需要计算 iph = (struct iphdr *)skb->data; //在第一个skb中添加ip头
iph->version = ;
iph->ihl = ;
if (opt) {
iph->ihl += opt->optlen>>;
ip_options_build(skb, opt, inet->cork.addr, rt, );
}
iph->tos = inet->tos;
iph->tot_len = htons(skb->len);
iph->frag_off = df;
ip_select_ident(iph, &rt->u.dst, sk); //选择一个ip标识
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
ip_send_check(iph); //校验和 skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP)
icmp_out_count(((struct icmphdr *)skb_transport_header(skb))->type); //更新一些统计信息 //发送这个skb到netfilter的LOCAL_OUT hook
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
if (err) {
if (err > )
err = inet->recverr ? net_xmit_errno(err) : ; if (err)
goto error;
}
out:
ip_cork_release(inet);
return err;
error:
IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
goto out;
}
到这需要简单说一下,其实我们看的是icmp回显请求相关的流程,其中什么ip碎片应该就根本不会发生,
但一些函数在ip层使用所以有些看起来十分的复杂。 icmp时间截请求处理
static void icmp_timestamp(struct sk_buff *skb)
{
struct timeval tv;
struct icmp_bxm icmp_param; if (skb->len < ) //长度不对
goto out_err;
/* Fill in the current time as ms since midnight UT: */
do_gettimeofday(&tv); //获取当前时间
icmp_param.data.times[] = htonl((tv.tv_sec % ) * + tv.tv_usec / );
icmp_param.data.times[] = icmp_param.data.times[];
//拷贝skb中的数据到 times[0]中
if (skb_copy_bits(skb, , &icmp_param.data.times[], ))
BUG(); icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; //时间截应答
icmp_param.data.icmph.code = ;
icmp_param.skb = skb;
icmp_param.offset = ;
icmp_param.data_len = ;
icmp_param.head_len = sizeof(struct icmphdr) + ;
icmp_reply(&icmp_param, skb);
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
地址掩码请求,linux没有实现它,参考内核中这函数的注释
static void icmp_address(struct sk_buff *skb)
{
#if 0
if (net_ratelimit())
printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
#endif
}
地址掩码应答处理
static void icmp_address_reply(struct sk_buff *skb)
{
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
//长度不对或没有标志重定向源地址
if (skb->len < || !(rt->rt_flags & RTCF_DIRECTSRC))
goto out;
in_dev = in_dev_get(dev);
if (!in_dev)
goto out;
rcu_read_lock();
//设备有地址,打开调试项,设备允许转发
if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) {
__be32 _mask, *mp;
//取出掩码
mp = skb_header_pointer(skb, , sizeof(_mask), &_mask);
BUG_ON(mp == NULL);
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
//循环所有地址,如果掩码匹配且路由地址也匹配
if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa))
break;
}
if (!ifa && net_ratelimit()) { //都不匹配
printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from %s/%u.%u.%u.%u\n",
NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
}
}
rcu_read_unlock();
in_dev_put(in_dev);
out:;
}
[/协议处理实现]
ICMP 实现的更多相关文章
- [协议]ICMP协议剖析
1.ICMP简介 ICMP全名为(INTERNET CONTROL MESSAGE PROTOCOL)网络控制消息协议. ICMP的协议号为1. ICMP报文就像是IP报文的小弟,总顶着IP报文的名头 ...
- 简单了解ICMP协议
ping命令是什么协议? 维基百科: ping是一种电脑网络工具,用来测试数据包能否通过IP协议到达特定主机.ping的运作原理是向目标主机传出一个ICMP echo@要求数据包,并等待接受echo回 ...
- ICMP的应用--Traceroute
Traceroute是用来侦测主机到目的主机之间所经路由情况的重要工具,也是最便利的工具.前面说到,尽管ping工具也可以进行侦测,但是,因为ip头的限制,ping不能完全的记录下所经过的路由器.所以 ...
- ICMP Protocol
[ICMP Protocol] 参考: 1.ICMP Types and Codes:http://www.nthelp.com/icmp.html 2.RFC 792 - Internet Cont ...
- TCP协议学习记录 (一) ICMP时间戳请求
程序只实现了获取时间戳,至于将时间戳转换成具体日期和时间,暂时没有好的办法. #define TIME_STAMP_REQUEST 13 struct iphdr { unsigned ; //包头长 ...
- 002.ICMP--拼接ICMP包,实现简单Ping程序(原始套接字)
一.大致流程: 将ICMP头和时间数据设置好后,通过创建好的原始套接字socket发出去.目的主机计算效验和后会将数据原样返回,用当前时间和返回的数据结算时间差,计算出rtt. 二.数据结构: ICM ...
- linux原始套接字(2)-icmp请求与接收
一.概述 上一篇arp请求使用的是链路层的原始套接字.icmp封装在ip数据报里面,所以icmp请 ...
- 网络错误定位案例 ICMP host *** unreachable - admin prohibited
1. 环境 一台物理服务器 9.115.251.86,上面创建两个虚机,每个虚机两个网卡: vm1:eth0 - 9.*.*.232 eth1:10.0.0.14 vm2: eth0 - 9.8.*. ...
- GO语言练习:网络编程 ICMP 示例
1.代码 2.编译及运行 1.Go语言网络编程:ICMP示例代码 icmptest.go package main import ( "fmt" "net" & ...
- 一个ICMP单元
unit ICMPUtils; interface {$IFDEF VER80} { This source file is *NOT* compatible with Delphi 1 becaus ...
随机推荐
- 安卓百度地图开发so文件引用失败问题研究
博客: 安卓之家 微博: 追风917 CSDN: 蒋朋的家 简书: 追风917 博客园: 追风917 # 问题 首先,下面的问题基本都是在Android Studio下使用不当导致,eclipse是百 ...
- 用友U8按BOM计算销售订单物料需求SQL代码 第一稿
drop table #tmp1999 drop table #tmp2999 drop table #tmp3999 drop table #tmp4999 drop table #tmp5999 ...
- JAVA多线程解惑之多线程返回值
如果有人问题你,多线程可以有返回值吗?你怎么回答? 看下面例子,我定义了一个类实现了Callable 接口 public class MyCallable implements Callable< ...
- C# Flash 图片上传案例(结合网上腾讯头像上传Flash插件)
之前遇到过很多次要上传类似头像图片这种功能需求,这次是要求弄一个flash插件上传图片 感谢主,一个偶然机会在网上找到了一个很好的腾讯头像修改的flash插件:插件下载 这个功能采用Ajax访问支持, ...
- Javase中多态polymorphic的简单介绍
-------------多态----------------- (1)面向对象三大核心思想: 1.封装 2.继承 3.多态 (2)多态定义:父类的引用指向子类的对象. (3)引用指的是父 ...
- 什么是NSTimer
本文主要是介绍什么是NSTimer,具体使用请参考上一篇博客. 1.什么是NSTimer? NSTimer就是timer就是一个能在从现在开始的后面的某一个时刻或者周期性的执行我们指定的方法的对象. ...
- Windows server 2003常用设置
1.禁用配置服务器向导 由于不需要服务器设置功能,首先我们先禁止“配置你的服务器”(Manage Your Server)向导的出现,你可以在控制面板(Control Panel) ...
- avconv转换视频
提取指定stream time avconv -i i.mkv -map 0:0 -map 0:1 -map 0:5 -c:v copy -c:a:0 mp3 -c:s copy o.mkv 合并 a ...
- (转) sphinx 高亮显示搜索词
http://hi.baidu.com/tewuapple/item/7a7bc34adbda24a8df2a9fe5 (转)
- ubuntu thinkphp pathinfo 404等问题
这个问题 困扰了我一天,由于对nginx的配置文件中的各种变量不懂.配置起来很麻烦,从网上搜索的,感觉合适自己的不多!!! 找啊找啊..终于找一篇!!!! 我的环境: php ubuntu 12.04 ...