邻居子系统1.5 neigh output
1.5.1
当邻居项不处于NUD_CONNECTD状态时,不允许快速路径发送报文,函数neigh_resolve_output 用于慢而安全的输出,通常用初始化neigh_ops结构
来实例output函数,当邻居从NUD_CONNECT转到非NUD_CONNECT的时候,使用neigh_suspect 将output设置为neigh_resolve_output ()
/* Neighbour state is suspicious;
disable fast path.
Called with write_locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
neigh->output = neigh->ops->output;
}
/* Neighbour state is OK;
enable fast path.
Called with write_locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
neigh->output = neigh->ops->connected_output;
}
static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
};
neigh_resolve_output:大概含义为:邻居项的输出设备支持hard_header_cache 同时二层首部没有建立
则为改路由缓存建立硬件首部缓存,然后再输出报文中添加改二层硬件首部.;否则直接在报文首部添加硬件首部
/* Slow and careful. */ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
int rc = 0; if (!dst)
goto discard;
/* 检测邻居项状态有效性 */
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
///* 有二层头缓存函数,则缓存之 */
if (dev->header_ops->cache && !neigh->hh.hh_len)
neigh_hh_init(neigh, dst); do {/* 填充二层头 */
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0)//如果添加首部成功调用xmit 输出到网络设备
rc = dev_queue_xmit(skb);/* 数据包发送 */
else
goto out_kfree_skb;
}
out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies; if (neigh->used != now)
neigh->used = now;
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb);
return 0;
}
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
int rc;
bool immediate_probe = false; write_lock_bh(&neigh->lock); rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
/*
去掉NUD_CONNECT NUD_DELAY NUD_PROBE 状态
那么就只剩下 NUD_STALE NUD_INCOMPLETE NUD_NONE NUD_FAILD
*/
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { //NUD_NONE状态
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
//如果允许发送广播请求或者应用程序发送请求解析neigh地址
unsigned long next, now = jiffies; atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
next = now + max(neigh->parms->retrans_time, HZ/2);
neigh_add_timer(neigh, next); //启动定时器
immediate_probe = true; //发送arp 请求(ipv4) 请求邻居表项
} else {
neigh->nud_state = NUD_FAILED;//邻居无效 不能输出
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock); kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh->nud_state = NUD_DELAY;//转变为delay 状态
neigh->updated = jiffies;
neigh_add_timer(neigh,
jiffies + neigh->parms->delay_probe_time);
} if (neigh->nud_state == NUD_INCOMPLETE) {//说明之前有报文发送
if (skb) {
while (neigh->arp_queue_len_bytes + skb->truesize >
neigh->parms->queue_len_bytes) {//如果请求报文已经满了,但还没有收到应答。
struct sk_buff *buff;//如果缓存队列还没有达到上限,则将报文加入到输出缓存队列中
//否者 丢弃队列中最早加入的报文然后加入队列
//但是返回值都是1 即 不能立即发送
buff = __skb_dequeue(&neigh->arp_queue);
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
kfree_skb(buff);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb);
neigh->arp_queue_len_bytes += skb->truesize;
}
rc = 1;
}
out_unlock_bh:
if (immediate_probe)
neigh_probe(neigh); 发出邻居项请求 solict报文 (arp请求等)
else
write_unlock(&neigh->lock);
local_bh_enable();
return rc;
}
neigh_hh_init :缓存二层头,以eth为例:就是缓存二层mac
/**
* ether_setup - setup Ethernet network device
* @dev: network device
*
* Fill in the fields of the device structure with Ethernet-generic values.
*/
void ether_setup(struct net_device *dev)
{
dev->header_ops = ð_header_ops;
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); }
const struct header_ops eth_header_ops ____cacheline_aligned = {
.create = eth_header,
.parse = eth_header_parse,
.rebuild = eth_rebuild_header,
.cache = eth_header_cache,
.cache_update = eth_header_cache_update,
}; /**
* eth_header_cache - fill cache entry from neighbour
* @neigh: source neighbour
* @hh: destination cache entry
* @type: Ethernet type field
*
* Create an Ethernet header template from the neighbour.
*/
int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
{
struct ethhdr *eth;
const struct net_device *dev = neigh->dev; eth = (struct ethhdr *)
(((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == htons(ETH_P_802_3))
return -1; eth->h_proto = type;
memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
hh->hh_len = ETH_HLEN;
return 0;
} /* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
__be16 prot = dst->ops->protocol;
struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); /* Only one thread can come in here and initialize the
* hh_cache entry.
*/
if (!hh->hh_len)
dev->header_ops->cache(n, hh, prot); write_unlock_bh(&n->lock);
}
//根据代码可以看出 直接拷贝二层头
快速发送:
//ip_finish_output2 中会调用dst_neigh_output 输出报文
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
struct hh_cache *hh; if (unlikely(dst->pending_confirm)) {
n->confirmed = jiffies;
dst->pending_confirm = 0;
} hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);//快速发出
else
return n->output(n, skb);// 慢速发出neigh_resolve_output
}
/* 拷贝缓存的二层头部,输出 */
static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
{
unsigned int seq;
unsigned int hh_len; /* 拷贝二层头到skb */
do {
seq = read_seqbegin(&hh->hh_lock);
hh_len = hh->hh_len;
/* 二层头部<DATA_MOD,直接使用该长度拷贝 */
if (likely(hh_len <= HH_DATA_MOD)) {
/* this is inlined by gcc */
memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
}
/* >=DATA_MOD,对齐头部,拷贝 */
else {
unsigned int hh_alen = HH_DATA_ALIGN(hh_len); memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
}
} while (read_seqretry(&hh->hh_lock, seq)); skb_push(skb, hh_len); /* 发送 */
return dev_queue_xmit(skb);
}
neigh_hh_output-缓存输出,直接拷贝二层头部,然后输出;
neigh_connected_output-快速输出,用于连接状态的输出;需要重新构建二层头部,然后输出;
neigh_resolve_output-慢速输出,用于非连接状态的输出;需要对邻居项状态进行检查,然后重新构造二层头部,最后输出;
neigh_direct_output-直接输出,用于没有二层头部时的输出;
/* CONNECTED状态的发送函数,没有neigh_hh_output快,这个需要重建二层头 */
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
unsigned int seq;
int err; /* 拷贝二层头 */
do {
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq)); /* 发送数据包 */
if (err >= 0)
err = dev_queue_xmit(skb);
else {
err = -EINVAL;
kfree_skb(skb);
}
return err;
}
邻居子系统1.5 neigh output的更多相关文章
- 邻居子系统 之 状态定时器回调neigh_timer_handler
概述 在分配邻居子系统之后,会设置定时器来处理那些需要定时器处理的状态,定时器回调函数为neigh_timer_handler:函数会根据状态机变换规则对状态进行切换,切换状态后,如果需要更新输出函数 ...
- 邻居子系统 之 更新neigh_update
概述 neigh_update函数用来更新指定的邻居项,更新内容是硬件地址和状态,更新之后,会根据新状态设置其输出函数,CONNECTED状态则使用快速输出,否则使用慢速输出:如果是由原来的无效状态变 ...
- 邻居子系统 之 邻居表的初始化neigh_table_init
概述 邻居子系统支持多种实现,例如ARP,ND等,这些实现需要在其初始化的时候,调用neigh_table_init将邻居表项添加到全局邻居子系统数组中,并对实例中的字段(如hash,定时器等)进行相 ...
- 邻居子系统输出 之 neigh_output、neigh_hh_output
概述 ip层在构造好ip头,检查完分片之后,会调用邻居子系统的输出函数neigh_output进行输出,输出分为有二层头缓存和没有两种情况,有缓存时调用neigh_hh_output进行快速输出,没有 ...
- 邻居子系统 之 邻居项创建__neigh_create
概述 IP层输出数据包会根据路由的下一跳查询邻居项,如果不存在则会调用__neigh_create创建邻居项,然后调用邻居项的output函数进行输出: __neigh_create完成邻居项的创建, ...
- 《深入理解Linux网络技术内幕》阅读笔记 --- 邻居子系统
1.封包从L3至L2的传送过程如下所示: 本地主机的路由子系统选择L3目的地址(下一个跃点). 根据路由表,如果下一个跃点在同一个网络中,邻居层就把目的L3地址解析为跃点的L2地址.这个关联会被放入缓 ...
- 邻居子系统 之 邻居项查找neigh_lookup、___neigh_lookup_noref
概述 邻居项查找是通过neigh_lookup相关函数来进行的: ___neigh_lookup_noref,该函数根据输出设备和主键值(IPv4为下一跳ip地址)在邻居项hash表中查找,找到则返回 ...
- 邻居子系统 arp 状态图
- IP输出 之 ip_output、ip_finish_output、ip_finish_output2
概述 ip_output-设置输出设备和协议,然后经过POST_ROUTING钩子点,最后调用ip_finish_output: ip_finish_output-对skb进行分片判断,需要分片,则分 ...
随机推荐
- 2020已经过去五分之四了,你确定还不来了解一下JS的rAF?
不会吧,不会吧,现在都2020年了不会还真人有人不知道JS的rAF吧??? rAF 简介 rAF是requestAnimationFrame的简称: 我们先从字面意思上理解requestAnimati ...
- lambda函数小结
C++中的lambda函数 lambda函数是函数式编程中的概念,由C++11引入,成为现代C++中重要的特性. 所谓lambda函数就是匿名函数,语法结构: [capture list] (para ...
- docker设置http访问
1 编辑配置文件 vim /etc/docker/daemon.json { "registry-mirrors": ["https://a4fyjv0u.mirr ...
- composer 阿里云加速 转
阿里云 Composer 全量镜像 本镜像与 Packagist 官方实时同步,推荐使用最新的 Composer 版本. 最新版本: 1.10.8 下载地址: https://mirrors.aliy ...
- 监听MySQL的binlog日志工具分析:Canal
Canal是阿里巴巴旗下的一款开源项目,利用Java开发.主要用途是基于MySQL数据库增量日志解析,提供增量数据订阅和消费,目前主要支持MySQL. GitHub地址:https://github. ...
- matplotlib作图 归零编码、曼切斯特编码、非归零编码、差分曼切斯特编码
效果图 代码 import matplotlib.pyplot as plt config = { 'color': 'black', 'lw': 5, } def init(): plt.figur ...
- django—ORM相关
常用的QuerySet方法 1.all() 查询表中所有数据,返回一个QuerySet对象列表 2.filter() 查询满足条件的数据,返回一个QuerySet对象列表 3.get() 查询指定的数 ...
- 安卓日常开发和逆向中常用的shell命令与非shell命令
简述shell 命令与 非shell命令区别 shell命令不用先adb shell进入界面执行 非shell命令必须要 adb shell进入界面执行 基础非shell命令 1.安装app adb ...
- vue学习笔记(一)---- vue指令( v-bind 属性绑定 )
看栗子: <body> <div id="app"> <input type="button" value="按钮&qu ...
- viewpage启动页
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com ...