linux 内核协议栈收报流程(一)ixgbe网卡驱动
首先模块加载insmod ixgbe.ko
module_init(ixgbe_init_module); module_init(ixgbe_init_module);
{
int ret;
pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
pr_info("%s\n", ixgbe_copyright); ixgbe_dbg_init();
ret = pci_register_driver(&ixgbe_driver);
if (ret) {
ixgbe_dbg_exit();
return ret;
} #ifdef CONFIG_IXGBE_DCA
dca_register_notify(&dca_notifier);
#endif return 0;
}
于是看pci设备的核心结构体
static struct pci_driver ixgbe_driver = {
.name = ixgbe_driver_name,
.id_table = ixgbe_pci_tbl,
.probe = ixgbe_probe,
.remove = ixgbe_remove,
#ifdef CONFIG_PM
.suspend = ixgbe_suspend,
.resume = ixgbe_resume,
#endif
.shutdown = ixgbe_shutdown,
.sriov_configure = ixgbe_pci_sriov_configure,
.err_handler = &ixgbe_err_handler
};
当设备加载成功后,会执行ixgbe_probe函数
static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{ /*分配struct net_device *netdev 结构体*/
netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); if (!netdev) {
err = -ENOMEM;
goto err_alloc_etherdev;
} SET_NETDEV_DEV(netdev, &pdev->dev); /*分配struct ixgbe_adapter *adapter结构体*/
adapter = netdev_priv(netdev); /*分配dev结构体的ops函数指针集合*/
netdev->netdev_ops = &ixgbe_netdev_ops; err = ixgbe_sw_init(adapter); err = ixgbe_init_interrupt_scheme(adapter);
/*设备注册完毕*/
err = register_netdev(netdev); }
重点看ixgbe_init_interrupt_scheme(adapter)函数,该函数里面会初始化adapter结构体以及napi相关的东西
int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{ err = ixgbe_alloc_q_vectors(adapter); }
static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{ if (q_vectors >= (rxr_remaining + txr_remaining)) {
for (; rxr_remaining; v_idx++) {
err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
0, 0, 1, rxr_idx); if (err)
goto err_out; /* update counts and index */
rxr_remaining--;
rxr_idx++;
}
}
}
static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
int v_count, int v_idx,
int txr_count, int txr_idx,
int rxr_count, int rxr_idx)
{
/* setup affinity mask and node */
if (cpu != -1)
cpumask_set_cpu(cpu, &q_vector->affinity_mask);
q_vector->numa_node = node; #ifdef CONFIG_IXGBE_DCA
/* initialize CPU for DCA */
q_vector->cpu = -1; #endif
/* initialize NAPI */
netif_napi_add(adapter->netdev, &q_vector->napi,
ixgbe_poll, 64);
napi_hash_add(&q_vector->napi);
}
到此为止,网卡设置初始化完毕
其中涉及到如下几个结构体
ixgbe_adapter /* board specific private data structure */
struct ixgbe_adapter { //发送的rings struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; //接收的rings struct ixgbe_ring *rx_ring[MAX_RX_QUEUES]; //这个vector里面包含了napi结构 //应该是跟下面的entries一一对应起来做为是一个中断向量的东西吧 struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS]; //这个里面估计是MSIX的多个中断对应的响应接口 struct msix_entry *msix_entries; } struct ixgbe_q_vector {
struct ixgbe_adapter *adapter;
ifdef CONFIG_IXGBE_DCA
int cpu; /* CPU for DCA */
#endif
u16 v_idx; /* index of q_vector within array, also used for
* finding the bit in EICR and friends that
* represents the vector for this ring */
u16 itr; /* Interrupt throttle rate written to EITR */
struct ixgbe_ring_container rx, tx; struct napi_struct napi;/*napi结构体*/
cpumask_t affinity_mask;
int numa_node;
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[IFNAMSIZ + 9]; /* for dynamic allocation of rings associated with this q_vector */
struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
}; struct napi_struct {
/* The poll_list must only be managed by the entity which
* changes the state of the NAPI_STATE_SCHED bit. This means
* whoever atomically sets that bit can add this napi_struct
* to the per-cpu poll_list, and whoever clears that bit
* can remove from the list right before clearing the bit.
*/
struct list_head poll_list; unsigned long state;
int weight;
unsigned int gro_count;
int (*poll)(struct napi_struct *, int);//poll的接口实现
#ifdef CONFIG_NETPOLL
spinlock_t poll_lock;
int poll_owner;
#endif
struct net_device *dev;
struct sk_buff *gro_list;
struct sk_buff *skb;
struct list_head dev_list;
};
然后当我们ifconfig dev up 时,会执行dev_ops->open函数
static int ixgbe_open(struct net_device *netdev)
{
/* allocate transmit descriptors */
err = ixgbe_setup_all_tx_resources(adapter);
if (err)
goto err_setup_tx; /* allocate receive descriptors */
err = ixgbe_setup_all_rx_resources(adapter);
/*注册中断*/
err = ixgbe_request_irq(adapter);
} static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
int err; if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
err = ixgbe_request_msix_irqs(adapter);
else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)
err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
netdev->name, adapter);
else
err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
netdev->name, adapter); if (err)
e_err(probe, "request_irq failed, Error %d\n", err); return err;
} static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
{
for (vector = 0; vector < adapter->num_q_vectors; vector++) {
struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
struct msix_entry *entry = &adapter->msix_entries[vector]; err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,
q_vector->name, q_vector); }
从上面的代码流程可以看出,最终注册的中断处理函数为ixgbe_msix_clean_rings
static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
{
struct ixgbe_q_vector *q_vector = data; /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring)
napi_schedule(&q_vector->napi); return IRQ_HANDLED;
}
从上述代码中可以看,该中断处理函数仅仅作为napi的调度者
当数据包到来时,首先唤醒硬中断执行ixgbe_msix_clean_rings函数,最终napi_schedule会调用__raise_softirq_irqoff去触发一个软中断NET_RX_SOFTIRQ,然后又对应的软中断接口去实现往上的协议栈逻辑
然后看看napi 调度函数都做了些什么工作
static inline void napi_schedule(struct napi_struct *n)
{
if (napi_schedule_prep(n))
__napi_schedule(n);
}
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags; local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
}
最终可以看出napi调度函数把napi结构体挂到了per cpu的私有数据结构softnet_data上
struct softnet_data {
struct Qdisc *output_queue;
struct Qdisc **output_queue_tailp;
struct list_head poll_list;
struct sk_buff *completion_queue;
struct sk_buff_head process_queue; /* stats */
unsigned int processed;
unsigned int time_squeeze;
unsigned int cpu_collision;
unsigned int received_rps; #ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list; /* Elements below can be accessed between CPUs for RPS */
struct call_single_data csd ____cacheline_aligned_in_smp;
struct softnet_data *rps_ipi_next;
unsigned int cpu;
unsigned int input_queue_head;
unsigned int input_queue_tail;
#endif
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;/*napi结构体里面的双向链表中*/
};
NET_RX_SOFTIRQ是收到数据包的软中断信号对应的接口是net_rx_action
NET_TX_SOFTIRQ是发送完数据包后的软中断信号对应的接口是net_tx_action
static void net_rx_action(struct softirq_action *h)
{
/* 获取每个cpu的数据*/
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
trace_napi_poll(n);
} }
于是就执行到初始化napi结构体中的poll函数,在这里为ixgbe_poll
int ixgbe_poll(struct napi_struct *napi, int budget)
{
struct ixgbe_q_vector *q_vector =
container_of(napi, struct ixgbe_q_vector, napi);
struct ixgbe_adapter *adapter = q_vector->adapter;
struct ixgbe_ring *ring;
int per_ring_budget;
bool clean_complete = true; #ifdef CONFIG_IXGBE_DCA
if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
ixgbe_update_dca(q_vector);
#endif ixgbe_for_each_ring(ring, q_vector->tx)
clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); if (!ixgbe_qv_lock_napi(q_vector))
return budget; /* attempt to distribute budget to each queue fairly, but don't allow
* the budget to go below 1 because we'll exit polling */
if (q_vector->rx.count > 1)
per_ring_budget = max(budget/q_vector->rx.count, 1);
else
per_ring_budget = budget; ixgbe_for_each_ring(ring, q_vector->rx)
clean_complete &= (ixgbe_clean_rx_irq(q_vector, ring,
per_ring_budget) < per_ring_budget); ixgbe_qv_unlock_napi(q_vector);
/* If all work not completed, return budget and keep polling */
if (!clean_complete)
return budget; /* all work done, exit the polling mode */
napi_complete(napi);
if (adapter->rx_itr_setting & 1)
ixgbe_set_itr(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx)); return 0;
} static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *rx_ring,
const int budget)
{
ixgbe_rx_skb(q_vector, skb);
} static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
struct sk_buff *skb)
{
if (ixgbe_qv_busy_polling(q_vector))
netif_receive_skb(skb);
else
napi_gro_receive(&q_vector->napi, skb);
} int netif_receive_skb(struct sk_buff *skb)
{
int ret; net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS; rcu_read_lock(); #ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
}
#endif
/*最终协议栈开始收报*/
ret = __netif_receive_skb(skb);
rcu_read_unlock();
return ret;
}
linux 内核协议栈收报流程(一)ixgbe网卡驱动的更多相关文章
- linux 内核协议栈收报流程(二)Netfilter全貌
ip层分片整理 int ip_local_deliver(struct sk_buff *skb){ /* * Reassemble IP fragments. */ if (ip_is_fragme ...
- Linux内核网络报文简单流程
转:http://blog.csdn.net/adamska0104/article/details/45397177 Linux内核网络报文简单流程2014-08-12 10:05:09 分类: L ...
- linux内核数据包转发流程(三)网卡帧接收分析
[版权声明:转载请保留出处:blog.csdn.net/gentleliu.邮箱:shallnew*163.com] 每一个cpu都有队列来处理接收到的帧,都有其数据结构来处理入口和出口流量,因此,不 ...
- 写在学习linux内核协议栈之前
一直很喜欢内核,但是新手,非常的痛苦啊.现在看一本linux内核协议栈源码解析一书,将自己学习的经历以及 理解记录下来,以备将来回头查漏补缺,同时校正自己的理解错误,自勉
- TCP/IP协议栈源码图解分析系列10:linux内核协议栈中对于socket相关API的实现
题记:本系列文章的目的是抛开书本从Linux内核源代码的角度详细分析TCP/IP协议栈内核相关技术 轻松搞定TCP/IP协议栈,原创文章欢迎交流, byhankswang@gmail.com linu ...
- Linux 内核协议栈之TCP连接关闭
Close行为: 当应用程序在调用close()函数关闭TCP连接时,Linux内核的默认行为是将套接口发送队列里的原有数据(比如之前残留的数据)以及新加入 的数据(比如函数close()产生的FIN ...
- Linux 内核协议栈 学习资料
终极资料 1.<Understanding Linux Network Internals> 2.<TCP/IP Architecture, Design and Implement ...
- Linux内核编译、安装流程
原文链接:https://blog.csdn.net/qq_28437139/article/details/83692907 此处只讲linux内核编译步骤至于安装虚拟机,安装ubuntu操作系统请 ...
- linux内核数据包转发流程(一):网络设备驱动
[版权声明:转载请保留出处:blog.csdn.net/gentleliu.邮箱:shallnew*163.com] 网卡驱动为每一个新的接口在一个全局的网络设备列表里插入一个数据结构.每一个接口由一 ...
随机推荐
- NodeJS学习指南
七天学会NodeJS NodeJS基础 什么是NodeJS 有啥用处 如何安装 安装程序 编译安装 如何运行 权限问题 模块 require exports module 模块初始化 主模块 完整示例 ...
- 《JS权威指南学习总结--第六章 对象》
内容要点: 一.对象定义 对象是JS的基本数据类型.对象是一种复合值:它将很多值(原始值或者其他对象)聚合在一起,可通过名字访问这些值. 对象也可看做是属性的无序集合,每个属性都是一个名/值对. 属性 ...
- PAT 乙级 1004. 成绩排名
读入n名学生的姓名.学号.成绩,分别输出成绩最高和成绩最低学生的姓名和学号. 输入格式:每个测试输入包含1个测试用例,格式为 第1行:正整数n 第2行:第1个学生的姓名 学号 成绩 第3行:第2个学生 ...
- Linux随笔(安装ftp,安装jdk,安装 tomcat,安装redis,安装MySQL)
su: authentication failure 解决办法:sudo passwd root 更改密码即可 确认虚拟机用到的联网方式是桥接模式,不然Windows是ping不通Linux的,确保 ...
- HDU 1054 Strategic Game(无向二分图的最大匹配)
( ̄▽ ̄)" //凡无向图,求匹配时都要除以2 #include<iostream> #include<cstdio> #include<algorithm&g ...
- hdu_5314_Happy King(树的点分治)
题目链接:hdu_5314_Happy King 题意: 给出一颗n个结点的树,点上有权值: 求点对(x,y)满足x!=y且x到y的路径上最大值与最小值的差<=D: 题解: 还是树的点分治,在统 ...
- ActiveMQ in Action(5) - Clustering
关键字: activemq 2.5 Clustering ActiveMQ从多种不同的方面提供了集群的支持.2.5.1 Queue consumer clusters ActiveMQ支持 ...
- MVC3+EF4.1学习系列(十)----MVC+EF处理树形结构
通过前几篇文章 我们处理了 一对一, 一对多,多对多关系 很好的发挥了ORM框架的做用 但是 少说了一种 树形结构的处理, 而这种树形关系 我们也经常遇到,常见的N级类别的处理, 以及经常有数据与类别 ...
- LeetCode OJ 120. Triangle
Given a triangle, find the minimum path sum from top to bottom. Each step you may move to adjacent n ...
- Openjudge-计算概论(A)-简单算术表达式求值
描述: 两位正整数的简单算术运算(只考虑整数运算),算术运算为: +,加法运算:-,减法运算:*,乘法运算:/,整除运算:%,取余运算. 算术表达式的格式为(运算符前后可能有空格):运算数 运算符 运 ...