linux 内核协议栈收报流程(一)ixgbe网卡驱动
首先模块加载insmod ixgbe.ko
- module_init(ixgbe_init_module);
- module_init(ixgbe_init_module);
- {
- int ret;
- pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
- pr_info("%s\n", ixgbe_copyright);
- ixgbe_dbg_init();
- ret = pci_register_driver(&ixgbe_driver);
- if (ret) {
- ixgbe_dbg_exit();
- return ret;
- }
- #ifdef CONFIG_IXGBE_DCA
- dca_register_notify(&dca_notifier);
- #endif
- return 0;
- }
于是看pci设备的核心结构体
- static struct pci_driver ixgbe_driver = {
- .name = ixgbe_driver_name,
- .id_table = ixgbe_pci_tbl,
- .probe = ixgbe_probe,
- .remove = ixgbe_remove,
- #ifdef CONFIG_PM
- .suspend = ixgbe_suspend,
- .resume = ixgbe_resume,
- #endif
- .shutdown = ixgbe_shutdown,
- .sriov_configure = ixgbe_pci_sriov_configure,
- .err_handler = &ixgbe_err_handler
- };
当设备加载成功后,会执行ixgbe_probe函数
- static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
- {
- /*分配struct net_device *netdev 结构体*/
- netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
- if (!netdev) {
- err = -ENOMEM;
- goto err_alloc_etherdev;
- }
- SET_NETDEV_DEV(netdev, &pdev->dev);
- /*分配struct ixgbe_adapter *adapter结构体*/
- adapter = netdev_priv(netdev);
- /*分配dev结构体的ops函数指针集合*/
- netdev->netdev_ops = &ixgbe_netdev_ops;
- err = ixgbe_sw_init(adapter);
- err = ixgbe_init_interrupt_scheme(adapter);
- /*设备注册完毕*/
err = register_netdev(netdev);- }
重点看ixgbe_init_interrupt_scheme(adapter)函数,该函数里面会初始化adapter结构体以及napi相关的东西
- int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
- {
- err = ixgbe_alloc_q_vectors(adapter);
- }
- static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
- {
- if (q_vectors >= (rxr_remaining + txr_remaining)) {
- for (; rxr_remaining; v_idx++) {
- err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
- 0, 0, 1, rxr_idx);
- if (err)
- goto err_out;
- /* update counts and index */
- rxr_remaining--;
- rxr_idx++;
- }
- }
- }
- static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
- int v_count, int v_idx,
- int txr_count, int txr_idx,
- int rxr_count, int rxr_idx)
- {
- /* setup affinity mask and node */
- if (cpu != -1)
- cpumask_set_cpu(cpu, &q_vector->affinity_mask);
- q_vector->numa_node = node;
- #ifdef CONFIG_IXGBE_DCA
- /* initialize CPU for DCA */
- q_vector->cpu = -1;
- #endif
- /* initialize NAPI */
- netif_napi_add(adapter->netdev, &q_vector->napi,
- ixgbe_poll, 64);
- napi_hash_add(&q_vector->napi);
- }
到此为止,网卡设置初始化完毕
其中涉及到如下几个结构体
- ixgbe_adapter
- /* board specific private data structure */
- struct ixgbe_adapter {
- //发送的rings
- struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
- //接收的rings
- struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
- //这个vector里面包含了napi结构
- //应该是跟下面的entries一一对应起来做为是一个中断向量的东西吧
- struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];
- //这个里面估计是MSIX的多个中断对应的响应接口
- struct msix_entry *msix_entries;
- }
- struct ixgbe_q_vector {
- struct ixgbe_adapter *adapter;
- ifdef CONFIG_IXGBE_DCA
- int cpu; /* CPU for DCA */
- #endif
- u16 v_idx; /* index of q_vector within array, also used for
- * finding the bit in EICR and friends that
- * represents the vector for this ring */
- u16 itr; /* Interrupt throttle rate written to EITR */
- struct ixgbe_ring_container rx, tx;
- struct napi_struct napi;/*napi结构体*/
- cpumask_t affinity_mask;
- int numa_node;
- struct rcu_head rcu; /* to avoid race with update stats on free */
- char name[IFNAMSIZ + 9];
- /* for dynamic allocation of rings associated with this q_vector */
- struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
- };
- struct napi_struct {
- /* The poll_list must only be managed by the entity which
- * changes the state of the NAPI_STATE_SCHED bit. This means
- * whoever atomically sets that bit can add this napi_struct
- * to the per-cpu poll_list, and whoever clears that bit
- * can remove from the list right before clearing the bit.
- */
- struct list_head poll_list;
- unsigned long state;
- int weight;
- unsigned int gro_count;
- int (*poll)(struct napi_struct *, int);//poll的接口实现
- #ifdef CONFIG_NETPOLL
- spinlock_t poll_lock;
- int poll_owner;
- #endif
- struct net_device *dev;
- struct sk_buff *gro_list;
- struct sk_buff *skb;
- struct list_head dev_list;
- };
然后当我们ifconfig dev up 时,会执行dev_ops->open函数
- static int ixgbe_open(struct net_device *netdev)
- {
- /* allocate transmit descriptors */
- err = ixgbe_setup_all_tx_resources(adapter);
- if (err)
- goto err_setup_tx;
- /* allocate receive descriptors */
- err = ixgbe_setup_all_rx_resources(adapter);
- /*注册中断*/
- err = ixgbe_request_irq(adapter);
- }
- static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
- {
- struct net_device *netdev = adapter->netdev;
- int err;
- if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
- err = ixgbe_request_msix_irqs(adapter);
- else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)
- err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
- netdev->name, adapter);
- else
- err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
- netdev->name, adapter);
- if (err)
- e_err(probe, "request_irq failed, Error %d\n", err);
- return err;
- }
- static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
- {
- for (vector = 0; vector < adapter->num_q_vectors; vector++) {
- struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
- struct msix_entry *entry = &adapter->msix_entries[vector];
- err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,
- q_vector->name, q_vector);
- }
从上面的代码流程可以看出,最终注册的中断处理函数为ixgbe_msix_clean_rings
- static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
- {
- struct ixgbe_q_vector *q_vector = data;
- /* EIAM disabled interrupts (on this vector) for us */
- if (q_vector->rx.ring || q_vector->tx.ring)
- napi_schedule(&q_vector->napi);
- return IRQ_HANDLED;
- }
从上述代码中可以看,该中断处理函数仅仅作为napi的调度者
当数据包到来时,首先唤醒硬中断执行ixgbe_msix_clean_rings函数,最终napi_schedule会调用__raise_softirq_irqoff去触发一个软中断NET_RX_SOFTIRQ,然后又对应的软中断接口去实现往上的协议栈逻辑
然后看看napi 调度函数都做了些什么工作
- static inline void napi_schedule(struct napi_struct *n)
- {
- if (napi_schedule_prep(n))
- __napi_schedule(n);
- }
- void __napi_schedule(struct napi_struct *n)
- {
- unsigned long flags;
- local_irq_save(flags);
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
- local_irq_restore(flags);
- }
- 最终可以看出napi调度函数把napi结构体挂到了per cpu的私有数据结构softnet_data上
- struct softnet_data {
- struct Qdisc *output_queue;
- struct Qdisc **output_queue_tailp;
- struct list_head poll_list;
- struct sk_buff *completion_queue;
- struct sk_buff_head process_queue;
- /* stats */
- unsigned int processed;
- unsigned int time_squeeze;
- unsigned int cpu_collision;
- unsigned int received_rps;
- #ifdef CONFIG_RPS
- struct softnet_data *rps_ipi_list;
- /* Elements below can be accessed between CPUs for RPS */
- struct call_single_data csd ____cacheline_aligned_in_smp;
- struct softnet_data *rps_ipi_next;
- unsigned int cpu;
- unsigned int input_queue_head;
- unsigned int input_queue_tail;
- #endif
- unsigned int dropped;
- struct sk_buff_head input_pkt_queue;
- struct napi_struct backlog;/*napi结构体里面的双向链表中*/
- };
NET_RX_SOFTIRQ是收到数据包的软中断信号对应的接口是net_rx_action
NET_TX_SOFTIRQ是发送完数据包后的软中断信号对应的接口是net_tx_action
- static void net_rx_action(struct softirq_action *h)
- {
- /* 获取每个cpu的数据*/
- struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- while (!list_empty(&sd->poll_list)) {
- struct napi_struct *n;
- n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n);
- }
- }
于是就执行到初始化napi结构体中的poll函数,在这里为ixgbe_poll
- int ixgbe_poll(struct napi_struct *napi, int budget)
- {
- struct ixgbe_q_vector *q_vector =
- container_of(napi, struct ixgbe_q_vector, napi);
- struct ixgbe_adapter *adapter = q_vector->adapter;
- struct ixgbe_ring *ring;
- int per_ring_budget;
- bool clean_complete = true;
- #ifdef CONFIG_IXGBE_DCA
- if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
- ixgbe_update_dca(q_vector);
- #endif
- ixgbe_for_each_ring(ring, q_vector->tx)
- clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring);
- if (!ixgbe_qv_lock_napi(q_vector))
- return budget;
- /* attempt to distribute budget to each queue fairly, but don't allow
- * the budget to go below 1 because we'll exit polling */
- if (q_vector->rx.count > 1)
- per_ring_budget = max(budget/q_vector->rx.count, 1);
- else
- per_ring_budget = budget;
- ixgbe_for_each_ring(ring, q_vector->rx)
- clean_complete &= (ixgbe_clean_rx_irq(q_vector, ring,
- per_ring_budget) < per_ring_budget);
- ixgbe_qv_unlock_napi(q_vector);
- /* If all work not completed, return budget and keep polling */
- if (!clean_complete)
- return budget;
- /* all work done, exit the polling mode */
- napi_complete(napi);
- if (adapter->rx_itr_setting & 1)
- ixgbe_set_itr(q_vector);
- if (!test_bit(__IXGBE_DOWN, &adapter->state))
- ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
- return 0;
- }
- static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
- struct ixgbe_ring *rx_ring,
- const int budget)
- {
- ixgbe_rx_skb(q_vector, skb);
- }
- static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
- struct sk_buff *skb)
- {
- if (ixgbe_qv_busy_polling(q_vector))
- netif_receive_skb(skb);
- else
- napi_gro_receive(&q_vector->napi, skb);
- }
- int netif_receive_skb(struct sk_buff *skb)
- {
- int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
- if (skb_defer_rx_timestamp(skb))
- return NET_RX_SUCCESS;
- rcu_read_lock();
- #ifdef CONFIG_RPS
- if (static_key_false(&rps_needed)) {
- struct rps_dev_flow voidflow, *rflow = &voidflow;
- int cpu = get_rps_cpu(skb->dev, skb, &rflow);
- if (cpu >= 0) {
- ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
- rcu_read_unlock();
- return ret;
- }
- }
- #endif
- /*最终协议栈开始收报*/
- ret = __netif_receive_skb(skb);
- rcu_read_unlock();
- return ret;
- }
linux 内核协议栈收报流程(一)ixgbe网卡驱动的更多相关文章
- linux 内核协议栈收报流程(二)Netfilter全貌
ip层分片整理 int ip_local_deliver(struct sk_buff *skb){ /* * Reassemble IP fragments. */ if (ip_is_fragme ...
- Linux内核网络报文简单流程
转:http://blog.csdn.net/adamska0104/article/details/45397177 Linux内核网络报文简单流程2014-08-12 10:05:09 分类: L ...
- linux内核数据包转发流程(三)网卡帧接收分析
[版权声明:转载请保留出处:blog.csdn.net/gentleliu.邮箱:shallnew*163.com] 每一个cpu都有队列来处理接收到的帧,都有其数据结构来处理入口和出口流量,因此,不 ...
- 写在学习linux内核协议栈之前
一直很喜欢内核,但是新手,非常的痛苦啊.现在看一本linux内核协议栈源码解析一书,将自己学习的经历以及 理解记录下来,以备将来回头查漏补缺,同时校正自己的理解错误,自勉
- TCP/IP协议栈源码图解分析系列10:linux内核协议栈中对于socket相关API的实现
题记:本系列文章的目的是抛开书本从Linux内核源代码的角度详细分析TCP/IP协议栈内核相关技术 轻松搞定TCP/IP协议栈,原创文章欢迎交流, byhankswang@gmail.com linu ...
- Linux 内核协议栈之TCP连接关闭
Close行为: 当应用程序在调用close()函数关闭TCP连接时,Linux内核的默认行为是将套接口发送队列里的原有数据(比如之前残留的数据)以及新加入 的数据(比如函数close()产生的FIN ...
- Linux 内核协议栈 学习资料
终极资料 1.<Understanding Linux Network Internals> 2.<TCP/IP Architecture, Design and Implement ...
- Linux内核编译、安装流程
原文链接:https://blog.csdn.net/qq_28437139/article/details/83692907 此处只讲linux内核编译步骤至于安装虚拟机,安装ubuntu操作系统请 ...
- linux内核数据包转发流程(一):网络设备驱动
[版权声明:转载请保留出处:blog.csdn.net/gentleliu.邮箱:shallnew*163.com] 网卡驱动为每一个新的接口在一个全局的网络设备列表里插入一个数据结构.每一个接口由一 ...
随机推荐
- Var x;---定义变量
变量定义有多种格式 <!DOCTYPE html> <html> <head> <meta charset="utf-8"> < ...
- 【C++】最大子列和
此题来自<数据结构与算法>,书中一共介绍了四种方法,这里贴出两种. 1.分治递归,对本题来说,虽然有更好的算法,但是用此题理解分治算法感觉挺有用 #include <iostream ...
- POJ 1845 Sumdiv#质因数分解+二分
题目链接:http://poj.org/problem?id=1845 关于质因数分解,模板见:http://www.cnblogs.com/atmacmer/p/5285810.html 二分法思想 ...
- POJ 3710 Christmas Game#经典图SG博弈
http://poj.org/problem?id=3710 (说实话对于Tarjan算法在搞图论的时候就没搞太懂,以后得找时间深入了解) (以下有关无向图删边游戏的资料来自论文贾志豪<组合游戏 ...
- Chapter 16_1 Class
一个类就是一个创建对象的模具.对于一些基于原型的语言,对象是没有“类型”的,而是每个对象都有一个原型(prototype). 原型也是一种常规的对象.当其他对象(类的实例)遇到一个未知操作时,原型会先 ...
- Events and Responder Chain
事件类型(Event Type) iOS 有三种事件类型: 触控事件(UIEventTypeTouches):单点.多点触控以及各种手势操作: 传感器事件(UIEventTypeMotion):重力. ...
- javascript焦点图自动播放
这次是完整版,网页点开就能自动播放 <!DOCTYPE html> <html> <head> <meta charset="UTF-8" ...
- Channel Allocation(DFS)
Channel Allocation Time Limit : 2000/1000ms (Java/Other) Memory Limit : 20000/10000K (Java/Other) ...
- Quartz 2D中的基本图形绘制
在iOS中绘图一般分为以下几个步骤: 1.获取绘图上下文 2.创建并设置路径 3.将路径添加到上下文 4.设置上下文状态 5.绘制路径 6.释放路径 在UIKit中默认已经为我们准备好了一个图形上下文 ...
- JavaScript 事件 事件流 事件对象 事件处理程序 回调函数 error和try...catch和throw
参考资料: 慕课网 DOM事件探秘 js事件对象 处理 事件驱动: JS是采用事件驱动的机制来响应用户操作的,也就是说当用户对某个html元素进行操作的时候,会产生一个时间,该时间会驱动某些函数 ...